github.com/hechain20/hechain@v0.0.0-20220316014945-b544036ba106/orderer/consensus/etcdraft/chain_test.go (about)

     1  /*
     2  Copyright hechain. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft_test
     8  
     9  import (
    10  	"encoding/pem"
    11  	"fmt"
    12  	"io/ioutil"
    13  	"os"
    14  	"os/user"
    15  	"path"
    16  	"sync"
    17  	"time"
    18  
    19  	"code.cloudfoundry.org/clock/fakeclock"
    20  	"github.com/golang/protobuf/proto"
    21  	"github.com/hechain20/hechain/bccsp"
    22  	"github.com/hechain20/hechain/bccsp/factory"
    23  	"github.com/hechain20/hechain/bccsp/sw"
    24  	"github.com/hechain20/hechain/common/channelconfig"
    25  	"github.com/hechain20/hechain/common/crypto/tlsgen"
    26  	"github.com/hechain20/hechain/common/flogging"
    27  	"github.com/hechain20/hechain/orderer/common/cluster"
    28  	orderer_types "github.com/hechain20/hechain/orderer/common/types"
    29  	"github.com/hechain20/hechain/orderer/consensus/etcdraft"
    30  	"github.com/hechain20/hechain/orderer/consensus/etcdraft/mocks"
    31  	consensusmocks "github.com/hechain20/hechain/orderer/consensus/mocks"
    32  	mockblockcutter "github.com/hechain20/hechain/orderer/mocks/common/blockcutter"
    33  	"github.com/hechain20/hechain/protoutil"
    34  	"github.com/hyperledger/fabric-protos-go/common"
    35  	"github.com/hyperledger/fabric-protos-go/orderer"
    36  	raftprotos "github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    37  	. "github.com/onsi/ginkgo"
    38  	. "github.com/onsi/gomega"
    39  	"github.com/onsi/gomega/types"
    40  	"github.com/pkg/errors"
    41  	"go.etcd.io/etcd/raft"
    42  	"go.etcd.io/etcd/raft/raftpb"
    43  	"go.uber.org/zap"
    44  )
    45  
    46  const (
    47  	interval            = 100 * time.Millisecond
    48  	LongEventualTimeout = 10 * time.Second
    49  
    50  	// 10 is the default setting of ELECTION_TICK.
    51  	// We used to have a small number here (2) to reduce the time for test - we don't
    52  	// need to tick node 10 times to trigger election - however, we are using another
    53  	// mechanism to trigger it now which does not depend on time: send an artificial
    54  	// MsgTimeoutNow to node.
    55  	ELECTION_TICK  = 10
    56  	HEARTBEAT_TICK = 1
    57  )
    58  
    59  //go:generate counterfeiter -o mocks/halt_callbacker.go --fake-name HaltCallbacker . haltCallbacker
    60  type haltCallbacker interface {
    61  	HaltCallback()
    62  }
    63  
    64  func init() {
    65  	factory.InitFactories(nil)
    66  }
    67  
    68  func mockOrderer(metadata []byte) *mocks.OrdererConfig {
    69  	return mockOrdererWithBatchTimeout(time.Second, metadata)
    70  }
    71  
    72  func mockOrdererWithBatchTimeout(batchTimeout time.Duration, metadata []byte) *mocks.OrdererConfig {
    73  	mockOrderer := &mocks.OrdererConfig{}
    74  	mockOrderer.BatchTimeoutReturns(batchTimeout)
    75  	mockOrderer.ConsensusMetadataReturns(metadata)
    76  	return mockOrderer
    77  }
    78  
    79  func mockOrdererWithTLSRootCert(batchTimeout time.Duration, metadata []byte, tlsCA tlsgen.CA) *mocks.OrdererConfig {
    80  	mockOrderer := mockOrdererWithBatchTimeout(batchTimeout, metadata)
    81  	mockOrg := &mocks.OrdererOrg{}
    82  	mockMSP := &mocks.MSP{}
    83  	mockMSP.GetTLSRootCertsReturns([][]byte{tlsCA.CertBytes()})
    84  	mockOrg.MSPReturns(mockMSP)
    85  	mockOrderer.OrganizationsReturns(map[string]channelconfig.OrdererOrg{
    86  		"fake-org": mockOrg,
    87  	})
    88  	return mockOrderer
    89  }
    90  
    91  // for some test cases we chmod file/dir to test failures caused by exotic permissions.
    92  // however this does not work if tests are running as root, i.e. in a container.
    93  func skipIfRoot() {
    94  	u, err := user.Current()
    95  	Expect(err).NotTo(HaveOccurred())
    96  	if u.Uid == "0" {
    97  		Skip("you are running test as root, there's no way to make files unreadable")
    98  	}
    99  }
   100  
   101  var _ = Describe("Chain", func() {
   102  	var (
   103  		env       *common.Envelope
   104  		channelID string
   105  		tlsCA     tlsgen.CA
   106  		logger    *flogging.FabricLogger
   107  	)
   108  
   109  	BeforeEach(func() {
   110  		tlsCA, _ = tlsgen.NewCA()
   111  		channelID = "test-channel"
   112  		logger = flogging.NewFabricLogger(zap.NewExample())
   113  		env = &common.Envelope{
   114  			Payload: marshalOrPanic(&common.Payload{
   115  				Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
   116  				Data:   []byte("TEST_MESSAGE"),
   117  			}),
   118  		}
   119  	})
   120  
   121  	Describe("Single Raft node", func() {
   122  		var (
   123  			configurator       *mocks.FakeConfigurator
   124  			consenterMetadata  *raftprotos.ConfigMetadata
   125  			consenters         map[uint64]*raftprotos.Consenter
   126  			clock              *fakeclock.FakeClock
   127  			opts               etcdraft.Options
   128  			support            *consensusmocks.FakeConsenterSupport
   129  			cutter             *mockblockcutter.Receiver
   130  			storage            *raft.MemoryStorage
   131  			observeC           chan raft.SoftState
   132  			chain              *etcdraft.Chain
   133  			dataDir            string
   134  			walDir             string
   135  			snapDir            string
   136  			err                error
   137  			fakeFields         *fakeMetricsFields
   138  			cryptoProvider     bccsp.BCCSP
   139  			fakeHaltCallbacker *mocks.HaltCallbacker
   140  		)
   141  
   142  		BeforeEach(func() {
   143  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
   144  			Expect(err).NotTo(HaveOccurred())
   145  
   146  			configurator = &mocks.FakeConfigurator{}
   147  			clock = fakeclock.NewFakeClock(time.Now())
   148  			storage = raft.NewMemoryStorage()
   149  
   150  			dataDir, err = ioutil.TempDir("", "wal-")
   151  			Expect(err).NotTo(HaveOccurred())
   152  			walDir = path.Join(dataDir, "wal")
   153  			snapDir = path.Join(dataDir, "snapshot")
   154  
   155  			observeC = make(chan raft.SoftState, 1)
   156  
   157  			support = &consensusmocks.FakeConsenterSupport{}
   158  			support.ChannelIDReturns(channelID)
   159  			consenterMetadata = createMetadata(1, tlsCA)
   160  			support.SharedConfigReturns(mockOrdererWithTLSRootCert(time.Hour, marshalOrPanic(consenterMetadata), tlsCA))
   161  
   162  			cutter = mockblockcutter.NewReceiver()
   163  			support.BlockCutterReturns(cutter)
   164  
   165  			// for block creator initialization
   166  			support.HeightReturns(1)
   167  			support.BlockReturns(getSeedBlock())
   168  
   169  			meta := &raftprotos.BlockMetadata{
   170  				ConsenterIds:    make([]uint64, len(consenterMetadata.Consenters)),
   171  				NextConsenterId: 1,
   172  			}
   173  
   174  			for i := range meta.ConsenterIds {
   175  				meta.ConsenterIds[i] = meta.NextConsenterId
   176  				meta.NextConsenterId++
   177  			}
   178  
   179  			consenters = map[uint64]*raftprotos.Consenter{}
   180  			for i, c := range consenterMetadata.Consenters {
   181  				consenters[meta.ConsenterIds[i]] = c
   182  			}
   183  
   184  			fakeFields = newFakeMetricsFields()
   185  
   186  			opts = etcdraft.Options{
   187  				RPCTimeout:        time.Second * 5,
   188  				RaftID:            1,
   189  				Clock:             clock,
   190  				TickInterval:      interval,
   191  				ElectionTick:      ELECTION_TICK,
   192  				HeartbeatTick:     HEARTBEAT_TICK,
   193  				MaxSizePerMsg:     1024 * 1024,
   194  				MaxInflightBlocks: 256,
   195  				BlockMetadata:     meta,
   196  				Consenters:        consenters,
   197  				Logger:            logger,
   198  				MemoryStorage:     storage,
   199  				WALDir:            walDir,
   200  				SnapDir:           snapDir,
   201  				Metrics:           newFakeMetrics(fakeFields),
   202  			}
   203  
   204  			fakeHaltCallbacker = &mocks.HaltCallbacker{}
   205  		})
   206  
   207  		campaign := func(c *etcdraft.Chain, observeC <-chan raft.SoftState) {
   208  			Eventually(func() <-chan raft.SoftState {
   209  				c.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: 1})}, 0)
   210  				return observeC
   211  			}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
   212  		}
   213  
   214  		JustBeforeEach(func() {
   215  			chain, err = etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, fakeHaltCallbacker.HaltCallback, observeC)
   216  			Expect(err).NotTo(HaveOccurred())
   217  
   218  			chain.Start()
   219  			cRel, status := chain.StatusReport()
   220  			Expect(cRel).To(Equal(orderer_types.ConsensusRelationConsenter))
   221  			Expect(status).To(Equal(orderer_types.StatusActive))
   222  
   223  			// When the Raft node bootstraps, it produces a ConfChange
   224  			// to add itself, which needs to be consumed with Ready().
   225  			// If there are pending configuration changes in raft,
   226  			// it refuses to campaign, no matter how many ticks elapse.
   227  			// This is not a problem in the production code because raft.Ready
   228  			// will be consumed eventually, as the wall clock advances.
   229  			//
   230  			// However, this is problematic when using the fake clock and
   231  			// artificial ticks. Instead of ticking raft indefinitely until
   232  			// raft.Ready is consumed, this check is added to indirectly guarantee
   233  			// that the first ConfChange is actually consumed and we can safely
   234  			// proceed to tick the Raft FSM.
   235  			Eventually(func() error {
   236  				_, err := storage.Entries(1, 1, 1)
   237  				return err
   238  			}, LongEventualTimeout).ShouldNot(HaveOccurred())
   239  		})
   240  
   241  		AfterEach(func() {
   242  			chain.Halt()
   243  			Eventually(chain.Errored, LongEventualTimeout).Should(BeClosed())
   244  			// Make sure no timer leak
   245  			Eventually(clock.WatcherCount, LongEventualTimeout).Should(BeZero())
   246  			os.RemoveAll(dataDir)
   247  		})
   248  
   249  		Context("when a node starts up", func() {
   250  			It("properly configures the communication layer", func() {
   251  				expectedNodeConfig := nodeConfigFromMetadata(consenterMetadata)
   252  				Eventually(configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(1))
   253  				_, arg2 := configurator.ConfigureArgsForCall(0)
   254  				Expect(arg2).To(Equal(expectedNodeConfig))
   255  			})
   256  
   257  			It("correctly sets the metrics labels and publishes requisite metrics", func() {
   258  				type withImplementers interface {
   259  					WithCallCount() int
   260  					WithArgsForCall(int) []string
   261  				}
   262  				metricsList := []withImplementers{
   263  					fakeFields.fakeClusterSize,
   264  					fakeFields.fakeIsLeader,
   265  					fakeFields.fakeActiveNodes,
   266  					fakeFields.fakeCommittedBlockNumber,
   267  					fakeFields.fakeSnapshotBlockNumber,
   268  					fakeFields.fakeLeaderChanges,
   269  					fakeFields.fakeProposalFailures,
   270  					fakeFields.fakeDataPersistDuration,
   271  					fakeFields.fakeNormalProposalsReceived,
   272  					fakeFields.fakeConfigProposalsReceived,
   273  				}
   274  				for _, m := range metricsList {
   275  					Expect(m.WithCallCount()).To(Equal(1))
   276  					Expect(func() string {
   277  						return m.WithArgsForCall(0)[1]
   278  					}()).To(Equal(channelID))
   279  				}
   280  
   281  				Expect(fakeFields.fakeClusterSize.SetCallCount()).To(Equal(1))
   282  				Expect(fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(1)))
   283  				Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(1))
   284  				Expect(fakeFields.fakeIsLeader.SetArgsForCall(0)).To(Equal(float64(0)))
   285  				Expect(fakeFields.fakeActiveNodes.SetCallCount()).To(Equal(1))
   286  				Expect(fakeFields.fakeActiveNodes.SetArgsForCall(0)).To(Equal(float64(0)))
   287  			})
   288  		})
   289  
   290  		Context("when no Raft leader is elected", func() {
   291  			It("fails to order envelope", func() {
   292  				err := chain.Order(env, 0)
   293  				Expect(err).To(MatchError("no Raft leader"))
   294  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   295  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   296  				Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(0))
   297  				Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1))
   298  				Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1)))
   299  			})
   300  
   301  			It("starts proactive campaign", func() {
   302  				// assert that even tick supplied are less than ELECTION_TIMEOUT,
   303  				// a leader can still be successfully elected.
   304  				for i := 0; i < ELECTION_TICK; i++ {
   305  					clock.Increment(interval)
   306  					time.Sleep(10 * time.Millisecond)
   307  				}
   308  				Eventually(observeC, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
   309  			})
   310  		})
   311  
   312  		Context("when Raft leader is elected", func() {
   313  			JustBeforeEach(func() {
   314  				campaign(chain, observeC)
   315  			})
   316  
   317  			It("updates metrics upon leader election", func() {
   318  				Expect(fakeFields.fakeIsLeader.SetCallCount()).To(Equal(2))
   319  				Expect(fakeFields.fakeIsLeader.SetArgsForCall(1)).To(Equal(float64(1)))
   320  				Expect(fakeFields.fakeLeaderChanges.AddCallCount()).To(Equal(1))
   321  				Expect(fakeFields.fakeLeaderChanges.AddArgsForCall(0)).To(Equal(float64(1)))
   322  			})
   323  
   324  			It("fails to order envelope if chain is halted", func() {
   325  				chain.Halt()
   326  				err := chain.Order(env, 0)
   327  				Expect(err).To(MatchError("chain is stopped"))
   328  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   329  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   330  				Expect(fakeFields.fakeProposalFailures.AddCallCount()).To(Equal(1))
   331  				Expect(fakeFields.fakeProposalFailures.AddArgsForCall(0)).To(Equal(float64(1)))
   332  			})
   333  
   334  			It("produces blocks following batch rules", func() {
   335  				close(cutter.Block)
   336  
   337  				By("cutting next batch directly")
   338  				cutter.CutNext = true
   339  				err := chain.Order(env, 0)
   340  				Expect(err).NotTo(HaveOccurred())
   341  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   342  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   343  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   344  				Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call
   345  				Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1)))
   346  
   347  				// There are three calls to DataPersistDuration by now corresponding to the following three
   348  				// arriving on the Ready channel:
   349  				// 1. an EntryConfChange to let this node join the Raft cluster
   350  				// 2. a SoftState and an associated increase of term in the HardState due to the node being elected leader
   351  				// 3. a block being committed
   352  				// The duration being emitted is zero since we don't tick the fake clock during this time
   353  				Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(3))
   354  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(0)).Should(Equal(float64(0)))
   355  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(1)).Should(Equal(float64(0)))
   356  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(2)).Should(Equal(float64(0)))
   357  
   358  				By("respecting batch timeout")
   359  				cutter.CutNext = false
   360  				timeout := time.Second
   361  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   362  				err = chain.Order(env, 0)
   363  				Expect(err).NotTo(HaveOccurred())
   364  				Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
   365  				Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
   366  
   367  				clock.WaitForNWatchersAndIncrement(timeout, 2)
   368  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   369  				Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call
   370  				Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2)))
   371  				Expect(fakeFields.fakeDataPersistDuration.ObserveCallCount()).Should(Equal(4))
   372  				Expect(fakeFields.fakeDataPersistDuration.ObserveArgsForCall(3)).Should(Equal(float64(0)))
   373  			})
   374  
   375  			It("does not reset timer for every envelope", func() {
   376  				close(cutter.Block)
   377  
   378  				timeout := time.Second
   379  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   380  
   381  				err := chain.Order(env, 0)
   382  				Expect(err).NotTo(HaveOccurred())
   383  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   384  
   385  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   386  
   387  				err = chain.Order(env, 0)
   388  				Expect(err).NotTo(HaveOccurred())
   389  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(2))
   390  
   391  				// the second envelope should not reset the timer; it should
   392  				// therefore expire if we increment it by just timeout/2
   393  				clock.Increment(timeout / 2)
   394  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   395  			})
   396  
   397  			It("does not write a block if halted before timeout", func() {
   398  				close(cutter.Block)
   399  				timeout := time.Second
   400  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   401  
   402  				err := chain.Order(env, 0)
   403  				Expect(err).NotTo(HaveOccurred())
   404  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   405  
   406  				// wait for timer to start
   407  				Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   408  
   409  				chain.Halt()
   410  				Consistently(support.WriteBlockCallCount).Should(Equal(0))
   411  			})
   412  
   413  			It("stops the timer if a batch is cut", func() {
   414  				close(cutter.Block)
   415  
   416  				timeout := time.Second
   417  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   418  
   419  				err := chain.Order(env, 0)
   420  				Expect(err).NotTo(HaveOccurred())
   421  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   422  
   423  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   424  
   425  				By("force a batch to be cut before timer expires")
   426  				cutter.CutNext = true
   427  				err = chain.Order(env, 0)
   428  				Expect(err).NotTo(HaveOccurred())
   429  
   430  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   431  				b, _ := support.WriteBlockArgsForCall(0)
   432  				Expect(b.Data.Data).To(HaveLen(2))
   433  				Expect(cutter.CurBatch()).To(HaveLen(0))
   434  
   435  				// this should start a fresh timer
   436  				cutter.CutNext = false
   437  				err = chain.Order(env, 0)
   438  				Expect(err).NotTo(HaveOccurred())
   439  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   440  
   441  				clock.WaitForNWatchersAndIncrement(timeout/2, 2)
   442  				Consistently(support.WriteBlockCallCount).Should(Equal(1))
   443  
   444  				clock.Increment(timeout / 2)
   445  
   446  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   447  				b, _ = support.WriteBlockArgsForCall(1)
   448  				Expect(b.Data.Data).To(HaveLen(1))
   449  			})
   450  
   451  			It("cut two batches if incoming envelope does not fit into first batch", func() {
   452  				close(cutter.Block)
   453  
   454  				timeout := time.Second
   455  				support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   456  
   457  				err := chain.Order(env, 0)
   458  				Expect(err).NotTo(HaveOccurred())
   459  				Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   460  
   461  				cutter.IsolatedTx = true
   462  				err = chain.Order(env, 0)
   463  				Expect(err).NotTo(HaveOccurred())
   464  
   465  				Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   466  			})
   467  
   468  			Context("revalidation", func() {
   469  				BeforeEach(func() {
   470  					close(cutter.Block)
   471  
   472  					timeout := time.Hour
   473  					support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
   474  					support.SequenceReturns(1)
   475  				})
   476  
   477  				It("enqueue if envelope is still valid", func() {
   478  					support.ProcessNormalMsgReturns(1, nil)
   479  
   480  					err := chain.Order(env, 0)
   481  					Expect(err).NotTo(HaveOccurred())
   482  					Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   483  					Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   484  				})
   485  
   486  				It("does not enqueue if envelope is not valid", func() {
   487  					support.ProcessNormalMsgReturns(1, errors.Errorf("Envelope is invalid"))
   488  
   489  					err := chain.Order(env, 0)
   490  					Expect(err).NotTo(HaveOccurred())
   491  					Consistently(cutter.CurBatch).Should(HaveLen(0))
   492  					Consistently(clock.WatcherCount).Should(Equal(1))
   493  				})
   494  			})
   495  
   496  			It("unblocks Errored if chain is halted", func() {
   497  				errorC := chain.Errored()
   498  				Expect(errorC).NotTo(BeClosed())
   499  				chain.Halt()
   500  				Eventually(errorC, LongEventualTimeout).Should(BeClosed())
   501  			})
   502  
   503  			It("does not call the halt callback function when halting externally", func() {
   504  				chain.Halt()
   505  				Consistently(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(0))
   506  			})
   507  
   508  			Describe("Config updates", func() {
   509  				var (
   510  					configEnv *common.Envelope
   511  					configSeq uint64
   512  				)
   513  
   514  				Context("when a type A config update comes", func() {
   515  					Context("for existing channel", func() {
   516  						// use to prepare the Orderer Values
   517  						BeforeEach(func() {
   518  							newValues := map[string]*common.ConfigValue{
   519  								"BatchTimeout": {
   520  									Version: 1,
   521  									Value: marshalOrPanic(&orderer.BatchTimeout{
   522  										Timeout: "3ms",
   523  									}),
   524  								},
   525  								"ConsensusType": {
   526  									Version: 4,
   527  								},
   528  							}
   529  							oldValues := map[string]*common.ConfigValue{
   530  								"ConsensusType": {
   531  									Version: 4,
   532  								},
   533  							}
   534  							configEnv = newConfigEnv(channelID,
   535  								common.HeaderType_CONFIG,
   536  								newConfigUpdateEnv(channelID, oldValues, newValues),
   537  							)
   538  							configSeq = 0
   539  						}) // BeforeEach block
   540  
   541  						Context("without revalidation (i.e. correct config sequence)", func() {
   542  							Context("without pending normal envelope", func() {
   543  								It("should create a config block and no normal block", func() {
   544  									err := chain.Configure(configEnv, configSeq)
   545  									Expect(err).NotTo(HaveOccurred())
   546  									Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   547  									Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   548  									Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   549  									Consistently(support.WriteBlockCallCount).Should(Equal(0))
   550  									Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(2)) // incl. initial call
   551  									Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(1)).Should(Equal(float64(1)))
   552  								})
   553  							})
   554  
   555  							Context("with pending normal envelope", func() {
   556  								It("should create a normal block and a config block", func() {
   557  									// We do not need to block the cutter from ordering in our test case and therefore close this channel.
   558  									close(cutter.Block)
   559  
   560  									By("adding a normal envelope")
   561  									err := chain.Order(env, 0)
   562  									Expect(err).NotTo(HaveOccurred())
   563  									Expect(fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
   564  									Expect(fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   565  									Eventually(cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
   566  
   567  									By("adding a config envelope")
   568  									err = chain.Configure(configEnv, configSeq)
   569  									Expect(err).NotTo(HaveOccurred())
   570  									Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   571  									Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   572  
   573  									Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   574  									Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   575  									Expect(fakeFields.fakeCommittedBlockNumber.SetCallCount()).Should(Equal(3)) // incl. initial call
   576  									Expect(fakeFields.fakeCommittedBlockNumber.SetArgsForCall(2)).Should(Equal(float64(2)))
   577  								})
   578  							})
   579  						})
   580  
   581  						Context("with revalidation (i.e. incorrect config sequence)", func() {
   582  							BeforeEach(func() {
   583  								close(cutter.Block)
   584  								support.SequenceReturns(1) // this causes the revalidation
   585  							})
   586  
   587  							It("should create config block upon correct revalidation", func() {
   588  								support.ProcessConfigMsgReturns(configEnv, 1, nil) // nil implies correct revalidation
   589  
   590  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   591  								Consistently(clock.WatcherCount).Should(Equal(1))
   592  								Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   593  							})
   594  
   595  							It("should not create config block upon incorrect revalidation", func() {
   596  								support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence"))
   597  
   598  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   599  								Consistently(clock.WatcherCount).Should(Equal(1))
   600  								Consistently(support.WriteConfigBlockCallCount).Should(Equal(0)) // no call to WriteConfigBlock
   601  							})
   602  
   603  							It("should not disturb current running timer upon incorrect revalidation", func() {
   604  								support.ProcessNormalMsgReturns(1, nil)
   605  								support.ProcessConfigMsgReturns(configEnv, 1, errors.Errorf("Invalid config envelope at changed config sequence"))
   606  
   607  								Expect(chain.Order(env, configSeq)).To(Succeed())
   608  								Eventually(clock.WatcherCount, LongEventualTimeout).Should(Equal(2))
   609  
   610  								clock.Increment(30 * time.Minute)
   611  								Consistently(support.WriteBlockCallCount).Should(Equal(0))
   612  
   613  								Expect(chain.Configure(configEnv, configSeq)).To(Succeed())
   614  								Consistently(clock.WatcherCount).Should(Equal(2))
   615  
   616  								Consistently(support.WriteBlockCallCount).Should(Equal(0))
   617  								Consistently(support.WriteConfigBlockCallCount).Should(Equal(0))
   618  
   619  								clock.Increment(30 * time.Minute)
   620  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   621  							})
   622  						})
   623  					})
   624  
   625  					Context("for creating a new channel", func() {
   626  						// use to prepare the Orderer Values
   627  						BeforeEach(func() {
   628  							chainID := "mychannel"
   629  							values := make(map[string]*common.ConfigValue)
   630  							configEnv = newConfigEnv(chainID,
   631  								common.HeaderType_CONFIG,
   632  								newConfigUpdateEnv(chainID, nil, values),
   633  							)
   634  							configSeq = 0
   635  						}) // BeforeEach block
   636  
   637  						It("should be able to create a channel", func() {
   638  							err := chain.Configure(configEnv, configSeq)
   639  							Expect(err).NotTo(HaveOccurred())
   640  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   641  						})
   642  					})
   643  				}) // Context block for type A config
   644  
   645  				Context("when a type B config update comes", func() {
   646  					Context("updating protocol values", func() {
   647  						// use to prepare the Orderer Values
   648  						BeforeEach(func() {
   649  							values := map[string]*common.ConfigValue{
   650  								"ConsensusType": {
   651  									Version: 1,
   652  									Value: marshalOrPanic(&orderer.ConsensusType{
   653  										Metadata: marshalOrPanic(consenterMetadata),
   654  									}),
   655  								},
   656  							}
   657  							configEnv = newConfigEnv(channelID,
   658  								common.HeaderType_CONFIG,
   659  								newConfigUpdateEnv(channelID, nil, values))
   660  							configSeq = 0
   661  						}) // BeforeEach block
   662  
   663  						It("should be able to process config update of type B", func() {
   664  							err := chain.Configure(configEnv, configSeq)
   665  							Expect(err).NotTo(HaveOccurred())
   666  							Expect(fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
   667  							Expect(fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
   668  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   669  						})
   670  					})
   671  
   672  					Context("updating consenters set by exactly one node", func() {
   673  						It("should be able to process config update adding single node", func() {
   674  							metadata := proto.Clone(consenterMetadata).(*raftprotos.ConfigMetadata)
   675  							metadata.Consenters = append(metadata.Consenters, &raftprotos.Consenter{
   676  								Host:          "localhost",
   677  								Port:          7050,
   678  								ServerTlsCert: serverTLSCert(tlsCA),
   679  								ClientTlsCert: clientTLSCert(tlsCA),
   680  							})
   681  
   682  							values := map[string]*common.ConfigValue{
   683  								"ConsensusType": {
   684  									Version: 1,
   685  									Value: marshalOrPanic(&orderer.ConsensusType{
   686  										Metadata: marshalOrPanic(metadata),
   687  									}),
   688  								},
   689  							}
   690  							configEnv = newConfigEnv(channelID,
   691  								common.HeaderType_CONFIG,
   692  								newConfigUpdateEnv(channelID, nil, values))
   693  							configSeq = 0
   694  
   695  							err := chain.Configure(configEnv, configSeq)
   696  							Expect(err).NotTo(HaveOccurred())
   697  							Eventually(support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
   698  						})
   699  					})
   700  				})
   701  			})
   702  
   703  			Describe("Crash Fault Tolerance", func() {
   704  				var raftMetadata *raftprotos.BlockMetadata
   705  
   706  				BeforeEach(func() {
   707  					raftMetadata = &raftprotos.BlockMetadata{
   708  						ConsenterIds:    []uint64{1},
   709  						NextConsenterId: 2,
   710  					}
   711  				})
   712  
   713  				Describe("when a chain is started with existing WAL", func() {
   714  					var (
   715  						m1 *raftprotos.BlockMetadata
   716  						m2 *raftprotos.BlockMetadata
   717  					)
   718  					JustBeforeEach(func() {
   719  						// to generate WAL data, we start a chain,
   720  						// order several envelopes and then halt the chain.
   721  						close(cutter.Block)
   722  						cutter.CutNext = true
   723  
   724  						// enque some data to be persisted on disk by raft
   725  						err := chain.Order(env, uint64(0))
   726  						Expect(err).NotTo(HaveOccurred())
   727  						Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   728  
   729  						_, metadata := support.WriteBlockArgsForCall(0)
   730  						m1 = &raftprotos.BlockMetadata{}
   731  						proto.Unmarshal(metadata, m1)
   732  
   733  						err = chain.Order(env, uint64(0))
   734  						Expect(err).NotTo(HaveOccurred())
   735  						Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   736  
   737  						_, metadata = support.WriteBlockArgsForCall(1)
   738  						m2 = &raftprotos.BlockMetadata{}
   739  						proto.Unmarshal(metadata, m2)
   740  
   741  						chain.Halt()
   742  					})
   743  
   744  					It("replays blocks from committed entries", func() {
   745  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
   746  						c.init()
   747  						c.Start()
   748  						defer c.Halt()
   749  
   750  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   751  
   752  						_, metadata := c.support.WriteBlockArgsForCall(0)
   753  						m := &raftprotos.BlockMetadata{}
   754  						proto.Unmarshal(metadata, m)
   755  						Expect(m.RaftIndex).To(Equal(m1.RaftIndex))
   756  
   757  						_, metadata = c.support.WriteBlockArgsForCall(1)
   758  						m = &raftprotos.BlockMetadata{}
   759  						proto.Unmarshal(metadata, m)
   760  						Expect(m.RaftIndex).To(Equal(m2.RaftIndex))
   761  
   762  						// chain should keep functioning
   763  						campaign(c.Chain, c.observe)
   764  
   765  						c.cutter.CutNext = true
   766  
   767  						err := c.Order(env, uint64(0))
   768  						Expect(err).NotTo(HaveOccurred())
   769  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
   770  					})
   771  
   772  					It("only replays blocks after Applied index", func() {
   773  						raftMetadata.RaftIndex = m1.RaftIndex
   774  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
   775  						c.support.WriteBlock(support.WriteBlockArgsForCall(0))
   776  
   777  						c.init()
   778  						c.Start()
   779  						defer c.Halt()
   780  
   781  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   782  
   783  						_, metadata := c.support.WriteBlockArgsForCall(1)
   784  						m := &raftprotos.BlockMetadata{}
   785  						proto.Unmarshal(metadata, m)
   786  						Expect(m.RaftIndex).To(Equal(m2.RaftIndex))
   787  
   788  						// chain should keep functioning
   789  						campaign(c.Chain, c.observe)
   790  
   791  						c.cutter.CutNext = true
   792  
   793  						err := c.Order(env, uint64(0))
   794  						Expect(err).NotTo(HaveOccurred())
   795  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
   796  					})
   797  
   798  					It("does not replay any block if already in sync", func() {
   799  						raftMetadata.RaftIndex = m2.RaftIndex
   800  						c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
   801  						c.init()
   802  						c.Start()
   803  						defer c.Halt()
   804  
   805  						Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
   806  
   807  						// chain should keep functioning
   808  						campaign(c.Chain, c.observe)
   809  
   810  						c.cutter.CutNext = true
   811  
   812  						err := c.Order(env, uint64(0))
   813  						Expect(err).NotTo(HaveOccurred())
   814  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   815  					})
   816  
   817  					Context("WAL file is not readable", func() {
   818  						It("fails to load wal", func() {
   819  							skipIfRoot()
   820  
   821  							files, err := ioutil.ReadDir(walDir)
   822  							Expect(err).NotTo(HaveOccurred())
   823  							for _, f := range files {
   824  								os.Chmod(path.Join(walDir, f.Name()), 0o300)
   825  							}
   826  
   827  							c, err := etcdraft.NewChain(support, opts, configurator, nil, cryptoProvider, noOpBlockPuller, nil, observeC)
   828  							Expect(c).To(BeNil())
   829  							Expect(err).To(MatchError(ContainSubstring("permission denied")))
   830  						})
   831  					})
   832  				})
   833  
   834  				Describe("when snapshotting is enabled (snapshot interval is not zero)", func() {
   835  					var (
   836  						ledgerLock sync.Mutex
   837  						ledger     map[uint64]*common.Block
   838  					)
   839  
   840  					countFiles := func() int {
   841  						files, err := ioutil.ReadDir(snapDir)
   842  						Expect(err).NotTo(HaveOccurred())
   843  						return len(files)
   844  					}
   845  
   846  					BeforeEach(func() {
   847  						opts.SnapshotCatchUpEntries = 2
   848  
   849  						close(cutter.Block)
   850  						cutter.CutNext = true
   851  
   852  						ledgerLock.Lock()
   853  						ledger = map[uint64]*common.Block{
   854  							0: getSeedBlock(), // genesis block
   855  						}
   856  						ledgerLock.Unlock()
   857  
   858  						support.WriteBlockStub = func(block *common.Block, meta []byte) {
   859  							b := proto.Clone(block).(*common.Block)
   860  
   861  							bytes, err := proto.Marshal(&common.Metadata{Value: meta})
   862  							Expect(err).NotTo(HaveOccurred())
   863  							b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
   864  
   865  							ledgerLock.Lock()
   866  							defer ledgerLock.Unlock()
   867  							ledger[b.Header.Number] = b
   868  						}
   869  
   870  						support.HeightStub = func() uint64 {
   871  							ledgerLock.Lock()
   872  							defer ledgerLock.Unlock()
   873  							return uint64(len(ledger))
   874  						}
   875  					})
   876  
   877  					Context("Small SnapshotInterval", func() {
   878  						BeforeEach(func() {
   879  							opts.SnapshotIntervalSize = 1
   880  						})
   881  
   882  						It("writes snapshot file to snapDir", func() {
   883  							// Scenario: start a chain with SnapInterval = 1 byte, expect it to take
   884  							// one snapshot for each block
   885  
   886  							i, _ := opts.MemoryStorage.FirstIndex()
   887  
   888  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   889  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   890  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   891  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   892  							Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(2)) // incl. initial call
   893  							s, _ := opts.MemoryStorage.Snapshot()
   894  							b := protoutil.UnmarshalBlockOrPanic(s.Data)
   895  							Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(1)).To(Equal(float64(b.Header.Number)))
   896  
   897  							i, _ = opts.MemoryStorage.FirstIndex()
   898  
   899  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   900  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   901  
   902  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
   903  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   904  							Expect(fakeFields.fakeSnapshotBlockNumber.SetCallCount()).To(Equal(3)) // incl. initial call
   905  							s, _ = opts.MemoryStorage.Snapshot()
   906  							b = protoutil.UnmarshalBlockOrPanic(s.Data)
   907  							Expect(fakeFields.fakeSnapshotBlockNumber.SetArgsForCall(2)).To(Equal(float64(b.Header.Number)))
   908  						})
   909  
   910  						It("pauses chain if sync is in progress", func() {
   911  							// Scenario:
   912  							// after a snapshot is taken, reboot chain with raftIndex = 0
   913  							// chain should attempt to sync upon reboot, and blocks on
   914  							// `WaitReady` API
   915  
   916  							i, _ := opts.MemoryStorage.FirstIndex()
   917  
   918  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   919  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   920  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   921  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   922  
   923  							i, _ = opts.MemoryStorage.FirstIndex()
   924  
   925  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   926  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   927  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
   928  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
   929  
   930  							chain.Halt()
   931  
   932  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
   933  							c.init()
   934  
   935  							signal := make(chan struct{})
   936  
   937  							c.puller.PullBlockStub = func(i uint64) *common.Block {
   938  								<-signal // blocking for assertions
   939  								ledgerLock.Lock()
   940  								defer ledgerLock.Unlock()
   941  								if i >= uint64(len(ledger)) {
   942  									return nil
   943  								}
   944  
   945  								// This is a false assumption - single node shouldn't be able to pull block from anywhere.
   946  								// However, this test is mainly to assert that chain should attempt catchup upon start,
   947  								// so we could live with it.
   948  								return ledger[i]
   949  							}
   950  
   951  							err := c.WaitReady()
   952  							Expect(err).To(MatchError("chain is not started"))
   953  
   954  							c.Start()
   955  							defer c.Halt()
   956  
   957  							// pull block is called, so chain should be catching up now, WaitReady should block
   958  							signal <- struct{}{}
   959  
   960  							done := make(chan error)
   961  							go func() {
   962  								done <- c.WaitReady()
   963  							}()
   964  
   965  							Consistently(done).ShouldNot(Receive())
   966  							close(signal)                         // unblock block puller
   967  							Eventually(done).Should(Receive(nil)) // WaitReady should be unblocked
   968  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
   969  						})
   970  
   971  						It("commits block from snapshot if it's missing from ledger", func() {
   972  							// Scenario:
   973  							// Single node exists right after a snapshot is taken, while the block
   974  							// in it hasn't been successfully persisted into ledger (there can be one
   975  							// async block write in-flight). Then the node is restarted, and catches
   976  							// up using the block in snapshot.
   977  
   978  							Expect(chain.Order(env, uint64(0))).To(Succeed())
   979  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   980  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
   981  
   982  							chain.Halt()
   983  
   984  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
   985  							c.init()
   986  							c.Start()
   987  							defer c.Halt()
   988  
   989  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
   990  						})
   991  
   992  						It("restores snapshot w/o extra entries", func() {
   993  							// Scenario:
   994  							// after a snapshot is taken, no more entries are appended.
   995  							// then node is restarted, it loads snapshot, finds its term
   996  							// and index. While replaying WAL to memory storage, it should
   997  							// not append any entry because no extra entry was appended
   998  							// after snapshot was taken.
   999  
  1000  							Expect(chain.Order(env, uint64(0))).To(Succeed())
  1001  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1002  							_, metadata := support.WriteBlockArgsForCall(0)
  1003  							m := &raftprotos.BlockMetadata{}
  1004  							proto.Unmarshal(metadata, m)
  1005  
  1006  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1007  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1))
  1008  							snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created
  1009  							Expect(err).NotTo(HaveOccurred())
  1010  							i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory
  1011  							Expect(err).NotTo(HaveOccurred())
  1012  
  1013  							// expect storage to preserve SnapshotCatchUpEntries entries before snapshot
  1014  							Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1))
  1015  
  1016  							chain.Halt()
  1017  
  1018  							raftMetadata.RaftIndex = m.RaftIndex
  1019  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
  1020  							c.opts.SnapshotIntervalSize = 1
  1021  
  1022  							c.init()
  1023  							c.Start()
  1024  
  1025  							// following arithmetic reflects how etcdraft MemoryStorage is implemented
  1026  							// when no entry is appended after snapshot being loaded.
  1027  							Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1))
  1028  							Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index))
  1029  
  1030  							// chain keeps functioning
  1031  							Eventually(func() <-chan raft.SoftState {
  1032  								c.clock.Increment(interval)
  1033  								return c.observe
  1034  							}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
  1035  
  1036  							c.cutter.CutNext = true
  1037  							err = c.Order(env, uint64(0))
  1038  							Expect(err).NotTo(HaveOccurred())
  1039  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1040  
  1041  							Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
  1042  							c.Halt()
  1043  
  1044  							_, metadata = c.support.WriteBlockArgsForCall(0)
  1045  							m = &raftprotos.BlockMetadata{}
  1046  							proto.Unmarshal(metadata, m)
  1047  							raftMetadata.RaftIndex = m.RaftIndex
  1048  							cx := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
  1049  
  1050  							cx.init()
  1051  							cx.Start()
  1052  							defer cx.Halt()
  1053  
  1054  							// chain keeps functioning
  1055  							Eventually(func() <-chan raft.SoftState {
  1056  								cx.clock.Increment(interval)
  1057  								return cx.observe
  1058  							}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateLeader)))
  1059  						})
  1060  					})
  1061  
  1062  					Context("Large SnapshotInterval", func() {
  1063  						BeforeEach(func() {
  1064  							opts.SnapshotIntervalSize = 1024
  1065  						})
  1066  
  1067  						It("restores snapshot w/ extra entries", func() {
  1068  							// Scenario:
  1069  							// after a snapshot is taken, more entries are appended.
  1070  							// then node is restarted, it loads snapshot, finds its term
  1071  							// and index. While replaying WAL to memory storage, it should
  1072  							// append some entries.
  1073  
  1074  							largeEnv := &common.Envelope{
  1075  								Payload: marshalOrPanic(&common.Payload{
  1076  									Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1077  									Data:   make([]byte, 500),
  1078  								}),
  1079  							}
  1080  
  1081  							By("Ordering two large envelopes to trigger snapshot")
  1082  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1083  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1084  
  1085  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1086  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1087  
  1088  							_, metadata := support.WriteBlockArgsForCall(1)
  1089  							m := &raftprotos.BlockMetadata{}
  1090  							proto.Unmarshal(metadata, m)
  1091  
  1092  							// check snapshot does exit
  1093  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1094  							Eventually(opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", 1))
  1095  							snapshot, err := opts.MemoryStorage.Snapshot() // get the snapshot just created
  1096  							Expect(err).NotTo(HaveOccurred())
  1097  							i, err := opts.MemoryStorage.FirstIndex() // get the first index in memory
  1098  							Expect(err).NotTo(HaveOccurred())
  1099  
  1100  							// expect storage to preserve SnapshotCatchUpEntries entries before snapshot
  1101  							Expect(i).To(Equal(snapshot.Metadata.Index - opts.SnapshotCatchUpEntries + 1))
  1102  
  1103  							By("Ordering another envlope to append new data to memory after snaphost")
  1104  							Expect(chain.Order(env, uint64(0))).To(Succeed())
  1105  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  1106  
  1107  							lasti, _ := opts.MemoryStorage.LastIndex()
  1108  
  1109  							chain.Halt()
  1110  
  1111  							raftMetadata.RaftIndex = m.RaftIndex
  1112  							c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
  1113  							cnt := support.WriteBlockCallCount()
  1114  							for i := 0; i < cnt; i++ {
  1115  								c.support.WriteBlock(support.WriteBlockArgsForCall(i))
  1116  							}
  1117  
  1118  							By("Restarting the node")
  1119  							c.init()
  1120  							c.Start()
  1121  							defer c.Halt()
  1122  
  1123  							By("Checking latest index is larger than index in snapshot")
  1124  							Eventually(c.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(Equal(snapshot.Metadata.Index + 1))
  1125  							Eventually(c.opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(lasti))
  1126  						})
  1127  
  1128  						When("local ledger is in sync with snapshot", func() {
  1129  							It("does not pull blocks and still respects snapshot interval", func() {
  1130  								// Scenario:
  1131  								// - snapshot is taken at block 2
  1132  								// - order one more envelope (block 3)
  1133  								// - reboot chain at block 2
  1134  								// - block 3 should be replayed from wal
  1135  								// - order another envelope to trigger snapshot, containing block 3 & 4
  1136  								// Assertions:
  1137  								// - block puller should NOT be called
  1138  								// - chain should keep functioning after reboot
  1139  								// - chain should respect snapshot interval to trigger next snapshot
  1140  
  1141  								largeEnv := &common.Envelope{
  1142  									Payload: marshalOrPanic(&common.Payload{
  1143  										Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1144  										Data:   make([]byte, 500),
  1145  									}),
  1146  								}
  1147  
  1148  								By("Ordering two large envelopes to trigger snapshot")
  1149  								Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1150  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1151  
  1152  								Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1153  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1154  
  1155  								Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1156  
  1157  								_, metadata := support.WriteBlockArgsForCall(1)
  1158  								m := &raftprotos.BlockMetadata{}
  1159  								proto.Unmarshal(metadata, m)
  1160  
  1161  								By("Cutting block [3]")
  1162  								// order another envelope. this should not trigger snapshot
  1163  								err = chain.Order(largeEnv, uint64(0))
  1164  								Expect(err).NotTo(HaveOccurred())
  1165  								Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  1166  
  1167  								chain.Halt()
  1168  
  1169  								raftMetadata.RaftIndex = m.RaftIndex
  1170  								c := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
  1171  								// replay block 1&2
  1172  								c.support.WriteBlock(support.WriteBlockArgsForCall(0))
  1173  								c.support.WriteBlock(support.WriteBlockArgsForCall(1))
  1174  
  1175  								c.opts.SnapshotIntervalSize = 1024
  1176  
  1177  								By("Restarting node at block [2]")
  1178  								c.init()
  1179  								c.Start()
  1180  								defer c.Halt()
  1181  
  1182  								// elect leader
  1183  								campaign(c.Chain, c.observe)
  1184  
  1185  								By("Ordering one more block to trigger snapshot")
  1186  								c.cutter.CutNext = true
  1187  								err = c.Order(largeEnv, uint64(0))
  1188  								Expect(err).NotTo(HaveOccurred())
  1189  
  1190  								Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(4))
  1191  								Expect(c.puller.PullBlockCallCount()).Should(BeZero())
  1192  								// old snapshot file is retained
  1193  								Eventually(countFiles, LongEventualTimeout).Should(Equal(2))
  1194  							})
  1195  						})
  1196  
  1197  						It("respects snapshot interval after reboot", func() {
  1198  							largeEnv := &common.Envelope{
  1199  								Payload: marshalOrPanic(&common.Payload{
  1200  									Header: &common.Header{ChannelHeader: marshalOrPanic(&common.ChannelHeader{Type: int32(common.HeaderType_MESSAGE), ChannelId: channelID})},
  1201  									Data:   make([]byte, 500),
  1202  								}),
  1203  							}
  1204  
  1205  							Expect(chain.Order(largeEnv, uint64(0))).To(Succeed())
  1206  							Eventually(support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1207  							// check no snapshot is taken
  1208  							Consistently(countFiles).Should(Equal(0))
  1209  
  1210  							_, metadata := support.WriteBlockArgsForCall(0)
  1211  							m := &raftprotos.BlockMetadata{}
  1212  							proto.Unmarshal(metadata, m)
  1213  
  1214  							chain.Halt()
  1215  
  1216  							raftMetadata.RaftIndex = m.RaftIndex
  1217  							c1 := newChain(10*time.Second, channelID, dataDir, 1, raftMetadata, consenters, cryptoProvider, nil, nil)
  1218  							cnt := support.WriteBlockCallCount()
  1219  							for i := 0; i < cnt; i++ {
  1220  								c1.support.WriteBlock(support.WriteBlockArgsForCall(i))
  1221  							}
  1222  							c1.cutter.CutNext = true
  1223  							c1.opts.SnapshotIntervalSize = 1024
  1224  
  1225  							By("Restarting chain")
  1226  							c1.init()
  1227  							c1.Start()
  1228  							// chain keeps functioning
  1229  							campaign(c1.Chain, c1.observe)
  1230  
  1231  							Expect(c1.Order(largeEnv, uint64(0))).To(Succeed())
  1232  							Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  1233  							// check snapshot does exit
  1234  							Eventually(countFiles, LongEventualTimeout).Should(Equal(1))
  1235  						})
  1236  					})
  1237  				})
  1238  			})
  1239  
  1240  			Context("Invalid WAL dir", func() {
  1241  				support := &consensusmocks.FakeConsenterSupport{}
  1242  				BeforeEach(func() {
  1243  					// for block creator initialization
  1244  					support.HeightReturns(1)
  1245  					support.BlockReturns(getSeedBlock())
  1246  				})
  1247  
  1248  				When("WAL dir is a file", func() {
  1249  					It("replaces file with fresh WAL dir", func() {
  1250  						f, err := ioutil.TempFile("", "wal-")
  1251  						Expect(err).NotTo(HaveOccurred())
  1252  						defer os.RemoveAll(f.Name())
  1253  
  1254  						chain, err := etcdraft.NewChain(
  1255  							support,
  1256  							etcdraft.Options{
  1257  								WALDir:        f.Name(),
  1258  								SnapDir:       snapDir,
  1259  								Logger:        logger,
  1260  								MemoryStorage: storage,
  1261  								BlockMetadata: &raftprotos.BlockMetadata{},
  1262  								Metrics:       newFakeMetrics(newFakeMetricsFields()),
  1263  							},
  1264  							configurator,
  1265  							nil,
  1266  							cryptoProvider,
  1267  							nil,
  1268  							nil,
  1269  							observeC)
  1270  						Expect(chain).NotTo(BeNil())
  1271  						Expect(err).NotTo(HaveOccurred())
  1272  
  1273  						info, err := os.Stat(f.Name())
  1274  						Expect(err).NotTo(HaveOccurred())
  1275  						Expect(info.IsDir()).To(BeTrue())
  1276  					})
  1277  				})
  1278  
  1279  				When("WAL dir is not writeable", func() {
  1280  					It("replace it with fresh WAL dir", func() {
  1281  						d, err := ioutil.TempDir("", "wal-")
  1282  						Expect(err).NotTo(HaveOccurred())
  1283  						defer os.RemoveAll(d)
  1284  
  1285  						err = os.Chmod(d, 0o500)
  1286  						Expect(err).NotTo(HaveOccurred())
  1287  
  1288  						chain, err := etcdraft.NewChain(
  1289  							support,
  1290  							etcdraft.Options{
  1291  								WALDir:        d,
  1292  								SnapDir:       snapDir,
  1293  								Logger:        logger,
  1294  								MemoryStorage: storage,
  1295  								BlockMetadata: &raftprotos.BlockMetadata{},
  1296  								Metrics:       newFakeMetrics(newFakeMetricsFields()),
  1297  							},
  1298  							nil,
  1299  							nil,
  1300  							cryptoProvider,
  1301  							noOpBlockPuller,
  1302  							nil,
  1303  							nil)
  1304  						Expect(chain).NotTo(BeNil())
  1305  						Expect(err).NotTo(HaveOccurred())
  1306  					})
  1307  				})
  1308  
  1309  				When("WAL parent dir is not writeable", func() {
  1310  					It("fails to bootstrap fresh raft node", func() {
  1311  						skipIfRoot()
  1312  
  1313  						d, err := ioutil.TempDir("", "wal-")
  1314  						Expect(err).NotTo(HaveOccurred())
  1315  						defer os.RemoveAll(d)
  1316  
  1317  						err = os.Chmod(d, 0o500)
  1318  						Expect(err).NotTo(HaveOccurred())
  1319  
  1320  						chain, err := etcdraft.NewChain(
  1321  							support,
  1322  							etcdraft.Options{
  1323  								WALDir:        path.Join(d, "wal-dir"),
  1324  								SnapDir:       snapDir,
  1325  								Logger:        logger,
  1326  								BlockMetadata: &raftprotos.BlockMetadata{},
  1327  							},
  1328  							nil,
  1329  							nil,
  1330  							cryptoProvider,
  1331  							noOpBlockPuller,
  1332  							nil,
  1333  							nil)
  1334  						Expect(chain).To(BeNil())
  1335  						Expect(err).To(MatchError(ContainSubstring("failed to initialize WAL: mkdir")))
  1336  					})
  1337  				})
  1338  			})
  1339  		})
  1340  	})
  1341  
  1342  	Describe("2-node Raft cluster", func() {
  1343  		var (
  1344  			network            *network
  1345  			channelID          string
  1346  			timeout            time.Duration
  1347  			dataDir            string
  1348  			c1, c2             *chain
  1349  			raftMetadata       *raftprotos.BlockMetadata
  1350  			consenters         map[uint64]*raftprotos.Consenter
  1351  			configEnv          *common.Envelope
  1352  			cryptoProvider     bccsp.BCCSP
  1353  			fakeHaltCallbacker *mocks.HaltCallbacker
  1354  		)
  1355  		BeforeEach(func() {
  1356  			var err error
  1357  
  1358  			channelID = "multi-node-channel"
  1359  			timeout = 10 * time.Second
  1360  
  1361  			dataDir, err = ioutil.TempDir("", "raft-test-")
  1362  			Expect(err).NotTo(HaveOccurred())
  1363  
  1364  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
  1365  			Expect(err).NotTo(HaveOccurred())
  1366  
  1367  			raftMetadata = &raftprotos.BlockMetadata{
  1368  				ConsenterIds:    []uint64{1, 2},
  1369  				NextConsenterId: 3,
  1370  			}
  1371  
  1372  			consenters = map[uint64]*raftprotos.Consenter{
  1373  				1: {
  1374  					Host:          "localhost",
  1375  					Port:          7051,
  1376  					ClientTlsCert: clientTLSCert(tlsCA),
  1377  					ServerTlsCert: serverTLSCert(tlsCA),
  1378  				},
  1379  				2: {
  1380  					Host:          "localhost",
  1381  					Port:          7051,
  1382  					ClientTlsCert: clientTLSCert(tlsCA),
  1383  					ServerTlsCert: serverTLSCert(tlsCA),
  1384  				},
  1385  			}
  1386  
  1387  			metadata := &raftprotos.ConfigMetadata{
  1388  				Options: &raftprotos.Options{
  1389  					TickInterval:         "500ms",
  1390  					ElectionTick:         10,
  1391  					HeartbeatTick:        1,
  1392  					MaxInflightBlocks:    5,
  1393  					SnapshotIntervalSize: 200,
  1394  				},
  1395  				Consenters: []*raftprotos.Consenter{consenters[2]},
  1396  			}
  1397  			value := map[string]*common.ConfigValue{
  1398  				"ConsensusType": {
  1399  					Version: 1,
  1400  					Value: marshalOrPanic(&orderer.ConsensusType{
  1401  						Metadata: marshalOrPanic(metadata),
  1402  					}),
  1403  				},
  1404  			}
  1405  			// prepare config update to remove 1
  1406  			configEnv = newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1407  
  1408  			fakeHaltCallbacker = &mocks.HaltCallbacker{}
  1409  			network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA, fakeHaltCallbacker.HaltCallback)
  1410  			c1, c2 = network.chains[1], network.chains[2]
  1411  			c1.cutter.CutNext = true
  1412  			network.init()
  1413  			network.start()
  1414  		})
  1415  
  1416  		AfterEach(func() {
  1417  			network.stop()
  1418  			network.exec(func(c *chain) {
  1419  				Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero())
  1420  			})
  1421  
  1422  			os.RemoveAll(dataDir)
  1423  		})
  1424  
  1425  		It("can remove leader by reconfiguring cluster", func() {
  1426  			network.elect(1)
  1427  
  1428  			// trigger status dissemination
  1429  			Eventually(func() int {
  1430  				c1.clock.Increment(interval)
  1431  				return c2.fakeFields.fakeActiveNodes.SetCallCount()
  1432  			}, LongEventualTimeout).Should(Equal(2))
  1433  			Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2)))
  1434  
  1435  			By("Configuring cluster to remove node")
  1436  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1437  			Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1438  			c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2)
  1439  
  1440  			Eventually(func() <-chan raft.SoftState {
  1441  				c2.clock.Increment(interval)
  1442  				return c2.observe
  1443  			}, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader)))
  1444  
  1445  			By("Asserting the haltCallback is called when the node is removed from the replica set")
  1446  			Eventually(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(1))
  1447  			By("Asserting the StatusReport responds correctly after eviction")
  1448  			Eventually(
  1449  				func() orderer_types.ConsensusRelation {
  1450  					cRel, _ := c1.StatusReport()
  1451  					return cRel
  1452  				},
  1453  			).Should(Equal(orderer_types.ConsensusRelationConfigTracker))
  1454  			_, status := c1.StatusReport()
  1455  			Expect(status).To(Equal(orderer_types.StatusInactive))
  1456  
  1457  			By("Asserting leader can still serve requests as single-node cluster")
  1458  			c2.cutter.CutNext = true
  1459  			Expect(c2.Order(env, 0)).To(Succeed())
  1460  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1461  		})
  1462  
  1463  		It("remove leader by reconfiguring cluster, but Halt before eviction", func() {
  1464  			network.elect(1)
  1465  
  1466  			// trigger status dissemination
  1467  			Eventually(func() int {
  1468  				c1.clock.Increment(interval)
  1469  				return c2.fakeFields.fakeActiveNodes.SetCallCount()
  1470  			}, LongEventualTimeout).Should(Equal(2))
  1471  			Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2)))
  1472  
  1473  			By("Configuring cluster to remove node")
  1474  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1475  			Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1476  			c1.clock.WaitForNWatchersAndIncrement((ELECTION_TICK-1)*interval, 2)
  1477  			c1.Halt()
  1478  
  1479  			Eventually(func() <-chan raft.SoftState {
  1480  				c2.clock.Increment(interval)
  1481  				return c2.observe
  1482  			}, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateLeader)))
  1483  
  1484  			By("Asserting leader can still serve requests as single-node cluster")
  1485  			c2.cutter.CutNext = true
  1486  			Expect(c2.Order(env, 0)).To(Succeed())
  1487  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1488  
  1489  			By("Asserting the haltCallback is not called when Halt is called before eviction")
  1490  			c1.clock.Increment(interval)
  1491  			Eventually(fakeHaltCallbacker.HaltCallbackCallCount).Should(Equal(0))
  1492  			By("Asserting the StatusReport responds correctly if the haltCallback is not called")
  1493  			Eventually(
  1494  				func() orderer_types.Status {
  1495  					_, status := c1.StatusReport()
  1496  					return status
  1497  				},
  1498  			).Should(Equal(orderer_types.StatusInactive))
  1499  			cRel, _ := c1.StatusReport()
  1500  			Expect(cRel).To(Equal(orderer_types.ConsensusRelationConsenter))
  1501  		})
  1502  
  1503  		It("can remove leader by reconfiguring cluster even if leadership transfer fails", func() {
  1504  			network.elect(1)
  1505  
  1506  			step1 := c1.getStepFunc()
  1507  			c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  1508  				stepMsg := &raftpb.Message{}
  1509  				if err := proto.Unmarshal(msg.Payload, stepMsg); err != nil {
  1510  					return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err)
  1511  				}
  1512  
  1513  				if stepMsg.Type == raftpb.MsgTimeoutNow {
  1514  					return nil
  1515  				}
  1516  
  1517  				return step1(dest, msg)
  1518  			})
  1519  
  1520  			By("Configuring cluster to remove node")
  1521  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1522  			Eventually(c2.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1523  			c2.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1524  			Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1525  
  1526  			c1.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1527  			Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed())
  1528  			close(c1.stopped) // mark c1 stopped in network
  1529  
  1530  			network.elect(2)
  1531  
  1532  			By("Asserting leader can still serve requests as single-node cluster")
  1533  			c2.cutter.CutNext = true
  1534  			Expect(c2.Order(env, 0)).To(Succeed())
  1535  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1536  		})
  1537  
  1538  		It("can remove follower by reconfiguring cluster", func() {
  1539  			network.elect(2)
  1540  
  1541  			Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1542  			network.exec(func(c *chain) {
  1543  				Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1544  			})
  1545  
  1546  			Eventually(c2.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1547  			Eventually(c1.Chain.Errored, LongEventualTimeout).Should(BeClosed())
  1548  
  1549  			By("Asserting leader can still serve requests as single-node cluster")
  1550  			c2.cutter.CutNext = true
  1551  			Expect(c2.Order(env, 0)).To(Succeed())
  1552  			Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1553  		})
  1554  	})
  1555  
  1556  	Describe("3-node Raft cluster", func() {
  1557  		var (
  1558  			network        *network
  1559  			channelID      string
  1560  			timeout        time.Duration
  1561  			dataDir        string
  1562  			c1, c2, c3     *chain
  1563  			raftMetadata   *raftprotos.BlockMetadata
  1564  			consenters     map[uint64]*raftprotos.Consenter
  1565  			cryptoProvider bccsp.BCCSP
  1566  		)
  1567  
  1568  		BeforeEach(func() {
  1569  			var err error
  1570  
  1571  			channelID = "multi-node-channel"
  1572  			timeout = 10 * time.Second
  1573  
  1574  			dataDir, err = ioutil.TempDir("", "raft-test-")
  1575  			Expect(err).NotTo(HaveOccurred())
  1576  
  1577  			raftMetadata = &raftprotos.BlockMetadata{
  1578  				ConsenterIds:    []uint64{1, 2, 3},
  1579  				NextConsenterId: 4,
  1580  			}
  1581  
  1582  			cryptoProvider, err = sw.NewDefaultSecurityLevelWithKeystore(sw.NewDummyKeyStore())
  1583  			Expect(err).NotTo(HaveOccurred())
  1584  
  1585  			consenters = map[uint64]*raftprotos.Consenter{
  1586  				1: {
  1587  					Host:          "localhost",
  1588  					Port:          7051,
  1589  					ClientTlsCert: clientTLSCert(tlsCA),
  1590  					ServerTlsCert: serverTLSCert(tlsCA),
  1591  				},
  1592  				2: {
  1593  					Host:          "localhost",
  1594  					Port:          7051,
  1595  					ClientTlsCert: clientTLSCert(tlsCA),
  1596  					ServerTlsCert: serverTLSCert(tlsCA),
  1597  				},
  1598  				3: {
  1599  					Host:          "localhost",
  1600  					Port:          7051,
  1601  					ClientTlsCert: clientTLSCert(tlsCA),
  1602  					ServerTlsCert: serverTLSCert(tlsCA),
  1603  				},
  1604  			}
  1605  
  1606  			network = createNetwork(timeout, channelID, dataDir, raftMetadata, consenters, cryptoProvider, tlsCA, nil)
  1607  			c1 = network.chains[1]
  1608  			c2 = network.chains[2]
  1609  			c3 = network.chains[3]
  1610  		})
  1611  
  1612  		AfterEach(func() {
  1613  			network.stop()
  1614  			network.exec(func(c *chain) {
  1615  				Eventually(c.clock.WatcherCount, LongEventualTimeout).Should(BeZero())
  1616  			})
  1617  
  1618  			os.RemoveAll(dataDir)
  1619  		})
  1620  
  1621  		When("2/3 nodes are running", func() {
  1622  			It("late node can catch up", func() {
  1623  				network.init()
  1624  				network.start(1, 2)
  1625  				network.elect(1)
  1626  
  1627  				// trigger status dissemination
  1628  				Eventually(func() int {
  1629  					c1.clock.Increment(interval)
  1630  					return c2.fakeFields.fakeActiveNodes.SetCallCount()
  1631  				}, LongEventualTimeout).Should(Equal(2))
  1632  				Expect(c2.fakeFields.fakeActiveNodes.SetArgsForCall(1)).To(Equal(float64(2)))
  1633  
  1634  				c1.cutter.CutNext = true
  1635  				err := c1.Order(env, 0)
  1636  				Expect(err).NotTo(HaveOccurred())
  1637  
  1638  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1639  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1640  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1641  
  1642  				network.start(3)
  1643  
  1644  				c1.clock.Increment(interval)
  1645  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1646  
  1647  				network.stop()
  1648  			})
  1649  
  1650  			It("late node receives snapshot from leader", func() {
  1651  				c1.opts.SnapshotIntervalSize = 1
  1652  				c1.opts.SnapshotCatchUpEntries = 1
  1653  
  1654  				c1.cutter.CutNext = true
  1655  
  1656  				var blocksLock sync.Mutex
  1657  				blocks := make(map[uint64]*common.Block) // storing written blocks for block puller
  1658  
  1659  				c1.support.WriteBlockStub = func(b *common.Block, meta []byte) {
  1660  					blocksLock.Lock()
  1661  					defer blocksLock.Unlock()
  1662  					bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  1663  					Expect(err).NotTo(HaveOccurred())
  1664  					b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  1665  					blocks[b.Header.Number] = b
  1666  				}
  1667  
  1668  				c3.puller.PullBlockStub = func(i uint64) *common.Block {
  1669  					blocksLock.Lock()
  1670  					defer blocksLock.Unlock()
  1671  					b, exist := blocks[i]
  1672  					if !exist {
  1673  						return nil
  1674  					}
  1675  
  1676  					return b
  1677  				}
  1678  
  1679  				network.init()
  1680  				network.start(1, 2)
  1681  				network.elect(1)
  1682  
  1683  				err := c1.Order(env, 0)
  1684  				Expect(err).NotTo(HaveOccurred())
  1685  
  1686  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1687  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  1688  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1689  
  1690  				err = c1.Order(env, 0)
  1691  				Expect(err).NotTo(HaveOccurred())
  1692  
  1693  				Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1694  				Eventually(func() int { return c2.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1695  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  1696  
  1697  				network.start(3)
  1698  
  1699  				c1.clock.Increment(interval)
  1700  				Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(2))
  1701  
  1702  				network.stop()
  1703  			})
  1704  		})
  1705  
  1706  		When("reconfiguring raft cluster", func() {
  1707  			const (
  1708  				defaultTimeout = 5 * time.Second
  1709  			)
  1710  			var (
  1711  				options = &raftprotos.Options{
  1712  					TickInterval:         "500ms",
  1713  					ElectionTick:         10,
  1714  					HeartbeatTick:        1,
  1715  					MaxInflightBlocks:    5,
  1716  					SnapshotIntervalSize: 200,
  1717  				}
  1718  				updateRaftConfigValue = func(metadata *raftprotos.ConfigMetadata) map[string]*common.ConfigValue {
  1719  					return map[string]*common.ConfigValue{
  1720  						"ConsensusType": {
  1721  							Version: 1,
  1722  							Value: marshalOrPanic(&orderer.ConsensusType{
  1723  								Metadata: marshalOrPanic(metadata),
  1724  							}),
  1725  						},
  1726  					}
  1727  				}
  1728  				addConsenterConfigValue = func() map[string]*common.ConfigValue {
  1729  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1730  					for _, consenter := range consenters {
  1731  						metadata.Consenters = append(metadata.Consenters, consenter)
  1732  					}
  1733  
  1734  					newConsenter := &raftprotos.Consenter{
  1735  						Host:          "localhost",
  1736  						Port:          7050,
  1737  						ServerTlsCert: serverTLSCert(tlsCA),
  1738  						ClientTlsCert: clientTLSCert(tlsCA),
  1739  					}
  1740  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1741  					return updateRaftConfigValue(metadata)
  1742  				}
  1743  				removeConsenterConfigValue = func(id uint64) map[string]*common.ConfigValue {
  1744  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1745  					for nodeID, consenter := range consenters {
  1746  						if nodeID == id {
  1747  							continue
  1748  						}
  1749  						metadata.Consenters = append(metadata.Consenters, consenter)
  1750  					}
  1751  					return updateRaftConfigValue(metadata)
  1752  				}
  1753  				createChannelEnv = func(metadata *raftprotos.ConfigMetadata) *common.Envelope {
  1754  					configEnv := newConfigEnv("another-channel",
  1755  						common.HeaderType_CONFIG,
  1756  						newConfigUpdateEnv(channelID, nil, updateRaftConfigValue(metadata)))
  1757  
  1758  					// Wrap config env in Orderer transaction
  1759  					return &common.Envelope{
  1760  						Payload: marshalOrPanic(&common.Payload{
  1761  							Header: &common.Header{
  1762  								ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  1763  									Type:      int32(common.HeaderType_ORDERER_TRANSACTION),
  1764  									ChannelId: channelID,
  1765  								}),
  1766  							},
  1767  							Data: marshalOrPanic(configEnv),
  1768  						}),
  1769  					}
  1770  				}
  1771  			)
  1772  
  1773  			BeforeEach(func() {
  1774  				network.exec(func(c *chain) {
  1775  					c.opts.EvictionSuspicion = time.Millisecond * 100
  1776  					c.opts.LeaderCheckInterval = time.Millisecond * 100
  1777  				})
  1778  
  1779  				network.init()
  1780  				network.start()
  1781  				network.elect(1)
  1782  
  1783  				By("Submitting first tx to cut the block")
  1784  				c1.cutter.CutNext = true
  1785  				err := c1.Order(env, 0)
  1786  				Expect(err).NotTo(HaveOccurred())
  1787  
  1788  				c1.clock.Increment(interval)
  1789  
  1790  				network.exec(
  1791  					func(c *chain) {
  1792  						Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1))
  1793  					})
  1794  			})
  1795  
  1796  			AfterEach(func() {
  1797  				network.stop()
  1798  			})
  1799  
  1800  			Context("channel creation", func() {
  1801  				It("succeeds with valid config metadata", func() {
  1802  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1803  					for _, consenter := range consenters {
  1804  						metadata.Consenters = append(metadata.Consenters, consenter)
  1805  					}
  1806  
  1807  					Expect(c1.Configure(createChannelEnv(metadata), 0)).To(Succeed())
  1808  					network.exec(func(c *chain) {
  1809  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  1810  					})
  1811  				})
  1812  			})
  1813  
  1814  			Context("reconfiguration", func() {
  1815  				It("can rotate certificate by adding and removing 1 node in one config update", func() {
  1816  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1817  					for id, consenter := range consenters {
  1818  						if id == 2 {
  1819  							// remove second consenter
  1820  							continue
  1821  						}
  1822  						metadata.Consenters = append(metadata.Consenters, consenter)
  1823  					}
  1824  
  1825  					// add new consenter
  1826  					newConsenter := &raftprotos.Consenter{
  1827  						Host:          "localhost",
  1828  						Port:          7050,
  1829  						ServerTlsCert: serverTLSCert(tlsCA),
  1830  						ClientTlsCert: clientTLSCert(tlsCA),
  1831  					}
  1832  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1833  
  1834  					value := map[string]*common.ConfigValue{
  1835  						"ConsensusType": {
  1836  							Version: 1,
  1837  							Value: marshalOrPanic(&orderer.ConsensusType{
  1838  								Metadata: marshalOrPanic(metadata),
  1839  							}),
  1840  						},
  1841  					}
  1842  
  1843  					By("creating new configuration with removed node and new one")
  1844  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1845  					c1.cutter.CutNext = true
  1846  
  1847  					By("sending config transaction")
  1848  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1849  
  1850  					network.exec(func(c *chain) {
  1851  						Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1852  					})
  1853  				})
  1854  
  1855  				It("rotates leader certificate and triggers leadership transfer", func() {
  1856  					metadata := &raftprotos.ConfigMetadata{Options: options}
  1857  					for id, consenter := range consenters {
  1858  						if id == 1 {
  1859  							// remove second consenter
  1860  							continue
  1861  						}
  1862  						metadata.Consenters = append(metadata.Consenters, consenter)
  1863  					}
  1864  
  1865  					// add new consenter
  1866  					newConsenter := &raftprotos.Consenter{
  1867  						Host:          "localhost",
  1868  						Port:          7050,
  1869  						ServerTlsCert: serverTLSCert(tlsCA),
  1870  						ClientTlsCert: clientTLSCert(tlsCA),
  1871  					}
  1872  					metadata.Consenters = append(metadata.Consenters, newConsenter)
  1873  
  1874  					value := map[string]*common.ConfigValue{
  1875  						"ConsensusType": {
  1876  							Version: 1,
  1877  							Value: marshalOrPanic(&orderer.ConsensusType{
  1878  								Metadata: marshalOrPanic(metadata),
  1879  							}),
  1880  						},
  1881  					}
  1882  
  1883  					By("creating new configuration with removed node and new one")
  1884  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1885  					c1.cutter.CutNext = true
  1886  
  1887  					By("sending config transaction")
  1888  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1889  
  1890  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(BeFollower()))
  1891  					network.exec(func(c *chain) {
  1892  						Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1893  					})
  1894  				})
  1895  
  1896  				When("Leader is disconnected after cert rotation", func() {
  1897  					It("still configures communication after failed leader transfer attempt", func() {
  1898  						metadata := &raftprotos.ConfigMetadata{Options: options}
  1899  						for id, consenter := range consenters {
  1900  							if id == 1 {
  1901  								// remove second consenter
  1902  								continue
  1903  							}
  1904  							metadata.Consenters = append(metadata.Consenters, consenter)
  1905  						}
  1906  
  1907  						// add new consenter
  1908  						newConsenter := &raftprotos.Consenter{
  1909  							Host:          "localhost",
  1910  							Port:          7050,
  1911  							ServerTlsCert: serverTLSCert(tlsCA),
  1912  							ClientTlsCert: clientTLSCert(tlsCA),
  1913  						}
  1914  						metadata.Consenters = append(metadata.Consenters, newConsenter)
  1915  
  1916  						value := map[string]*common.ConfigValue{
  1917  							"ConsensusType": {
  1918  								Version: 1,
  1919  								Value: marshalOrPanic(&orderer.ConsensusType{
  1920  									Metadata: marshalOrPanic(metadata),
  1921  								}),
  1922  							},
  1923  						}
  1924  
  1925  						By("creating new configuration with removed node and new one")
  1926  						configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  1927  						c1.cutter.CutNext = true
  1928  
  1929  						step1 := c1.getStepFunc()
  1930  						count := c1.rpc.SendConsensusCallCount() // record current step call count
  1931  						c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  1932  							// disconnect network after 4 MsgApp are sent by c1:
  1933  							// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  1934  							// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  1935  							if c1.rpc.SendConsensusCallCount() == count+4 {
  1936  								defer network.disconnect(1)
  1937  							}
  1938  
  1939  							return step1(dest, msg)
  1940  						})
  1941  
  1942  						network.exec(func(c *chain) {
  1943  							Consistently(c.clock.WatcherCount).Should(Equal(1))
  1944  						})
  1945  
  1946  						By("sending config transaction")
  1947  						Expect(c1.Configure(configEnv, 0)).To(Succeed())
  1948  
  1949  						Consistently(c1.observe).ShouldNot(Receive())
  1950  						network.exec(func(c *chain) {
  1951  							// wait for timeout timer to start
  1952  							c.clock.WaitForNWatchersAndIncrement(time.Duration(ELECTION_TICK)*interval, 2)
  1953  							Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  1954  						})
  1955  					})
  1956  				})
  1957  
  1958  				When("Follower is disconnected while leader cert is being rotated", func() {
  1959  					It("still configures communication and transfer leader", func() {
  1960  						metadata := &raftprotos.ConfigMetadata{Options: options}
  1961  						for id, consenter := range consenters {
  1962  							if id == 1 {
  1963  								// remove second consenter
  1964  								continue
  1965  							}
  1966  							metadata.Consenters = append(metadata.Consenters, consenter)
  1967  						}
  1968  
  1969  						// add new consenter
  1970  						newConsenter := &raftprotos.Consenter{
  1971  							Host:          "localhost",
  1972  							Port:          7050,
  1973  							ServerTlsCert: serverTLSCert(tlsCA),
  1974  							ClientTlsCert: clientTLSCert(tlsCA),
  1975  						}
  1976  						metadata.Consenters = append(metadata.Consenters, newConsenter)
  1977  
  1978  						value := map[string]*common.ConfigValue{
  1979  							"ConsensusType": {
  1980  								Version: 1,
  1981  								Value: marshalOrPanic(&orderer.ConsensusType{
  1982  									Metadata: marshalOrPanic(metadata),
  1983  								}),
  1984  							},
  1985  						}
  1986  
  1987  						cnt := c1.rpc.SendConsensusCallCount()
  1988  						network.disconnect(3)
  1989  
  1990  						// Trigger some heartbeats to be sent so that leader notices
  1991  						// failed message delivery to 3, and mark it as Paused.
  1992  						// This is to ensure leadership is transferred to 2.
  1993  						Eventually(func() int {
  1994  							c1.clock.Increment(interval)
  1995  							return c1.rpc.SendConsensusCallCount()
  1996  						}, LongEventualTimeout).Should(BeNumerically(">=", cnt+5))
  1997  
  1998  						By("creating new configuration with removed node and new one")
  1999  						configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, value))
  2000  						c1.cutter.CutNext = true
  2001  
  2002  						By("sending config transaction")
  2003  						Expect(c1.Configure(configEnv, 0)).To(Succeed())
  2004  
  2005  						Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(2, raft.StateFollower)))
  2006  						network.Lock()
  2007  						network.leader = 2 // manually set network leader
  2008  						network.Unlock()
  2009  						network.disconnect(1)
  2010  
  2011  						network.exec(func(c *chain) {
  2012  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2013  							Eventually(c.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  2014  						}, 1, 2)
  2015  
  2016  						network.join(3, true)
  2017  						Eventually(c3.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2018  						Eventually(c3.configurator.ConfigureCallCount, LongEventualTimeout).Should(Equal(2))
  2019  
  2020  						By("Ordering normal transaction")
  2021  						c2.cutter.CutNext = true
  2022  						Expect(c3.Order(env, 0)).To(Succeed())
  2023  						network.exec(func(c *chain) {
  2024  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2025  						}, 2, 3)
  2026  					})
  2027  				})
  2028  
  2029  				It("adding node to the cluster", func() {
  2030  					addConsenterUpdate := addConsenterConfigValue()
  2031  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterUpdate))
  2032  					c1.cutter.CutNext = true
  2033  
  2034  					By("sending config transaction")
  2035  					err := c1.Configure(configEnv, 0)
  2036  					Expect(err).NotTo(HaveOccurred())
  2037  					Expect(c1.fakeFields.fakeConfigProposalsReceived.AddCallCount()).To(Equal(1))
  2038  					Expect(c1.fakeFields.fakeConfigProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2039  
  2040  					network.exec(func(c *chain) {
  2041  						Eventually(c.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1))
  2042  						Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2))
  2043  						Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(4)))
  2044  					})
  2045  
  2046  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2047  					meta := &common.Metadata{Value: raftmetabytes}
  2048  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2049  					Expect(err).NotTo(HaveOccurred())
  2050  
  2051  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil)
  2052  					// if we join a node to existing network, it MUST already obtained blocks
  2053  					// till the config block that adds this node to cluster.
  2054  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2055  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2056  					c4.init()
  2057  
  2058  					network.addChain(c4)
  2059  					c4.Start()
  2060  
  2061  					// ConfChange is applied to etcd/raft asynchronously, meaning node 4 is not added
  2062  					// to leader's node list right away. An immediate tick does not trigger a heartbeat
  2063  					// being sent to node 4. Therefore, we repeatedly tick the leader until node 4 joins
  2064  					// the cluster successfully.
  2065  					Eventually(func() <-chan raft.SoftState {
  2066  						c1.clock.Increment(interval)
  2067  						return c4.observe
  2068  					}, defaultTimeout).Should(Receive(Equal(raft.SoftState{Lead: 1, RaftState: raft.StateFollower})))
  2069  
  2070  					Eventually(c4.support.WriteBlockCallCount, defaultTimeout).Should(Equal(1))
  2071  					Eventually(c4.support.WriteConfigBlockCallCount, defaultTimeout).Should(Equal(1))
  2072  
  2073  					By("submitting new transaction to follower")
  2074  					c1.cutter.CutNext = true
  2075  					err = c4.Order(env, 0)
  2076  					Expect(err).NotTo(HaveOccurred())
  2077  					Expect(c4.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2078  					Expect(c4.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2079  
  2080  					network.exec(func(c *chain) {
  2081  						Eventually(c.support.WriteBlockCallCount, defaultTimeout).Should(Equal(2))
  2082  					})
  2083  				})
  2084  
  2085  				It("does not reconfigure raft cluster if it's a channel creation tx", func() {
  2086  					configEnv := newConfigEnv("another-channel",
  2087  						common.HeaderType_CONFIG,
  2088  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(2)))
  2089  
  2090  					// Wrap config env in Orderer transaction
  2091  					channelCreationEnv := &common.Envelope{
  2092  						Payload: marshalOrPanic(&common.Payload{
  2093  							Header: &common.Header{
  2094  								ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  2095  									Type:      int32(common.HeaderType_ORDERER_TRANSACTION),
  2096  									ChannelId: channelID,
  2097  								}),
  2098  							},
  2099  							Data: marshalOrPanic(configEnv),
  2100  						}),
  2101  					}
  2102  
  2103  					c1.cutter.CutNext = true
  2104  
  2105  					Expect(c1.Configure(channelCreationEnv, 0)).To(Succeed())
  2106  					network.exec(func(c *chain) {
  2107  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2108  					})
  2109  
  2110  					// assert c2 is not evicted
  2111  					Consistently(c2.Errored).ShouldNot(BeClosed())
  2112  					Expect(c2.Order(env, 0)).To(Succeed())
  2113  
  2114  					network.exec(func(c *chain) {
  2115  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2116  					})
  2117  				})
  2118  
  2119  				It("stop leader and continue reconfiguration failing over to new leader", func() {
  2120  					// Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader
  2121  					// configure chain support mock to disconnect c1 right after it writes configuration block
  2122  					// into the ledger, this to simulate failover.
  2123  					// Next boostraping a new node c4 to join a cluster and creating config transaction, submitting
  2124  					// it to the leader. Once leader writes configuration block it fails and leadership transferred to
  2125  					// c2.
  2126  					// Test asserts that new node c4, will join the cluster and c2 will handle failover of
  2127  					// re-configuration. Later we connecting c1 back and making sure it capable of catching up with
  2128  					// new configuration and successfully rejoins replica set.
  2129  
  2130  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2131  					c1.cutter.CutNext = true
  2132  
  2133  					step1 := c1.getStepFunc()
  2134  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2135  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2136  						// disconnect network after 4 MsgApp are sent by c1:
  2137  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2138  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2139  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2140  							defer network.disconnect(1)
  2141  						}
  2142  
  2143  						return step1(dest, msg)
  2144  					})
  2145  
  2146  					By("sending config transaction")
  2147  					err := c1.Configure(configEnv, 0)
  2148  					Expect(err).NotTo(HaveOccurred())
  2149  
  2150  					// every node has written config block to the OSN ledger
  2151  					network.exec(
  2152  						func(c *chain) {
  2153  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2154  						})
  2155  
  2156  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2157  					c1.setStepFunc(step1)
  2158  
  2159  					// elect node with higher index
  2160  					i2, _ := c2.storage.LastIndex() // err is always nil
  2161  					i3, _ := c3.storage.LastIndex()
  2162  					candidate := uint64(2)
  2163  					if i3 > i2 {
  2164  						candidate = 3
  2165  					}
  2166  					network.chains[candidate].cutter.CutNext = true
  2167  					network.elect(candidate)
  2168  
  2169  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2170  					meta := &common.Metadata{Value: raftmetabytes}
  2171  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2172  					Expect(err).NotTo(HaveOccurred())
  2173  
  2174  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil)
  2175  					// if we join a node to existing network, it MUST already obtained blocks
  2176  					// till the config block that adds this node to cluster.
  2177  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2178  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2179  					c4.init()
  2180  
  2181  					network.addChain(c4)
  2182  					c4.start()
  2183  					Expect(c4.WaitReady()).To(Succeed())
  2184  					network.join(4, true)
  2185  
  2186  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2187  					Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2188  
  2189  					By("submitting new transaction to follower")
  2190  					err = c4.Order(env, 0)
  2191  					Expect(err).NotTo(HaveOccurred())
  2192  
  2193  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2194  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2195  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2196  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2197  
  2198  					// node 1 has been stopped should not write any block
  2199  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2200  
  2201  					network.join(1, true)
  2202  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2203  				})
  2204  
  2205  				It("stop cluster quorum and continue reconfiguration after the restart", func() {
  2206  					// Scenario: Starting replica set of 3 Raft nodes, electing node c1 to be a leader
  2207  					// configure chain support mock to stop cluster after config block is committed.
  2208  					// Restart the cluster and ensure it picks up updates and capable to finish reconfiguration.
  2209  
  2210  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2211  					c1.cutter.CutNext = true
  2212  
  2213  					step1 := c1.getStepFunc()
  2214  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2215  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2216  						// disconnect network after 4 MsgApp are sent by c1:
  2217  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2218  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2219  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2220  							defer func() {
  2221  								network.disconnect(1)
  2222  								network.disconnect(2)
  2223  								network.disconnect(3)
  2224  							}()
  2225  						}
  2226  
  2227  						return step1(dest, msg)
  2228  					})
  2229  
  2230  					By("sending config transaction")
  2231  					err := c1.Configure(configEnv, 0)
  2232  					Expect(err).NotTo(HaveOccurred())
  2233  
  2234  					// every node has written config block to the OSN ledger
  2235  					network.exec(
  2236  						func(c *chain) {
  2237  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2238  						})
  2239  
  2240  					// assert conf change proposals have been dropped, before proceed to reconnect network
  2241  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2242  					c1.setStepFunc(step1)
  2243  
  2244  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2245  					meta := &common.Metadata{Value: raftmetabytes}
  2246  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2247  					Expect(err).NotTo(HaveOccurred())
  2248  
  2249  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil)
  2250  					// if we join a node to existing network, it MUST already obtained blocks
  2251  					// till the config block that adds this node to cluster.
  2252  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2253  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2254  					c4.init()
  2255  
  2256  					network.addChain(c4)
  2257  
  2258  					By("reconnecting nodes back")
  2259  					for i := uint64(1); i < 4; i++ {
  2260  						network.connect(i)
  2261  					}
  2262  
  2263  					// elect node with higher index
  2264  					i2, _ := c2.storage.LastIndex() // err is always nil
  2265  					i3, _ := c3.storage.LastIndex()
  2266  					candidate := uint64(2)
  2267  					if i3 > i2 {
  2268  						candidate = 3
  2269  					}
  2270  					network.chains[candidate].cutter.CutNext = true
  2271  					network.elect(candidate)
  2272  
  2273  					c4.start()
  2274  					Expect(c4.WaitReady()).To(Succeed())
  2275  					network.join(4, false)
  2276  
  2277  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2278  					Eventually(c4.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2279  
  2280  					By("submitting new transaction to follower")
  2281  					err = c4.Order(env, 0)
  2282  					Expect(err).NotTo(HaveOccurred())
  2283  
  2284  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2285  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2286  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2287  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2288  					Eventually(c4.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2289  				})
  2290  
  2291  				It("ensures that despite leader failure cluster continue to process configuration to remove the leader", func() {
  2292  					// Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader.
  2293  					// Prepare config update transaction which removes leader (nodeID = 1), then leader
  2294  					// fails right after it commits configuration block.
  2295  
  2296  					configEnv := newConfigEnv(channelID,
  2297  						common.HeaderType_CONFIG,
  2298  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1
  2299  
  2300  					c1.cutter.CutNext = true
  2301  
  2302  					step1 := c1.getStepFunc()
  2303  					count := c1.rpc.SendConsensusCallCount() // record current step call count
  2304  					c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2305  						// disconnect network after 4 MsgApp are sent by c1:
  2306  						// - 2 MsgApp to c2 & c3 that replicate data to raft followers
  2307  						// - 2 MsgApp to c2 & c3 that instructs followers to commit data
  2308  						if c1.rpc.SendConsensusCallCount() == count+4 {
  2309  							defer network.disconnect(1)
  2310  						}
  2311  
  2312  						return step1(dest, msg)
  2313  					})
  2314  
  2315  					By("sending config transaction")
  2316  					err := c1.Configure(configEnv, 0)
  2317  					Expect(err).NotTo(HaveOccurred())
  2318  
  2319  					network.exec(func(c *chain) {
  2320  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2321  					})
  2322  
  2323  					Eventually(c1.rpc.SendConsensusCallCount, LongEventualTimeout).Should(Equal(count + 6))
  2324  					c1.setStepFunc(step1)
  2325  
  2326  					// elect node with higher index
  2327  					i2, _ := c2.storage.LastIndex() // err is always nil
  2328  					i3, _ := c3.storage.LastIndex()
  2329  					candidate := uint64(2)
  2330  					if i3 > i2 {
  2331  						candidate = 3
  2332  					}
  2333  					network.chains[candidate].cutter.CutNext = true
  2334  					network.elect(candidate)
  2335  
  2336  					By("submitting new transaction to follower")
  2337  					err = c3.Order(env, 0)
  2338  					Expect(err).NotTo(HaveOccurred())
  2339  
  2340  					// rest nodes are alive include a newly added, hence should write 2 blocks
  2341  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2342  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2343  				})
  2344  
  2345  				It("removes leader from replica set", func() {
  2346  					// Scenario: Starting replica set of 3 nodes, electing nodeID = 1 to be the leader.
  2347  					// Prepare config update transaction which removes leader (nodeID = 1), this to
  2348  					// ensure we handle re-configuration of node removal correctly and remaining two
  2349  					// nodes still capable to form functional quorum and Raft capable of making further progress.
  2350  					// Moreover test asserts that removed node stops Rafting with rest of the cluster, i.e.
  2351  					// should not be able to get updates or forward transactions.
  2352  
  2353  					configEnv := newConfigEnv(channelID,
  2354  						common.HeaderType_CONFIG,
  2355  						newConfigUpdateEnv(channelID, nil, removeConsenterConfigValue(1))) // remove nodeID == 1
  2356  
  2357  					c1.cutter.CutNext = true
  2358  
  2359  					By("sending config transaction")
  2360  					err := c1.Configure(configEnv, 0)
  2361  					Expect(err).NotTo(HaveOccurred())
  2362  
  2363  					// every node has written config block to the OSN ledger
  2364  					network.exec(
  2365  						func(c *chain) {
  2366  							Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2367  							Eventually(c.fakeFields.fakeClusterSize.SetCallCount, LongEventualTimeout).Should(Equal(2))
  2368  							Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(1)).To(Equal(float64(2)))
  2369  						})
  2370  
  2371  					// Assert c1 has exited
  2372  					c1.clock.WaitForNWatchersAndIncrement(ELECTION_TICK*interval, 2)
  2373  					Eventually(c1.Errored, LongEventualTimeout).Should(BeClosed())
  2374  					close(c1.stopped)
  2375  
  2376  					var newLeader, remainingFollower *chain
  2377  					for newLeader == nil || remainingFollower == nil {
  2378  						var state raft.SoftState
  2379  						select {
  2380  						case state = <-c2.observe:
  2381  						case state = <-c3.observe:
  2382  						case <-time.After(LongEventualTimeout):
  2383  							Fail("Expected a new leader to present")
  2384  						}
  2385  
  2386  						if state.RaftState == raft.StateLeader && state.Lead != raft.None {
  2387  							newLeader = network.chains[state.Lead]
  2388  						}
  2389  
  2390  						if state.RaftState == raft.StateFollower && state.Lead != raft.None {
  2391  							remainingFollower = network.chains[state.Lead]
  2392  						}
  2393  					}
  2394  
  2395  					By("submitting transaction to new leader")
  2396  					newLeader.cutter.CutNext = true
  2397  					err = newLeader.Order(env, 0)
  2398  					Expect(err).NotTo(HaveOccurred())
  2399  
  2400  					Eventually(newLeader.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2401  					Eventually(remainingFollower.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2402  					// node 1 has been stopped should not write any block
  2403  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2404  
  2405  					By("trying to submit to new node, expected to fail")
  2406  					c1.cutter.CutNext = true
  2407  					err = c1.Order(env, 0)
  2408  					Expect(err).To(HaveOccurred())
  2409  
  2410  					// number of block writes should remain the same
  2411  					Consistently(newLeader.support.WriteBlockCallCount).Should(Equal(2))
  2412  					Consistently(remainingFollower.support.WriteBlockCallCount).Should(Equal(2))
  2413  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(1))
  2414  				})
  2415  
  2416  				It("does not deadlock if leader steps down while config block is in-flight", func() {
  2417  					configEnv := newConfigEnv(channelID, common.HeaderType_CONFIG, newConfigUpdateEnv(channelID, nil, addConsenterConfigValue()))
  2418  					c1.cutter.CutNext = true
  2419  
  2420  					signal := make(chan struct{})
  2421  					stub := c1.support.WriteConfigBlockStub
  2422  					c1.support.WriteConfigBlockStub = func(b *common.Block, meta []byte) {
  2423  						signal <- struct{}{}
  2424  						<-signal
  2425  						stub(b, meta)
  2426  					}
  2427  
  2428  					By("Sending config transaction")
  2429  					Expect(c1.Configure(configEnv, 0)).To(Succeed())
  2430  
  2431  					Eventually(signal, LongEventualTimeout).Should(Receive())
  2432  					network.disconnect(1)
  2433  
  2434  					By("Ticking leader till it steps down")
  2435  					Eventually(func() raft.SoftState {
  2436  						c1.clock.Increment(interval)
  2437  						return c1.Node.Status().SoftState
  2438  					}, LongEventualTimeout).Should(StateEqual(0, raft.StateFollower))
  2439  
  2440  					close(signal)
  2441  
  2442  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(StateEqual(0, raft.StateFollower)))
  2443  
  2444  					By("Re-electing 1 as leader")
  2445  					network.connect(1)
  2446  					network.elect(1)
  2447  
  2448  					_, raftmetabytes := c1.support.WriteConfigBlockArgsForCall(0)
  2449  					meta := &common.Metadata{Value: raftmetabytes}
  2450  					raftmeta, err := etcdraft.ReadBlockMetadata(meta, nil)
  2451  					Expect(err).NotTo(HaveOccurred())
  2452  
  2453  					c4 := newChain(timeout, channelID, dataDir, 4, raftmeta, consenters, cryptoProvider, nil, nil)
  2454  					// if we join a node to existing network, it MUST already obtained blocks
  2455  					// till the config block that adds this node to cluster.
  2456  					c4.support.WriteBlock(c1.support.WriteBlockArgsForCall(0))
  2457  					c4.support.WriteConfigBlock(c1.support.WriteConfigBlockArgsForCall(0))
  2458  					c4.init()
  2459  
  2460  					network.addChain(c4)
  2461  					c4.Start()
  2462  
  2463  					Eventually(func() <-chan raft.SoftState {
  2464  						c1.clock.Increment(interval)
  2465  						return c4.observe
  2466  					}, LongEventualTimeout).Should(Receive(StateEqual(1, raft.StateFollower)))
  2467  
  2468  					By("Submitting tx to confirm network is still working")
  2469  					Expect(c1.Order(env, 0)).To(Succeed())
  2470  
  2471  					network.exec(func(c *chain) {
  2472  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2473  						Eventually(c.support.WriteConfigBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2474  					})
  2475  				})
  2476  			})
  2477  		})
  2478  
  2479  		When("3/3 nodes are running", func() {
  2480  			JustBeforeEach(func() {
  2481  				network.init()
  2482  				network.start()
  2483  				network.elect(1)
  2484  			})
  2485  
  2486  			AfterEach(func() {
  2487  				network.stop()
  2488  			})
  2489  
  2490  			It("correctly sets the cluster size and leadership metrics", func() {
  2491  				// the network should see only one leadership change
  2492  				network.exec(func(c *chain) {
  2493  					Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(1))
  2494  					Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(0)).Should(Equal(float64(1)))
  2495  					Expect(c.fakeFields.fakeClusterSize.SetCallCount()).Should(Equal(1))
  2496  					Expect(c.fakeFields.fakeClusterSize.SetArgsForCall(0)).To(Equal(float64(3)))
  2497  				})
  2498  				// c1 should be the leader
  2499  				Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2))
  2500  				Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1)))
  2501  				// c2 and c3 should continue to remain followers
  2502  				Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2503  				Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0)))
  2504  				Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2505  				Expect(c3.fakeFields.fakeIsLeader.SetArgsForCall(0)).Should(Equal(float64(0)))
  2506  			})
  2507  
  2508  			It("orders envelope on leader", func() {
  2509  				By("instructed to cut next block")
  2510  				c1.cutter.CutNext = true
  2511  				err := c1.Order(env, 0)
  2512  				Expect(err).NotTo(HaveOccurred())
  2513  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2514  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2515  
  2516  				network.exec(
  2517  					func(c *chain) {
  2518  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2519  					})
  2520  
  2521  				By("respect batch timeout")
  2522  				c1.cutter.CutNext = false
  2523  
  2524  				err = c1.Order(env, 0)
  2525  				Expect(err).NotTo(HaveOccurred())
  2526  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
  2527  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
  2528  				Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  2529  
  2530  				c1.clock.WaitForNWatchersAndIncrement(timeout, 2)
  2531  				network.exec(
  2532  					func(c *chain) {
  2533  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2534  					})
  2535  			})
  2536  
  2537  			It("orders envelope on follower", func() {
  2538  				By("instructed to cut next block")
  2539  				c1.cutter.CutNext = true
  2540  				err := c2.Order(env, 0)
  2541  				Expect(err).NotTo(HaveOccurred())
  2542  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2543  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2544  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2545  
  2546  				network.exec(
  2547  					func(c *chain) {
  2548  						Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2549  					})
  2550  
  2551  				By("respect batch timeout")
  2552  				c1.cutter.CutNext = false
  2553  
  2554  				err = c2.Order(env, 0)
  2555  				Expect(err).NotTo(HaveOccurred())
  2556  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(2))
  2557  				Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(1)).To(Equal(float64(1)))
  2558  				Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2559  				Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  2560  
  2561  				c1.clock.WaitForNWatchersAndIncrement(timeout, 2)
  2562  				network.exec(
  2563  					func(c *chain) {
  2564  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2565  					})
  2566  			})
  2567  
  2568  			When("MaxInflightBlocks is reached", func() {
  2569  				BeforeEach(func() {
  2570  					network.exec(func(c *chain) { c.opts.MaxInflightBlocks = 1 })
  2571  				})
  2572  
  2573  				It("waits for in flight blocks to be committed", func() {
  2574  					c1.cutter.CutNext = true
  2575  					// disconnect c1 to disrupt consensus
  2576  					network.disconnect(1)
  2577  
  2578  					Expect(c1.Order(env, 0)).To(Succeed())
  2579  
  2580  					doneProp := make(chan struct{})
  2581  					go func() {
  2582  						defer GinkgoRecover()
  2583  						Expect(c1.Order(env, 0)).To(Succeed())
  2584  						close(doneProp)
  2585  					}()
  2586  					// expect second `Order` to block
  2587  					Consistently(doneProp).ShouldNot(BeClosed())
  2588  					network.exec(func(c *chain) {
  2589  						Consistently(c.support.WriteBlockCallCount).Should(BeZero())
  2590  					})
  2591  
  2592  					network.connect(1)
  2593  					c1.clock.Increment(interval)
  2594  
  2595  					Eventually(doneProp, LongEventualTimeout).Should(BeClosed())
  2596  					network.exec(func(c *chain) {
  2597  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2598  					})
  2599  				})
  2600  
  2601  				It("resets block in flight when steps down from leader", func() {
  2602  					c1.cutter.CutNext = true
  2603  					c2.cutter.CutNext = true
  2604  					// disconnect c1 to disrupt consensus
  2605  					network.disconnect(1)
  2606  
  2607  					Expect(c1.Order(env, 0)).To(Succeed())
  2608  
  2609  					doneProp := make(chan struct{})
  2610  					go func() {
  2611  						defer GinkgoRecover()
  2612  
  2613  						Expect(c1.Order(env, 0)).To(Succeed())
  2614  						close(doneProp)
  2615  					}()
  2616  					// expect second `Order` to block
  2617  					Consistently(doneProp).ShouldNot(BeClosed())
  2618  					network.exec(func(c *chain) {
  2619  						Consistently(c.support.WriteBlockCallCount).Should(BeZero())
  2620  					})
  2621  
  2622  					network.elect(2)
  2623  					Expect(c3.Order(env, 0)).To(Succeed())
  2624  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  2625  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2626  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2627  
  2628  					network.connect(1)
  2629  					c2.clock.Increment(interval)
  2630  
  2631  					Eventually(doneProp, LongEventualTimeout).Should(BeClosed())
  2632  					network.exec(func(c *chain) {
  2633  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2634  					})
  2635  				})
  2636  			})
  2637  
  2638  			When("gRPC stream to leader is stuck", func() {
  2639  				BeforeEach(func() {
  2640  					c2.opts.RPCTimeout = time.Second
  2641  					network.Lock()
  2642  					network.delayWG.Add(1)
  2643  					network.Unlock()
  2644  				})
  2645  				It("correctly times out", func() {
  2646  					err := c2.Order(env, 0)
  2647  					Expect(err).To(MatchError("timed out (1s) waiting on forwarding to 1"))
  2648  					network.delayWG.Done()
  2649  				})
  2650  			})
  2651  
  2652  			When("leader is disconnected", func() {
  2653  				It("correctly returns a failure to the client when forwarding from a follower", func() {
  2654  					network.disconnect(1)
  2655  
  2656  					err := c2.Order(env, 0)
  2657  					Expect(err).To(MatchError("connection lost"))
  2658  				})
  2659  
  2660  				It("proactively steps down to follower", func() {
  2661  					network.disconnect(1)
  2662  
  2663  					By("Ticking leader until it steps down")
  2664  					Eventually(func() <-chan raft.SoftState {
  2665  						c1.clock.Increment(interval)
  2666  						return c1.observe
  2667  					}, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StateFollower})))
  2668  
  2669  					By("Ensuring it does not accept message due to the cluster being leaderless")
  2670  					err := c1.Order(env, 0)
  2671  					Expect(err).To(MatchError("no Raft leader"))
  2672  
  2673  					network.elect(2)
  2674  
  2675  					// c1 should have lost leadership
  2676  					Expect(c1.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(3))
  2677  					Expect(c1.fakeFields.fakeIsLeader.SetArgsForCall(2)).Should(Equal(float64(0)))
  2678  					// c2 should become the leader
  2679  					Expect(c2.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(2))
  2680  					Expect(c2.fakeFields.fakeIsLeader.SetArgsForCall(1)).Should(Equal(float64(1)))
  2681  					// c2 should continue to remain follower
  2682  					Expect(c3.fakeFields.fakeIsLeader.SetCallCount()).Should(Equal(1))
  2683  
  2684  					network.join(1, true)
  2685  					network.exec(func(c *chain) {
  2686  						Expect(c.fakeFields.fakeLeaderChanges.AddCallCount()).Should(Equal(3))
  2687  						Expect(c.fakeFields.fakeLeaderChanges.AddArgsForCall(2)).Should(Equal(float64(1)))
  2688  					})
  2689  
  2690  					err = c1.Order(env, 0)
  2691  					Expect(err).NotTo(HaveOccurred())
  2692  				})
  2693  
  2694  				It("does not deadlock if propose is blocked", func() {
  2695  					signal := make(chan struct{})
  2696  					c1.cutter.CutNext = true
  2697  					c1.support.SequenceStub = func() uint64 {
  2698  						signal <- struct{}{}
  2699  						<-signal
  2700  						return 0
  2701  					}
  2702  
  2703  					By("Sending a normal transaction")
  2704  					Expect(c1.Order(env, 0)).To(Succeed())
  2705  
  2706  					Eventually(signal).Should(Receive())
  2707  					network.disconnect(1)
  2708  
  2709  					By("Ticking leader till it steps down")
  2710  					Eventually(func() raft.SoftState {
  2711  						c1.clock.Increment(interval)
  2712  						return c1.Node.Status().SoftState
  2713  					}).Should(StateEqual(0, raft.StateFollower))
  2714  
  2715  					close(signal)
  2716  
  2717  					Eventually(c1.observe).Should(Receive(StateEqual(0, raft.StateFollower)))
  2718  					c1.support.SequenceStub = nil
  2719  					network.exec(func(c *chain) {
  2720  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  2721  					})
  2722  
  2723  					By("Re-electing 1 as leader")
  2724  					network.connect(1)
  2725  					network.elect(1)
  2726  
  2727  					By("Sending another normal transaction")
  2728  					Expect(c1.Order(env, 0)).To(Succeed())
  2729  
  2730  					network.exec(func(c *chain) {
  2731  						Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2732  					})
  2733  				})
  2734  			})
  2735  
  2736  			When("follower is disconnected", func() {
  2737  				It("should return error when receiving an env", func() {
  2738  					network.disconnect(2)
  2739  
  2740  					errorC := c2.Errored()
  2741  					Consistently(errorC).ShouldNot(BeClosed()) // assert that errorC is not closed
  2742  
  2743  					By("Ticking node 2 until it becomes pre-candidate")
  2744  					Eventually(func() <-chan raft.SoftState {
  2745  						c2.clock.Increment(interval)
  2746  						return c2.observe
  2747  					}, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 0, RaftState: raft.StatePreCandidate})))
  2748  
  2749  					Eventually(errorC).Should(BeClosed())
  2750  					err := c2.Order(env, 0)
  2751  					Expect(err).To(HaveOccurred())
  2752  					Expect(c2.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(1))
  2753  					Expect(c2.fakeFields.fakeNormalProposalsReceived.AddArgsForCall(0)).To(Equal(float64(1)))
  2754  					Expect(c1.fakeFields.fakeNormalProposalsReceived.AddCallCount()).To(Equal(0))
  2755  
  2756  					network.connect(2)
  2757  					c1.clock.Increment(interval)
  2758  					Expect(errorC).To(BeClosed())
  2759  
  2760  					Eventually(c2.Errored).ShouldNot(BeClosed())
  2761  				})
  2762  			})
  2763  
  2764  			It("leader retransmits lost messages", func() {
  2765  				// This tests that heartbeats will trigger leader to retransmit lost MsgApp
  2766  
  2767  				c1.cutter.CutNext = true
  2768  
  2769  				network.disconnect(1) // drop MsgApp
  2770  
  2771  				err := c1.Order(env, 0)
  2772  				Expect(err).NotTo(HaveOccurred())
  2773  
  2774  				network.exec(
  2775  					func(c *chain) {
  2776  						Consistently(func() int { return c.support.WriteBlockCallCount() }).Should(Equal(0))
  2777  					})
  2778  
  2779  				network.connect(1) // reconnect leader
  2780  
  2781  				c1.clock.Increment(interval) // trigger a heartbeat
  2782  				network.exec(
  2783  					func(c *chain) {
  2784  						Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2785  					})
  2786  			})
  2787  
  2788  			It("allows the leader to create multiple normal blocks without having to wait for them to be written out", func() {
  2789  				// this ensures that the created blocks are not written out
  2790  				network.disconnect(1)
  2791  
  2792  				c1.cutter.CutNext = true
  2793  				for i := 0; i < 3; i++ {
  2794  					Expect(c1.Order(env, 0)).To(Succeed())
  2795  				}
  2796  
  2797  				Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  2798  
  2799  				network.connect(1)
  2800  
  2801  				// After FAB-13722, leader would pause replication if it gets notified that message
  2802  				// delivery to certain node is failed, i.e. connection refused. Replication to that
  2803  				// follower is resumed if leader receives a MsgHeartbeatResp from it.
  2804  				// We could certainly repeatedly tick leader to trigger heartbeat broadcast, but we
  2805  				// would also risk a slow leader stepping down due to excessive ticks.
  2806  				//
  2807  				// Instead, we can simply send artificial MsgHeartbeatResp to leader to resume.
  2808  				m2 := &raftpb.Message{To: c1.id, From: c2.id, Type: raftpb.MsgHeartbeatResp}
  2809  				c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m2)}, c2.id)
  2810  				m3 := &raftpb.Message{To: c1.id, From: c3.id, Type: raftpb.MsgHeartbeatResp}
  2811  				c1.Consensus(&orderer.ConsensusRequest{Channel: channelID, Payload: protoutil.MarshalOrPanic(m3)}, c3.id)
  2812  
  2813  				network.exec(func(c *chain) {
  2814  					Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  2815  				})
  2816  			})
  2817  
  2818  			It("new leader should wait for in-fight blocks to commit before accepting new env", func() {
  2819  				// Scenario: when a node is elected as new leader and there are still in-flight blocks,
  2820  				// it should not immediately start accepting new envelopes, instead it should wait for
  2821  				// those in-flight blocks to be committed, otherwise we may create uncle block which
  2822  				// forks and panicks chain.
  2823  				//
  2824  				// Steps:
  2825  				// - start raft cluster with three nodes and genesis block0
  2826  				// - order env1 on c1, which creates block1
  2827  				// - drop MsgApp from 1 to 3
  2828  				// - drop second round of MsgApp sent from 1 to 2, so that block1 is only committed on c1
  2829  				// - disconnect c1 and elect c2
  2830  				// - order env2 on c2. This env must NOT be immediately accepted, otherwise c2 would create
  2831  				//   an uncle block1 based on block0.
  2832  				// - c2 commits block1
  2833  				// - c2 accepts env2, and creates block2
  2834  				// - c2 commits block2
  2835  				c1.cutter.CutNext = true
  2836  				c2.cutter.CutNext = true
  2837  
  2838  				step1 := c1.getStepFunc()
  2839  				c1.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2840  					stepMsg := &raftpb.Message{}
  2841  					Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred())
  2842  
  2843  					if dest == 3 {
  2844  						return nil
  2845  					}
  2846  
  2847  					if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) == 0 {
  2848  						return nil
  2849  					}
  2850  
  2851  					return step1(dest, msg)
  2852  				})
  2853  
  2854  				Expect(c1.Order(env, 0)).NotTo(HaveOccurred())
  2855  
  2856  				Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  2857  				Consistently(c2.support.WriteBlockCallCount).Should(Equal(0))
  2858  				Consistently(c3.support.WriteBlockCallCount).Should(Equal(0))
  2859  
  2860  				network.disconnect(1)
  2861  
  2862  				step2 := c2.getStepFunc()
  2863  				c2.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  2864  					stepMsg := &raftpb.Message{}
  2865  					Expect(proto.Unmarshal(msg.Payload, stepMsg)).NotTo(HaveOccurred())
  2866  
  2867  					if stepMsg.Type == raftpb.MsgApp && len(stepMsg.Entries) != 0 && dest == 3 {
  2868  						for _, ent := range stepMsg.Entries {
  2869  							if len(ent.Data) != 0 {
  2870  								return nil
  2871  							}
  2872  						}
  2873  					}
  2874  					return step2(dest, msg)
  2875  				})
  2876  
  2877  				network.elect(2)
  2878  
  2879  				go func() {
  2880  					defer GinkgoRecover()
  2881  					Expect(c2.Order(env, 0)).NotTo(HaveOccurred())
  2882  				}()
  2883  
  2884  				Consistently(c2.support.WriteBlockCallCount).Should(Equal(0))
  2885  				Consistently(c3.support.WriteBlockCallCount).Should(Equal(0))
  2886  
  2887  				c2.setStepFunc(step2)
  2888  				c2.clock.Increment(interval)
  2889  
  2890  				Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2891  				Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  2892  
  2893  				b, _ := c2.support.WriteBlockArgsForCall(0)
  2894  				Expect(b.Header.Number).To(Equal(uint64(1)))
  2895  				b, _ = c2.support.WriteBlockArgsForCall(1)
  2896  				Expect(b.Header.Number).To(Equal(uint64(2)))
  2897  			})
  2898  
  2899  			Context("handling config blocks", func() {
  2900  				var configEnv *common.Envelope
  2901  				BeforeEach(func() {
  2902  					values := map[string]*common.ConfigValue{
  2903  						"BatchTimeout": {
  2904  							Version: 1,
  2905  							Value: marshalOrPanic(&orderer.BatchTimeout{
  2906  								Timeout: "3ms",
  2907  							}),
  2908  						},
  2909  					}
  2910  					configEnv = newConfigEnv(channelID,
  2911  						common.HeaderType_CONFIG,
  2912  						newConfigUpdateEnv(channelID, nil, values),
  2913  					)
  2914  				})
  2915  
  2916  				It("holds up block creation on leader once a config block has been created and not written out", func() {
  2917  					// this ensures that the created blocks are not written out
  2918  					network.disconnect(1)
  2919  
  2920  					c1.cutter.CutNext = true
  2921  					// config block
  2922  					err := c1.Order(configEnv, 0)
  2923  					Expect(err).NotTo(HaveOccurred())
  2924  
  2925  					// to avoid data races since we are accessing these within a goroutine
  2926  					tempEnv := env
  2927  					tempC1 := c1
  2928  
  2929  					done := make(chan struct{})
  2930  
  2931  					// normal block
  2932  					go func() {
  2933  						defer GinkgoRecover()
  2934  
  2935  						// This should be blocked if config block is not committed
  2936  						err := tempC1.Order(tempEnv, 0)
  2937  						Expect(err).NotTo(HaveOccurred())
  2938  
  2939  						close(done)
  2940  					}()
  2941  
  2942  					Consistently(done).ShouldNot(BeClosed())
  2943  
  2944  					network.connect(1)
  2945  					c1.clock.Increment(interval)
  2946  
  2947  					network.exec(
  2948  						func(c *chain) {
  2949  							Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2950  						})
  2951  
  2952  					network.exec(
  2953  						func(c *chain) {
  2954  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2955  						})
  2956  				})
  2957  
  2958  				It("continues creating blocks on leader after a config block has been successfully written out", func() {
  2959  					c1.cutter.CutNext = true
  2960  					// config block
  2961  					err := c1.Configure(configEnv, 0)
  2962  					Expect(err).NotTo(HaveOccurred())
  2963  					network.exec(
  2964  						func(c *chain) {
  2965  							Eventually(func() int { return c.support.WriteConfigBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2966  						})
  2967  
  2968  					// normal block following config block
  2969  					err = c1.Order(env, 0)
  2970  					Expect(err).NotTo(HaveOccurred())
  2971  					network.exec(
  2972  						func(c *chain) {
  2973  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  2974  						})
  2975  				})
  2976  			})
  2977  
  2978  			When("Snapshotting is enabled", func() {
  2979  				BeforeEach(func() {
  2980  					c1.opts.SnapshotIntervalSize = 1
  2981  					c1.opts.SnapshotCatchUpEntries = 1
  2982  				})
  2983  
  2984  				It("keeps running if some entries in memory are purged", func() {
  2985  					// Scenario: snapshotting is enabled on node 1 and it purges memory storage
  2986  					// per every snapshot. Cluster should be correctly functioning.
  2987  
  2988  					i, err := c1.opts.MemoryStorage.FirstIndex()
  2989  					Expect(err).NotTo(HaveOccurred())
  2990  					Expect(i).To(Equal(uint64(1)))
  2991  
  2992  					c1.cutter.CutNext = true
  2993  
  2994  					err = c1.Order(env, 0)
  2995  					Expect(err).NotTo(HaveOccurred())
  2996  
  2997  					network.exec(
  2998  						func(c *chain) {
  2999  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  3000  						})
  3001  
  3002  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  3003  					i, err = c1.opts.MemoryStorage.FirstIndex()
  3004  					Expect(err).NotTo(HaveOccurred())
  3005  
  3006  					err = c1.Order(env, 0)
  3007  					Expect(err).NotTo(HaveOccurred())
  3008  
  3009  					network.exec(
  3010  						func(c *chain) {
  3011  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  3012  						})
  3013  
  3014  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  3015  					i, err = c1.opts.MemoryStorage.FirstIndex()
  3016  					Expect(err).NotTo(HaveOccurred())
  3017  
  3018  					err = c1.Order(env, 0)
  3019  					Expect(err).NotTo(HaveOccurred())
  3020  
  3021  					network.exec(
  3022  						func(c *chain) {
  3023  							Eventually(c.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(3))
  3024  						})
  3025  
  3026  					Eventually(c1.opts.MemoryStorage.FirstIndex, LongEventualTimeout).Should(BeNumerically(">", i))
  3027  				})
  3028  
  3029  				It("lagged node can catch up using snapshot", func() {
  3030  					network.disconnect(2)
  3031  					c1.cutter.CutNext = true
  3032  
  3033  					c2Lasti, _ := c2.opts.MemoryStorage.LastIndex()
  3034  					var blockCnt int
  3035  					// Order blocks until first index of c1 memory is greater than last index of c2,
  3036  					// so a snapshot will be sent to c2 when it rejoins network
  3037  					Eventually(func() bool {
  3038  						c1Firsti, _ := c1.opts.MemoryStorage.FirstIndex()
  3039  						if c1Firsti > c2Lasti+1 {
  3040  							return true
  3041  						}
  3042  
  3043  						Expect(c1.Order(env, 0)).To(Succeed())
  3044  						blockCnt++
  3045  						Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  3046  						Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  3047  						return false
  3048  					}, LongEventualTimeout).Should(BeTrue())
  3049  
  3050  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3051  
  3052  					network.join(2, false)
  3053  
  3054  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(blockCnt))
  3055  					indices := etcdraft.ListSnapshots(logger, c2.opts.SnapDir)
  3056  					Expect(indices).To(HaveLen(1))
  3057  					gap := indices[0] - c2Lasti
  3058  
  3059  					// TODO In theory, "equal" is the accurate behavior we expect. However, eviction suspector,
  3060  					// which calls block puller, is still replying on real clock, and sometimes increment puller
  3061  					// call count. Therefore we are being more lenient here until suspector starts using fake clock
  3062  					// so we have more deterministic control over it.
  3063  					Expect(c2.puller.PullBlockCallCount()).To(BeNumerically(">=", int(gap)))
  3064  
  3065  					// chain should keeps functioning
  3066  					Expect(c2.Order(env, 0)).To(Succeed())
  3067  
  3068  					network.exec(
  3069  						func(c *chain) {
  3070  							Eventually(func() int { return c.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(blockCnt + 1))
  3071  						})
  3072  				})
  3073  			})
  3074  
  3075  			Context("failover", func() {
  3076  				It("follower should step up as leader upon failover", func() {
  3077  					network.stop(1)
  3078  					network.elect(2)
  3079  
  3080  					By("order envelope on new leader")
  3081  					c2.cutter.CutNext = true
  3082  					err := c2.Order(env, 0)
  3083  					Expect(err).NotTo(HaveOccurred())
  3084  
  3085  					// block should not be produced on chain 1
  3086  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3087  
  3088  					// block should be produced on chain 2 & 3
  3089  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3090  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3091  
  3092  					By("order envelope on follower")
  3093  					err = c3.Order(env, 0)
  3094  					Expect(err).NotTo(HaveOccurred())
  3095  
  3096  					// block should not be produced on chain 1
  3097  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3098  
  3099  					// block should be produced on chain 2 & 3
  3100  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  3101  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(2))
  3102  				})
  3103  
  3104  				It("follower cannot be elected if its log is not up-to-date", func() {
  3105  					network.disconnect(2)
  3106  
  3107  					c1.cutter.CutNext = true
  3108  					err := c1.Order(env, 0)
  3109  					Expect(err).NotTo(HaveOccurred())
  3110  
  3111  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3112  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3113  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3114  
  3115  					network.disconnect(1)
  3116  					network.connect(2)
  3117  
  3118  					// node 2 has not caught up with other nodes
  3119  					for tick := 0; tick < 2*ELECTION_TICK-1; tick++ {
  3120  						c2.clock.Increment(interval)
  3121  						Consistently(c2.observe).ShouldNot(Receive(Equal(2)))
  3122  					}
  3123  
  3124  					// When PreVote is enabled, node 2 would fail to collect enough
  3125  					// PreVote because its index is not up-to-date. Therefore, it
  3126  					// does not cause leader change on other nodes.
  3127  					Consistently(c3.observe).ShouldNot(Receive())
  3128  					network.elect(3) // node 3 has newest logs among 2&3, so it can be elected
  3129  				})
  3130  
  3131  				It("PreVote prevents reconnected node from disturbing network", func() {
  3132  					network.disconnect(2)
  3133  
  3134  					c1.cutter.CutNext = true
  3135  					err := c1.Order(env, 0)
  3136  					Expect(err).NotTo(HaveOccurred())
  3137  
  3138  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3139  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3140  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(1))
  3141  
  3142  					network.connect(2)
  3143  
  3144  					for tick := 0; tick < 2*ELECTION_TICK-1; tick++ {
  3145  						c2.clock.Increment(interval)
  3146  						Consistently(c2.observe).ShouldNot(Receive(Equal(2)))
  3147  					}
  3148  
  3149  					Consistently(c1.observe).ShouldNot(Receive())
  3150  					Consistently(c3.observe).ShouldNot(Receive())
  3151  				})
  3152  
  3153  				It("follower can catch up and then campaign with success", func() {
  3154  					network.disconnect(2)
  3155  
  3156  					c1.cutter.CutNext = true
  3157  					for i := 0; i < 10; i++ {
  3158  						err := c1.Order(env, 0)
  3159  						Expect(err).NotTo(HaveOccurred())
  3160  					}
  3161  
  3162  					Eventually(c1.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3163  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(0))
  3164  					Eventually(c3.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3165  
  3166  					network.join(2, false)
  3167  					Eventually(c2.support.WriteBlockCallCount, LongEventualTimeout).Should(Equal(10))
  3168  
  3169  					network.disconnect(1)
  3170  					network.elect(2)
  3171  				})
  3172  
  3173  				It("purges blockcutter, stops timer and discards created blocks if leadership is lost", func() {
  3174  					// enqueue one transaction into 1's blockcutter to test for purging of block cutter
  3175  					c1.cutter.CutNext = false
  3176  					err := c1.Order(env, 0)
  3177  					Expect(err).NotTo(HaveOccurred())
  3178  					Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(1))
  3179  
  3180  					// no block should be written because env is not cut into block yet
  3181  					c1.clock.WaitForNWatchersAndIncrement(interval, 2)
  3182  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  3183  
  3184  					network.disconnect(1)
  3185  					network.elect(2)
  3186  					network.join(1, true)
  3187  
  3188  					Eventually(c1.clock.WatcherCount, LongEventualTimeout).Should(Equal(1)) // blockcutter time is stopped
  3189  					Eventually(c1.cutter.CurBatch, LongEventualTimeout).Should(HaveLen(0))
  3190  					// the created block should be discarded since there is a leadership change
  3191  					Consistently(c1.support.WriteBlockCallCount).Should(Equal(0))
  3192  
  3193  					network.disconnect(2)
  3194  					network.elect(1)
  3195  
  3196  					err = c1.Order(env, 0)
  3197  					Expect(err).NotTo(HaveOccurred())
  3198  
  3199  					// The following group of assertions is redundant - it's here for completeness.
  3200  					// If the blockcutter has not been reset, fast-forwarding 1's clock to 'timeout', should result in the blockcutter firing.
  3201  					// If the blockcucter has been reset, fast-forwarding won't do anything.
  3202  					//
  3203  					// Put differently:
  3204  					//
  3205  					// correct:
  3206  					// stop         start                      fire
  3207  					// |--------------|---------------------------|
  3208  					//    n*intervals              timeout
  3209  					// (advanced in election)
  3210  					//
  3211  					// wrong:
  3212  					// unstop                   fire
  3213  					// |---------------------------|
  3214  					//          timeout
  3215  					//
  3216  					//              timeout-n*interval   n*interval
  3217  					//                 |-----------|----------------|
  3218  					//                             ^                ^
  3219  					//                at this point of time     it should fire
  3220  					//                timer should not fire     at this point
  3221  
  3222  					c1.clock.WaitForNWatchersAndIncrement(timeout-interval, 2)
  3223  					Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  3224  					Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(0))
  3225  
  3226  					c1.clock.Increment(interval)
  3227  					Eventually(func() int { return c1.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  3228  					Eventually(func() int { return c3.support.WriteBlockCallCount() }, LongEventualTimeout).Should(Equal(1))
  3229  				})
  3230  
  3231  				It("stale leader should not be able to propose block because of lagged term", func() {
  3232  					network.disconnect(1)
  3233  					network.elect(2)
  3234  					network.connect(1)
  3235  
  3236  					c1.cutter.CutNext = true
  3237  					err := c1.Order(env, 0)
  3238  					Expect(err).NotTo(HaveOccurred())
  3239  
  3240  					network.exec(
  3241  						func(c *chain) {
  3242  							Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
  3243  						})
  3244  				})
  3245  
  3246  				It("aborts waiting for block to be committed upon leadership lost", func() {
  3247  					network.disconnect(1)
  3248  
  3249  					c1.cutter.CutNext = true
  3250  					err := c1.Order(env, 0)
  3251  					Expect(err).NotTo(HaveOccurred())
  3252  
  3253  					network.exec(
  3254  						func(c *chain) {
  3255  							Consistently(c.support.WriteBlockCallCount).Should(Equal(0))
  3256  						})
  3257  
  3258  					network.elect(2)
  3259  					network.connect(1)
  3260  
  3261  					c2.clock.Increment(interval)
  3262  					// this check guarantees that signal on resignC is consumed in commitBatches method.
  3263  					Eventually(c1.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: 2, RaftState: raft.StateFollower})))
  3264  				})
  3265  			})
  3266  		})
  3267  	})
  3268  })
  3269  
  3270  func nodeConfigFromMetadata(consenterMetadata *raftprotos.ConfigMetadata) []cluster.RemoteNode {
  3271  	var nodes []cluster.RemoteNode
  3272  	for i, consenter := range consenterMetadata.Consenters {
  3273  		// For now, skip ourselves
  3274  		if i == 0 {
  3275  			continue
  3276  		}
  3277  		serverDER, _ := pem.Decode(consenter.ServerTlsCert)
  3278  		clientDER, _ := pem.Decode(consenter.ClientTlsCert)
  3279  		node := cluster.RemoteNode{
  3280  			ID:            uint64(i + 1),
  3281  			Endpoint:      "localhost:7050",
  3282  			ServerTLSCert: serverDER.Bytes,
  3283  			ClientTLSCert: clientDER.Bytes,
  3284  		}
  3285  		nodes = append(nodes, node)
  3286  	}
  3287  	return nodes
  3288  }
  3289  
  3290  func createMetadata(nodeCount int, tlsCA tlsgen.CA) *raftprotos.ConfigMetadata {
  3291  	md := &raftprotos.ConfigMetadata{Options: &raftprotos.Options{
  3292  		TickInterval:      time.Duration(interval).String(),
  3293  		ElectionTick:      ELECTION_TICK,
  3294  		HeartbeatTick:     HEARTBEAT_TICK,
  3295  		MaxInflightBlocks: 5,
  3296  	}}
  3297  	for i := 0; i < nodeCount; i++ {
  3298  		md.Consenters = append(md.Consenters, &raftprotos.Consenter{
  3299  			Host:          "localhost",
  3300  			Port:          7050,
  3301  			ServerTlsCert: serverTLSCert(tlsCA),
  3302  			ClientTlsCert: clientTLSCert(tlsCA),
  3303  		})
  3304  	}
  3305  	return md
  3306  }
  3307  
  3308  func serverTLSCert(tlsCA tlsgen.CA) []byte {
  3309  	cert, err := tlsCA.NewServerCertKeyPair("localhost")
  3310  	if err != nil {
  3311  		panic(err)
  3312  	}
  3313  	return cert.Cert
  3314  }
  3315  
  3316  func clientTLSCert(tlsCA tlsgen.CA) []byte {
  3317  	cert, err := tlsCA.NewClientCertKeyPair()
  3318  	if err != nil {
  3319  		panic(err)
  3320  	}
  3321  	return cert.Cert
  3322  }
  3323  
  3324  // marshalOrPanic serializes a protobuf message and panics if this
  3325  // operation fails
  3326  func marshalOrPanic(pb proto.Message) []byte {
  3327  	data, err := proto.Marshal(pb)
  3328  	if err != nil {
  3329  		panic(err)
  3330  	}
  3331  	return data
  3332  }
  3333  
  3334  // helpers to facilitate tests
  3335  type stepFunc func(dest uint64, msg *orderer.ConsensusRequest) error
  3336  
  3337  type chain struct {
  3338  	id uint64
  3339  
  3340  	stepLock sync.Mutex
  3341  	step     stepFunc
  3342  
  3343  	// msgBuffer serializes ingress messages for a chain
  3344  	// so they are delivered in the same order
  3345  	msgBuffer chan *msg
  3346  
  3347  	support      *consensusmocks.FakeConsenterSupport
  3348  	cutter       *mockblockcutter.Receiver
  3349  	configurator *mocks.FakeConfigurator
  3350  	rpc          *mocks.FakeRPC
  3351  	storage      *raft.MemoryStorage
  3352  	clock        *fakeclock.FakeClock
  3353  	opts         etcdraft.Options
  3354  	puller       *mocks.FakeBlockPuller
  3355  
  3356  	// store written blocks to be returned by mock block puller
  3357  	ledgerLock            sync.RWMutex
  3358  	ledger                map[uint64]*common.Block
  3359  	ledgerHeight          uint64
  3360  	lastConfigBlockNumber uint64
  3361  
  3362  	observe      chan raft.SoftState
  3363  	unstarted    chan struct{}
  3364  	stopped      chan struct{}
  3365  	haltCallback func()
  3366  
  3367  	fakeFields *fakeMetricsFields
  3368  
  3369  	*etcdraft.Chain
  3370  
  3371  	cryptoProvider bccsp.BCCSP
  3372  }
  3373  
  3374  type msg struct {
  3375  	req    *orderer.ConsensusRequest
  3376  	sender uint64
  3377  }
  3378  
  3379  func newChain(
  3380  	timeout time.Duration,
  3381  	channel, dataDir string,
  3382  	id uint64,
  3383  	raftMetadata *raftprotos.BlockMetadata,
  3384  	consenters map[uint64]*raftprotos.Consenter,
  3385  	cryptoProvider bccsp.BCCSP,
  3386  	support *consensusmocks.FakeConsenterSupport,
  3387  	haltCallback func(),
  3388  ) *chain {
  3389  	rpc := &mocks.FakeRPC{}
  3390  	clock := fakeclock.NewFakeClock(time.Now())
  3391  	storage := raft.NewMemoryStorage()
  3392  
  3393  	fakeFields := newFakeMetricsFields()
  3394  
  3395  	opts := etcdraft.Options{
  3396  		RPCTimeout:          timeout,
  3397  		RaftID:              uint64(id),
  3398  		Clock:               clock,
  3399  		TickInterval:        interval,
  3400  		ElectionTick:        ELECTION_TICK,
  3401  		HeartbeatTick:       HEARTBEAT_TICK,
  3402  		MaxSizePerMsg:       1024 * 1024,
  3403  		MaxInflightBlocks:   256,
  3404  		BlockMetadata:       raftMetadata,
  3405  		LeaderCheckInterval: 500 * time.Millisecond,
  3406  		Consenters:          consenters,
  3407  		Logger:              flogging.NewFabricLogger(zap.NewExample()),
  3408  		MemoryStorage:       storage,
  3409  		WALDir:              path.Join(dataDir, "wal"),
  3410  		SnapDir:             path.Join(dataDir, "snapshot"),
  3411  		Metrics:             newFakeMetrics(fakeFields),
  3412  	}
  3413  
  3414  	if support == nil {
  3415  		support = &consensusmocks.FakeConsenterSupport{}
  3416  		support.ChannelIDReturns(channel)
  3417  		support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
  3418  	}
  3419  	cutter := mockblockcutter.NewReceiver()
  3420  	close(cutter.Block)
  3421  	support.BlockCutterReturns(cutter)
  3422  
  3423  	// upon leader change, lead is reset to 0 before set to actual
  3424  	// new leader, i.e. 1 -> 0 -> 2. Therefore 2 numbers will be
  3425  	// sent on this chan, so we need size to be 2
  3426  	observe := make(chan raft.SoftState, 2)
  3427  
  3428  	configurator := &mocks.FakeConfigurator{}
  3429  	puller := &mocks.FakeBlockPuller{}
  3430  
  3431  	ch := make(chan struct{})
  3432  	close(ch)
  3433  
  3434  	c := &chain{
  3435  		id:           id,
  3436  		support:      support,
  3437  		cutter:       cutter,
  3438  		rpc:          rpc,
  3439  		storage:      storage,
  3440  		observe:      observe,
  3441  		clock:        clock,
  3442  		opts:         opts,
  3443  		unstarted:    ch,
  3444  		stopped:      make(chan struct{}),
  3445  		configurator: configurator,
  3446  		puller:       puller,
  3447  		ledger: map[uint64]*common.Block{
  3448  			0: getSeedBlock(), // Very first block
  3449  		},
  3450  		ledgerHeight:   1,
  3451  		fakeFields:     fakeFields,
  3452  		cryptoProvider: cryptoProvider,
  3453  		msgBuffer:      make(chan *msg, 500),
  3454  		haltCallback:   haltCallback,
  3455  	}
  3456  
  3457  	// receives normal blocks and metadata and appends it into
  3458  	// the ledger struct to simulate write behaviour
  3459  	appendNormalBlockToLedger := func(b *common.Block, meta []byte) {
  3460  		c.ledgerLock.Lock()
  3461  		defer c.ledgerLock.Unlock()
  3462  
  3463  		b = proto.Clone(b).(*common.Block)
  3464  		bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  3465  		Expect(err).NotTo(HaveOccurred())
  3466  		b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  3467  
  3468  		lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber})
  3469  		b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{
  3470  			Value: lastConfigValue,
  3471  		})
  3472  
  3473  		c.ledger[b.Header.Number] = b
  3474  		if c.ledgerHeight < b.Header.Number+1 {
  3475  			c.ledgerHeight = b.Header.Number + 1
  3476  		}
  3477  	}
  3478  
  3479  	// receives config blocks and metadata and appends it into
  3480  	// the ledger struct to simulate write behaviour
  3481  	appendConfigBlockToLedger := func(b *common.Block, meta []byte) {
  3482  		c.ledgerLock.Lock()
  3483  		defer c.ledgerLock.Unlock()
  3484  
  3485  		b = proto.Clone(b).(*common.Block)
  3486  		bytes, err := proto.Marshal(&common.Metadata{Value: meta})
  3487  		Expect(err).NotTo(HaveOccurred())
  3488  		b.Metadata.Metadata[common.BlockMetadataIndex_ORDERER] = bytes
  3489  
  3490  		c.lastConfigBlockNumber = b.Header.Number
  3491  
  3492  		lastConfigValue := protoutil.MarshalOrPanic(&common.LastConfig{Index: c.lastConfigBlockNumber})
  3493  		b.Metadata.Metadata[common.BlockMetadataIndex_LAST_CONFIG] = protoutil.MarshalOrPanic(&common.Metadata{
  3494  			Value: lastConfigValue,
  3495  		})
  3496  
  3497  		c.ledger[b.Header.Number] = b
  3498  		if c.ledgerHeight < b.Header.Number+1 {
  3499  			c.ledgerHeight = b.Header.Number + 1
  3500  		}
  3501  	}
  3502  
  3503  	c.support.WriteBlockStub = appendNormalBlockToLedger
  3504  	c.support.WriteConfigBlockStub = appendConfigBlockToLedger
  3505  
  3506  	// returns current ledger height
  3507  	c.support.HeightStub = func() uint64 {
  3508  		c.ledgerLock.RLock()
  3509  		defer c.ledgerLock.RUnlock()
  3510  		return c.ledgerHeight
  3511  	}
  3512  
  3513  	// reads block from the ledger
  3514  	c.support.BlockStub = func(number uint64) *common.Block {
  3515  		c.ledgerLock.RLock()
  3516  		defer c.ledgerLock.RUnlock()
  3517  		return c.ledger[number]
  3518  	}
  3519  
  3520  	// consume ingress messages for chain
  3521  	go func() {
  3522  		for msg := range c.msgBuffer {
  3523  			c.Consensus(msg.req, msg.sender)
  3524  		}
  3525  	}()
  3526  
  3527  	return c
  3528  }
  3529  
  3530  func (c *chain) init() {
  3531  	ch, err := etcdraft.NewChain(
  3532  		c.support,
  3533  		c.opts,
  3534  		c.configurator,
  3535  		c.rpc,
  3536  		c.cryptoProvider,
  3537  		func() (etcdraft.BlockPuller, error) { return c.puller, nil },
  3538  		c.haltCallback,
  3539  		c.observe,
  3540  	)
  3541  	Expect(err).NotTo(HaveOccurred())
  3542  	c.Chain = ch
  3543  }
  3544  
  3545  func (c *chain) start() {
  3546  	c.unstarted = nil
  3547  	c.Start()
  3548  }
  3549  
  3550  func (c *chain) setStepFunc(f stepFunc) {
  3551  	c.stepLock.Lock()
  3552  	c.step = f
  3553  	c.stepLock.Unlock()
  3554  }
  3555  
  3556  func (c *chain) getStepFunc() stepFunc {
  3557  	c.stepLock.Lock()
  3558  	defer c.stepLock.Unlock()
  3559  	return c.step
  3560  }
  3561  
  3562  type network struct {
  3563  	delayWG sync.WaitGroup
  3564  	sync.RWMutex
  3565  
  3566  	leader uint64
  3567  	chains map[uint64]*chain
  3568  
  3569  	// links simulates the configuration of comm layer (link is bi-directional).
  3570  	// if links[left][right] == true, right can send msg to left.
  3571  	links map[uint64]map[uint64]bool
  3572  	// connectivity determines if a node is connected to network. This is used for tests
  3573  	// to simulate network partition.
  3574  	connectivity map[uint64]bool
  3575  }
  3576  
  3577  func (n *network) link(from []uint64, to uint64) {
  3578  	links := make(map[uint64]bool)
  3579  	for _, id := range from {
  3580  		links[id] = true
  3581  	}
  3582  
  3583  	n.Lock()
  3584  	defer n.Unlock()
  3585  
  3586  	n.links[to] = links
  3587  }
  3588  
  3589  func (n *network) linked(from, to uint64) bool {
  3590  	n.RLock()
  3591  	defer n.RUnlock()
  3592  
  3593  	return n.links[to][from]
  3594  }
  3595  
  3596  func (n *network) connect(id uint64) {
  3597  	n.Lock()
  3598  	defer n.Unlock()
  3599  
  3600  	n.connectivity[id] = true
  3601  }
  3602  
  3603  func (n *network) disconnect(id uint64) {
  3604  	n.Lock()
  3605  	defer n.Unlock()
  3606  
  3607  	n.connectivity[id] = false
  3608  }
  3609  
  3610  func (n *network) connected(id uint64) bool {
  3611  	n.RLock()
  3612  	defer n.RUnlock()
  3613  
  3614  	return n.connectivity[id]
  3615  }
  3616  
  3617  func (n *network) addChain(c *chain) {
  3618  	n.connect(c.id) // chain is connected by default
  3619  
  3620  	c.step = func(dest uint64, req *orderer.ConsensusRequest) error {
  3621  		if !n.linked(c.id, dest) {
  3622  			return errors.Errorf("connection refused")
  3623  		}
  3624  
  3625  		if !n.connected(c.id) || !n.connected(dest) {
  3626  			return errors.Errorf("connection lost")
  3627  		}
  3628  
  3629  		n.RLock()
  3630  		target := n.chains[dest]
  3631  		n.RUnlock()
  3632  		target.msgBuffer <- &msg{req: req, sender: c.id}
  3633  		return nil
  3634  	}
  3635  
  3636  	c.rpc.SendConsensusStub = func(dest uint64, msg *orderer.ConsensusRequest) error {
  3637  		c.stepLock.Lock()
  3638  		defer c.stepLock.Unlock()
  3639  		return c.step(dest, msg)
  3640  	}
  3641  
  3642  	c.rpc.SendSubmitStub = func(dest uint64, msg *orderer.SubmitRequest, f func(error)) error {
  3643  		if !n.linked(c.id, dest) {
  3644  			err := errors.Errorf("connection refused")
  3645  			f(err)
  3646  			return err
  3647  		}
  3648  
  3649  		if !n.connected(c.id) || !n.connected(dest) {
  3650  			err := errors.Errorf("connection lost")
  3651  			f(err)
  3652  			return err
  3653  		}
  3654  
  3655  		n.RLock()
  3656  		target := n.chains[dest]
  3657  		n.RUnlock()
  3658  		go func() {
  3659  			n.Lock()
  3660  			n.delayWG.Wait()
  3661  			n.Unlock()
  3662  
  3663  			defer GinkgoRecover()
  3664  			target.Submit(msg, c.id)
  3665  			f(nil)
  3666  		}()
  3667  		return nil
  3668  	}
  3669  
  3670  	c.puller.PullBlockStub = func(i uint64) *common.Block {
  3671  		n.RLock()
  3672  		leaderChain := n.chains[n.leader]
  3673  		n.RUnlock()
  3674  
  3675  		leaderChain.ledgerLock.RLock()
  3676  		defer leaderChain.ledgerLock.RUnlock()
  3677  		block := leaderChain.ledger[i]
  3678  		return block
  3679  	}
  3680  
  3681  	c.puller.HeightsByEndpointsStub = func() (map[string]uint64, error) {
  3682  		n.RLock()
  3683  		leader := n.chains[n.leader]
  3684  		n.RUnlock()
  3685  
  3686  		if leader == nil {
  3687  			return nil, errors.Errorf("ledger not available")
  3688  		}
  3689  
  3690  		leader.ledgerLock.RLock()
  3691  		defer leader.ledgerLock.RUnlock()
  3692  		return map[string]uint64{"leader": leader.ledgerHeight}, nil
  3693  	}
  3694  
  3695  	c.configurator.ConfigureCalls(func(channel string, nodes []cluster.RemoteNode) {
  3696  		var ids []uint64
  3697  		for _, node := range nodes {
  3698  			ids = append(ids, node.ID)
  3699  		}
  3700  		n.link(ids, c.id)
  3701  	})
  3702  
  3703  	n.Lock()
  3704  	defer n.Unlock()
  3705  	n.chains[c.id] = c
  3706  }
  3707  
  3708  func createNetwork(
  3709  	timeout time.Duration,
  3710  	channel, dataDir string,
  3711  	raftMetadata *raftprotos.BlockMetadata,
  3712  	consenters map[uint64]*raftprotos.Consenter,
  3713  	cryptoProvider bccsp.BCCSP,
  3714  	tlsCA tlsgen.CA,
  3715  	haltCallback func(),
  3716  ) *network {
  3717  	n := &network{
  3718  		chains:       make(map[uint64]*chain),
  3719  		connectivity: make(map[uint64]bool),
  3720  		links:        make(map[uint64]map[uint64]bool),
  3721  	}
  3722  
  3723  	for _, nodeID := range raftMetadata.ConsenterIds {
  3724  		dir, err := ioutil.TempDir(dataDir, fmt.Sprintf("node-%d-", nodeID))
  3725  		Expect(err).NotTo(HaveOccurred())
  3726  
  3727  		m := proto.Clone(raftMetadata).(*raftprotos.BlockMetadata)
  3728  		support := &consensusmocks.FakeConsenterSupport{}
  3729  		support.ChannelIDReturns(channel)
  3730  		support.SharedConfigReturns(mockOrdererWithBatchTimeout(timeout, nil))
  3731  		mockOrdererConfig := mockOrdererWithTLSRootCert(timeout, nil, tlsCA)
  3732  		support.SharedConfigReturns(mockOrdererConfig)
  3733  		n.addChain(newChain(timeout, channel, dir, nodeID, m, consenters, cryptoProvider, support, haltCallback))
  3734  	}
  3735  
  3736  	return n
  3737  }
  3738  
  3739  // tests could alter configuration of a chain before creating it
  3740  func (n *network) init() {
  3741  	n.exec(func(c *chain) { c.init() })
  3742  }
  3743  
  3744  func (n *network) start(ids ...uint64) {
  3745  	nodes := ids
  3746  	if len(nodes) == 0 {
  3747  		for i := range n.chains {
  3748  			nodes = append(nodes, i)
  3749  		}
  3750  	}
  3751  
  3752  	for _, id := range nodes {
  3753  		n.chains[id].start()
  3754  
  3755  		// When the Raft node bootstraps, it produces a ConfChange
  3756  		// to add itself, which needs to be consumed with Ready().
  3757  		// If there are pending configuration changes in raft,
  3758  		// it refused to campaign, no matter how many ticks supplied.
  3759  		// This is not a problem in production code because eventually
  3760  		// raft.Ready will be consumed as real time goes by.
  3761  		//
  3762  		// However, this is problematic when using fake clock and artificial
  3763  		// ticks. Instead of ticking raft indefinitely until raft.Ready is
  3764  		// consumed, this check is added to indirectly guarantee
  3765  		// that first ConfChange is actually consumed and we can safely
  3766  		// proceed to tick raft.
  3767  		Eventually(func() error {
  3768  			_, err := n.chains[id].storage.Entries(1, 1, 1)
  3769  			return err
  3770  		}, LongEventualTimeout).ShouldNot(HaveOccurred())
  3771  		Eventually(n.chains[id].WaitReady, LongEventualTimeout).ShouldNot(HaveOccurred())
  3772  	}
  3773  }
  3774  
  3775  func (n *network) stop(ids ...uint64) {
  3776  	nodes := ids
  3777  	if len(nodes) == 0 {
  3778  		for i := range n.chains {
  3779  			nodes = append(nodes, i)
  3780  		}
  3781  	}
  3782  
  3783  	for _, id := range nodes {
  3784  		c := n.chains[id]
  3785  		c.Halt()
  3786  		Eventually(c.Errored).Should(BeClosed())
  3787  		select {
  3788  		case <-c.stopped:
  3789  		default:
  3790  			close(c.stopped)
  3791  		}
  3792  	}
  3793  }
  3794  
  3795  func (n *network) exec(f func(c *chain), ids ...uint64) {
  3796  	if len(ids) == 0 {
  3797  		for _, c := range n.chains {
  3798  			f(c)
  3799  		}
  3800  
  3801  		return
  3802  	}
  3803  
  3804  	for _, i := range ids {
  3805  		f(n.chains[i])
  3806  	}
  3807  }
  3808  
  3809  // connect a node to network and tick leader to trigger
  3810  // a heartbeat so newly joined node can detect leader.
  3811  //
  3812  // expectLeaderChange controls whether leader change should
  3813  // be observed on newly joined node.
  3814  // - it should be true if newly joined node was leader
  3815  // - it should be false if newly joined node was follower, and
  3816  //   already knows the leader.
  3817  func (n *network) join(id uint64, expectLeaderChange bool) {
  3818  	n.connect(id)
  3819  
  3820  	n.RLock()
  3821  	leader, follower := n.chains[n.leader], n.chains[id]
  3822  	n.RUnlock()
  3823  
  3824  	step := leader.getStepFunc()
  3825  	signal := make(chan struct{})
  3826  	leader.setStepFunc(func(dest uint64, msg *orderer.ConsensusRequest) error {
  3827  		if dest == id {
  3828  			// close signal channel when a message targeting newly
  3829  			// joined node is observed on wire.
  3830  			select {
  3831  			case <-signal:
  3832  			default:
  3833  				close(signal)
  3834  			}
  3835  		}
  3836  
  3837  		return step(dest, msg)
  3838  	})
  3839  
  3840  	// Tick leader so it sends out a heartbeat to new node.
  3841  	// One tick _may_ not be enough because leader might be busy
  3842  	// and this tick is droppped on the floor.
  3843  	Eventually(func() <-chan struct{} {
  3844  		leader.clock.Increment(interval)
  3845  		return signal
  3846  	}, LongEventualTimeout, 100*time.Millisecond).Should(BeClosed())
  3847  
  3848  	leader.setStepFunc(step)
  3849  
  3850  	if expectLeaderChange {
  3851  		Eventually(follower.observe, LongEventualTimeout).Should(Receive(Equal(raft.SoftState{Lead: n.leader, RaftState: raft.StateFollower})))
  3852  	}
  3853  
  3854  	// wait for newly joined node to catch up with leader
  3855  	i, err := n.chains[n.leader].opts.MemoryStorage.LastIndex()
  3856  	Expect(err).NotTo(HaveOccurred())
  3857  	Eventually(n.chains[id].opts.MemoryStorage.LastIndex, LongEventualTimeout).Should(Equal(i))
  3858  }
  3859  
  3860  // elect deterministically elects a node as leader
  3861  func (n *network) elect(id uint64) {
  3862  	n.RLock()
  3863  	// skip observing leader change on followers if the same leader is elected as the previous one,
  3864  	// because this may happen too quickly from a slow follower's point of view, and 0 -> X transition
  3865  	// may not be omitted at all.
  3866  	observeFollowers := id != n.leader
  3867  	candidate := n.chains[id]
  3868  	var followers []*chain
  3869  	for _, c := range n.chains {
  3870  		if c.id != id {
  3871  			followers = append(followers, c)
  3872  		}
  3873  	}
  3874  	n.RUnlock()
  3875  
  3876  	// Send node an artificial MsgTimeoutNow to emulate leadership transfer.
  3877  	fmt.Fprintf(GinkgoWriter, "Send artificial MsgTimeoutNow to elect node %d\n", id)
  3878  	candidate.Consensus(&orderer.ConsensusRequest{Payload: protoutil.MarshalOrPanic(&raftpb.Message{Type: raftpb.MsgTimeoutNow, To: id})}, 0)
  3879  	Eventually(candidate.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateLeader)))
  3880  
  3881  	n.Lock()
  3882  	n.leader = id
  3883  	n.Unlock()
  3884  
  3885  	if !observeFollowers {
  3886  		return
  3887  	}
  3888  
  3889  	// now observe leader change on other nodes
  3890  	for _, c := range followers {
  3891  		if c.id == id {
  3892  			continue
  3893  		}
  3894  
  3895  		select {
  3896  		case <-c.stopped: // skip check if node n is stopped
  3897  		case <-c.unstarted: // skip check if node is not started yet
  3898  		default:
  3899  			if n.linked(c.id, id) && n.connected(c.id) {
  3900  				Eventually(c.observe, LongEventualTimeout).Should(Receive(StateEqual(id, raft.StateFollower)))
  3901  			}
  3902  		}
  3903  	}
  3904  }
  3905  
  3906  // sets the configEnv var declared above
  3907  func newConfigEnv(chainID string, headerType common.HeaderType, configUpdateEnv *common.ConfigUpdateEnvelope) *common.Envelope {
  3908  	return &common.Envelope{
  3909  		Payload: marshalOrPanic(&common.Payload{
  3910  			Header: &common.Header{
  3911  				ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  3912  					Type:      int32(headerType),
  3913  					ChannelId: chainID,
  3914  				}),
  3915  			},
  3916  			Data: marshalOrPanic(&common.ConfigEnvelope{
  3917  				LastUpdate: &common.Envelope{
  3918  					Payload: marshalOrPanic(&common.Payload{
  3919  						Header: &common.Header{
  3920  							ChannelHeader: marshalOrPanic(&common.ChannelHeader{
  3921  								Type:      int32(common.HeaderType_CONFIG_UPDATE),
  3922  								ChannelId: chainID,
  3923  							}),
  3924  						},
  3925  						Data: marshalOrPanic(configUpdateEnv),
  3926  					}), // common.Payload
  3927  				}, // LastUpdate
  3928  			}),
  3929  		}),
  3930  	}
  3931  }
  3932  
  3933  func newConfigUpdateEnv(chainID string, oldValues, newValues map[string]*common.ConfigValue) *common.ConfigUpdateEnvelope {
  3934  	return &common.ConfigUpdateEnvelope{
  3935  		ConfigUpdate: marshalOrPanic(&common.ConfigUpdate{
  3936  			ChannelId: chainID,
  3937  			ReadSet: &common.ConfigGroup{
  3938  				Groups: map[string]*common.ConfigGroup{
  3939  					"Orderer": {
  3940  						Values: oldValues,
  3941  					},
  3942  				},
  3943  			},
  3944  			WriteSet: &common.ConfigGroup{
  3945  				Groups: map[string]*common.ConfigGroup{
  3946  					"Orderer": {
  3947  						Values: newValues,
  3948  					},
  3949  				},
  3950  			}, // WriteSet
  3951  		}),
  3952  	}
  3953  }
  3954  
  3955  func getSeedBlock() *common.Block {
  3956  	return &common.Block{
  3957  		Header:   &common.BlockHeader{},
  3958  		Data:     &common.BlockData{Data: [][]byte{[]byte("foo")}},
  3959  		Metadata: &common.BlockMetadata{Metadata: make([][]byte, 4)},
  3960  	}
  3961  }
  3962  
  3963  func StateEqual(lead uint64, state raft.StateType) types.GomegaMatcher {
  3964  	return Equal(raft.SoftState{Lead: lead, RaftState: state})
  3965  }
  3966  
  3967  func BeFollower() types.GomegaMatcher {
  3968  	return &StateMatcher{expect: raft.StateFollower}
  3969  }
  3970  
  3971  type StateMatcher struct {
  3972  	expect raft.StateType
  3973  }
  3974  
  3975  func (stmatcher *StateMatcher) Match(actual interface{}) (success bool, err error) {
  3976  	state, ok := actual.(raft.SoftState)
  3977  	if !ok {
  3978  		return false, errors.Errorf("StateMatcher expects a raft SoftState")
  3979  	}
  3980  
  3981  	return state.RaftState == stmatcher.expect, nil
  3982  }
  3983  
  3984  func (stmatcher *StateMatcher) FailureMessage(actual interface{}) (message string) {
  3985  	state, ok := actual.(raft.SoftState)
  3986  	if !ok {
  3987  		return "StateMatcher expects a raft SoftState"
  3988  	}
  3989  
  3990  	return fmt.Sprintf("Expected %s to be %s", state.RaftState, stmatcher.expect)
  3991  }
  3992  
  3993  func (stmatcher *StateMatcher) NegatedFailureMessage(actual interface{}) (message string) {
  3994  	state, ok := actual.(raft.SoftState)
  3995  	if !ok {
  3996  		return "StateMatcher expects a raft SoftState"
  3997  	}
  3998  
  3999  	return fmt.Sprintf("Expected %s not to be %s", state.RaftState, stmatcher.expect)
  4000  }
  4001  
  4002  func noOpBlockPuller() (etcdraft.BlockPuller, error) {
  4003  	bp := &mocks.FakeBlockPuller{}
  4004  	return bp, nil
  4005  }