github.com/Hnampk/my-fabric@v0.0.0-20201028083322-75069da399c0/orderer/consensus/etcdraft/chain.go

github.com/Hnampk/my-fabric@v0.0.0-20201028083322-75069da399c0/orderer/consensus/etcdraft/chain.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft
     8  
     9  import (
    10  	"context"
    11  	"encoding/pem"
    12  	"fmt"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"code.cloudfoundry.org/clock"
    18  	"github.com/golang/protobuf/proto"
    19  	"github.com/hyperledger/fabric-protos-go/common"
    20  	"github.com/hyperledger/fabric-protos-go/orderer"
    21  	"github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    22  	"github.com/hyperledger/fabric/bccsp"
    23  	"github.com/hyperledger/fabric/common/flogging"
    24  	"github.com/hyperledger/fabric/orderer/common/cluster"
    25  	"github.com/hyperledger/fabric/orderer/consensus"
    26  	"github.com/hyperledger/fabric/protoutil"
    27  	"github.com/pkg/errors"
    28  	"go.etcd.io/etcd/raft"
    29  	"go.etcd.io/etcd/raft/raftpb"
    30  	"go.etcd.io/etcd/wal"
    31  )
    32  
    33  const (
    34  	BYTE = 1 << (10 * iota)
    35  	KILOBYTE
    36  	MEGABYTE
    37  	GIGABYTE
    38  	TERABYTE
    39  )
    40  
    41  const (
    42  	// DefaultSnapshotCatchUpEntries is the default number of entries
    43  	// to preserve in memory when a snapshot is taken. This is for
    44  	// slow followers to catch up.
    45  	DefaultSnapshotCatchUpEntries = uint64(4)
    46  
    47  	// DefaultSnapshotIntervalSize is the default snapshot interval. It is
    48  	// used if SnapshotIntervalSize is not provided in channel config options.
    49  	// It is needed to enforce snapshot being set.
    50  	DefaultSnapshotIntervalSize = 16 * MEGABYTE
    51  
    52  	// DefaultEvictionSuspicion is the threshold that a node will start
    53  	// suspecting its own eviction if it has been leaderless for this
    54  	// period of time.
    55  	DefaultEvictionSuspicion = time.Minute * 10
    56  
    57  	// DefaultLeaderlessCheckInterval is the interval that a chain checks
    58  	// its own leadership status.
    59  	DefaultLeaderlessCheckInterval = time.Second * 10
    60  )
    61  
    62  //go:generate counterfeiter -o mocks/configurator.go . Configurator
    63  
    64  // Configurator is used to configure the communication layer
    65  // when the chain starts.
    66  type Configurator interface {
    67  	Configure(channel string, newNodes []cluster.RemoteNode)
    68  }
    69  
    70  //go:generate counterfeiter -o mocks/mock_rpc.go . RPC
    71  
    72  // RPC is used to mock the transport layer in tests.
    73  type RPC interface {
    74  	SendConsensus(dest uint64, msg *orderer.ConsensusRequest) error
    75  	SendSubmit(dest uint64, request *orderer.SubmitRequest) error
    76  }
    77  
    78  //go:generate counterfeiter -o mocks/mock_blockpuller.go . BlockPuller
    79  
    80  // BlockPuller is used to pull blocks from other OSN
    81  type BlockPuller interface {
    82  	PullBlock(seq uint64) *common.Block
    83  	HeightsByEndpoints() (map[string]uint64, error)
    84  	Close()
    85  }
    86  
    87  // CreateBlockPuller is a function to create BlockPuller on demand.
    88  // It is passed into chain initializer so that tests could mock this.
    89  type CreateBlockPuller func() (BlockPuller, error)
    90  
    91  // Options contains all the configurations relevant to the chain.
    92  type Options struct {
    93  	RaftID uint64
    94  
    95  	Clock clock.Clock
    96  
    97  	WALDir               string
    98  	SnapDir              string
    99  	SnapshotIntervalSize uint32
   100  
   101  	// This is configurable mainly for testing purpose. Users are not
   102  	// expected to alter this. Instead, DefaultSnapshotCatchUpEntries is used.
   103  	SnapshotCatchUpEntries uint64
   104  
   105  	MemoryStorage MemoryStorage
   106  	Logger        *flogging.FabricLogger
   107  
   108  	TickInterval      time.Duration
   109  	ElectionTick      int
   110  	HeartbeatTick     int
   111  	MaxSizePerMsg     uint64
   112  	MaxInflightBlocks int
   113  
   114  	// BlockMetdata and Consenters should only be modified while under lock
   115  	// of raftMetadataLock
   116  	BlockMetadata *etcdraft.BlockMetadata
   117  	Consenters    map[uint64]*etcdraft.Consenter
   118  
   119  	// MigrationInit is set when the node starts right after consensus-type migration
   120  	MigrationInit bool
   121  
   122  	Metrics *Metrics
   123  	Cert    []byte
   124  
   125  	EvictionSuspicion   time.Duration
   126  	LeaderCheckInterval time.Duration
   127  }
   128  
   129  type submit struct {
   130  	req    *orderer.SubmitRequest
   131  	leader chan uint64
   132  }
   133  
   134  type gc struct {
   135  	index uint64
   136  	state raftpb.ConfState
   137  	data  []byte
   138  }
   139  
   140  // Chain implements consensus.Chain interface.
   141  type Chain struct {
   142  	configurator Configurator
   143  
   144  	rpc RPC
   145  
   146  	raftID    uint64
   147  	channelID string
   148  
   149  	lastKnownLeader uint64
   150  	ActiveNodes     atomic.Value
   151  
   152  	submitC  chan *submit
   153  	applyC   chan apply
   154  	observeC chan<- raft.SoftState // Notifies external observer on leader change (passed in optionally as an argument for tests)
   155  	haltC    chan struct{}         // Signals to goroutines that the chain is halting
   156  	doneC    chan struct{}         // Closes when the chain halts
   157  	startC   chan struct{}         // Closes when the node is started
   158  	snapC    chan *raftpb.Snapshot // Signal to catch up with snapshot
   159  	gcC      chan *gc              // Signal to take snapshot
   160  
   161  	errorCLock sync.RWMutex
   162  	errorC     chan struct{} // returned by Errored()
   163  
   164  	raftMetadataLock     sync.RWMutex
   165  	confChangeInProgress *raftpb.ConfChange
   166  	justElected          bool // this is true when node has just been elected
   167  	configInflight       bool // this is true when there is config block or ConfChange in flight
   168  	blockInflight        int  // number of in flight blocks
   169  
   170  	clock clock.Clock // Tests can inject a fake clock
   171  
   172  	support consensus.ConsenterSupport
   173  
   174  	lastBlock    *common.Block
   175  	appliedIndex uint64
   176  
   177  	// needed by snapshotting
   178  	sizeLimit        uint32 // SnapshotIntervalSize in bytes
   179  	accDataSize      uint32 // accumulative data size since last snapshot
   180  	lastSnapBlockNum uint64
   181  	confState        raftpb.ConfState // Etcdraft requires ConfState to be persisted within snapshot
   182  
   183  	createPuller CreateBlockPuller // func used to create BlockPuller on demand
   184  
   185  	fresh bool // indicate if this is a fresh raft node
   186  
   187  	// this is exported so that test can use `Node.Status()` to get raft node status.
   188  	Node *node
   189  	opts Options
   190  
   191  	Metrics *Metrics
   192  	logger  *flogging.FabricLogger
   193  
   194  	periodicChecker *PeriodicCheck
   195  
   196  	haltCallback func()
   197  	// BCCSP instane
   198  	CryptoProvider bccsp.BCCSP
   199  }
   200  
   201  // NewChain constructs a chain object.
   202  func NewChain(
   203  	support consensus.ConsenterSupport,
   204  	opts Options,
   205  	conf Configurator,
   206  	rpc RPC,
   207  	cryptoProvider bccsp.BCCSP,
   208  	f CreateBlockPuller,
   209  	haltCallback func(),
   210  	observeC chan<- raft.SoftState,
   211  ) (*Chain, error) {
   212  
   213  	lg := opts.Logger.With("channel", support.ChannelID(), "node", opts.RaftID)
   214  
   215  	fresh := !wal.Exist(opts.WALDir)
   216  	storage, err := CreateStorage(lg, opts.WALDir, opts.SnapDir, opts.MemoryStorage)
   217  	if err != nil {
   218  		return nil, errors.Errorf("failed to restore persisted raft data: %s", err)
   219  	}
   220  
   221  	if opts.SnapshotCatchUpEntries == 0 {
   222  		storage.SnapshotCatchUpEntries = DefaultSnapshotCatchUpEntries
   223  	} else {
   224  		storage.SnapshotCatchUpEntries = opts.SnapshotCatchUpEntries
   225  	}
   226  
   227  	sizeLimit := opts.SnapshotIntervalSize
   228  	if sizeLimit == 0 {
   229  		sizeLimit = DefaultSnapshotIntervalSize
   230  	}
   231  
   232  	// get block number in last snapshot, if exists
   233  	var snapBlkNum uint64
   234  	var cc raftpb.ConfState
   235  	if s := storage.Snapshot(); !raft.IsEmptySnap(s) {
   236  		b := protoutil.UnmarshalBlockOrPanic(s.Data)
   237  		snapBlkNum = b.Header.Number
   238  		cc = s.Metadata.ConfState
   239  	}
   240  
   241  	b := support.Block(support.Height() - 1)
   242  	if b == nil {
   243  		return nil, errors.Errorf("failed to get last block")
   244  	}
   245  
   246  	c := &Chain{
   247  		configurator:     conf,
   248  		rpc:              rpc,
   249  		channelID:        support.ChannelID(),
   250  		raftID:           opts.RaftID,
   251  		submitC:          make(chan *submit),
   252  		applyC:           make(chan apply),
   253  		haltC:            make(chan struct{}),
   254  		doneC:            make(chan struct{}),
   255  		startC:           make(chan struct{}),
   256  		snapC:            make(chan *raftpb.Snapshot),
   257  		errorC:           make(chan struct{}),
   258  		gcC:              make(chan *gc),
   259  		observeC:         observeC,
   260  		support:          support,
   261  		fresh:            fresh,
   262  		appliedIndex:     opts.BlockMetadata.RaftIndex,
   263  		lastBlock:        b,
   264  		sizeLimit:        sizeLimit,
   265  		lastSnapBlockNum: snapBlkNum,
   266  		confState:        cc,
   267  		createPuller:     f,
   268  		clock:            opts.Clock,
   269  		haltCallback:     haltCallback,
   270  		Metrics: &Metrics{
   271  			ClusterSize:             opts.Metrics.ClusterSize.With("channel", support.ChannelID()),
   272  			IsLeader:                opts.Metrics.IsLeader.With("channel", support.ChannelID()),
   273  			ActiveNodes:             opts.Metrics.ActiveNodes.With("channel", support.ChannelID()),
   274  			CommittedBlockNumber:    opts.Metrics.CommittedBlockNumber.With("channel", support.ChannelID()),
   275  			SnapshotBlockNumber:     opts.Metrics.SnapshotBlockNumber.With("channel", support.ChannelID()),
   276  			LeaderChanges:           opts.Metrics.LeaderChanges.With("channel", support.ChannelID()),
   277  			ProposalFailures:        opts.Metrics.ProposalFailures.With("channel", support.ChannelID()),
   278  			DataPersistDuration:     opts.Metrics.DataPersistDuration.With("channel", support.ChannelID()),
   279  			NormalProposalsReceived: opts.Metrics.NormalProposalsReceived.With("channel", support.ChannelID()),
   280  			ConfigProposalsReceived: opts.Metrics.ConfigProposalsReceived.With("channel", support.ChannelID()),
   281  		},
   282  		logger:         lg,
   283  		opts:           opts,
   284  		CryptoProvider: cryptoProvider,
   285  	}
   286  
   287  	// Sets initial values for metrics
   288  	c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
   289  	c.Metrics.IsLeader.Set(float64(0)) // all nodes start out as followers
   290  	c.Metrics.ActiveNodes.Set(float64(0))
   291  	c.Metrics.CommittedBlockNumber.Set(float64(c.lastBlock.Header.Number))
   292  	c.Metrics.SnapshotBlockNumber.Set(float64(c.lastSnapBlockNum))
   293  
   294  	// DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217
   295  	// We guard against replay of written blocks with `appliedIndex` instead.
   296  	config := &raft.Config{
   297  		ID:              c.raftID,
   298  		ElectionTick:    c.opts.ElectionTick,
   299  		HeartbeatTick:   c.opts.HeartbeatTick,
   300  		MaxSizePerMsg:   c.opts.MaxSizePerMsg,
   301  		MaxInflightMsgs: c.opts.MaxInflightBlocks,
   302  		Logger:          c.logger,
   303  		Storage:         c.opts.MemoryStorage,
   304  		// PreVote prevents reconnected node from disturbing network.
   305  		// See etcd/raft doc for more details.
   306  		PreVote:                   true,
   307  		CheckQuorum:               true,
   308  		DisableProposalForwarding: true, // This prevents blocks from being accidentally proposed by followers
   309  	}
   310  
   311  	disseminator := &Disseminator{RPC: c.rpc}
   312  	disseminator.UpdateMetadata(nil) // initialize
   313  	c.ActiveNodes.Store([]uint64{})
   314  
   315  	c.Node = &node{
   316  		chainID:      c.channelID,
   317  		chain:        c,
   318  		logger:       c.logger,
   319  		metrics:      c.Metrics,
   320  		storage:      storage,
   321  		rpc:          disseminator,
   322  		config:       config,
   323  		tickInterval: c.opts.TickInterval,
   324  		clock:        c.clock,
   325  		metadata:     c.opts.BlockMetadata,
   326  		tracker: &Tracker{
   327  			id:     c.raftID,
   328  			sender: disseminator,
   329  			gauge:  c.Metrics.ActiveNodes,
   330  			active: &c.ActiveNodes,
   331  			logger: c.logger,
   332  		},
   333  	}
   334  
   335  	return c, nil
   336  }
   337  
   338  // Start instructs the orderer to begin serving the chain and keep it current.
   339  func (c *Chain) Start() {
   340  	c.logger.Infof("Starting Raft node")
   341  
   342  	if err := c.configureComm(); err != nil {
   343  		c.logger.Errorf("Failed to start chain, aborting: +%v", err)
   344  		close(c.doneC)
   345  		return
   346  	}
   347  
   348  	isJoin := c.support.Height() > 1
   349  	if isJoin && c.opts.MigrationInit {
   350  		isJoin = false
   351  		c.logger.Infof("Consensus-type migration detected, starting new raft node on an existing channel; height=%d", c.support.Height())
   352  	}
   353  	c.Node.start(c.fresh, isJoin)
   354  
   355  	close(c.startC)
   356  	close(c.errorC)
   357  
   358  	go c.gc()
   359  	go c.run()
   360  
   361  	es := c.newEvictionSuspector()
   362  
   363  	interval := DefaultLeaderlessCheckInterval
   364  	if c.opts.LeaderCheckInterval != 0 {
   365  		interval = c.opts.LeaderCheckInterval
   366  	}
   367  
   368  	c.periodicChecker = &PeriodicCheck{
   369  		Logger:        c.logger,
   370  		Report:        es.confirmSuspicion,
   371  		ReportCleared: es.clearSuspicion,
   372  		CheckInterval: interval,
   373  		Condition:     c.suspectEviction,
   374  	}
   375  	c.periodicChecker.Run()
   376  }
   377  
   378  // Order submits normal type transactions for ordering.
   379  func (c *Chain) Order(env *common.Envelope, configSeq uint64) error {
   380  	c.Metrics.NormalProposalsReceived.Add(1)
   381  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   382  }
   383  
   384  // Configure submits config type transactions for ordering.
   385  func (c *Chain) Configure(env *common.Envelope, configSeq uint64) error {
   386  	c.Metrics.ConfigProposalsReceived.Add(1)
   387  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   388  }
   389  
   390  // WaitReady blocks when the chain:
   391  // - is catching up with other nodes using snapshot
   392  //
   393  // In any other case, it returns right away.
   394  func (c *Chain) WaitReady() error {
   395  	if err := c.isRunning(); err != nil {
   396  		return err
   397  	}
   398  
   399  	select {
   400  	case c.submitC <- nil:
   401  	case <-c.doneC:
   402  		return errors.Errorf("chain is stopped")
   403  	}
   404  
   405  	return nil
   406  }
   407  
   408  // Errored returns a channel that closes when the chain stops.
   409  func (c *Chain) Errored() <-chan struct{} {
   410  	c.errorCLock.RLock()
   411  	defer c.errorCLock.RUnlock()
   412  	return c.errorC
   413  }
   414  
   415  // Halt stops the chain.
   416  func (c *Chain) Halt() {
   417  	select {
   418  	case <-c.startC:
   419  	default:
   420  		c.logger.Warnf("Attempted to halt a chain that has not started")
   421  		return
   422  	}
   423  
   424  	select {
   425  	case c.haltC <- struct{}{}:
   426  	case <-c.doneC:
   427  		return
   428  	}
   429  	<-c.doneC
   430  
   431  	if c.haltCallback != nil {
   432  		c.haltCallback()
   433  	}
   434  }
   435  
   436  func (c *Chain) isRunning() error {
   437  	select {
   438  	case <-c.startC:
   439  	default:
   440  		return errors.Errorf("chain is not started")
   441  	}
   442  
   443  	select {
   444  	case <-c.doneC:
   445  		return errors.Errorf("chain is stopped")
   446  	default:
   447  	}
   448  
   449  	return nil
   450  }
   451  
   452  // Consensus passes the given ConsensusRequest message to the raft.Node instance
   453  func (c *Chain) Consensus(req *orderer.ConsensusRequest, sender uint64) error {
   454  	if err := c.isRunning(); err != nil {
   455  		return err
   456  	}
   457  
   458  	stepMsg := &raftpb.Message{}
   459  	if err := proto.Unmarshal(req.Payload, stepMsg); err != nil {
   460  		return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err)
   461  	}
   462  
   463  	if err := c.Node.Step(context.TODO(), *stepMsg); err != nil {
   464  		return fmt.Errorf("failed to process Raft Step message: %s", err)
   465  	}
   466  
   467  	if len(req.Metadata) == 0 || atomic.LoadUint64(&c.lastKnownLeader) != sender { // ignore metadata from non-leader
   468  		return nil
   469  	}
   470  
   471  	clusterMetadata := &etcdraft.ClusterMetadata{}
   472  	if err := proto.Unmarshal(req.Metadata, clusterMetadata); err != nil {
   473  		return errors.Errorf("failed to unmarshal ClusterMetadata: %s", err)
   474  	}
   475  
   476  	c.Metrics.ActiveNodes.Set(float64(len(clusterMetadata.ActiveNodes)))
   477  	c.ActiveNodes.Store(clusterMetadata.ActiveNodes)
   478  
   479  	return nil
   480  }
   481  
   482  // Submit forwards the incoming request to:
   483  // - the local run goroutine if this is leader
   484  // - the actual leader via the transport mechanism
   485  // The call fails if there's no leader elected yet.
   486  func (c *Chain) Submit(req *orderer.SubmitRequest, sender uint64) error {
   487  	if err := c.isRunning(); err != nil {
   488  		c.Metrics.ProposalFailures.Add(1)
   489  		return err
   490  	}
   491  
   492  	leadC := make(chan uint64, 1)
   493  	select {
   494  	case c.submitC <- &submit{req, leadC}:
   495  		lead := <-leadC
   496  		if lead == raft.None {
   497  			c.Metrics.ProposalFailures.Add(1)
   498  			return errors.Errorf("no Raft leader")
   499  		}
   500  
   501  		if lead != c.raftID {
   502  			if err := c.rpc.SendSubmit(lead, req); err != nil {
   503  				c.Metrics.ProposalFailures.Add(1)
   504  				return err
   505  			}
   506  		}
   507  
   508  	case <-c.doneC:
   509  		c.Metrics.ProposalFailures.Add(1)
   510  		return errors.Errorf("chain is stopped")
   511  	}
   512  
   513  	return nil
   514  }
   515  
   516  type apply struct {
   517  	entries []raftpb.Entry
   518  	soft    *raft.SoftState
   519  }
   520  
   521  func isCandidate(state raft.StateType) bool {
   522  	return state == raft.StatePreCandidate || state == raft.StateCandidate
   523  }
   524  
   525  func (c *Chain) run() {
   526  	ticking := false
   527  	timer := c.clock.NewTimer(time.Second)
   528  	// we need a stopped timer rather than nil,
   529  	// because we will be select waiting on timer.C()
   530  	if !timer.Stop() {
   531  		<-timer.C()
   532  	}
   533  
   534  	// if timer is already started, this is a no-op
   535  	startTimer := func() {
   536  		if !ticking {
   537  			ticking = true
   538  			timer.Reset(c.support.SharedConfig().BatchTimeout())
   539  		}
   540  	}
   541  
   542  	stopTimer := func() {
   543  		if !timer.Stop() && ticking {
   544  			// we only need to drain the channel if the timer expired (not explicitly stopped)
   545  			<-timer.C()
   546  		}
   547  		ticking = false
   548  	}
   549  
   550  	var soft raft.SoftState
   551  	submitC := c.submitC
   552  	var bc *blockCreator
   553  
   554  	var propC chan<- *common.Block
   555  	var cancelProp context.CancelFunc
   556  	cancelProp = func() {} // no-op as initial value
   557  
   558  	becomeLeader := func() (chan<- *common.Block, context.CancelFunc) {
   559  		c.Metrics.IsLeader.Set(1)
   560  
   561  		c.blockInflight = 0
   562  		c.justElected = true
   563  		submitC = nil
   564  		ch := make(chan *common.Block, c.opts.MaxInflightBlocks)
   565  
   566  		// if there is unfinished ConfChange, we should resume the effort to propose it as
   567  		// new leader, and wait for it to be committed before start serving new requests.
   568  		if cc := c.getInFlightConfChange(); cc != nil {
   569  			// The reason `ProposeConfChange` should be called in go routine is documented in `writeConfigBlock` method.
   570  			go func() {
   571  				if err := c.Node.ProposeConfChange(context.TODO(), *cc); err != nil {
   572  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
   573  				}
   574  			}()
   575  
   576  			c.confChangeInProgress = cc
   577  			c.configInflight = true
   578  		}
   579  
   580  		// Leader should call Propose in go routine, because this method may be blocked
   581  		// if node is leaderless (this can happen when leader steps down in a heavily
   582  		// loaded network). We need to make sure applyC can still be consumed properly.
   583  		ctx, cancel := context.WithCancel(context.Background())
   584  		go func(ctx context.Context, ch <-chan *common.Block) {
   585  			for {
   586  				select {
   587  				case b := <-ch:
   588  					data := protoutil.MarshalOrPanic(b)
   589  					if err := c.Node.Propose(ctx, data); err != nil {
   590  						c.logger.Errorf("Failed to propose block [%d] to raft and discard %d blocks in queue: %s", b.Header.Number, len(ch), err)
   591  						return
   592  					}
   593  					c.logger.Debugf("Proposed block [%d] to raft consensus", b.Header.Number)
   594  
   595  				case <-ctx.Done():
   596  					c.logger.Debugf("Quit proposing blocks, discarded %d blocks in the queue", len(ch))
   597  					return
   598  				}
   599  			}
   600  		}(ctx, ch)
   601  
   602  		return ch, cancel
   603  	}
   604  
   605  	becomeFollower := func() {
   606  		cancelProp()
   607  		c.blockInflight = 0
   608  		_ = c.support.BlockCutter().Cut()
   609  		stopTimer()
   610  		submitC = c.submitC
   611  		bc = nil
   612  		c.Metrics.IsLeader.Set(0)
   613  	}
   614  
   615  	for {
   616  		select {
   617  		case s := <-submitC:
   618  			if s == nil {
   619  				// polled by `WaitReady`
   620  				continue
   621  			}
   622  
   623  			if soft.RaftState == raft.StatePreCandidate || soft.RaftState == raft.StateCandidate {
   624  				s.leader <- raft.None
   625  				continue
   626  			}
   627  
   628  			s.leader <- soft.Lead
   629  			if soft.Lead != c.raftID {
   630  				continue
   631  			}
   632  
   633  			batches, pending, err := c.ordered(s.req)
   634  			if err != nil {
   635  				c.logger.Errorf("Failed to order message: %s", err)
   636  				continue
   637  			}
   638  			if pending {
   639  				startTimer() // no-op if timer is already started
   640  			} else {
   641  				stopTimer()
   642  			}
   643  
   644  			c.propose(propC, bc, batches...)
   645  
   646  			if c.configInflight {
   647  				c.logger.Info("Received config transaction, pause accepting transaction till it is committed")
   648  				submitC = nil
   649  			} else if c.blockInflight >= c.opts.MaxInflightBlocks {
   650  				c.logger.Debugf("Number of in-flight blocks (%d) reaches limit (%d), pause accepting transaction",
   651  					c.blockInflight, c.opts.MaxInflightBlocks)
   652  				submitC = nil
   653  			}
   654  
   655  		case app := <-c.applyC:
   656  			if app.soft != nil {
   657  				newLeader := atomic.LoadUint64(&app.soft.Lead) // etcdraft requires atomic access
   658  				if newLeader != soft.Lead {
   659  					c.logger.Infof("Raft leader changed: %d -> %d", soft.Lead, newLeader)
   660  					c.Metrics.LeaderChanges.Add(1)
   661  
   662  					atomic.StoreUint64(&c.lastKnownLeader, newLeader)
   663  
   664  					if newLeader == c.raftID {
   665  						propC, cancelProp = becomeLeader()
   666  					}
   667  
   668  					if soft.Lead == c.raftID {
   669  						becomeFollower()
   670  					}
   671  				}
   672  
   673  				foundLeader := soft.Lead == raft.None && newLeader != raft.None
   674  				quitCandidate := isCandidate(soft.RaftState) && !isCandidate(app.soft.RaftState)
   675  
   676  				if foundLeader || quitCandidate {
   677  					c.errorCLock.Lock()
   678  					c.errorC = make(chan struct{})
   679  					c.errorCLock.Unlock()
   680  				}
   681  
   682  				if isCandidate(app.soft.RaftState) || newLeader == raft.None {
   683  					atomic.StoreUint64(&c.lastKnownLeader, raft.None)
   684  					select {
   685  					case <-c.errorC:
   686  					default:
   687  						nodeCount := len(c.opts.BlockMetadata.ConsenterIds)
   688  						// Only close the error channel (to signal the broadcast/deliver front-end a consensus backend error)
   689  						// If we are a cluster of size 3 or more, otherwise we can't expand a cluster of size 1 to 2 nodes.
   690  						if nodeCount > 2 {
   691  							close(c.errorC)
   692  						} else {
   693  							c.logger.Warningf("No leader is present, cluster size is %d", nodeCount)
   694  						}
   695  					}
   696  				}
   697  
   698  				soft = raft.SoftState{Lead: newLeader, RaftState: app.soft.RaftState}
   699  
   700  				// notify external observer
   701  				select {
   702  				case c.observeC <- soft:
   703  				default:
   704  				}
   705  			}
   706  
   707  			c.apply(app.entries)
   708  
   709  			if c.justElected {
   710  				msgInflight := c.Node.lastIndex() > c.appliedIndex
   711  				if msgInflight {
   712  					c.logger.Debugf("There are in flight blocks, new leader should not serve requests")
   713  					continue
   714  				}
   715  
   716  				if c.configInflight {
   717  					c.logger.Debugf("There is config block in flight, new leader should not serve requests")
   718  					continue
   719  				}
   720  
   721  				c.logger.Infof("Start accepting requests as Raft leader at block [%d]", c.lastBlock.Header.Number)
   722  				bc = &blockCreator{
   723  					hash:   protoutil.BlockHeaderHash(c.lastBlock.Header),
   724  					number: c.lastBlock.Header.Number,
   725  					logger: c.logger,
   726  				}
   727  				submitC = c.submitC
   728  				c.justElected = false
   729  			} else if c.configInflight {
   730  				c.logger.Info("Config block or ConfChange in flight, pause accepting transaction")
   731  				submitC = nil
   732  			} else if c.blockInflight < c.opts.MaxInflightBlocks {
   733  				submitC = c.submitC
   734  			}
   735  
   736  		case <-timer.C():
   737  			ticking = false
   738  
   739  			batch := c.support.BlockCutter().Cut()
   740  			if len(batch) == 0 {
   741  				c.logger.Warningf("Batch timer expired with no pending requests, this might indicate a bug")
   742  				continue
   743  			}
   744  
   745  			c.logger.Debugf("Batch timer expired, creating block")
   746  			c.propose(propC, bc, batch) // we are certain this is normal block, no need to block
   747  
   748  		case sn := <-c.snapC:
   749  			if sn.Metadata.Index != 0 {
   750  				if sn.Metadata.Index <= c.appliedIndex {
   751  					c.logger.Debugf("Skip snapshot taken at index %d, because it is behind current applied index %d", sn.Metadata.Index, c.appliedIndex)
   752  					break
   753  				}
   754  
   755  				c.confState = sn.Metadata.ConfState
   756  				c.appliedIndex = sn.Metadata.Index
   757  			} else {
   758  				c.logger.Infof("Received artificial snapshot to trigger catchup")
   759  			}
   760  
   761  			if err := c.catchUp(sn); err != nil {
   762  				c.logger.Panicf("Failed to recover from snapshot taken at Term %d and Index %d: %s",
   763  					sn.Metadata.Term, sn.Metadata.Index, err)
   764  			}
   765  
   766  		case <-c.doneC:
   767  			stopTimer()
   768  			cancelProp()
   769  
   770  			select {
   771  			case <-c.errorC: // avoid closing closed channel
   772  			default:
   773  				close(c.errorC)
   774  			}
   775  
   776  			c.logger.Infof("Stop serving requests")
   777  			c.periodicChecker.Stop()
   778  			return
   779  		}
   780  	}
   781  }
   782  
   783  func (c *Chain) writeBlock(block *common.Block, index uint64) {
   784  	if block.Header.Number > c.lastBlock.Header.Number+1 {
   785  		c.logger.Panicf("Got block [%d], expect block [%d]", block.Header.Number, c.lastBlock.Header.Number+1)
   786  	} else if block.Header.Number < c.lastBlock.Header.Number+1 {
   787  		c.logger.Infof("Got block [%d], expect block [%d], this node was forced to catch up", block.Header.Number, c.lastBlock.Header.Number+1)
   788  		return
   789  	}
   790  
   791  	if c.blockInflight > 0 {
   792  		c.blockInflight-- // only reduce on leader
   793  	}
   794  	c.lastBlock = block
   795  
   796  	c.logger.Infof("Writing block [%d] (Raft index: %d) to ledger", block.Header.Number, index)
   797  
   798  	if protoutil.IsConfigBlock(block) {
   799  		c.writeConfigBlock(block, index)
   800  		return
   801  	}
   802  
   803  	c.raftMetadataLock.Lock()
   804  	c.opts.BlockMetadata.RaftIndex = index
   805  	m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
   806  	c.raftMetadataLock.Unlock()
   807  
   808  	c.support.WriteBlock(block, m)
   809  }
   810  
   811  // Orders the envelope in the `msg` content. SubmitRequest.
   812  // Returns
   813  //   -- batches [][]*common.Envelope; the batches cut,
   814  //   -- pending bool; if there are envelopes pending to be ordered,
   815  //   -- err error; the error encountered, if any.
   816  // It takes care of config messages as well as the revalidation of messages if the config sequence has advanced.
   817  func (c *Chain) ordered(msg *orderer.SubmitRequest) (batches [][]*common.Envelope, pending bool, err error) {
   818  	seq := c.support.Sequence()
   819  
   820  	if c.isConfig(msg.Payload) {
   821  		// ConfigMsg
   822  		if msg.LastValidationSeq < seq {
   823  			c.logger.Warnf("Config message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   824  			msg.Payload, _, err = c.support.ProcessConfigMsg(msg.Payload)
   825  			if err != nil {
   826  				c.Metrics.ProposalFailures.Add(1)
   827  				return nil, true, errors.Errorf("bad config message: %s", err)
   828  			}
   829  		}
   830  
   831  		batch := c.support.BlockCutter().Cut()
   832  		batches = [][]*common.Envelope{}
   833  		if len(batch) != 0 {
   834  			batches = append(batches, batch)
   835  		}
   836  		batches = append(batches, []*common.Envelope{msg.Payload})
   837  		return batches, false, nil
   838  	}
   839  	// it is a normal message
   840  	if msg.LastValidationSeq < seq {
   841  		c.logger.Warnf("Normal message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   842  		if _, err := c.support.ProcessNormalMsg(msg.Payload); err != nil {
   843  			c.Metrics.ProposalFailures.Add(1)
   844  			return nil, true, errors.Errorf("bad normal message: %s", err)
   845  		}
   846  	}
   847  	batches, pending = c.support.BlockCutter().Ordered(msg.Payload)
   848  	return batches, pending, nil
   849  
   850  }
   851  
   852  func (c *Chain) propose(ch chan<- *common.Block, bc *blockCreator, batches ...[]*common.Envelope) {
   853  	for _, batch := range batches {
   854  		b := bc.createNextBlock(batch)
   855  		c.logger.Infof("Created block [%d], there are %d blocks in flight", b.Header.Number, c.blockInflight)
   856  
   857  		select {
   858  		case ch <- b:
   859  		default:
   860  			c.logger.Panic("Programming error: limit of in-flight blocks does not properly take effect or block is proposed by follower")
   861  		}
   862  
   863  		// if it is config block, then we should wait for the commit of the block
   864  		if protoutil.IsConfigBlock(b) {
   865  			c.configInflight = true
   866  		}
   867  
   868  		c.blockInflight++
   869  	}
   870  
   871  	return
   872  }
   873  
   874  func (c *Chain) catchUp(snap *raftpb.Snapshot) error {
   875  	b, err := protoutil.UnmarshalBlock(snap.Data)
   876  	if err != nil {
   877  		return errors.Errorf("failed to unmarshal snapshot data to block: %s", err)
   878  	}
   879  
   880  	if c.lastBlock.Header.Number >= b.Header.Number {
   881  		c.logger.Warnf("Snapshot is at block [%d], local block number is %d, no sync needed", b.Header.Number, c.lastBlock.Header.Number)
   882  		return nil
   883  	}
   884  
   885  	puller, err := c.createPuller()
   886  	if err != nil {
   887  		return errors.Errorf("failed to create block puller: %s", err)
   888  	}
   889  	defer puller.Close()
   890  
   891  	next := c.lastBlock.Header.Number + 1
   892  
   893  	c.logger.Infof("Catching up with snapshot taken at block [%d], starting from block [%d]", b.Header.Number, next)
   894  
   895  	for next <= b.Header.Number {
   896  		block := puller.PullBlock(next)
   897  		if block == nil {
   898  			return errors.Errorf("failed to fetch block [%d] from cluster", next)
   899  		}
   900  		if protoutil.IsConfigBlock(block) {
   901  			c.support.WriteConfigBlock(block, nil)
   902  
   903  			configMembership := c.detectConfChange(block)
   904  
   905  			if configMembership != nil && configMembership.Changed() {
   906  				c.logger.Infof("Config block [%d] changes consenter set, communication should be reconfigured", block.Header.Number)
   907  
   908  				c.raftMetadataLock.Lock()
   909  				c.opts.BlockMetadata = configMembership.NewBlockMetadata
   910  				c.opts.Consenters = configMembership.NewConsenters
   911  				c.raftMetadataLock.Unlock()
   912  
   913  				if err := c.configureComm(); err != nil {
   914  					c.logger.Panicf("Failed to configure communication: %s", err)
   915  				}
   916  			}
   917  		} else {
   918  			c.support.WriteBlock(block, nil)
   919  		}
   920  
   921  		c.lastBlock = block
   922  		next++
   923  	}
   924  
   925  	c.logger.Infof("Finished syncing with cluster up to and including block [%d]", b.Header.Number)
   926  	return nil
   927  }
   928  
   929  func (c *Chain) detectConfChange(block *common.Block) *MembershipChanges {
   930  	// If config is targeting THIS channel, inspect consenter set and
   931  	// propose raft ConfChange if it adds/removes node.
   932  	configMetadata := c.newConfigMetadata(block)
   933  
   934  	if configMetadata == nil {
   935  		return nil
   936  	}
   937  
   938  	if configMetadata.Options != nil &&
   939  		configMetadata.Options.SnapshotIntervalSize != 0 &&
   940  		configMetadata.Options.SnapshotIntervalSize != c.sizeLimit {
   941  		c.logger.Infof("Update snapshot interval size to %d bytes (was %d)",
   942  			configMetadata.Options.SnapshotIntervalSize, c.sizeLimit)
   943  		c.sizeLimit = configMetadata.Options.SnapshotIntervalSize
   944  	}
   945  
   946  	changes, err := ComputeMembershipChanges(c.opts.BlockMetadata, c.opts.Consenters, configMetadata.Consenters, c.support.SharedConfig())
   947  	if err != nil {
   948  		c.logger.Panicf("illegal configuration change detected: %s", err)
   949  	}
   950  
   951  	if changes.Rotated() {
   952  		c.logger.Infof("Config block [%d] rotates TLS certificate of node %d", block.Header.Number, changes.RotatedNode)
   953  	}
   954  
   955  	return changes
   956  }
   957  
   958  func (c *Chain) apply(ents []raftpb.Entry) {
   959  	if len(ents) == 0 {
   960  		return
   961  	}
   962  
   963  	if ents[0].Index > c.appliedIndex+1 {
   964  		c.logger.Panicf("first index of committed entry[%d] should <= appliedIndex[%d]+1", ents[0].Index, c.appliedIndex)
   965  	}
   966  
   967  	var position int
   968  	for i := range ents {
   969  		switch ents[i].Type {
   970  		case raftpb.EntryNormal:
   971  			if len(ents[i].Data) == 0 {
   972  				break
   973  			}
   974  
   975  			position = i
   976  			c.accDataSize += uint32(len(ents[i].Data))
   977  
   978  			// We need to strictly avoid re-applying normal entries,
   979  			// otherwise we are writing the same block twice.
   980  			if ents[i].Index <= c.appliedIndex {
   981  				c.logger.Debugf("Received block with raft index (%d) <= applied index (%d), skip", ents[i].Index, c.appliedIndex)
   982  				break
   983  			}
   984  
   985  			block := protoutil.UnmarshalBlockOrPanic(ents[i].Data)
   986  			c.writeBlock(block, ents[i].Index)
   987  			c.Metrics.CommittedBlockNumber.Set(float64(block.Header.Number))
   988  
   989  		case raftpb.EntryConfChange:
   990  			var cc raftpb.ConfChange
   991  			if err := cc.Unmarshal(ents[i].Data); err != nil {
   992  				c.logger.Warnf("Failed to unmarshal ConfChange data: %s", err)
   993  				continue
   994  			}
   995  
   996  			c.confState = *c.Node.ApplyConfChange(cc)
   997  
   998  			switch cc.Type {
   999  			case raftpb.ConfChangeAddNode:
  1000  				c.logger.Infof("Applied config change to add node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1001  			case raftpb.ConfChangeRemoveNode:
  1002  				c.logger.Infof("Applied config change to remove node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1003  			default:
  1004  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1005  			}
  1006  
  1007  			// This ConfChange was introduced by a previously committed config block,
  1008  			// we can now unblock submitC to accept envelopes.
  1009  			var configureComm bool
  1010  			if c.confChangeInProgress != nil &&
  1011  				c.confChangeInProgress.NodeID == cc.NodeID &&
  1012  				c.confChangeInProgress.Type == cc.Type {
  1013  
  1014  				configureComm = true
  1015  				c.confChangeInProgress = nil
  1016  				c.configInflight = false
  1017  				// report the new cluster size
  1018  				c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
  1019  			}
  1020  
  1021  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1022  			removeLeader := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == lead
  1023  			shouldHalt := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == c.raftID
  1024  
  1025  			// unblock `run` go routine so it can still consume Raft messages
  1026  			go func() {
  1027  				if removeLeader {
  1028  					c.logger.Infof("Current leader is being removed from channel, attempt leadership transfer")
  1029  					c.Node.abdicateLeader(lead)
  1030  				}
  1031  
  1032  				if configureComm && !shouldHalt { // no need to configure comm if this node is going to halt
  1033  					if err := c.configureComm(); err != nil {
  1034  						c.logger.Panicf("Failed to configure communication: %s", err)
  1035  					}
  1036  				}
  1037  
  1038  				if shouldHalt {
  1039  					c.logger.Infof("This node is being removed from replica set")
  1040  					c.Halt()
  1041  					return
  1042  				}
  1043  			}()
  1044  		}
  1045  
  1046  		if ents[i].Index > c.appliedIndex {
  1047  			c.appliedIndex = ents[i].Index
  1048  		}
  1049  	}
  1050  
  1051  	if c.accDataSize >= c.sizeLimit {
  1052  		b := protoutil.UnmarshalBlockOrPanic(ents[position].Data)
  1053  
  1054  		select {
  1055  		case c.gcC <- &gc{index: c.appliedIndex, state: c.confState, data: ents[position].Data}:
  1056  			c.logger.Infof("Accumulated %d bytes since last snapshot, exceeding size limit (%d bytes), "+
  1057  				"taking snapshot at block [%d] (index: %d), last snapshotted block number is %d, current nodes: %+v",
  1058  				c.accDataSize, c.sizeLimit, b.Header.Number, c.appliedIndex, c.lastSnapBlockNum, c.confState.Nodes)
  1059  			c.accDataSize = 0
  1060  			c.lastSnapBlockNum = b.Header.Number
  1061  			c.Metrics.SnapshotBlockNumber.Set(float64(b.Header.Number))
  1062  		default:
  1063  			c.logger.Warnf("Snapshotting is in progress, it is very likely that SnapshotIntervalSize is too small")
  1064  		}
  1065  	}
  1066  
  1067  	return
  1068  }
  1069  
  1070  func (c *Chain) gc() {
  1071  	for {
  1072  		select {
  1073  		case g := <-c.gcC:
  1074  			c.Node.takeSnapshot(g.index, g.state, g.data)
  1075  		case <-c.doneC:
  1076  			c.logger.Infof("Stop garbage collecting")
  1077  			return
  1078  		}
  1079  	}
  1080  }
  1081  
  1082  func (c *Chain) isConfig(env *common.Envelope) bool {
  1083  	h, err := protoutil.ChannelHeader(env)
  1084  	if err != nil {
  1085  		c.logger.Panicf("failed to extract channel header from envelope")
  1086  	}
  1087  
  1088  	return h.Type == int32(common.HeaderType_CONFIG) || h.Type == int32(common.HeaderType_ORDERER_TRANSACTION)
  1089  }
  1090  
  1091  func (c *Chain) configureComm() error {
  1092  	// Reset unreachable map when communication is reconfigured
  1093  	c.Node.unreachableLock.Lock()
  1094  	c.Node.unreachable = make(map[uint64]struct{})
  1095  	c.Node.unreachableLock.Unlock()
  1096  
  1097  	nodes, err := c.remotePeers()
  1098  	if err != nil {
  1099  		return err
  1100  	}
  1101  
  1102  	c.configurator.Configure(c.channelID, nodes)
  1103  	return nil
  1104  }
  1105  
  1106  func (c *Chain) remotePeers() ([]cluster.RemoteNode, error) {
  1107  	c.raftMetadataLock.RLock()
  1108  	defer c.raftMetadataLock.RUnlock()
  1109  
  1110  	var nodes []cluster.RemoteNode
  1111  	for raftID, consenter := range c.opts.Consenters {
  1112  		// No need to know yourself
  1113  		if raftID == c.raftID {
  1114  			continue
  1115  		}
  1116  		serverCertAsDER, err := pemToDER(consenter.ServerTlsCert, raftID, "server", c.logger)
  1117  		if err != nil {
  1118  			return nil, errors.WithStack(err)
  1119  		}
  1120  		clientCertAsDER, err := pemToDER(consenter.ClientTlsCert, raftID, "client", c.logger)
  1121  		if err != nil {
  1122  			return nil, errors.WithStack(err)
  1123  		}
  1124  		nodes = append(nodes, cluster.RemoteNode{
  1125  			ID:            raftID,
  1126  			Endpoint:      fmt.Sprintf("%s:%d", consenter.Host, consenter.Port),
  1127  			ServerTLSCert: serverCertAsDER,
  1128  			ClientTLSCert: clientCertAsDER,
  1129  		})
  1130  	}
  1131  	return nodes, nil
  1132  }
  1133  
  1134  func pemToDER(pemBytes []byte, id uint64, certType string, logger *flogging.FabricLogger) ([]byte, error) {
  1135  	bl, _ := pem.Decode(pemBytes)
  1136  	if bl == nil {
  1137  		logger.Errorf("Rejecting PEM block of %s TLS cert for node %d, offending PEM is: %s", certType, id, string(pemBytes))
  1138  		return nil, errors.Errorf("invalid PEM block")
  1139  	}
  1140  	return bl.Bytes, nil
  1141  }
  1142  
  1143  // writeConfigBlock writes configuration blocks into the ledger in
  1144  // addition extracts updates about raft replica set and if there
  1145  // are changes updates cluster membership as well
  1146  func (c *Chain) writeConfigBlock(block *common.Block, index uint64) {
  1147  	hdr, err := ConfigChannelHeader(block)
  1148  	if err != nil {
  1149  		c.logger.Panicf("Failed to get config header type from config block: %s", err)
  1150  	}
  1151  
  1152  	c.configInflight = false
  1153  
  1154  	switch common.HeaderType(hdr.Type) {
  1155  	case common.HeaderType_CONFIG:
  1156  		configMembership := c.detectConfChange(block)
  1157  
  1158  		c.raftMetadataLock.Lock()
  1159  		c.opts.BlockMetadata.RaftIndex = index
  1160  		if configMembership != nil {
  1161  			c.opts.BlockMetadata = configMembership.NewBlockMetadata
  1162  			c.opts.Consenters = configMembership.NewConsenters
  1163  		}
  1164  		c.raftMetadataLock.Unlock()
  1165  
  1166  		blockMetadataBytes := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1167  
  1168  		// write block with metadata
  1169  		c.support.WriteConfigBlock(block, blockMetadataBytes)
  1170  
  1171  		if configMembership == nil {
  1172  			return
  1173  		}
  1174  
  1175  		// update membership
  1176  		if configMembership.ConfChange != nil {
  1177  			// We need to propose conf change in a go routine, because it may be blocked if raft node
  1178  			// becomes leaderless, and we should not block `run` so it can keep consuming applyC,
  1179  			// otherwise we have a deadlock.
  1180  			go func() {
  1181  				// ProposeConfChange returns error only if node being stopped.
  1182  				// This proposal is dropped by followers because DisableProposalForwarding is enabled.
  1183  				if err := c.Node.ProposeConfChange(context.TODO(), *configMembership.ConfChange); err != nil {
  1184  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
  1185  				}
  1186  			}()
  1187  
  1188  			c.confChangeInProgress = configMembership.ConfChange
  1189  
  1190  			switch configMembership.ConfChange.Type {
  1191  			case raftpb.ConfChangeAddNode:
  1192  				c.logger.Infof("Config block just committed adds node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1193  			case raftpb.ConfChangeRemoveNode:
  1194  				c.logger.Infof("Config block just committed removes node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1195  			default:
  1196  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1197  			}
  1198  
  1199  			c.configInflight = true
  1200  		} else if configMembership.Rotated() {
  1201  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1202  			if configMembership.RotatedNode == lead {
  1203  				c.logger.Infof("Certificate of Raft leader is being rotated, attempt leader transfer before reconfiguring communication")
  1204  				go func() {
  1205  					c.Node.abdicateLeader(lead)
  1206  					if err := c.configureComm(); err != nil {
  1207  						c.logger.Panicf("Failed to configure communication: %s", err)
  1208  					}
  1209  				}()
  1210  			} else {
  1211  				if err := c.configureComm(); err != nil {
  1212  					c.logger.Panicf("Failed to configure communication: %s", err)
  1213  				}
  1214  			}
  1215  		}
  1216  
  1217  	case common.HeaderType_ORDERER_TRANSACTION:
  1218  		// If this config is channel creation, no extra inspection is needed
  1219  		c.raftMetadataLock.Lock()
  1220  		c.opts.BlockMetadata.RaftIndex = index
  1221  		m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1222  		c.raftMetadataLock.Unlock()
  1223  
  1224  		c.support.WriteConfigBlock(block, m)
  1225  
  1226  	default:
  1227  		c.logger.Panicf("Programming error: unexpected config type: %s", common.HeaderType(hdr.Type))
  1228  	}
  1229  }
  1230  
  1231  // getInFlightConfChange returns ConfChange in-flight if any.
  1232  // It returns confChangeInProgress if it is not nil. Otherwise
  1233  // it returns ConfChange from the last committed block (might be nil).
  1234  func (c *Chain) getInFlightConfChange() *raftpb.ConfChange {
  1235  	if c.confChangeInProgress != nil {
  1236  		return c.confChangeInProgress
  1237  	}
  1238  
  1239  	if c.lastBlock.Header.Number == 0 {
  1240  		return nil // nothing to failover just started the chain
  1241  	}
  1242  
  1243  	if !protoutil.IsConfigBlock(c.lastBlock) {
  1244  		return nil
  1245  	}
  1246  
  1247  	// extracting current Raft configuration state
  1248  	confState := c.Node.ApplyConfChange(raftpb.ConfChange{})
  1249  
  1250  	if len(confState.Nodes) == len(c.opts.BlockMetadata.ConsenterIds) {
  1251  		// Raft configuration change could only add one node or
  1252  		// remove one node at a time, if raft conf state size is
  1253  		// equal to membership stored in block metadata field,
  1254  		// that means everything is in sync and no need to propose
  1255  		// config update.
  1256  		return nil
  1257  	}
  1258  
  1259  	return ConfChange(c.opts.BlockMetadata, confState)
  1260  }
  1261  
  1262  // newMetadata extract config metadata from the configuration block
  1263  func (c *Chain) newConfigMetadata(block *common.Block) *etcdraft.ConfigMetadata {
  1264  	metadata, err := ConsensusMetadataFromConfigBlock(block)
  1265  	if err != nil {
  1266  		c.logger.Panicf("error reading consensus metadata: %s", err)
  1267  	}
  1268  	return metadata
  1269  }
  1270  
  1271  // ValidateConsensusMetadata determines the validity of a
  1272  // ConsensusMetadata update during config updates on the channel.
  1273  func (c *Chain) ValidateConsensusMetadata(oldMetadataBytes, newMetadataBytes []byte, newChannel bool) error {
  1274  	// metadata was not updated
  1275  	if newMetadataBytes == nil {
  1276  		return nil
  1277  	}
  1278  	if oldMetadataBytes == nil {
  1279  		c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old metadata")
  1280  	}
  1281  
  1282  	oldMetadata := &etcdraft.ConfigMetadata{}
  1283  	if err := proto.Unmarshal(oldMetadataBytes, oldMetadata); err != nil {
  1284  		c.logger.Panicf("Programming Error: Failed to unmarshal old etcdraft consensus metadata: %v", err)
  1285  	}
  1286  	newMetadata := &etcdraft.ConfigMetadata{}
  1287  	if err := proto.Unmarshal(newMetadataBytes, newMetadata); err != nil {
  1288  		return errors.Wrap(err, "failed to unmarshal new etcdraft metadata configuration")
  1289  	}
  1290  
  1291  	err := CheckConfigMetadata(newMetadata)
  1292  	if err != nil {
  1293  		return errors.Wrap(err, "invalid new config metdadata")
  1294  	}
  1295  
  1296  	if newChannel {
  1297  		// check if the consenters are a subset of the existing consenters (system channel consenters)
  1298  		set := ConsentersToMap(oldMetadata.Consenters)
  1299  		for _, c := range newMetadata.Consenters {
  1300  			if _, exits := set[string(c.ClientTlsCert)]; !exits {
  1301  				return errors.New("new channel has consenter that is not part of system consenter set")
  1302  			}
  1303  		}
  1304  		return nil
  1305  	}
  1306  
  1307  	// create the dummy parameters for ComputeMembershipChanges
  1308  	dummyOldBlockMetadata, _ := ReadBlockMetadata(nil, oldMetadata)
  1309  	dummyOldConsentersMap := CreateConsentersMap(dummyOldBlockMetadata, oldMetadata)
  1310  	changes, err := ComputeMembershipChanges(dummyOldBlockMetadata, dummyOldConsentersMap, newMetadata.Consenters, c.support.SharedConfig())
  1311  	if err != nil {
  1312  		return err
  1313  	}
  1314  
  1315  	active := c.ActiveNodes.Load().([]uint64)
  1316  	if changes.UnacceptableQuorumLoss(active) {
  1317  		return errors.Errorf("%d out of %d nodes are alive, configuration will result in quorum loss", len(active), len(dummyOldConsentersMap))
  1318  	}
  1319  
  1320  	return nil
  1321  }
  1322  
  1323  func (c *Chain) suspectEviction() bool {
  1324  	if c.isRunning() != nil {
  1325  		return false
  1326  	}
  1327  
  1328  	return atomic.LoadUint64(&c.lastKnownLeader) == uint64(0)
  1329  }
  1330  
  1331  func (c *Chain) newEvictionSuspector() *evictionSuspector {
  1332  	consenterCertificate := &ConsenterCertificate{
  1333  		ConsenterCertificate: c.opts.Cert,
  1334  		CryptoProvider:       c.CryptoProvider,
  1335  	}
  1336  
  1337  	return &evictionSuspector{
  1338  		amIInChannel:               consenterCertificate.IsConsenterOfChannel,
  1339  		evictionSuspicionThreshold: c.opts.EvictionSuspicion,
  1340  		writeBlock:                 c.support.Append,
  1341  		createPuller:               c.createPuller,
  1342  		height:                     c.support.Height,
  1343  		triggerCatchUp:             c.triggerCatchup,
  1344  		logger:                     c.logger,
  1345  		halt: func() {
  1346  			c.Halt()
  1347  		},
  1348  	}
  1349  }
  1350  
  1351  func (c *Chain) triggerCatchup(sn *raftpb.Snapshot) {
  1352  	select {
  1353  	case c.snapC <- sn:
  1354  	case <-c.doneC:
  1355  	}
  1356  }