github.com/true-sqn/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/chain.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft
     8  
     9  import (
    10  	"context"
    11  	"encoding/pem"
    12  	"fmt"
    13  	"sync"
    14  	"sync/atomic"
    15  	"time"
    16  
    17  	"code.cloudfoundry.org/clock"
    18  	"github.com/golang/protobuf/proto"
    19  	"github.com/hyperledger/fabric-protos-go/common"
    20  	"github.com/hyperledger/fabric-protos-go/orderer"
    21  	"github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    22  	"github.com/hyperledger/fabric/bccsp"
    23  	"github.com/hyperledger/fabric/common/flogging"
    24  	"github.com/hyperledger/fabric/orderer/common/cluster"
    25  	"github.com/hyperledger/fabric/orderer/consensus"
    26  	"github.com/hyperledger/fabric/protoutil"
    27  	"github.com/pkg/errors"
    28  	"go.etcd.io/etcd/raft"
    29  	"go.etcd.io/etcd/raft/raftpb"
    30  	"go.etcd.io/etcd/wal"
    31  )
    32  
    33  const (
    34  	BYTE = 1 << (10 * iota)
    35  	KILOBYTE
    36  	MEGABYTE
    37  	GIGABYTE
    38  	TERABYTE
    39  )
    40  
    41  const (
    42  	// DefaultSnapshotCatchUpEntries is the default number of entries
    43  	// to preserve in memory when a snapshot is taken. This is for
    44  	// slow followers to catch up.
    45  	DefaultSnapshotCatchUpEntries = uint64(4)
    46  
    47  	// DefaultSnapshotIntervalSize is the default snapshot interval. It is
    48  	// used if SnapshotIntervalSize is not provided in channel config options.
    49  	// It is needed to enforce snapshot being set.
    50  	DefaultSnapshotIntervalSize = 16 * MEGABYTE
    51  
    52  	// DefaultEvictionSuspicion is the threshold that a node will start
    53  	// suspecting its own eviction if it has been leaderless for this
    54  	// period of time.
    55  	DefaultEvictionSuspicion = time.Minute * 10
    56  
    57  	// DefaultLeaderlessCheckInterval is the interval that a chain checks
    58  	// its own leadership status.
    59  	DefaultLeaderlessCheckInterval = time.Second * 10
    60  )
    61  
    62  //go:generate counterfeiter -o mocks/configurator.go . Configurator
    63  
    64  // Configurator is used to configure the communication layer
    65  // when the chain starts.
    66  type Configurator interface {
    67  	Configure(channel string, newNodes []cluster.RemoteNode)
    68  }
    69  
    70  //go:generate counterfeiter -o mocks/mock_rpc.go . RPC
    71  
    72  // RPC is used to mock the transport layer in tests.
    73  type RPC interface {
    74  	SendConsensus(dest uint64, msg *orderer.ConsensusRequest) error
    75  	SendSubmit(dest uint64, request *orderer.SubmitRequest) error
    76  }
    77  
    78  //go:generate counterfeiter -o mocks/mock_blockpuller.go . BlockPuller
    79  
    80  // BlockPuller is used to pull blocks from other OSN
    81  type BlockPuller interface {
    82  	PullBlock(seq uint64) *common.Block
    83  	HeightsByEndpoints() (map[string]uint64, error)
    84  	Close()
    85  }
    86  
    87  // CreateBlockPuller is a function to create BlockPuller on demand.
    88  // It is passed into chain initializer so that tests could mock this.
    89  type CreateBlockPuller func() (BlockPuller, error)
    90  
    91  // Options contains all the configurations relevant to the chain.
    92  type Options struct {
    93  	RaftID uint64
    94  
    95  	Clock clock.Clock
    96  
    97  	WALDir               string
    98  	SnapDir              string
    99  	SnapshotIntervalSize uint32
   100  
   101  	// This is configurable mainly for testing purpose. Users are not
   102  	// expected to alter this. Instead, DefaultSnapshotCatchUpEntries is used.
   103  	SnapshotCatchUpEntries uint64
   104  
   105  	MemoryStorage MemoryStorage
   106  	Logger        *flogging.FabricLogger
   107  
   108  	TickInterval      time.Duration
   109  	ElectionTick      int
   110  	HeartbeatTick     int
   111  	MaxSizePerMsg     uint64
   112  	MaxInflightBlocks int
   113  
   114  	// BlockMetdata and Consenters should only be modified while under lock
   115  	// of raftMetadataLock
   116  	BlockMetadata *etcdraft.BlockMetadata
   117  	Consenters    map[uint64]*etcdraft.Consenter
   118  
   119  	// MigrationInit is set when the node starts right after consensus-type migration
   120  	MigrationInit bool
   121  
   122  	Metrics *Metrics
   123  	Cert    []byte
   124  
   125  	EvictionSuspicion   time.Duration
   126  	LeaderCheckInterval time.Duration
   127  }
   128  
   129  type submit struct {
   130  	req    *orderer.SubmitRequest
   131  	leader chan uint64
   132  }
   133  
   134  type gc struct {
   135  	index uint64
   136  	state raftpb.ConfState
   137  	data  []byte
   138  }
   139  
   140  // Chain implements consensus.Chain interface.
   141  type Chain struct {
   142  	configurator Configurator
   143  
   144  	rpc RPC
   145  
   146  	raftID    uint64
   147  	channelID string
   148  
   149  	lastKnownLeader uint64
   150  	ActiveNodes     atomic.Value
   151  
   152  	submitC  chan *submit
   153  	applyC   chan apply
   154  	observeC chan<- raft.SoftState // Notifies external observer on leader change (passed in optionally as an argument for tests)
   155  	haltC    chan struct{}         // Signals to goroutines that the chain is halting
   156  	doneC    chan struct{}         // Closes when the chain halts
   157  	startC   chan struct{}         // Closes when the node is started
   158  	snapC    chan *raftpb.Snapshot // Signal to catch up with snapshot
   159  	gcC      chan *gc              // Signal to take snapshot
   160  
   161  	errorCLock sync.RWMutex
   162  	errorC     chan struct{} // returned by Errored()
   163  
   164  	raftMetadataLock     sync.RWMutex
   165  	confChangeInProgress *raftpb.ConfChange
   166  	justElected          bool // this is true when node has just been elected
   167  	configInflight       bool // this is true when there is config block or ConfChange in flight
   168  	blockInflight        int  // number of in flight blocks
   169  
   170  	clock clock.Clock // Tests can inject a fake clock
   171  
   172  	support consensus.ConsenterSupport
   173  
   174  	lastBlock    *common.Block
   175  	appliedIndex uint64
   176  
   177  	// needed by snapshotting
   178  	sizeLimit        uint32 // SnapshotIntervalSize in bytes
   179  	accDataSize      uint32 // accumulative data size since last snapshot
   180  	lastSnapBlockNum uint64
   181  	confState        raftpb.ConfState // Etcdraft requires ConfState to be persisted within snapshot
   182  
   183  	createPuller CreateBlockPuller // func used to create BlockPuller on demand
   184  
   185  	fresh bool // indicate if this is a fresh raft node
   186  
   187  	// this is exported so that test can use `Node.Status()` to get raft node status.
   188  	Node *node
   189  	opts Options
   190  
   191  	Metrics *Metrics
   192  	logger  *flogging.FabricLogger
   193  
   194  	periodicChecker *PeriodicCheck
   195  
   196  	haltCallback func()
   197  	// BCCSP instane
   198  	CryptoProvider bccsp.BCCSP
   199  }
   200  
   201  // NewChain constructs a chain object.
   202  func NewChain(
   203  	support consensus.ConsenterSupport,
   204  	opts Options,
   205  	conf Configurator,
   206  	rpc RPC,
   207  	cryptoProvider bccsp.BCCSP,
   208  	f CreateBlockPuller,
   209  	haltCallback func(),
   210  	observeC chan<- raft.SoftState,
   211  ) (*Chain, error) {
   212  
   213  	lg := opts.Logger.With("channel", support.ChannelID(), "node", opts.RaftID)
   214  
   215  	fresh := !wal.Exist(opts.WALDir)
   216  	storage, err := CreateStorage(lg, opts.WALDir, opts.SnapDir, opts.MemoryStorage)
   217  	if err != nil {
   218  		return nil, errors.Errorf("failed to restore persisted raft data: %s", err)
   219  	}
   220  
   221  	if opts.SnapshotCatchUpEntries == 0 {
   222  		storage.SnapshotCatchUpEntries = DefaultSnapshotCatchUpEntries
   223  	} else {
   224  		storage.SnapshotCatchUpEntries = opts.SnapshotCatchUpEntries
   225  	}
   226  
   227  	sizeLimit := opts.SnapshotIntervalSize
   228  	if sizeLimit == 0 {
   229  		sizeLimit = DefaultSnapshotIntervalSize
   230  	}
   231  
   232  	// get block number in last snapshot, if exists
   233  	var snapBlkNum uint64
   234  	var cc raftpb.ConfState
   235  	if s := storage.Snapshot(); !raft.IsEmptySnap(s) {
   236  		b := protoutil.UnmarshalBlockOrPanic(s.Data)
   237  		snapBlkNum = b.Header.Number
   238  		cc = s.Metadata.ConfState
   239  	}
   240  
   241  	b := support.Block(support.Height() - 1)
   242  	if b == nil {
   243  		return nil, errors.Errorf("failed to get last block")
   244  	}
   245  
   246  	c := &Chain{
   247  		configurator:     conf,
   248  		rpc:              rpc,
   249  		channelID:        support.ChannelID(),
   250  		raftID:           opts.RaftID,
   251  		submitC:          make(chan *submit),
   252  		applyC:           make(chan apply),
   253  		haltC:            make(chan struct{}),
   254  		doneC:            make(chan struct{}),
   255  		startC:           make(chan struct{}),
   256  		snapC:            make(chan *raftpb.Snapshot),
   257  		errorC:           make(chan struct{}),
   258  		gcC:              make(chan *gc),
   259  		observeC:         observeC,
   260  		support:          support,
   261  		fresh:            fresh,
   262  		appliedIndex:     opts.BlockMetadata.RaftIndex,
   263  		lastBlock:        b,
   264  		sizeLimit:        sizeLimit,
   265  		lastSnapBlockNum: snapBlkNum,
   266  		confState:        cc,
   267  		createPuller:     f,
   268  		clock:            opts.Clock,
   269  		haltCallback:     haltCallback,
   270  		Metrics: &Metrics{
   271  			ClusterSize:             opts.Metrics.ClusterSize.With("channel", support.ChannelID()),
   272  			IsLeader:                opts.Metrics.IsLeader.With("channel", support.ChannelID()),
   273  			ActiveNodes:             opts.Metrics.ActiveNodes.With("channel", support.ChannelID()),
   274  			CommittedBlockNumber:    opts.Metrics.CommittedBlockNumber.With("channel", support.ChannelID()),
   275  			SnapshotBlockNumber:     opts.Metrics.SnapshotBlockNumber.With("channel", support.ChannelID()),
   276  			LeaderChanges:           opts.Metrics.LeaderChanges.With("channel", support.ChannelID()),
   277  			ProposalFailures:        opts.Metrics.ProposalFailures.With("channel", support.ChannelID()),
   278  			DataPersistDuration:     opts.Metrics.DataPersistDuration.With("channel", support.ChannelID()),
   279  			NormalProposalsReceived: opts.Metrics.NormalProposalsReceived.With("channel", support.ChannelID()),
   280  			ConfigProposalsReceived: opts.Metrics.ConfigProposalsReceived.With("channel", support.ChannelID()),
   281  		},
   282  		logger:         lg,
   283  		opts:           opts,
   284  		CryptoProvider: cryptoProvider,
   285  	}
   286  
   287  	// Sets initial values for metrics
   288  	c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
   289  	c.Metrics.IsLeader.Set(float64(0)) // all nodes start out as followers
   290  	c.Metrics.ActiveNodes.Set(float64(0))
   291  	c.Metrics.CommittedBlockNumber.Set(float64(c.lastBlock.Header.Number))
   292  	c.Metrics.SnapshotBlockNumber.Set(float64(c.lastSnapBlockNum))
   293  
   294  	// DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217
   295  	// We guard against replay of written blocks with `appliedIndex` instead.
   296  	config := &raft.Config{
   297  		ID:              c.raftID,
   298  		ElectionTick:    c.opts.ElectionTick,
   299  		HeartbeatTick:   c.opts.HeartbeatTick,
   300  		MaxSizePerMsg:   c.opts.MaxSizePerMsg,
   301  		MaxInflightMsgs: c.opts.MaxInflightBlocks,
   302  		Logger:          c.logger,
   303  		Storage:         c.opts.MemoryStorage,
   304  		// PreVote prevents reconnected node from disturbing network.
   305  		// See etcd/raft doc for more details.
   306  		PreVote:                   true,
   307  		CheckQuorum:               true,
   308  		DisableProposalForwarding: true, // This prevents blocks from being accidentally proposed by followers
   309  	}
   310  
   311  	disseminator := &Disseminator{RPC: c.rpc}
   312  	disseminator.UpdateMetadata(nil) // initialize
   313  	c.ActiveNodes.Store([]uint64{})
   314  
   315  	c.Node = &node{
   316  		chainID:      c.channelID,
   317  		chain:        c,
   318  		logger:       c.logger,
   319  		metrics:      c.Metrics,
   320  		storage:      storage,
   321  		rpc:          disseminator,
   322  		config:       config,
   323  		tickInterval: c.opts.TickInterval,
   324  		clock:        c.clock,
   325  		metadata:     c.opts.BlockMetadata,
   326  		tracker: &Tracker{
   327  			id:     c.raftID,
   328  			sender: disseminator,
   329  			gauge:  c.Metrics.ActiveNodes,
   330  			active: &c.ActiveNodes,
   331  			logger: c.logger,
   332  		},
   333  	}
   334  
   335  	return c, nil
   336  }
   337  
   338  // Start instructs the orderer to begin serving the chain and keep it current.
   339  func (c *Chain) Start() {
   340  	c.logger.Infof("Starting Raft node")
   341  
   342  	if err := c.configureComm(); err != nil {
   343  		c.logger.Errorf("Failed to start chain, aborting: +%v", err)
   344  		close(c.doneC)
   345  		return
   346  	}
   347  
   348  	isJoin := c.support.Height() > 1
   349  	if isJoin && c.opts.MigrationInit {
   350  		isJoin = false
   351  		c.logger.Infof("Consensus-type migration detected, starting new raft node on an existing channel; height=%d", c.support.Height())
   352  	}
   353  	c.Node.start(c.fresh, isJoin)
   354  
   355  	close(c.startC)
   356  	close(c.errorC)
   357  
   358  	go c.gc()
   359  	go c.run()
   360  
   361  	es := c.newEvictionSuspector()
   362  
   363  	interval := DefaultLeaderlessCheckInterval
   364  	if c.opts.LeaderCheckInterval != 0 {
   365  		interval = c.opts.LeaderCheckInterval
   366  	}
   367  
   368  	c.periodicChecker = &PeriodicCheck{
   369  		Logger:        c.logger,
   370  		Report:        es.confirmSuspicion,
   371  		CheckInterval: interval,
   372  		Condition:     c.suspectEviction,
   373  	}
   374  	c.periodicChecker.Run()
   375  }
   376  
   377  // Order submits normal type transactions for ordering.
   378  func (c *Chain) Order(env *common.Envelope, configSeq uint64) error {
   379  	c.Metrics.NormalProposalsReceived.Add(1)
   380  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   381  }
   382  
   383  // Configure submits config type transactions for ordering.
   384  func (c *Chain) Configure(env *common.Envelope, configSeq uint64) error {
   385  	c.Metrics.ConfigProposalsReceived.Add(1)
   386  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   387  }
   388  
   389  // WaitReady blocks when the chain:
   390  // - is catching up with other nodes using snapshot
   391  //
   392  // In any other case, it returns right away.
   393  func (c *Chain) WaitReady() error {
   394  	if err := c.isRunning(); err != nil {
   395  		return err
   396  	}
   397  
   398  	select {
   399  	case c.submitC <- nil:
   400  	case <-c.doneC:
   401  		return errors.Errorf("chain is stopped")
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  // Errored returns a channel that closes when the chain stops.
   408  func (c *Chain) Errored() <-chan struct{} {
   409  	c.errorCLock.RLock()
   410  	defer c.errorCLock.RUnlock()
   411  	return c.errorC
   412  }
   413  
   414  // Halt stops the chain.
   415  func (c *Chain) Halt() {
   416  	select {
   417  	case <-c.startC:
   418  	default:
   419  		c.logger.Warnf("Attempted to halt a chain that has not started")
   420  		return
   421  	}
   422  
   423  	select {
   424  	case c.haltC <- struct{}{}:
   425  	case <-c.doneC:
   426  		return
   427  	}
   428  	<-c.doneC
   429  
   430  	if c.haltCallback != nil {
   431  		c.haltCallback()
   432  	}
   433  }
   434  
   435  func (c *Chain) isRunning() error {
   436  	select {
   437  	case <-c.startC:
   438  	default:
   439  		return errors.Errorf("chain is not started")
   440  	}
   441  
   442  	select {
   443  	case <-c.doneC:
   444  		return errors.Errorf("chain is stopped")
   445  	default:
   446  	}
   447  
   448  	return nil
   449  }
   450  
   451  // Consensus passes the given ConsensusRequest message to the raft.Node instance
   452  func (c *Chain) Consensus(req *orderer.ConsensusRequest, sender uint64) error {
   453  	if err := c.isRunning(); err != nil {
   454  		return err
   455  	}
   456  
   457  	stepMsg := &raftpb.Message{}
   458  	if err := proto.Unmarshal(req.Payload, stepMsg); err != nil {
   459  		return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err)
   460  	}
   461  
   462  	if err := c.Node.Step(context.TODO(), *stepMsg); err != nil {
   463  		return fmt.Errorf("failed to process Raft Step message: %s", err)
   464  	}
   465  
   466  	if len(req.Metadata) == 0 || atomic.LoadUint64(&c.lastKnownLeader) != sender { // ignore metadata from non-leader
   467  		return nil
   468  	}
   469  
   470  	clusterMetadata := &etcdraft.ClusterMetadata{}
   471  	if err := proto.Unmarshal(req.Metadata, clusterMetadata); err != nil {
   472  		return errors.Errorf("failed to unmarshal ClusterMetadata: %s", err)
   473  	}
   474  
   475  	c.Metrics.ActiveNodes.Set(float64(len(clusterMetadata.ActiveNodes)))
   476  	c.ActiveNodes.Store(clusterMetadata.ActiveNodes)
   477  
   478  	return nil
   479  }
   480  
   481  // Submit forwards the incoming request to:
   482  // - the local run goroutine if this is leader
   483  // - the actual leader via the transport mechanism
   484  // The call fails if there's no leader elected yet.
   485  func (c *Chain) Submit(req *orderer.SubmitRequest, sender uint64) error {
   486  	if err := c.isRunning(); err != nil {
   487  		c.Metrics.ProposalFailures.Add(1)
   488  		return err
   489  	}
   490  
   491  	leadC := make(chan uint64, 1)
   492  	select {
   493  	case c.submitC <- &submit{req, leadC}:
   494  		lead := <-leadC
   495  		if lead == raft.None {
   496  			c.Metrics.ProposalFailures.Add(1)
   497  			return errors.Errorf("no Raft leader")
   498  		}
   499  
   500  		if lead != c.raftID {
   501  			if err := c.rpc.SendSubmit(lead, req); err != nil {
   502  				c.Metrics.ProposalFailures.Add(1)
   503  				return err
   504  			}
   505  		}
   506  
   507  	case <-c.doneC:
   508  		c.Metrics.ProposalFailures.Add(1)
   509  		return errors.Errorf("chain is stopped")
   510  	}
   511  
   512  	return nil
   513  }
   514  
   515  type apply struct {
   516  	entries []raftpb.Entry
   517  	soft    *raft.SoftState
   518  }
   519  
   520  func isCandidate(state raft.StateType) bool {
   521  	return state == raft.StatePreCandidate || state == raft.StateCandidate
   522  }
   523  
   524  func (c *Chain) run() {
   525  	ticking := false
   526  	timer := c.clock.NewTimer(time.Second)
   527  	// we need a stopped timer rather than nil,
   528  	// because we will be select waiting on timer.C()
   529  	if !timer.Stop() {
   530  		<-timer.C()
   531  	}
   532  
   533  	// if timer is already started, this is a no-op
   534  	startTimer := func() {
   535  		if !ticking {
   536  			ticking = true
   537  			timer.Reset(c.support.SharedConfig().BatchTimeout())
   538  		}
   539  	}
   540  
   541  	stopTimer := func() {
   542  		if !timer.Stop() && ticking {
   543  			// we only need to drain the channel if the timer expired (not explicitly stopped)
   544  			<-timer.C()
   545  		}
   546  		ticking = false
   547  	}
   548  
   549  	var soft raft.SoftState
   550  	submitC := c.submitC
   551  	var bc *blockCreator
   552  
   553  	var propC chan<- *common.Block
   554  	var cancelProp context.CancelFunc
   555  	cancelProp = func() {} // no-op as initial value
   556  
   557  	becomeLeader := func() (chan<- *common.Block, context.CancelFunc) {
   558  		c.Metrics.IsLeader.Set(1)
   559  
   560  		c.blockInflight = 0
   561  		c.justElected = true
   562  		submitC = nil
   563  		ch := make(chan *common.Block, c.opts.MaxInflightBlocks)
   564  
   565  		// if there is unfinished ConfChange, we should resume the effort to propose it as
   566  		// new leader, and wait for it to be committed before start serving new requests.
   567  		if cc := c.getInFlightConfChange(); cc != nil {
   568  			// The reason `ProposeConfChange` should be called in go routine is documented in `writeConfigBlock` method.
   569  			go func() {
   570  				if err := c.Node.ProposeConfChange(context.TODO(), *cc); err != nil {
   571  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
   572  				}
   573  			}()
   574  
   575  			c.confChangeInProgress = cc
   576  			c.configInflight = true
   577  		}
   578  
   579  		// Leader should call Propose in go routine, because this method may be blocked
   580  		// if node is leaderless (this can happen when leader steps down in a heavily
   581  		// loaded network). We need to make sure applyC can still be consumed properly.
   582  		ctx, cancel := context.WithCancel(context.Background())
   583  		go func(ctx context.Context, ch <-chan *common.Block) {
   584  			for {
   585  				select {
   586  				case b := <-ch:
   587  					data := protoutil.MarshalOrPanic(b)
   588  					if err := c.Node.Propose(ctx, data); err != nil {
   589  						c.logger.Errorf("Failed to propose block [%d] to raft and discard %d blocks in queue: %s", b.Header.Number, len(ch), err)
   590  						return
   591  					}
   592  					c.logger.Debugf("Proposed block [%d] to raft consensus", b.Header.Number)
   593  
   594  				case <-ctx.Done():
   595  					c.logger.Debugf("Quit proposing blocks, discarded %d blocks in the queue", len(ch))
   596  					return
   597  				}
   598  			}
   599  		}(ctx, ch)
   600  
   601  		return ch, cancel
   602  	}
   603  
   604  	becomeFollower := func() {
   605  		cancelProp()
   606  		c.blockInflight = 0
   607  		_ = c.support.BlockCutter().Cut()
   608  		stopTimer()
   609  		submitC = c.submitC
   610  		bc = nil
   611  		c.Metrics.IsLeader.Set(0)
   612  	}
   613  
   614  	for {
   615  		select {
   616  		case s := <-submitC:
   617  			if s == nil {
   618  				// polled by `WaitReady`
   619  				continue
   620  			}
   621  
   622  			if soft.RaftState == raft.StatePreCandidate || soft.RaftState == raft.StateCandidate {
   623  				s.leader <- raft.None
   624  				continue
   625  			}
   626  
   627  			s.leader <- soft.Lead
   628  			if soft.Lead != c.raftID {
   629  				continue
   630  			}
   631  
   632  			batches, pending, err := c.ordered(s.req)
   633  			if err != nil {
   634  				c.logger.Errorf("Failed to order message: %s", err)
   635  				continue
   636  			}
   637  			if pending {
   638  				startTimer() // no-op if timer is already started
   639  			} else {
   640  				stopTimer()
   641  			}
   642  
   643  			c.propose(propC, bc, batches...)
   644  
   645  			if c.configInflight {
   646  				c.logger.Info("Received config transaction, pause accepting transaction till it is committed")
   647  				submitC = nil
   648  			} else if c.blockInflight >= c.opts.MaxInflightBlocks {
   649  				c.logger.Debugf("Number of in-flight blocks (%d) reaches limit (%d), pause accepting transaction",
   650  					c.blockInflight, c.opts.MaxInflightBlocks)
   651  				submitC = nil
   652  			}
   653  
   654  		case app := <-c.applyC:
   655  			if app.soft != nil {
   656  				newLeader := atomic.LoadUint64(&app.soft.Lead) // etcdraft requires atomic access
   657  				if newLeader != soft.Lead {
   658  					c.logger.Infof("Raft leader changed: %d -> %d", soft.Lead, newLeader)
   659  					c.Metrics.LeaderChanges.Add(1)
   660  
   661  					atomic.StoreUint64(&c.lastKnownLeader, newLeader)
   662  
   663  					if newLeader == c.raftID {
   664  						propC, cancelProp = becomeLeader()
   665  					}
   666  
   667  					if soft.Lead == c.raftID {
   668  						becomeFollower()
   669  					}
   670  				}
   671  
   672  				foundLeader := soft.Lead == raft.None && newLeader != raft.None
   673  				quitCandidate := isCandidate(soft.RaftState) && !isCandidate(app.soft.RaftState)
   674  
   675  				if foundLeader || quitCandidate {
   676  					c.errorCLock.Lock()
   677  					c.errorC = make(chan struct{})
   678  					c.errorCLock.Unlock()
   679  				}
   680  
   681  				if isCandidate(app.soft.RaftState) || newLeader == raft.None {
   682  					atomic.StoreUint64(&c.lastKnownLeader, raft.None)
   683  					select {
   684  					case <-c.errorC:
   685  					default:
   686  						nodeCount := len(c.opts.BlockMetadata.ConsenterIds)
   687  						// Only close the error channel (to signal the broadcast/deliver front-end a consensus backend error)
   688  						// If we are a cluster of size 3 or more, otherwise we can't expand a cluster of size 1 to 2 nodes.
   689  						if nodeCount > 2 {
   690  							close(c.errorC)
   691  						} else {
   692  							c.logger.Warningf("No leader is present, cluster size is %d", nodeCount)
   693  						}
   694  					}
   695  				}
   696  
   697  				soft = raft.SoftState{Lead: newLeader, RaftState: app.soft.RaftState}
   698  
   699  				// notify external observer
   700  				select {
   701  				case c.observeC <- soft:
   702  				default:
   703  				}
   704  			}
   705  
   706  			c.apply(app.entries)
   707  
   708  			if c.justElected {
   709  				msgInflight := c.Node.lastIndex() > c.appliedIndex
   710  				if msgInflight {
   711  					c.logger.Debugf("There are in flight blocks, new leader should not serve requests")
   712  					continue
   713  				}
   714  
   715  				if c.configInflight {
   716  					c.logger.Debugf("There is config block in flight, new leader should not serve requests")
   717  					continue
   718  				}
   719  
   720  				c.logger.Infof("Start accepting requests as Raft leader at block [%d]", c.lastBlock.Header.Number)
   721  				bc = &blockCreator{
   722  					hash:   protoutil.BlockHeaderHash(c.lastBlock.Header),
   723  					number: c.lastBlock.Header.Number,
   724  					logger: c.logger,
   725  				}
   726  				submitC = c.submitC
   727  				c.justElected = false
   728  			} else if c.configInflight {
   729  				c.logger.Info("Config block or ConfChange in flight, pause accepting transaction")
   730  				submitC = nil
   731  			} else if c.blockInflight < c.opts.MaxInflightBlocks {
   732  				submitC = c.submitC
   733  			}
   734  
   735  		case <-timer.C():
   736  			ticking = false
   737  
   738  			batch := c.support.BlockCutter().Cut()
   739  			if len(batch) == 0 {
   740  				c.logger.Warningf("Batch timer expired with no pending requests, this might indicate a bug")
   741  				continue
   742  			}
   743  
   744  			c.logger.Debugf("Batch timer expired, creating block")
   745  			c.propose(propC, bc, batch) // we are certain this is normal block, no need to block
   746  
   747  		case sn := <-c.snapC:
   748  			if sn.Metadata.Index != 0 {
   749  				if sn.Metadata.Index <= c.appliedIndex {
   750  					c.logger.Debugf("Skip snapshot taken at index %d, because it is behind current applied index %d", sn.Metadata.Index, c.appliedIndex)
   751  					break
   752  				}
   753  
   754  				c.confState = sn.Metadata.ConfState
   755  				c.appliedIndex = sn.Metadata.Index
   756  			} else {
   757  				c.logger.Infof("Received artificial snapshot to trigger catchup")
   758  			}
   759  
   760  			if err := c.catchUp(sn); err != nil {
   761  				c.logger.Panicf("Failed to recover from snapshot taken at Term %d and Index %d: %s",
   762  					sn.Metadata.Term, sn.Metadata.Index, err)
   763  			}
   764  
   765  		case <-c.doneC:
   766  			stopTimer()
   767  			cancelProp()
   768  
   769  			select {
   770  			case <-c.errorC: // avoid closing closed channel
   771  			default:
   772  				close(c.errorC)
   773  			}
   774  
   775  			c.logger.Infof("Stop serving requests")
   776  			c.periodicChecker.Stop()
   777  			return
   778  		}
   779  	}
   780  }
   781  
   782  func (c *Chain) writeBlock(block *common.Block, index uint64) {
   783  	if block.Header.Number > c.lastBlock.Header.Number+1 {
   784  		c.logger.Panicf("Got block [%d], expect block [%d]", block.Header.Number, c.lastBlock.Header.Number+1)
   785  	} else if block.Header.Number < c.lastBlock.Header.Number+1 {
   786  		c.logger.Infof("Got block [%d], expect block [%d], this node was forced to catch up", block.Header.Number, c.lastBlock.Header.Number+1)
   787  		return
   788  	}
   789  
   790  	if c.blockInflight > 0 {
   791  		c.blockInflight-- // only reduce on leader
   792  	}
   793  	c.lastBlock = block
   794  
   795  	c.logger.Infof("Writing block [%d] (Raft index: %d) to ledger", block.Header.Number, index)
   796  
   797  	if protoutil.IsConfigBlock(block) {
   798  		c.writeConfigBlock(block, index)
   799  		return
   800  	}
   801  
   802  	c.raftMetadataLock.Lock()
   803  	c.opts.BlockMetadata.RaftIndex = index
   804  	m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
   805  	c.raftMetadataLock.Unlock()
   806  
   807  	c.support.WriteBlock(block, m)
   808  }
   809  
   810  // Orders the envelope in the `msg` content. SubmitRequest.
   811  // Returns
   812  //   -- batches [][]*common.Envelope; the batches cut,
   813  //   -- pending bool; if there are envelopes pending to be ordered,
   814  //   -- err error; the error encountered, if any.
   815  // It takes care of config messages as well as the revalidation of messages if the config sequence has advanced.
   816  func (c *Chain) ordered(msg *orderer.SubmitRequest) (batches [][]*common.Envelope, pending bool, err error) {
   817  	seq := c.support.Sequence()
   818  
   819  	if c.isConfig(msg.Payload) {
   820  		// ConfigMsg
   821  		if msg.LastValidationSeq < seq {
   822  			c.logger.Warnf("Config message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   823  			msg.Payload, _, err = c.support.ProcessConfigMsg(msg.Payload)
   824  			if err != nil {
   825  				c.Metrics.ProposalFailures.Add(1)
   826  				return nil, true, errors.Errorf("bad config message: %s", err)
   827  			}
   828  		}
   829  
   830  		batch := c.support.BlockCutter().Cut()
   831  		batches = [][]*common.Envelope{}
   832  		if len(batch) != 0 {
   833  			batches = append(batches, batch)
   834  		}
   835  		batches = append(batches, []*common.Envelope{msg.Payload})
   836  		return batches, false, nil
   837  	}
   838  	// it is a normal message
   839  	if msg.LastValidationSeq < seq {
   840  		c.logger.Warnf("Normal message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   841  		if _, err := c.support.ProcessNormalMsg(msg.Payload); err != nil {
   842  			c.Metrics.ProposalFailures.Add(1)
   843  			return nil, true, errors.Errorf("bad normal message: %s", err)
   844  		}
   845  	}
   846  	batches, pending = c.support.BlockCutter().Ordered(msg.Payload)
   847  	return batches, pending, nil
   848  
   849  }
   850  
   851  func (c *Chain) propose(ch chan<- *common.Block, bc *blockCreator, batches ...[]*common.Envelope) {
   852  	for _, batch := range batches {
   853  		b := bc.createNextBlock(batch)
   854  		c.logger.Infof("Created block [%d], there are %d blocks in flight", b.Header.Number, c.blockInflight)
   855  
   856  		select {
   857  		case ch <- b:
   858  		default:
   859  			c.logger.Panic("Programming error: limit of in-flight blocks does not properly take effect or block is proposed by follower")
   860  		}
   861  
   862  		// if it is config block, then we should wait for the commit of the block
   863  		if protoutil.IsConfigBlock(b) {
   864  			c.configInflight = true
   865  		}
   866  
   867  		c.blockInflight++
   868  	}
   869  
   870  	return
   871  }
   872  
   873  func (c *Chain) catchUp(snap *raftpb.Snapshot) error {
   874  	b, err := protoutil.UnmarshalBlock(snap.Data)
   875  	if err != nil {
   876  		return errors.Errorf("failed to unmarshal snapshot data to block: %s", err)
   877  	}
   878  
   879  	if c.lastBlock.Header.Number >= b.Header.Number {
   880  		c.logger.Warnf("Snapshot is at block [%d], local block number is %d, no sync needed", b.Header.Number, c.lastBlock.Header.Number)
   881  		return nil
   882  	}
   883  
   884  	puller, err := c.createPuller()
   885  	if err != nil {
   886  		return errors.Errorf("failed to create block puller: %s", err)
   887  	}
   888  	defer puller.Close()
   889  
   890  	next := c.lastBlock.Header.Number + 1
   891  
   892  	c.logger.Infof("Catching up with snapshot taken at block [%d], starting from block [%d]", b.Header.Number, next)
   893  
   894  	for next <= b.Header.Number {
   895  		block := puller.PullBlock(next)
   896  		if block == nil {
   897  			return errors.Errorf("failed to fetch block [%d] from cluster", next)
   898  		}
   899  		if protoutil.IsConfigBlock(block) {
   900  			c.support.WriteConfigBlock(block, nil)
   901  
   902  			configMembership := c.detectConfChange(block)
   903  
   904  			if configMembership != nil && configMembership.Changed() {
   905  				c.logger.Infof("Config block [%d] changes consenter set, communication should be reconfigured", block.Header.Number)
   906  
   907  				c.raftMetadataLock.Lock()
   908  				c.opts.BlockMetadata = configMembership.NewBlockMetadata
   909  				c.opts.Consenters = configMembership.NewConsenters
   910  				c.raftMetadataLock.Unlock()
   911  
   912  				if err := c.configureComm(); err != nil {
   913  					c.logger.Panicf("Failed to configure communication: %s", err)
   914  				}
   915  			}
   916  		} else {
   917  			c.support.WriteBlock(block, nil)
   918  		}
   919  
   920  		c.lastBlock = block
   921  		next++
   922  	}
   923  
   924  	c.logger.Infof("Finished syncing with cluster up to and including block [%d]", b.Header.Number)
   925  	return nil
   926  }
   927  
   928  func (c *Chain) detectConfChange(block *common.Block) *MembershipChanges {
   929  	// If config is targeting THIS channel, inspect consenter set and
   930  	// propose raft ConfChange if it adds/removes node.
   931  	configMetadata := c.newConfigMetadata(block)
   932  
   933  	if configMetadata == nil {
   934  		return nil
   935  	}
   936  
   937  	if configMetadata.Options != nil &&
   938  		configMetadata.Options.SnapshotIntervalSize != 0 &&
   939  		configMetadata.Options.SnapshotIntervalSize != c.sizeLimit {
   940  		c.logger.Infof("Update snapshot interval size to %d bytes (was %d)",
   941  			configMetadata.Options.SnapshotIntervalSize, c.sizeLimit)
   942  		c.sizeLimit = configMetadata.Options.SnapshotIntervalSize
   943  	}
   944  
   945  	changes, err := ComputeMembershipChanges(c.opts.BlockMetadata, c.opts.Consenters, configMetadata.Consenters, c.support.SharedConfig())
   946  	if err != nil {
   947  		c.logger.Panicf("illegal configuration change detected: %s", err)
   948  	}
   949  
   950  	if changes.Rotated() {
   951  		c.logger.Infof("Config block [%d] rotates TLS certificate of node %d", block.Header.Number, changes.RotatedNode)
   952  	}
   953  
   954  	return changes
   955  }
   956  
   957  func (c *Chain) apply(ents []raftpb.Entry) {
   958  	if len(ents) == 0 {
   959  		return
   960  	}
   961  
   962  	if ents[0].Index > c.appliedIndex+1 {
   963  		c.logger.Panicf("first index of committed entry[%d] should <= appliedIndex[%d]+1", ents[0].Index, c.appliedIndex)
   964  	}
   965  
   966  	var position int
   967  	for i := range ents {
   968  		switch ents[i].Type {
   969  		case raftpb.EntryNormal:
   970  			if len(ents[i].Data) == 0 {
   971  				break
   972  			}
   973  
   974  			position = i
   975  			c.accDataSize += uint32(len(ents[i].Data))
   976  
   977  			// We need to strictly avoid re-applying normal entries,
   978  			// otherwise we are writing the same block twice.
   979  			if ents[i].Index <= c.appliedIndex {
   980  				c.logger.Debugf("Received block with raft index (%d) <= applied index (%d), skip", ents[i].Index, c.appliedIndex)
   981  				break
   982  			}
   983  
   984  			block := protoutil.UnmarshalBlockOrPanic(ents[i].Data)
   985  			c.writeBlock(block, ents[i].Index)
   986  			c.Metrics.CommittedBlockNumber.Set(float64(block.Header.Number))
   987  
   988  		case raftpb.EntryConfChange:
   989  			var cc raftpb.ConfChange
   990  			if err := cc.Unmarshal(ents[i].Data); err != nil {
   991  				c.logger.Warnf("Failed to unmarshal ConfChange data: %s", err)
   992  				continue
   993  			}
   994  
   995  			c.confState = *c.Node.ApplyConfChange(cc)
   996  
   997  			switch cc.Type {
   998  			case raftpb.ConfChangeAddNode:
   999  				c.logger.Infof("Applied config change to add node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1000  			case raftpb.ConfChangeRemoveNode:
  1001  				c.logger.Infof("Applied config change to remove node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1002  			default:
  1003  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1004  			}
  1005  
  1006  			// This ConfChange was introduced by a previously committed config block,
  1007  			// we can now unblock submitC to accept envelopes.
  1008  			var configureComm bool
  1009  			if c.confChangeInProgress != nil &&
  1010  				c.confChangeInProgress.NodeID == cc.NodeID &&
  1011  				c.confChangeInProgress.Type == cc.Type {
  1012  
  1013  				configureComm = true
  1014  				c.confChangeInProgress = nil
  1015  				c.configInflight = false
  1016  				// report the new cluster size
  1017  				c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
  1018  			}
  1019  
  1020  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1021  			removeLeader := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == lead
  1022  			shouldHalt := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == c.raftID
  1023  
  1024  			// unblock `run` go routine so it can still consume Raft messages
  1025  			go func() {
  1026  				if removeLeader {
  1027  					c.logger.Infof("Current leader is being removed from channel, attempt leadership transfer")
  1028  					c.Node.abdicateLeader(lead)
  1029  				}
  1030  
  1031  				if configureComm && !shouldHalt { // no need to configure comm if this node is going to halt
  1032  					if err := c.configureComm(); err != nil {
  1033  						c.logger.Panicf("Failed to configure communication: %s", err)
  1034  					}
  1035  				}
  1036  
  1037  				if shouldHalt {
  1038  					c.logger.Infof("This node is being removed from replica set")
  1039  					c.Halt()
  1040  					return
  1041  				}
  1042  			}()
  1043  		}
  1044  
  1045  		if ents[i].Index > c.appliedIndex {
  1046  			c.appliedIndex = ents[i].Index
  1047  		}
  1048  	}
  1049  
  1050  	if c.accDataSize >= c.sizeLimit {
  1051  		b := protoutil.UnmarshalBlockOrPanic(ents[position].Data)
  1052  
  1053  		select {
  1054  		case c.gcC <- &gc{index: c.appliedIndex, state: c.confState, data: ents[position].Data}:
  1055  			c.logger.Infof("Accumulated %d bytes since last snapshot, exceeding size limit (%d bytes), "+
  1056  				"taking snapshot at block [%d] (index: %d), last snapshotted block number is %d, current nodes: %+v",
  1057  				c.accDataSize, c.sizeLimit, b.Header.Number, c.appliedIndex, c.lastSnapBlockNum, c.confState.Nodes)
  1058  			c.accDataSize = 0
  1059  			c.lastSnapBlockNum = b.Header.Number
  1060  			c.Metrics.SnapshotBlockNumber.Set(float64(b.Header.Number))
  1061  		default:
  1062  			c.logger.Warnf("Snapshotting is in progress, it is very likely that SnapshotIntervalSize is too small")
  1063  		}
  1064  	}
  1065  
  1066  	return
  1067  }
  1068  
  1069  func (c *Chain) gc() {
  1070  	for {
  1071  		select {
  1072  		case g := <-c.gcC:
  1073  			c.Node.takeSnapshot(g.index, g.state, g.data)
  1074  		case <-c.doneC:
  1075  			c.logger.Infof("Stop garbage collecting")
  1076  			return
  1077  		}
  1078  	}
  1079  }
  1080  
  1081  func (c *Chain) isConfig(env *common.Envelope) bool {
  1082  	h, err := protoutil.ChannelHeader(env)
  1083  	if err != nil {
  1084  		c.logger.Panicf("failed to extract channel header from envelope")
  1085  	}
  1086  
  1087  	return h.Type == int32(common.HeaderType_CONFIG) || h.Type == int32(common.HeaderType_ORDERER_TRANSACTION)
  1088  }
  1089  
  1090  func (c *Chain) configureComm() error {
  1091  	// Reset unreachable map when communication is reconfigured
  1092  	c.Node.unreachableLock.Lock()
  1093  	c.Node.unreachable = make(map[uint64]struct{})
  1094  	c.Node.unreachableLock.Unlock()
  1095  
  1096  	nodes, err := c.remotePeers()
  1097  	if err != nil {
  1098  		return err
  1099  	}
  1100  
  1101  	c.configurator.Configure(c.channelID, nodes)
  1102  	return nil
  1103  }
  1104  
  1105  func (c *Chain) remotePeers() ([]cluster.RemoteNode, error) {
  1106  	c.raftMetadataLock.RLock()
  1107  	defer c.raftMetadataLock.RUnlock()
  1108  
  1109  	var nodes []cluster.RemoteNode
  1110  	for raftID, consenter := range c.opts.Consenters {
  1111  		// No need to know yourself
  1112  		if raftID == c.raftID {
  1113  			continue
  1114  		}
  1115  		serverCertAsDER, err := pemToDER(consenter.ServerTlsCert, raftID, "server", c.logger)
  1116  		if err != nil {
  1117  			return nil, errors.WithStack(err)
  1118  		}
  1119  		clientCertAsDER, err := pemToDER(consenter.ClientTlsCert, raftID, "client", c.logger)
  1120  		if err != nil {
  1121  			return nil, errors.WithStack(err)
  1122  		}
  1123  		nodes = append(nodes, cluster.RemoteNode{
  1124  			ID:            raftID,
  1125  			Endpoint:      fmt.Sprintf("%s:%d", consenter.Host, consenter.Port),
  1126  			ServerTLSCert: serverCertAsDER,
  1127  			ClientTLSCert: clientCertAsDER,
  1128  		})
  1129  	}
  1130  	return nodes, nil
  1131  }
  1132  
  1133  func pemToDER(pemBytes []byte, id uint64, certType string, logger *flogging.FabricLogger) ([]byte, error) {
  1134  	bl, _ := pem.Decode(pemBytes)
  1135  	if bl == nil {
  1136  		logger.Errorf("Rejecting PEM block of %s TLS cert for node %d, offending PEM is: %s", certType, id, string(pemBytes))
  1137  		return nil, errors.Errorf("invalid PEM block")
  1138  	}
  1139  	return bl.Bytes, nil
  1140  }
  1141  
  1142  // writeConfigBlock writes configuration blocks into the ledger in
  1143  // addition extracts updates about raft replica set and if there
  1144  // are changes updates cluster membership as well
  1145  func (c *Chain) writeConfigBlock(block *common.Block, index uint64) {
  1146  	hdr, err := ConfigChannelHeader(block)
  1147  	if err != nil {
  1148  		c.logger.Panicf("Failed to get config header type from config block: %s", err)
  1149  	}
  1150  
  1151  	c.configInflight = false
  1152  
  1153  	switch common.HeaderType(hdr.Type) {
  1154  	case common.HeaderType_CONFIG:
  1155  		configMembership := c.detectConfChange(block)
  1156  
  1157  		c.raftMetadataLock.Lock()
  1158  		c.opts.BlockMetadata.RaftIndex = index
  1159  		if configMembership != nil {
  1160  			c.opts.BlockMetadata = configMembership.NewBlockMetadata
  1161  			c.opts.Consenters = configMembership.NewConsenters
  1162  		}
  1163  		c.raftMetadataLock.Unlock()
  1164  
  1165  		blockMetadataBytes := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1166  
  1167  		// write block with metadata
  1168  		c.support.WriteConfigBlock(block, blockMetadataBytes)
  1169  
  1170  		if configMembership == nil {
  1171  			return
  1172  		}
  1173  
  1174  		// update membership
  1175  		if configMembership.ConfChange != nil {
  1176  			// We need to propose conf change in a go routine, because it may be blocked if raft node
  1177  			// becomes leaderless, and we should not block `run` so it can keep consuming applyC,
  1178  			// otherwise we have a deadlock.
  1179  			go func() {
  1180  				// ProposeConfChange returns error only if node being stopped.
  1181  				// This proposal is dropped by followers because DisableProposalForwarding is enabled.
  1182  				if err := c.Node.ProposeConfChange(context.TODO(), *configMembership.ConfChange); err != nil {
  1183  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
  1184  				}
  1185  			}()
  1186  
  1187  			c.confChangeInProgress = configMembership.ConfChange
  1188  
  1189  			switch configMembership.ConfChange.Type {
  1190  			case raftpb.ConfChangeAddNode:
  1191  				c.logger.Infof("Config block just committed adds node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1192  			case raftpb.ConfChangeRemoveNode:
  1193  				c.logger.Infof("Config block just committed removes node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1194  			default:
  1195  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1196  			}
  1197  
  1198  			c.configInflight = true
  1199  		} else if configMembership.Rotated() {
  1200  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1201  			if configMembership.RotatedNode == lead {
  1202  				c.logger.Infof("Certificate of Raft leader is being rotated, attempt leader transfer before reconfiguring communication")
  1203  				go func() {
  1204  					c.Node.abdicateLeader(lead)
  1205  					if err := c.configureComm(); err != nil {
  1206  						c.logger.Panicf("Failed to configure communication: %s", err)
  1207  					}
  1208  				}()
  1209  			} else {
  1210  				if err := c.configureComm(); err != nil {
  1211  					c.logger.Panicf("Failed to configure communication: %s", err)
  1212  				}
  1213  			}
  1214  		}
  1215  
  1216  	case common.HeaderType_ORDERER_TRANSACTION:
  1217  		// If this config is channel creation, no extra inspection is needed
  1218  		c.raftMetadataLock.Lock()
  1219  		c.opts.BlockMetadata.RaftIndex = index
  1220  		m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1221  		c.raftMetadataLock.Unlock()
  1222  
  1223  		c.support.WriteConfigBlock(block, m)
  1224  
  1225  	default:
  1226  		c.logger.Panicf("Programming error: unexpected config type: %s", common.HeaderType(hdr.Type))
  1227  	}
  1228  }
  1229  
  1230  // getInFlightConfChange returns ConfChange in-flight if any.
  1231  // It returns confChangeInProgress if it is not nil. Otherwise
  1232  // it returns ConfChange from the last committed block (might be nil).
  1233  func (c *Chain) getInFlightConfChange() *raftpb.ConfChange {
  1234  	if c.confChangeInProgress != nil {
  1235  		return c.confChangeInProgress
  1236  	}
  1237  
  1238  	if c.lastBlock.Header.Number == 0 {
  1239  		return nil // nothing to failover just started the chain
  1240  	}
  1241  
  1242  	if !protoutil.IsConfigBlock(c.lastBlock) {
  1243  		return nil
  1244  	}
  1245  
  1246  	// extracting current Raft configuration state
  1247  	confState := c.Node.ApplyConfChange(raftpb.ConfChange{})
  1248  
  1249  	if len(confState.Nodes) == len(c.opts.BlockMetadata.ConsenterIds) {
  1250  		// Raft configuration change could only add one node or
  1251  		// remove one node at a time, if raft conf state size is
  1252  		// equal to membership stored in block metadata field,
  1253  		// that means everything is in sync and no need to propose
  1254  		// config update.
  1255  		return nil
  1256  	}
  1257  
  1258  	return ConfChange(c.opts.BlockMetadata, confState)
  1259  }
  1260  
  1261  // newMetadata extract config metadata from the configuration block
  1262  func (c *Chain) newConfigMetadata(block *common.Block) *etcdraft.ConfigMetadata {
  1263  	metadata, err := ConsensusMetadataFromConfigBlock(block)
  1264  	if err != nil {
  1265  		c.logger.Panicf("error reading consensus metadata: %s", err)
  1266  	}
  1267  	return metadata
  1268  }
  1269  
  1270  // ValidateConsensusMetadata determines the validity of a
  1271  // ConsensusMetadata update during config updates on the channel.
  1272  func (c *Chain) ValidateConsensusMetadata(oldMetadataBytes, newMetadataBytes []byte, newChannel bool) error {
  1273  	// metadata was not updated
  1274  	if newMetadataBytes == nil {
  1275  		return nil
  1276  	}
  1277  	if oldMetadataBytes == nil {
  1278  		c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old metadata")
  1279  	}
  1280  
  1281  	oldMetadata := &etcdraft.ConfigMetadata{}
  1282  	if err := proto.Unmarshal(oldMetadataBytes, oldMetadata); err != nil {
  1283  		c.logger.Panicf("Programming Error: Failed to unmarshal old etcdraft consensus metadata: %v", err)
  1284  	}
  1285  	newMetadata := &etcdraft.ConfigMetadata{}
  1286  	if err := proto.Unmarshal(newMetadataBytes, newMetadata); err != nil {
  1287  		return errors.Wrap(err, "failed to unmarshal new etcdraft metadata configuration")
  1288  	}
  1289  
  1290  	err := CheckConfigMetadata(newMetadata)
  1291  	if err != nil {
  1292  		return errors.Wrap(err, "invalid new config metdadata")
  1293  	}
  1294  
  1295  	if newChannel {
  1296  		// check if the consenters are a subset of the existing consenters (system channel consenters)
  1297  		set := ConsentersToMap(oldMetadata.Consenters)
  1298  		for _, c := range newMetadata.Consenters {
  1299  			if _, exits := set[string(c.ClientTlsCert)]; !exits {
  1300  				return errors.New("new channel has consenter that is not part of system consenter set")
  1301  			}
  1302  		}
  1303  		return nil
  1304  	}
  1305  
  1306  	// create the dummy parameters for ComputeMembershipChanges
  1307  	dummyOldBlockMetadata, _ := ReadBlockMetadata(nil, oldMetadata)
  1308  	dummyOldConsentersMap := CreateConsentersMap(dummyOldBlockMetadata, oldMetadata)
  1309  	changes, err := ComputeMembershipChanges(dummyOldBlockMetadata, dummyOldConsentersMap, newMetadata.Consenters, c.support.SharedConfig())
  1310  	if err != nil {
  1311  		return err
  1312  	}
  1313  
  1314  	active := c.ActiveNodes.Load().([]uint64)
  1315  	if changes.UnacceptableQuorumLoss(active) {
  1316  		return errors.Errorf("%d out of %d nodes are alive, configuration will result in quorum loss", len(active), len(dummyOldConsentersMap))
  1317  	}
  1318  
  1319  	return nil
  1320  }
  1321  
  1322  func (c *Chain) suspectEviction() bool {
  1323  	if c.isRunning() != nil {
  1324  		return false
  1325  	}
  1326  
  1327  	return atomic.LoadUint64(&c.lastKnownLeader) == uint64(0)
  1328  }
  1329  
  1330  func (c *Chain) newEvictionSuspector() *evictionSuspector {
  1331  	consenterCertificate := &ConsenterCertificate{
  1332  		ConsenterCertificate: c.opts.Cert,
  1333  		CryptoProvider:       c.CryptoProvider,
  1334  	}
  1335  
  1336  	return &evictionSuspector{
  1337  		amIInChannel:               consenterCertificate.IsConsenterOfChannel,
  1338  		evictionSuspicionThreshold: c.opts.EvictionSuspicion,
  1339  		writeBlock:                 c.support.Append,
  1340  		createPuller:               c.createPuller,
  1341  		height:                     c.support.Height,
  1342  		triggerCatchUp:             c.triggerCatchup,
  1343  		logger:                     c.logger,
  1344  		halt: func() {
  1345  			c.Halt()
  1346  		},
  1347  	}
  1348  }
  1349  
  1350  func (c *Chain) triggerCatchup(sn *raftpb.Snapshot) {
  1351  	select {
  1352  	case c.snapC <- sn:
  1353  	case <-c.doneC:
  1354  	}
  1355  }