github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/consensus/etcdraft/chain.go

github.com/osdi23p228/fabric@v0.0.0-20221218062954-77808885f5db/orderer/consensus/etcdraft/chain.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft
     8  
     9  import (
    10  	"context"
    11  	"encoding/pem"
    12  	"fmt"
    13  	"github.com/osdi23p228/fabric/common/channelconfig"
    14  	"sync"
    15  	"sync/atomic"
    16  	"time"
    17  
    18  	"github.com/osdi23p228/fabric/orderer/common/types"
    19  
    20  	"code.cloudfoundry.org/clock"
    21  	"github.com/golang/protobuf/proto"
    22  	"github.com/hyperledger/fabric-protos-go/common"
    23  	"github.com/hyperledger/fabric-protos-go/orderer"
    24  	"github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    25  	"github.com/osdi23p228/fabric/bccsp"
    26  	"github.com/osdi23p228/fabric/common/flogging"
    27  	"github.com/osdi23p228/fabric/orderer/common/cluster"
    28  	"github.com/osdi23p228/fabric/orderer/consensus"
    29  	"github.com/osdi23p228/fabric/protoutil"
    30  	"github.com/pkg/errors"
    31  	"go.etcd.io/etcd/raft"
    32  	"go.etcd.io/etcd/raft/raftpb"
    33  	"go.etcd.io/etcd/wal"
    34  )
    35  
    36  const (
    37  	BYTE = 1 << (10 * iota)
    38  	KILOBYTE
    39  	MEGABYTE
    40  	GIGABYTE
    41  	TERABYTE
    42  )
    43  
    44  const (
    45  	// DefaultSnapshotCatchUpEntries is the default number of entries
    46  	// to preserve in memory when a snapshot is taken. This is for
    47  	// slow followers to catch up.
    48  	DefaultSnapshotCatchUpEntries = uint64(4)
    49  
    50  	// DefaultSnapshotIntervalSize is the default snapshot interval. It is
    51  	// used if SnapshotIntervalSize is not provided in channel config options.
    52  	// It is needed to enforce snapshot being set.
    53  	DefaultSnapshotIntervalSize = 16 * MEGABYTE
    54  
    55  	// DefaultEvictionSuspicion is the threshold that a node will start
    56  	// suspecting its own eviction if it has been leaderless for this
    57  	// period of time.
    58  	DefaultEvictionSuspicion = time.Minute * 10
    59  
    60  	// DefaultLeaderlessCheckInterval is the interval that a chain checks
    61  	// its own leadership status.
    62  	DefaultLeaderlessCheckInterval = time.Second * 10
    63  )
    64  
    65  //go:generate counterfeiter -o mocks/configurator.go . Configurator
    66  
    67  // Configurator is used to configure the communication layer
    68  // when the chain starts.
    69  type Configurator interface {
    70  	Configure(channel string, newNodes []cluster.RemoteNode)
    71  }
    72  
    73  //go:generate counterfeiter -o mocks/mock_rpc.go . RPC
    74  
    75  // RPC is used to mock the transport layer in tests.
    76  type RPC interface {
    77  	SendConsensus(dest uint64, msg *orderer.ConsensusRequest) error
    78  	SendSubmit(dest uint64, request *orderer.SubmitRequest) error
    79  }
    80  
    81  //go:generate counterfeiter -o mocks/mock_blockpuller.go . BlockPuller
    82  
    83  // BlockPuller is used to pull blocks from other OSN
    84  type BlockPuller interface {
    85  	PullBlock(seq uint64) *common.Block
    86  	HeightsByEndpoints() (map[string]uint64, error)
    87  	Close()
    88  }
    89  
    90  // CreateBlockPuller is a function to create BlockPuller on demand.
    91  // It is passed into chain initializer so that tests could mock this.
    92  type CreateBlockPuller func() (BlockPuller, error)
    93  
    94  // Options contains all the configurations relevant to the chain.
    95  type Options struct {
    96  	RaftID uint64
    97  
    98  	Clock clock.Clock
    99  
   100  	WALDir               string
   101  	SnapDir              string
   102  	SnapshotIntervalSize uint32
   103  
   104  	// This is configurable mainly for testing purpose. Users are not
   105  	// expected to alter this. Instead, DefaultSnapshotCatchUpEntries is used.
   106  	SnapshotCatchUpEntries uint64
   107  
   108  	MemoryStorage MemoryStorage
   109  	Logger        *flogging.FabricLogger
   110  
   111  	TickInterval      time.Duration
   112  	ElectionTick      int
   113  	HeartbeatTick     int
   114  	MaxSizePerMsg     uint64
   115  	MaxInflightBlocks int
   116  
   117  	// BlockMetdata and Consenters should only be modified while under lock
   118  	// of raftMetadataLock
   119  	BlockMetadata *etcdraft.BlockMetadata
   120  	Consenters    map[uint64]*etcdraft.Consenter
   121  
   122  	// MigrationInit is set when the node starts right after consensus-type migration
   123  	MigrationInit bool
   124  
   125  	Metrics *Metrics
   126  	Cert    []byte
   127  
   128  	EvictionSuspicion   time.Duration
   129  	LeaderCheckInterval time.Duration
   130  }
   131  
   132  type submit struct {
   133  	req    *orderer.SubmitRequest
   134  	leader chan uint64
   135  }
   136  
   137  type gc struct {
   138  	index uint64
   139  	state raftpb.ConfState
   140  	data  []byte
   141  }
   142  
   143  // Chain implements consensus.Chain interface.
   144  type Chain struct {
   145  	configurator Configurator
   146  
   147  	rpc RPC
   148  
   149  	raftID    uint64
   150  	channelID string
   151  
   152  	lastKnownLeader uint64
   153  	ActiveNodes     atomic.Value
   154  
   155  	submitC  chan *submit
   156  	applyC   chan apply
   157  	observeC chan<- raft.SoftState // Notifies external observer on leader change (passed in optionally as an argument for tests)
   158  	haltC    chan struct{}         // Signals to goroutines that the chain is halting
   159  	doneC    chan struct{}         // Closes when the chain halts
   160  	startC   chan struct{}         // Closes when the node is started
   161  	snapC    chan *raftpb.Snapshot // Signal to catch up with snapshot
   162  	gcC      chan *gc              // Signal to take snapshot
   163  
   164  	errorCLock sync.RWMutex
   165  	errorC     chan struct{} // returned by Errored()
   166  
   167  	raftMetadataLock     sync.RWMutex
   168  	confChangeInProgress *raftpb.ConfChange
   169  	justElected          bool // this is true when node has just been elected
   170  	configInflight       bool // this is true when there is config block or ConfChange in flight
   171  	blockInflight        int  // number of in flight blocks
   172  
   173  	clock clock.Clock // Tests can inject a fake clock
   174  
   175  	support consensus.ConsenterSupport
   176  
   177  	lastBlock    *common.Block
   178  	appliedIndex uint64
   179  
   180  	// needed by snapshotting
   181  	sizeLimit        uint32 // SnapshotIntervalSize in bytes
   182  	accDataSize      uint32 // accumulative data size since last snapshot
   183  	lastSnapBlockNum uint64
   184  	confState        raftpb.ConfState // Etcdraft requires ConfState to be persisted within snapshot
   185  
   186  	createPuller CreateBlockPuller // func used to create BlockPuller on demand
   187  
   188  	fresh bool // indicate if this is a fresh raft node
   189  
   190  	// this is exported so that test can use `Node.Status()` to get raft node status.
   191  	Node *node
   192  	opts Options
   193  
   194  	Metrics *Metrics
   195  	logger  *flogging.FabricLogger
   196  
   197  	periodicChecker *PeriodicCheck
   198  
   199  	haltCallback func()
   200  	// BCCSP instane
   201  	CryptoProvider bccsp.BCCSP
   202  }
   203  
   204  // NewChain constructs a chain object.
   205  func NewChain(
   206  	support consensus.ConsenterSupport,
   207  	opts Options,
   208  	conf Configurator,
   209  	rpc RPC,
   210  	cryptoProvider bccsp.BCCSP,
   211  	f CreateBlockPuller,
   212  	haltCallback func(),
   213  	observeC chan<- raft.SoftState,
   214  ) (*Chain, error) {
   215  
   216  	lg := opts.Logger.With("channel", support.ChannelID(), "node", opts.RaftID)
   217  
   218  	fresh := !wal.Exist(opts.WALDir)
   219  	storage, err := CreateStorage(lg, opts.WALDir, opts.SnapDir, opts.MemoryStorage)
   220  	if err != nil {
   221  		return nil, errors.Errorf("failed to restore persisted raft data: %s", err)
   222  	}
   223  
   224  	if opts.SnapshotCatchUpEntries == 0 {
   225  		storage.SnapshotCatchUpEntries = DefaultSnapshotCatchUpEntries
   226  	} else {
   227  		storage.SnapshotCatchUpEntries = opts.SnapshotCatchUpEntries
   228  	}
   229  
   230  	sizeLimit := opts.SnapshotIntervalSize
   231  	if sizeLimit == 0 {
   232  		sizeLimit = DefaultSnapshotIntervalSize
   233  	}
   234  
   235  	// get block number in last snapshot, if exists
   236  	var snapBlkNum uint64
   237  	var cc raftpb.ConfState
   238  	if s := storage.Snapshot(); !raft.IsEmptySnap(s) {
   239  		b := protoutil.UnmarshalBlockOrPanic(s.Data)
   240  		snapBlkNum = b.Header.Number
   241  		cc = s.Metadata.ConfState
   242  	}
   243  
   244  	b := support.Block(support.Height() - 1)
   245  	if b == nil {
   246  		return nil, errors.Errorf("failed to get last block")
   247  	}
   248  
   249  	c := &Chain{
   250  		configurator:     conf,
   251  		rpc:              rpc,
   252  		channelID:        support.ChannelID(),
   253  		raftID:           opts.RaftID,
   254  		submitC:          make(chan *submit),
   255  		applyC:           make(chan apply),
   256  		haltC:            make(chan struct{}),
   257  		doneC:            make(chan struct{}),
   258  		startC:           make(chan struct{}),
   259  		snapC:            make(chan *raftpb.Snapshot),
   260  		errorC:           make(chan struct{}),
   261  		gcC:              make(chan *gc),
   262  		observeC:         observeC,
   263  		support:          support,
   264  		fresh:            fresh,
   265  		appliedIndex:     opts.BlockMetadata.RaftIndex,
   266  		lastBlock:        b,
   267  		sizeLimit:        sizeLimit,
   268  		lastSnapBlockNum: snapBlkNum,
   269  		confState:        cc,
   270  		createPuller:     f,
   271  		clock:            opts.Clock,
   272  		haltCallback:     haltCallback,
   273  		Metrics: &Metrics{
   274  			ClusterSize:             opts.Metrics.ClusterSize.With("channel", support.ChannelID()),
   275  			IsLeader:                opts.Metrics.IsLeader.With("channel", support.ChannelID()),
   276  			ActiveNodes:             opts.Metrics.ActiveNodes.With("channel", support.ChannelID()),
   277  			CommittedBlockNumber:    opts.Metrics.CommittedBlockNumber.With("channel", support.ChannelID()),
   278  			SnapshotBlockNumber:     opts.Metrics.SnapshotBlockNumber.With("channel", support.ChannelID()),
   279  			LeaderChanges:           opts.Metrics.LeaderChanges.With("channel", support.ChannelID()),
   280  			ProposalFailures:        opts.Metrics.ProposalFailures.With("channel", support.ChannelID()),
   281  			DataPersistDuration:     opts.Metrics.DataPersistDuration.With("channel", support.ChannelID()),
   282  			NormalProposalsReceived: opts.Metrics.NormalProposalsReceived.With("channel", support.ChannelID()),
   283  			ConfigProposalsReceived: opts.Metrics.ConfigProposalsReceived.With("channel", support.ChannelID()),
   284  		},
   285  		logger:         lg,
   286  		opts:           opts,
   287  		CryptoProvider: cryptoProvider,
   288  	}
   289  
   290  	// Sets initial values for metrics
   291  	c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
   292  	c.Metrics.IsLeader.Set(float64(0)) // all nodes start out as followers
   293  	c.Metrics.ActiveNodes.Set(float64(0))
   294  	c.Metrics.CommittedBlockNumber.Set(float64(c.lastBlock.Header.Number))
   295  	c.Metrics.SnapshotBlockNumber.Set(float64(c.lastSnapBlockNum))
   296  
   297  	// DO NOT use Applied option in config, see https://github.com/etcd-io/etcd/issues/10217
   298  	// We guard against replay of written blocks with `appliedIndex` instead.
   299  	config := &raft.Config{
   300  		ID:              c.raftID,
   301  		ElectionTick:    c.opts.ElectionTick,
   302  		HeartbeatTick:   c.opts.HeartbeatTick,
   303  		MaxSizePerMsg:   c.opts.MaxSizePerMsg,
   304  		MaxInflightMsgs: c.opts.MaxInflightBlocks,
   305  		Logger:          c.logger,
   306  		Storage:         c.opts.MemoryStorage,
   307  		// PreVote prevents reconnected node from disturbing network.
   308  		// See etcd/raft doc for more details.
   309  		PreVote:                   true,
   310  		CheckQuorum:               true,
   311  		DisableProposalForwarding: true, // This prevents blocks from being accidentally proposed by followers
   312  	}
   313  
   314  	disseminator := &Disseminator{RPC: c.rpc}
   315  	disseminator.UpdateMetadata(nil) // initialize
   316  	c.ActiveNodes.Store([]uint64{})
   317  
   318  	c.Node = &node{
   319  		chainID:      c.channelID,
   320  		chain:        c,
   321  		logger:       c.logger,
   322  		metrics:      c.Metrics,
   323  		storage:      storage,
   324  		rpc:          disseminator,
   325  		config:       config,
   326  		tickInterval: c.opts.TickInterval,
   327  		clock:        c.clock,
   328  		metadata:     c.opts.BlockMetadata,
   329  		tracker: &Tracker{
   330  			id:     c.raftID,
   331  			sender: disseminator,
   332  			gauge:  c.Metrics.ActiveNodes,
   333  			active: &c.ActiveNodes,
   334  			logger: c.logger,
   335  		},
   336  	}
   337  
   338  	return c, nil
   339  }
   340  
   341  // Start instructs the orderer to begin serving the chain and keep it current.
   342  func (c *Chain) Start() {
   343  	c.logger.Infof("Starting Raft node")
   344  
   345  	if err := c.configureComm(); err != nil {
   346  		c.logger.Errorf("Failed to start chain, aborting: +%v", err)
   347  		close(c.doneC)
   348  		return
   349  	}
   350  
   351  	isJoin := c.support.Height() > 1
   352  	if isJoin && c.opts.MigrationInit {
   353  		isJoin = false
   354  		c.logger.Infof("Consensus-type migration detected, starting new raft node on an existing channel; height=%d", c.support.Height())
   355  	}
   356  	c.Node.start(c.fresh, isJoin)
   357  
   358  	close(c.startC)
   359  	close(c.errorC)
   360  
   361  	go c.gc()
   362  	go c.run()
   363  
   364  	es := c.newEvictionSuspector()
   365  
   366  	interval := DefaultLeaderlessCheckInterval
   367  	if c.opts.LeaderCheckInterval != 0 {
   368  		interval = c.opts.LeaderCheckInterval
   369  	}
   370  
   371  	c.periodicChecker = &PeriodicCheck{
   372  		Logger:        c.logger,
   373  		Report:        es.confirmSuspicion,
   374  		ReportCleared: es.clearSuspicion,
   375  		CheckInterval: interval,
   376  		Condition:     c.suspectEviction,
   377  	}
   378  	c.periodicChecker.Run()
   379  }
   380  
   381  // Order submits normal type transactions for ordering.
   382  func (c *Chain) Order(env *common.Envelope, configSeq uint64) error {
   383  	c.Metrics.NormalProposalsReceived.Add(1)
   384  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   385  }
   386  
   387  // Configure submits config type transactions for ordering.
   388  func (c *Chain) Configure(env *common.Envelope, configSeq uint64) error {
   389  	c.Metrics.ConfigProposalsReceived.Add(1)
   390  	return c.Submit(&orderer.SubmitRequest{LastValidationSeq: configSeq, Payload: env, Channel: c.channelID}, 0)
   391  }
   392  
   393  // WaitReady blocks when the chain:
   394  // - is catching up with other nodes using snapshot
   395  //
   396  // In any other case, it returns right away.
   397  func (c *Chain) WaitReady() error {
   398  	if err := c.isRunning(); err != nil {
   399  		return err
   400  	}
   401  
   402  	select {
   403  	case c.submitC <- nil:
   404  	case <-c.doneC:
   405  		return errors.Errorf("chain is stopped")
   406  	}
   407  
   408  	return nil
   409  }
   410  
   411  // Errored returns a channel that closes when the chain stops.
   412  func (c *Chain) Errored() <-chan struct{} {
   413  	c.errorCLock.RLock()
   414  	defer c.errorCLock.RUnlock()
   415  	return c.errorC
   416  }
   417  
   418  // Halt stops the chain.
   419  func (c *Chain) Halt() {
   420  	select {
   421  	case <-c.startC:
   422  	default:
   423  		c.logger.Warnf("Attempted to halt a chain that has not started")
   424  		return
   425  	}
   426  
   427  	select {
   428  	case c.haltC <- struct{}{}:
   429  	case <-c.doneC:
   430  		return
   431  	}
   432  	<-c.doneC
   433  
   434  	if c.haltCallback != nil {
   435  		c.haltCallback()
   436  	}
   437  }
   438  
   439  func (c *Chain) isRunning() error {
   440  	select {
   441  	case <-c.startC:
   442  	default:
   443  		return errors.Errorf("chain is not started")
   444  	}
   445  
   446  	select {
   447  	case <-c.doneC:
   448  		return errors.Errorf("chain is stopped")
   449  	default:
   450  	}
   451  
   452  	return nil
   453  }
   454  
   455  // Consensus passes the given ConsensusRequest message to the raft.Node instance
   456  func (c *Chain) Consensus(req *orderer.ConsensusRequest, sender uint64) error {
   457  	if err := c.isRunning(); err != nil {
   458  		return err
   459  	}
   460  
   461  	stepMsg := &raftpb.Message{}
   462  	if err := proto.Unmarshal(req.Payload, stepMsg); err != nil {
   463  		return fmt.Errorf("failed to unmarshal StepRequest payload to Raft Message: %s", err)
   464  	}
   465  
   466  	if stepMsg.To != c.raftID {
   467  		c.logger.Warnf("Received msg to %d, my ID is probably wrong due to out of date, cowardly halting", stepMsg.To)
   468  		c.Halt()
   469  		return nil
   470  	}
   471  
   472  	if err := c.Node.Step(context.TODO(), *stepMsg); err != nil {
   473  		return fmt.Errorf("failed to process Raft Step message: %s", err)
   474  	}
   475  
   476  	if len(req.Metadata) == 0 || atomic.LoadUint64(&c.lastKnownLeader) != sender { // ignore metadata from non-leader
   477  		return nil
   478  	}
   479  
   480  	clusterMetadata := &etcdraft.ClusterMetadata{}
   481  	if err := proto.Unmarshal(req.Metadata, clusterMetadata); err != nil {
   482  		return errors.Errorf("failed to unmarshal ClusterMetadata: %s", err)
   483  	}
   484  
   485  	c.Metrics.ActiveNodes.Set(float64(len(clusterMetadata.ActiveNodes)))
   486  	c.ActiveNodes.Store(clusterMetadata.ActiveNodes)
   487  
   488  	return nil
   489  }
   490  
   491  // Submit forwards the incoming request to:
   492  // - the local run goroutine if this is leader
   493  // - the actual leader via the transport mechanism
   494  // The call fails if there's no leader elected yet.
   495  func (c *Chain) Submit(req *orderer.SubmitRequest, sender uint64) error {
   496  	if err := c.isRunning(); err != nil {
   497  		c.Metrics.ProposalFailures.Add(1)
   498  		return err
   499  	}
   500  
   501  	leadC := make(chan uint64, 1)
   502  	select {
   503  	case c.submitC <- &submit{req, leadC}:
   504  		lead := <-leadC
   505  		if lead == raft.None {
   506  			c.Metrics.ProposalFailures.Add(1)
   507  			return errors.Errorf("no Raft leader")
   508  		}
   509  
   510  		if lead != c.raftID {
   511  			if err := c.rpc.SendSubmit(lead, req); err != nil {
   512  				c.Metrics.ProposalFailures.Add(1)
   513  				return err
   514  			}
   515  		}
   516  
   517  	case <-c.doneC:
   518  		c.Metrics.ProposalFailures.Add(1)
   519  		return errors.Errorf("chain is stopped")
   520  	}
   521  
   522  	return nil
   523  }
   524  
   525  type apply struct {
   526  	entries []raftpb.Entry
   527  	soft    *raft.SoftState
   528  }
   529  
   530  func isCandidate(state raft.StateType) bool {
   531  	return state == raft.StatePreCandidate || state == raft.StateCandidate
   532  }
   533  
   534  func (c *Chain) run() {
   535  	ticking := false
   536  	timer := c.clock.NewTimer(time.Second)
   537  	// we need a stopped timer rather than nil,
   538  	// because we will be select waiting on timer.C()
   539  	if !timer.Stop() {
   540  		<-timer.C()
   541  	}
   542  
   543  	// if timer is already started, this is a no-op
   544  	startTimer := func() {
   545  		if !ticking {
   546  			ticking = true
   547  			timer.Reset(c.support.SharedConfig().BatchTimeout())
   548  		}
   549  	}
   550  
   551  	stopTimer := func() {
   552  		if !timer.Stop() && ticking {
   553  			// we only need to drain the channel if the timer expired (not explicitly stopped)
   554  			<-timer.C()
   555  		}
   556  		ticking = false
   557  	}
   558  
   559  	var soft raft.SoftState
   560  	submitC := c.submitC
   561  	var bc *blockCreator
   562  
   563  	var propC chan<- *common.Block
   564  	var cancelProp context.CancelFunc
   565  	cancelProp = func() {} // no-op as initial value
   566  
   567  	becomeLeader := func() (chan<- *common.Block, context.CancelFunc) {
   568  		c.Metrics.IsLeader.Set(1)
   569  
   570  		c.blockInflight = 0
   571  		c.justElected = true
   572  		submitC = nil
   573  		ch := make(chan *common.Block, c.opts.MaxInflightBlocks)
   574  
   575  		// if there is unfinished ConfChange, we should resume the effort to propose it as
   576  		// new leader, and wait for it to be committed before start serving new requests.
   577  		if cc := c.getInFlightConfChange(); cc != nil {
   578  			// The reason `ProposeConfChange` should be called in go routine is documented in `writeConfigBlock` method.
   579  			go func() {
   580  				if err := c.Node.ProposeConfChange(context.TODO(), *cc); err != nil {
   581  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
   582  				}
   583  			}()
   584  
   585  			c.confChangeInProgress = cc
   586  			c.configInflight = true
   587  		}
   588  
   589  		// Leader should call Propose in go routine, because this method may be blocked
   590  		// if node is leaderless (this can happen when leader steps down in a heavily
   591  		// loaded network). We need to make sure applyC can still be consumed properly.
   592  		ctx, cancel := context.WithCancel(context.Background())
   593  		go func(ctx context.Context, ch <-chan *common.Block) {
   594  			for {
   595  				select {
   596  				case b := <-ch:
   597  					data := protoutil.MarshalOrPanic(b)
   598  					if err := c.Node.Propose(ctx, data); err != nil {
   599  						c.logger.Errorf("Failed to propose block [%d] to raft and discard %d blocks in queue: %s", b.Header.Number, len(ch), err)
   600  						return
   601  					}
   602  					c.logger.Debugf("Proposed block [%d] to raft consensus", b.Header.Number)
   603  
   604  				case <-ctx.Done():
   605  					c.logger.Debugf("Quit proposing blocks, discarded %d blocks in the queue", len(ch))
   606  					return
   607  				}
   608  			}
   609  		}(ctx, ch)
   610  
   611  		return ch, cancel
   612  	}
   613  
   614  	becomeFollower := func() {
   615  		cancelProp()
   616  		c.blockInflight = 0
   617  		_ = c.support.BlockCutter().Cut()
   618  		stopTimer()
   619  		submitC = c.submitC
   620  		bc = nil
   621  		c.Metrics.IsLeader.Set(0)
   622  	}
   623  
   624  	for {
   625  		select {
   626  		case s := <-submitC:
   627  			if s == nil {
   628  				// polled by `WaitReady`
   629  				continue
   630  			}
   631  
   632  			if soft.RaftState == raft.StatePreCandidate || soft.RaftState == raft.StateCandidate {
   633  				s.leader <- raft.None
   634  				continue
   635  			}
   636  
   637  			s.leader <- soft.Lead
   638  			if soft.Lead != c.raftID {
   639  				continue
   640  			}
   641  
   642  			batches, pending, err := c.ordered(s.req)
   643  			if err != nil {
   644  				c.logger.Errorf("Failed to order message: %s", err)
   645  				continue
   646  			}
   647  			if pending {
   648  				startTimer() // no-op if timer is already started
   649  			} else {
   650  				stopTimer()
   651  			}
   652  
   653  			c.propose(propC, bc, batches...)
   654  
   655  			if c.configInflight {
   656  				c.logger.Info("Received config transaction, pause accepting transaction till it is committed")
   657  				submitC = nil
   658  			} else if c.blockInflight >= c.opts.MaxInflightBlocks {
   659  				c.logger.Debugf("Number of in-flight blocks (%d) reaches limit (%d), pause accepting transaction",
   660  					c.blockInflight, c.opts.MaxInflightBlocks)
   661  				submitC = nil
   662  			}
   663  
   664  		case app := <-c.applyC:
   665  			if app.soft != nil {
   666  				newLeader := atomic.LoadUint64(&app.soft.Lead) // etcdraft requires atomic access
   667  				if newLeader != soft.Lead {
   668  					c.logger.Infof("Raft leader changed: %d -> %d", soft.Lead, newLeader)
   669  					c.Metrics.LeaderChanges.Add(1)
   670  
   671  					atomic.StoreUint64(&c.lastKnownLeader, newLeader)
   672  
   673  					if newLeader == c.raftID {
   674  						propC, cancelProp = becomeLeader()
   675  					}
   676  
   677  					if soft.Lead == c.raftID {
   678  						becomeFollower()
   679  					}
   680  				}
   681  
   682  				foundLeader := soft.Lead == raft.None && newLeader != raft.None
   683  				quitCandidate := isCandidate(soft.RaftState) && !isCandidate(app.soft.RaftState)
   684  
   685  				if foundLeader || quitCandidate {
   686  					c.errorCLock.Lock()
   687  					c.errorC = make(chan struct{})
   688  					c.errorCLock.Unlock()
   689  				}
   690  
   691  				if isCandidate(app.soft.RaftState) || newLeader == raft.None {
   692  					atomic.StoreUint64(&c.lastKnownLeader, raft.None)
   693  					select {
   694  					case <-c.errorC:
   695  					default:
   696  						nodeCount := len(c.opts.BlockMetadata.ConsenterIds)
   697  						// Only close the error channel (to signal the broadcast/deliver front-end a consensus backend error)
   698  						// If we are a cluster of size 3 or more, otherwise we can't expand a cluster of size 1 to 2 nodes.
   699  						if nodeCount > 2 {
   700  							close(c.errorC)
   701  						} else {
   702  							c.logger.Warningf("No leader is present, cluster size is %d", nodeCount)
   703  						}
   704  					}
   705  				}
   706  
   707  				soft = raft.SoftState{Lead: newLeader, RaftState: app.soft.RaftState}
   708  
   709  				// notify external observer
   710  				select {
   711  				case c.observeC <- soft:
   712  				default:
   713  				}
   714  			}
   715  
   716  			c.apply(app.entries)
   717  
   718  			if c.justElected {
   719  				msgInflight := c.Node.lastIndex() > c.appliedIndex
   720  				if msgInflight {
   721  					c.logger.Debugf("There are in flight blocks, new leader should not serve requests")
   722  					continue
   723  				}
   724  
   725  				if c.configInflight {
   726  					c.logger.Debugf("There is config block in flight, new leader should not serve requests")
   727  					continue
   728  				}
   729  
   730  				c.logger.Infof("Start accepting requests as Raft leader at block [%d]", c.lastBlock.Header.Number)
   731  				bc = &blockCreator{
   732  					hash:   protoutil.BlockHeaderHash(c.lastBlock.Header),
   733  					number: c.lastBlock.Header.Number,
   734  					logger: c.logger,
   735  				}
   736  				submitC = c.submitC
   737  				c.justElected = false
   738  			} else if c.configInflight {
   739  				c.logger.Info("Config block or ConfChange in flight, pause accepting transaction")
   740  				submitC = nil
   741  			} else if c.blockInflight < c.opts.MaxInflightBlocks {
   742  				submitC = c.submitC
   743  			}
   744  
   745  		case <-timer.C():
   746  			ticking = false
   747  
   748  			batch := c.support.BlockCutter().Cut()
   749  			if len(batch) == 0 {
   750  				c.logger.Warningf("Batch timer expired with no pending requests, this might indicate a bug")
   751  				continue
   752  			}
   753  
   754  			c.logger.Debugf("Batch timer expired, creating block")
   755  			c.propose(propC, bc, batch) // we are certain this is normal block, no need to block
   756  
   757  		case sn := <-c.snapC:
   758  			if sn.Metadata.Index != 0 {
   759  				if sn.Metadata.Index <= c.appliedIndex {
   760  					c.logger.Debugf("Skip snapshot taken at index %d, because it is behind current applied index %d", sn.Metadata.Index, c.appliedIndex)
   761  					break
   762  				}
   763  
   764  				c.confState = sn.Metadata.ConfState
   765  				c.appliedIndex = sn.Metadata.Index
   766  			} else {
   767  				c.logger.Infof("Received artificial snapshot to trigger catchup")
   768  			}
   769  
   770  			if err := c.catchUp(sn); err != nil {
   771  				c.logger.Panicf("Failed to recover from snapshot taken at Term %d and Index %d: %s",
   772  					sn.Metadata.Term, sn.Metadata.Index, err)
   773  			}
   774  
   775  		case <-c.doneC:
   776  			stopTimer()
   777  			cancelProp()
   778  
   779  			select {
   780  			case <-c.errorC: // avoid closing closed channel
   781  			default:
   782  				close(c.errorC)
   783  			}
   784  
   785  			c.logger.Infof("Stop serving requests")
   786  			c.periodicChecker.Stop()
   787  			return
   788  		}
   789  	}
   790  }
   791  
   792  func (c *Chain) writeBlock(block *common.Block, index uint64) {
   793  	if block.Header.Number > c.lastBlock.Header.Number+1 {
   794  		c.logger.Panicf("Got block [%d], expect block [%d]", block.Header.Number, c.lastBlock.Header.Number+1)
   795  	} else if block.Header.Number < c.lastBlock.Header.Number+1 {
   796  		c.logger.Infof("Got block [%d], expect block [%d], this node was forced to catch up", block.Header.Number, c.lastBlock.Header.Number+1)
   797  		return
   798  	}
   799  
   800  	if c.blockInflight > 0 {
   801  		c.blockInflight-- // only reduce on leader
   802  	}
   803  	c.lastBlock = block
   804  
   805  	c.logger.Infof("Writing block [%d] (Raft index: %d) to ledger", block.Header.Number, index)
   806  
   807  	if protoutil.IsConfigBlock(block) {
   808  		c.writeConfigBlock(block, index)
   809  		return
   810  	}
   811  
   812  	c.raftMetadataLock.Lock()
   813  	c.opts.BlockMetadata.RaftIndex = index
   814  	m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
   815  	c.raftMetadataLock.Unlock()
   816  
   817  	c.support.WriteBlock(block, m)
   818  }
   819  
   820  // Orders the envelope in the `msg` content. SubmitRequest.
   821  // Returns
   822  //   -- batches [][]*common.Envelope; the batches cut,
   823  //   -- pending bool; if there are envelopes pending to be ordered,
   824  //   -- err error; the error encountered, if any.
   825  // It takes care of config messages as well as the revalidation of messages if the config sequence has advanced.
   826  func (c *Chain) ordered(msg *orderer.SubmitRequest) (batches [][]*common.Envelope, pending bool, err error) {
   827  	seq := c.support.Sequence()
   828  
   829  	if c.isConfig(msg.Payload) {
   830  		// ConfigMsg
   831  		if msg.LastValidationSeq < seq {
   832  			c.logger.Warnf("Config message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   833  			msg.Payload, _, err = c.support.ProcessConfigMsg(msg.Payload)
   834  			if err != nil {
   835  				c.Metrics.ProposalFailures.Add(1)
   836  				return nil, true, errors.Errorf("bad config message: %s", err)
   837  			}
   838  		}
   839  
   840  		batch := c.support.BlockCutter().Cut()
   841  		batches = [][]*common.Envelope{}
   842  		if len(batch) != 0 {
   843  			batches = append(batches, batch)
   844  		}
   845  		batches = append(batches, []*common.Envelope{msg.Payload})
   846  		return batches, false, nil
   847  	}
   848  	// it is a normal message
   849  	if msg.LastValidationSeq < seq {
   850  		c.logger.Warnf("Normal message was validated against %d, although current config seq has advanced (%d)", msg.LastValidationSeq, seq)
   851  		if _, err := c.support.ProcessNormalMsg(msg.Payload); err != nil {
   852  			c.Metrics.ProposalFailures.Add(1)
   853  			return nil, true, errors.Errorf("bad normal message: %s", err)
   854  		}
   855  	}
   856  	batches, pending = c.support.BlockCutter().Ordered(msg.Payload)
   857  	return batches, pending, nil
   858  
   859  }
   860  
   861  func (c *Chain) propose(ch chan<- *common.Block, bc *blockCreator, batches ...[]*common.Envelope) {
   862  	for _, batch := range batches {
   863  		b := bc.createNextBlock(batch)
   864  		c.logger.Infof("Created block [%d], there are %d blocks in flight", b.Header.Number, c.blockInflight)
   865  
   866  		select {
   867  		case ch <- b:
   868  		default:
   869  			c.logger.Panic("Programming error: limit of in-flight blocks does not properly take effect or block is proposed by follower")
   870  		}
   871  
   872  		// if it is config block, then we should wait for the commit of the block
   873  		if protoutil.IsConfigBlock(b) {
   874  			c.configInflight = true
   875  		}
   876  
   877  		c.blockInflight++
   878  	}
   879  }
   880  
   881  func (c *Chain) catchUp(snap *raftpb.Snapshot) error {
   882  	b, err := protoutil.UnmarshalBlock(snap.Data)
   883  	if err != nil {
   884  		return errors.Errorf("failed to unmarshal snapshot data to block: %s", err)
   885  	}
   886  
   887  	if c.lastBlock.Header.Number >= b.Header.Number {
   888  		c.logger.Warnf("Snapshot is at block [%d], local block number is %d, no sync needed", b.Header.Number, c.lastBlock.Header.Number)
   889  		return nil
   890  	} else if b.Header.Number == c.lastBlock.Header.Number+1 {
   891  		c.logger.Infof("The only missing block [%d] is encapsulated in snapshot, committing it to shortcut catchup process", b.Header.Number)
   892  		c.commitBlock(b)
   893  		c.lastBlock = b
   894  		return nil
   895  	}
   896  
   897  	puller, err := c.createPuller()
   898  	if err != nil {
   899  		return errors.Errorf("failed to create block puller: %s", err)
   900  	}
   901  	defer puller.Close()
   902  
   903  	next := c.lastBlock.Header.Number + 1
   904  
   905  	c.logger.Infof("Catching up with snapshot taken at block [%d], starting from block [%d]", b.Header.Number, next)
   906  
   907  	for next <= b.Header.Number {
   908  		block := puller.PullBlock(next)
   909  		if block == nil {
   910  			return errors.Errorf("failed to fetch block [%d] from cluster", next)
   911  		}
   912  		c.commitBlock(block)
   913  		c.lastBlock = block
   914  		next++
   915  	}
   916  
   917  	c.logger.Infof("Finished syncing with cluster up to and including block [%d]", b.Header.Number)
   918  	return nil
   919  }
   920  
   921  func (c *Chain) commitBlock(block *common.Block) {
   922  	if !protoutil.IsConfigBlock(block) {
   923  		c.support.WriteBlock(block, nil)
   924  		return
   925  	}
   926  
   927  	c.support.WriteConfigBlock(block, nil)
   928  
   929  	configMembership := c.detectConfChange(block)
   930  
   931  	if configMembership != nil && configMembership.Changed() {
   932  		c.logger.Infof("Config block [%d] changes consenter set, communication should be reconfigured", block.Header.Number)
   933  
   934  		c.raftMetadataLock.Lock()
   935  		c.opts.BlockMetadata = configMembership.NewBlockMetadata
   936  		c.opts.Consenters = configMembership.NewConsenters
   937  		c.raftMetadataLock.Unlock()
   938  
   939  		if err := c.configureComm(); err != nil {
   940  			c.logger.Panicf("Failed to configure communication: %s", err)
   941  		}
   942  	}
   943  }
   944  
   945  func (c *Chain) detectConfChange(block *common.Block) *MembershipChanges {
   946  	// If config is targeting THIS channel, inspect consenter set and
   947  	// propose raft ConfChange if it adds/removes node.
   948  	configMetadata := c.newConfigMetadata(block)
   949  
   950  	if configMetadata == nil {
   951  		return nil
   952  	}
   953  
   954  	if configMetadata.Options != nil &&
   955  		configMetadata.Options.SnapshotIntervalSize != 0 &&
   956  		configMetadata.Options.SnapshotIntervalSize != c.sizeLimit {
   957  		c.logger.Infof("Update snapshot interval size to %d bytes (was %d)",
   958  			configMetadata.Options.SnapshotIntervalSize, c.sizeLimit)
   959  		c.sizeLimit = configMetadata.Options.SnapshotIntervalSize
   960  	}
   961  
   962  	changes, err := ComputeMembershipChanges(c.opts.BlockMetadata, c.opts.Consenters, configMetadata.Consenters)
   963  	if err != nil {
   964  		c.logger.Panicf("illegal configuration change detected: %s", err)
   965  	}
   966  
   967  	if changes.Rotated() {
   968  		c.logger.Infof("Config block [%d] rotates TLS certificate of node %d", block.Header.Number, changes.RotatedNode)
   969  	}
   970  
   971  	return changes
   972  }
   973  
   974  func (c *Chain) apply(ents []raftpb.Entry) {
   975  	if len(ents) == 0 {
   976  		return
   977  	}
   978  
   979  	if ents[0].Index > c.appliedIndex+1 {
   980  		c.logger.Panicf("first index of committed entry[%d] should <= appliedIndex[%d]+1", ents[0].Index, c.appliedIndex)
   981  	}
   982  
   983  	var position int
   984  	for i := range ents {
   985  		switch ents[i].Type {
   986  		case raftpb.EntryNormal:
   987  			if len(ents[i].Data) == 0 {
   988  				break
   989  			}
   990  
   991  			position = i
   992  			c.accDataSize += uint32(len(ents[i].Data))
   993  
   994  			// We need to strictly avoid re-applying normal entries,
   995  			// otherwise we are writing the same block twice.
   996  			if ents[i].Index <= c.appliedIndex {
   997  				c.logger.Debugf("Received block with raft index (%d) <= applied index (%d), skip", ents[i].Index, c.appliedIndex)
   998  				break
   999  			}
  1000  
  1001  			block := protoutil.UnmarshalBlockOrPanic(ents[i].Data)
  1002  			c.writeBlock(block, ents[i].Index)
  1003  			c.Metrics.CommittedBlockNumber.Set(float64(block.Header.Number))
  1004  
  1005  		case raftpb.EntryConfChange:
  1006  			var cc raftpb.ConfChange
  1007  			if err := cc.Unmarshal(ents[i].Data); err != nil {
  1008  				c.logger.Warnf("Failed to unmarshal ConfChange data: %s", err)
  1009  				continue
  1010  			}
  1011  
  1012  			c.confState = *c.Node.ApplyConfChange(cc)
  1013  
  1014  			switch cc.Type {
  1015  			case raftpb.ConfChangeAddNode:
  1016  				c.logger.Infof("Applied config change to add node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1017  			case raftpb.ConfChangeRemoveNode:
  1018  				c.logger.Infof("Applied config change to remove node %d, current nodes in channel: %+v", cc.NodeID, c.confState.Nodes)
  1019  			default:
  1020  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1021  			}
  1022  
  1023  			// This ConfChange was introduced by a previously committed config block,
  1024  			// we can now unblock submitC to accept envelopes.
  1025  			var configureComm bool
  1026  			if c.confChangeInProgress != nil &&
  1027  				c.confChangeInProgress.NodeID == cc.NodeID &&
  1028  				c.confChangeInProgress.Type == cc.Type {
  1029  
  1030  				configureComm = true
  1031  				c.confChangeInProgress = nil
  1032  				c.configInflight = false
  1033  				// report the new cluster size
  1034  				c.Metrics.ClusterSize.Set(float64(len(c.opts.BlockMetadata.ConsenterIds)))
  1035  			}
  1036  
  1037  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1038  			removeLeader := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == lead
  1039  			shouldHalt := cc.Type == raftpb.ConfChangeRemoveNode && cc.NodeID == c.raftID
  1040  
  1041  			// unblock `run` go routine so it can still consume Raft messages
  1042  			go func() {
  1043  				if removeLeader {
  1044  					c.logger.Infof("Current leader is being removed from channel, attempt leadership transfer")
  1045  					c.Node.abdicateLeader(lead)
  1046  				}
  1047  
  1048  				if configureComm && !shouldHalt { // no need to configure comm if this node is going to halt
  1049  					if err := c.configureComm(); err != nil {
  1050  						c.logger.Panicf("Failed to configure communication: %s", err)
  1051  					}
  1052  				}
  1053  
  1054  				if shouldHalt {
  1055  					c.logger.Infof("This node is being removed from replica set")
  1056  					c.Halt()
  1057  					return
  1058  				}
  1059  			}()
  1060  		}
  1061  
  1062  		if ents[i].Index > c.appliedIndex {
  1063  			c.appliedIndex = ents[i].Index
  1064  		}
  1065  	}
  1066  
  1067  	if c.accDataSize >= c.sizeLimit {
  1068  		b := protoutil.UnmarshalBlockOrPanic(ents[position].Data)
  1069  
  1070  		select {
  1071  		case c.gcC <- &gc{index: c.appliedIndex, state: c.confState, data: ents[position].Data}:
  1072  			c.logger.Infof("Accumulated %d bytes since last snapshot, exceeding size limit (%d bytes), "+
  1073  				"taking snapshot at block [%d] (index: %d), last snapshotted block number is %d, current nodes: %+v",
  1074  				c.accDataSize, c.sizeLimit, b.Header.Number, c.appliedIndex, c.lastSnapBlockNum, c.confState.Nodes)
  1075  			c.accDataSize = 0
  1076  			c.lastSnapBlockNum = b.Header.Number
  1077  			c.Metrics.SnapshotBlockNumber.Set(float64(b.Header.Number))
  1078  		default:
  1079  			c.logger.Warnf("Snapshotting is in progress, it is very likely that SnapshotIntervalSize is too small")
  1080  		}
  1081  	}
  1082  }
  1083  
  1084  func (c *Chain) gc() {
  1085  	for {
  1086  		select {
  1087  		case g := <-c.gcC:
  1088  			c.Node.takeSnapshot(g.index, g.state, g.data)
  1089  		case <-c.doneC:
  1090  			c.logger.Infof("Stop garbage collecting")
  1091  			return
  1092  		}
  1093  	}
  1094  }
  1095  
  1096  func (c *Chain) isConfig(env *common.Envelope) bool {
  1097  	h, err := protoutil.ChannelHeader(env)
  1098  	if err != nil {
  1099  		c.logger.Panicf("failed to extract channel header from envelope")
  1100  	}
  1101  
  1102  	return h.Type == int32(common.HeaderType_CONFIG) || h.Type == int32(common.HeaderType_ORDERER_TRANSACTION)
  1103  }
  1104  
  1105  func (c *Chain) configureComm() error {
  1106  	// Reset unreachable map when communication is reconfigured
  1107  	c.Node.unreachableLock.Lock()
  1108  	c.Node.unreachable = make(map[uint64]struct{})
  1109  	c.Node.unreachableLock.Unlock()
  1110  
  1111  	nodes, err := c.remotePeers()
  1112  	if err != nil {
  1113  		return err
  1114  	}
  1115  
  1116  	c.configurator.Configure(c.channelID, nodes)
  1117  	return nil
  1118  }
  1119  
  1120  func (c *Chain) remotePeers() ([]cluster.RemoteNode, error) {
  1121  	c.raftMetadataLock.RLock()
  1122  	defer c.raftMetadataLock.RUnlock()
  1123  
  1124  	var nodes []cluster.RemoteNode
  1125  	for raftID, consenter := range c.opts.Consenters {
  1126  		// No need to know yourself
  1127  		if raftID == c.raftID {
  1128  			continue
  1129  		}
  1130  		serverCertAsDER, err := pemToDER(consenter.ServerTlsCert, raftID, "server", c.logger)
  1131  		if err != nil {
  1132  			return nil, errors.WithStack(err)
  1133  		}
  1134  		clientCertAsDER, err := pemToDER(consenter.ClientTlsCert, raftID, "client", c.logger)
  1135  		if err != nil {
  1136  			return nil, errors.WithStack(err)
  1137  		}
  1138  		nodes = append(nodes, cluster.RemoteNode{
  1139  			ID:            raftID,
  1140  			Endpoint:      fmt.Sprintf("%s:%d", consenter.Host, consenter.Port),
  1141  			ServerTLSCert: serverCertAsDER,
  1142  			ClientTLSCert: clientCertAsDER,
  1143  		})
  1144  	}
  1145  	return nodes, nil
  1146  }
  1147  
  1148  func pemToDER(pemBytes []byte, id uint64, certType string, logger *flogging.FabricLogger) ([]byte, error) {
  1149  	bl, _ := pem.Decode(pemBytes)
  1150  	if bl == nil {
  1151  		logger.Errorf("Rejecting PEM block of %s TLS cert for node %d, offending PEM is: %s", certType, id, string(pemBytes))
  1152  		return nil, errors.Errorf("invalid PEM block")
  1153  	}
  1154  	return bl.Bytes, nil
  1155  }
  1156  
  1157  // writeConfigBlock writes configuration blocks into the ledger in
  1158  // addition extracts updates about raft replica set and if there
  1159  // are changes updates cluster membership as well
  1160  func (c *Chain) writeConfigBlock(block *common.Block, index uint64) {
  1161  	hdr, err := ConfigChannelHeader(block)
  1162  	if err != nil {
  1163  		c.logger.Panicf("Failed to get config header type from config block: %s", err)
  1164  	}
  1165  
  1166  	c.configInflight = false
  1167  
  1168  	switch common.HeaderType(hdr.Type) {
  1169  	case common.HeaderType_CONFIG:
  1170  		configMembership := c.detectConfChange(block)
  1171  
  1172  		c.raftMetadataLock.Lock()
  1173  		c.opts.BlockMetadata.RaftIndex = index
  1174  		if configMembership != nil {
  1175  			c.opts.BlockMetadata = configMembership.NewBlockMetadata
  1176  			c.opts.Consenters = configMembership.NewConsenters
  1177  		}
  1178  		c.raftMetadataLock.Unlock()
  1179  
  1180  		blockMetadataBytes := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1181  
  1182  		// write block with metadata
  1183  		c.support.WriteConfigBlock(block, blockMetadataBytes)
  1184  
  1185  		if configMembership == nil {
  1186  			return
  1187  		}
  1188  
  1189  		// update membership
  1190  		if configMembership.ConfChange != nil {
  1191  			// We need to propose conf change in a go routine, because it may be blocked if raft node
  1192  			// becomes leaderless, and we should not block `run` so it can keep consuming applyC,
  1193  			// otherwise we have a deadlock.
  1194  			go func() {
  1195  				// ProposeConfChange returns error only if node being stopped.
  1196  				// This proposal is dropped by followers because DisableProposalForwarding is enabled.
  1197  				if err := c.Node.ProposeConfChange(context.TODO(), *configMembership.ConfChange); err != nil {
  1198  					c.logger.Warnf("Failed to propose configuration update to Raft node: %s", err)
  1199  				}
  1200  			}()
  1201  
  1202  			c.confChangeInProgress = configMembership.ConfChange
  1203  
  1204  			switch configMembership.ConfChange.Type {
  1205  			case raftpb.ConfChangeAddNode:
  1206  				c.logger.Infof("Config block just committed adds node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1207  			case raftpb.ConfChangeRemoveNode:
  1208  				c.logger.Infof("Config block just committed removes node %d, pause accepting transactions till config change is applied", configMembership.ConfChange.NodeID)
  1209  			default:
  1210  				c.logger.Panic("Programming error, encountered unsupported raft config change")
  1211  			}
  1212  
  1213  			c.configInflight = true
  1214  		} else if configMembership.Rotated() {
  1215  			lead := atomic.LoadUint64(&c.lastKnownLeader)
  1216  			if configMembership.RotatedNode == lead {
  1217  				c.logger.Infof("Certificate of Raft leader is being rotated, attempt leader transfer before reconfiguring communication")
  1218  				go func() {
  1219  					c.Node.abdicateLeader(lead)
  1220  					if err := c.configureComm(); err != nil {
  1221  						c.logger.Panicf("Failed to configure communication: %s", err)
  1222  					}
  1223  				}()
  1224  			} else {
  1225  				if err := c.configureComm(); err != nil {
  1226  					c.logger.Panicf("Failed to configure communication: %s", err)
  1227  				}
  1228  			}
  1229  		}
  1230  
  1231  	case common.HeaderType_ORDERER_TRANSACTION:
  1232  		// If this config is channel creation, no extra inspection is needed
  1233  		c.raftMetadataLock.Lock()
  1234  		c.opts.BlockMetadata.RaftIndex = index
  1235  		m := protoutil.MarshalOrPanic(c.opts.BlockMetadata)
  1236  		c.raftMetadataLock.Unlock()
  1237  
  1238  		c.support.WriteConfigBlock(block, m)
  1239  
  1240  	default:
  1241  		c.logger.Panicf("Programming error: unexpected config type: %s", common.HeaderType(hdr.Type))
  1242  	}
  1243  }
  1244  
  1245  // getInFlightConfChange returns ConfChange in-flight if any.
  1246  // It returns confChangeInProgress if it is not nil. Otherwise
  1247  // it returns ConfChange from the last committed block (might be nil).
  1248  func (c *Chain) getInFlightConfChange() *raftpb.ConfChange {
  1249  	if c.confChangeInProgress != nil {
  1250  		return c.confChangeInProgress
  1251  	}
  1252  
  1253  	if c.lastBlock.Header.Number == 0 {
  1254  		return nil // nothing to failover just started the chain
  1255  	}
  1256  
  1257  	if !protoutil.IsConfigBlock(c.lastBlock) {
  1258  		return nil
  1259  	}
  1260  
  1261  	// extracting current Raft configuration state
  1262  	confState := c.Node.ApplyConfChange(raftpb.ConfChange{})
  1263  
  1264  	if len(confState.Nodes) == len(c.opts.BlockMetadata.ConsenterIds) {
  1265  		// Raft configuration change could only add one node or
  1266  		// remove one node at a time, if raft conf state size is
  1267  		// equal to membership stored in block metadata field,
  1268  		// that means everything is in sync and no need to propose
  1269  		// config update.
  1270  		return nil
  1271  	}
  1272  
  1273  	return ConfChange(c.opts.BlockMetadata, confState)
  1274  }
  1275  
  1276  // newMetadata extract config metadata from the configuration block
  1277  func (c *Chain) newConfigMetadata(block *common.Block) *etcdraft.ConfigMetadata {
  1278  	metadata, err := ConsensusMetadataFromConfigBlock(block)
  1279  	if err != nil {
  1280  		c.logger.Panicf("error reading consensus metadata: %s", err)
  1281  	}
  1282  	return metadata
  1283  }
  1284  
  1285  // ValidateConsensusMetadata determines the validity of a
  1286  // ConsensusMetadata update during config updates on the channel.
  1287  func (c *Chain) ValidateConsensusMetadata(oldOrdererConfig, newOrdererConfig channelconfig.Orderer, newChannel bool) error {
  1288  	if newOrdererConfig == nil {
  1289  		c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil new channel config")
  1290  		return nil
  1291  	}
  1292  
  1293  	// metadata was not updated
  1294  	if newOrdererConfig.ConsensusMetadata() == nil {
  1295  		return nil
  1296  	}
  1297  
  1298  	if oldOrdererConfig == nil {
  1299  		c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old channel config")
  1300  		return nil
  1301  	}
  1302  
  1303  	if oldOrdererConfig.ConsensusMetadata() == nil {
  1304  		c.logger.Panic("Programming Error: ValidateConsensusMetadata called with nil old metadata")
  1305  		return nil
  1306  	}
  1307  
  1308  	oldMetadata := &etcdraft.ConfigMetadata{}
  1309  	if err := proto.Unmarshal(oldOrdererConfig.ConsensusMetadata(), oldMetadata); err != nil {
  1310  		c.logger.Panicf("Programming Error: Failed to unmarshal old etcdraft consensus metadata: %v", err)
  1311  	}
  1312  
  1313  	newMetadata := &etcdraft.ConfigMetadata{}
  1314  	if err := proto.Unmarshal(newOrdererConfig.ConsensusMetadata(), newMetadata); err != nil {
  1315  		return errors.Wrap(err, "failed to unmarshal new etcdraft metadata configuration")
  1316  	}
  1317  
  1318  	verifyOpts, err := createX509VerifyOptions(newOrdererConfig)
  1319  	if err != nil {
  1320  		return errors.Wrapf(err, "failed to create x509 verify options from old and new orderer config")
  1321  	}
  1322  
  1323  	if err := VerifyConfigMetadata(newMetadata, verifyOpts); err != nil {
  1324  		return errors.Wrap(err, "invalid new config metadata")
  1325  	}
  1326  
  1327  	if newChannel {
  1328  		// check if the consenters are a subset of the existing consenters (system channel consenters)
  1329  		set := ConsentersToMap(oldMetadata.Consenters)
  1330  		for _, c := range newMetadata.Consenters {
  1331  			if !set.Exists(c) {
  1332  				return errors.New("new channel has consenter that is not part of system consenter set")
  1333  			}
  1334  		}
  1335  		return nil
  1336  	}
  1337  
  1338  	// create the dummy parameters for ComputeMembershipChanges
  1339  	c.raftMetadataLock.RLock()
  1340  	dummyOldBlockMetadata := proto.Clone(c.opts.BlockMetadata).(*etcdraft.BlockMetadata)
  1341  	c.raftMetadataLock.RUnlock()
  1342  
  1343  	dummyOldConsentersMap := CreateConsentersMap(dummyOldBlockMetadata, oldMetadata)
  1344  	changes, err := ComputeMembershipChanges(dummyOldBlockMetadata, dummyOldConsentersMap, newMetadata.Consenters)
  1345  	if err != nil {
  1346  		return err
  1347  	}
  1348  
  1349  	//new config metadata was verified above. Additionally need to check new consenters for certificates expiration
  1350  	for _, c := range changes.AddedNodes {
  1351  		if err := validateConsenterTLSCerts(c, verifyOpts, false); err != nil {
  1352  			return errors.Wrapf(err, "consenter %s:%d has invalid certificates", c.Host, c.Port)
  1353  		}
  1354  	}
  1355  
  1356  	active := c.ActiveNodes.Load().([]uint64)
  1357  	if changes.UnacceptableQuorumLoss(active) {
  1358  		return errors.Errorf("%d out of %d nodes are alive, configuration will result in quorum loss", len(active), len(dummyOldConsentersMap))
  1359  	}
  1360  
  1361  	return nil
  1362  }
  1363  
  1364  // StatusReport returns the ClusterRelation & Status
  1365  func (c *Chain) StatusReport() (types.ClusterRelation, types.Status) {
  1366  	return types.ClusterRelationMember, types.StatusActive
  1367  }
  1368  
  1369  func (c *Chain) suspectEviction() bool {
  1370  	if c.isRunning() != nil {
  1371  		return false
  1372  	}
  1373  
  1374  	return atomic.LoadUint64(&c.lastKnownLeader) == uint64(0)
  1375  }
  1376  
  1377  func (c *Chain) newEvictionSuspector() *evictionSuspector {
  1378  	consenterCertificate := &ConsenterCertificate{
  1379  		Logger:               c.logger,
  1380  		ConsenterCertificate: c.opts.Cert,
  1381  		CryptoProvider:       c.CryptoProvider,
  1382  	}
  1383  
  1384  	return &evictionSuspector{
  1385  		amIInChannel:               consenterCertificate.IsConsenterOfChannel,
  1386  		evictionSuspicionThreshold: c.opts.EvictionSuspicion,
  1387  		writeBlock:                 c.support.Append,
  1388  		createPuller:               c.createPuller,
  1389  		height:                     c.support.Height,
  1390  		triggerCatchUp:             c.triggerCatchup,
  1391  		logger:                     c.logger,
  1392  		halt: func() {
  1393  			c.Halt()
  1394  		},
  1395  	}
  1396  }
  1397  
  1398  func (c *Chain) triggerCatchup(sn *raftpb.Snapshot) {
  1399  	select {
  1400  	case c.snapC <- sn:
  1401  	case <-c.doneC:
  1402  	}
  1403  }