github.com/bigzoro/my_simplechain@v0.0.0-20240315012955-8ad0a2a29bb9/consensus/raft/backend/handler.go

github.com/bigzoro/my_simplechain@v0.0.0-20240315012955-8ad0a2a29bb9/consensus/raft/backend/handler.go (about)

     1  package backend
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/http"
     8  	"net/url"
     9  	"os"
    10  	"strconv"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/bigzoro/my_simplechain/common"
    15  	"github.com/bigzoro/my_simplechain/consensus/raft"
    16  	"github.com/bigzoro/my_simplechain/core"
    17  	"github.com/bigzoro/my_simplechain/core/types"
    18  	"github.com/bigzoro/my_simplechain/eth/downloader"
    19  	"github.com/bigzoro/my_simplechain/event"
    20  	"github.com/bigzoro/my_simplechain/log"
    21  	"github.com/bigzoro/my_simplechain/miner"
    22  	"github.com/bigzoro/my_simplechain/p2p"
    23  	"github.com/bigzoro/my_simplechain/p2p/enode"
    24  	"github.com/bigzoro/my_simplechain/p2p/enr"
    25  	"github.com/bigzoro/my_simplechain/rlp"
    26  
    27  	"github.com/coreos/etcd/etcdserver/stats"
    28  	"github.com/coreos/etcd/pkg/fileutil"
    29  	raftTypes "github.com/coreos/etcd/pkg/types"
    30  	etcdRaft "github.com/coreos/etcd/raft"
    31  	"github.com/coreos/etcd/raft/raftpb"
    32  	"github.com/coreos/etcd/rafthttp"
    33  	"github.com/coreos/etcd/snap"
    34  	"github.com/coreos/etcd/wal"
    35  	mapset "github.com/deckarep/golang-set"
    36  	"github.com/syndtr/goleveldb/leveldb"
    37  	etcdlog "log"
    38  )
    39  
    40  type ProtocolManager struct {
    41  	mu       sync.RWMutex // For protecting concurrent JS access to "local peer" and "remote peer" state
    42  	quitSync chan struct{}
    43  	stopped  bool
    44  
    45  	// Static configuration
    46  	joinExisting   bool // Whether to join an existing cluster when a WAL doesn't already exist
    47  	bootstrapNodes []*enode.Node
    48  	raftId         uint16
    49  	raftPort       uint16
    50  	raftConfigPath string // add wsw
    51  	// Local peer state (protected by mu vs concurrent access via JS)
    52  	address       *raft.Address
    53  	role          int    // Role: minter or verifier
    54  	appliedIndex  uint64 // The index of the last-applied raft entry
    55  	snapshotIndex uint64 // The index of the latest snapshot.
    56  
    57  	// Remote peer state (protected by mu vs concurrent access via JS)
    58  	leader       uint16
    59  	peers        map[uint16]*raft.Peer
    60  	removedPeers mapset.Set // *Permanently removed* peers
    61  
    62  	// P2P transport
    63  	p2pServer *p2p.Server // Initialized in start()
    64  
    65  	// Blockchain services
    66  	blockchain *core.BlockChain
    67  	downloader *downloader.Downloader
    68  	minter     *miner.Miner
    69  
    70  	// Blockchain events
    71  	eventMux      *event.TypeMux
    72  	minedBlockSub *event.TypeMuxSubscription
    73  
    74  	// Raft proposal events
    75  	blockProposalC      chan *types.Block      // for mined blocks to raft
    76  	confChangeProposalC chan raftpb.ConfChange // for config changes from js console to raft
    77  
    78  	// Raft transport
    79  	unsafeRawNode etcdRaft.Node
    80  	transport     *rafthttp.Transport
    81  	httpstopc     chan struct{}
    82  	httpdonec     chan struct{}
    83  
    84  	// Raft snapshotting
    85  	snapshotter *snap.Snapshotter
    86  	snapdir     string
    87  	confState   raftpb.ConfState
    88  
    89  	// Raft write-ahead log
    90  	waldir string
    91  	wal    *wal.WAL
    92  
    93  	// Storage
    94  	raftDb      *leveldb.DB             // Persistent storage for last-applied raft index
    95  	raftStorage *etcdRaft.MemoryStorage // Volatile raft storage
    96  }
    97  
    98  //
    99  // Public interface
   100  //
   101  
   102  func NewProtocolManager(raftId uint16, raftPort uint16, blockchain *core.BlockChain, mux *event.TypeMux, bootstrapNodes []*enode.Node, joinExisting bool, datadir string, minter *miner.Miner, downloader *downloader.Downloader) (*ProtocolManager, error) {
   103  	waldir := fmt.Sprintf("%s/raft-wal", datadir)
   104  	snapdir := fmt.Sprintf("%s/raft-snap", datadir)
   105  	raftDbLoc := fmt.Sprintf("%s/raft-state", datadir)
   106  	raftConfigDir := fmt.Sprintf("%s/sipe", datadir)
   107  
   108  	manager := &ProtocolManager{
   109  		bootstrapNodes:      bootstrapNodes,
   110  		peers:               make(map[uint16]*raft.Peer),
   111  		leader:              uint16(etcdRaft.None),
   112  		removedPeers:        mapset.NewSet(),
   113  		joinExisting:        joinExisting,
   114  		blockchain:          blockchain,
   115  		eventMux:            mux,
   116  		blockProposalC:      make(chan *types.Block),
   117  		confChangeProposalC: make(chan raftpb.ConfChange),
   118  		httpstopc:           make(chan struct{}),
   119  		httpdonec:           make(chan struct{}),
   120  		waldir:              waldir,
   121  		snapdir:             snapdir,
   122  		raftConfigPath:      raftConfigDir,
   123  		snapshotter:         snap.New(snapdir),
   124  		raftId:              raftId,
   125  		raftPort:            raftPort,
   126  		quitSync:            make(chan struct{}),
   127  		raftStorage:         etcdRaft.NewMemoryStorage(),
   128  		minter:              minter,
   129  		downloader:          downloader,
   130  	}
   131  
   132  	if db, err := openRaftDb(raftDbLoc); err != nil {
   133  		return nil, err
   134  	} else {
   135  		manager.raftDb = db
   136  	}
   137  
   138  	return manager, nil
   139  }
   140  
   141  func (pm *ProtocolManager) Start(p2pServer *p2p.Server) {
   142  	log.Info("starting raft protocol handler")
   143  
   144  	pm.p2pServer = p2pServer
   145  	pm.minedBlockSub = pm.eventMux.Subscribe(core.NewMinedBlockEvent{})
   146  	pm.startRaft()
   147  	go pm.minedBroadcastLoop()
   148  }
   149  
   150  func (pm *ProtocolManager) Stop() {
   151  	pm.mu.Lock()
   152  	defer pm.mu.Unlock()
   153  
   154  	defer log.Info("raft protocol handler stopped")
   155  
   156  	if pm.stopped {
   157  		return
   158  	}
   159  
   160  	log.Info("stopping raft protocol handler...")
   161  
   162  	for raftId, peer := range pm.peers {
   163  		pm.disconnectFromPeer(raftId, peer)
   164  	}
   165  
   166  	pm.minedBlockSub.Unsubscribe()
   167  
   168  	if pm.transport != nil {
   169  		pm.transport.Stop()
   170  	}
   171  
   172  	close(pm.httpstopc)
   173  	<-pm.httpdonec
   174  	close(pm.quitSync)
   175  
   176  	if pm.unsafeRawNode != nil {
   177  		pm.unsafeRawNode.Stop()
   178  	}
   179  
   180  	pm.raftDb.Close()
   181  
   182  	pm.p2pServer = nil
   183  
   184  	pm.minter.Stop()
   185  
   186  	pm.stopped = true
   187  }
   188  
   189  func (pm *ProtocolManager) NodeInfo() *RaftNodeInfo {
   190  	pm.mu.RLock() // as we read role and peers
   191  	defer pm.mu.RUnlock()
   192  
   193  	var roleDescription string
   194  	if pm.role == raft.MinterRole {
   195  		roleDescription = "minter"
   196  	} else {
   197  		roleDescription = "verifier"
   198  	}
   199  
   200  	peerAddresses := make([]*raft.Address, len(pm.peers))
   201  	peerIdx := 0
   202  	for _, peer := range pm.peers {
   203  		peerAddresses[peerIdx] = peer.Address
   204  		peerIdx += 1
   205  	}
   206  
   207  	removedPeerIfaces := pm.removedPeers
   208  	removedPeerIds := make([]uint16, removedPeerIfaces.Cardinality())
   209  	i := 0
   210  	for removedIface := range removedPeerIfaces.Iterator().C {
   211  		removedPeerIds[i] = removedIface.(uint16)
   212  		i++
   213  	}
   214  
   215  	//
   216  	// NOTE: before exposing any new fields here, make sure that the underlying
   217  	// ProtocolManager members are protected from concurrent access by pm.mu!
   218  	//
   219  	return &RaftNodeInfo{
   220  		ClusterSize:    len(pm.peers) + 1,
   221  		Role:           roleDescription,
   222  		Address:        pm.address,
   223  		PeerAddresses:  peerAddresses,
   224  		RemovedPeerIds: removedPeerIds,
   225  		AppliedIndex:   pm.appliedIndex,
   226  		SnapshotIndex:  pm.snapshotIndex,
   227  	}
   228  }
   229  
   230  // There seems to be a very rare race in raft where during `etcdRaft.StartNode`
   231  // it will call back our `Process` method before it's finished returning the
   232  // `raft.Node`, `pm.unsafeRawNode`, to us. This re-entrance through a separate
   233  // thread will cause a nil pointer dereference. To work around this, this
   234  // getter method should be used instead of reading `pm.unsafeRawNode` directly.
   235  func (pm *ProtocolManager) rawNode() etcdRaft.Node {
   236  	for pm.unsafeRawNode == nil {
   237  		time.Sleep(100 * time.Millisecond)
   238  	}
   239  
   240  	return pm.unsafeRawNode
   241  }
   242  
   243  func (pm *ProtocolManager) nextRaftId() uint16 {
   244  	pm.mu.RLock()
   245  	defer pm.mu.RUnlock()
   246  
   247  	maxId := pm.raftId
   248  
   249  	for peerId := range pm.peers {
   250  		if maxId < peerId {
   251  			maxId = peerId
   252  		}
   253  	}
   254  
   255  	removedPeerIfaces := pm.removedPeers
   256  	for removedIface := range removedPeerIfaces.Iterator().C {
   257  		removedId := removedIface.(uint16)
   258  
   259  		if maxId < removedId {
   260  			maxId = removedId
   261  		}
   262  	}
   263  
   264  	return maxId + 1
   265  }
   266  
   267  func (pm *ProtocolManager) isRaftIdRemoved(id uint16) bool {
   268  	pm.mu.RLock()
   269  	defer pm.mu.RUnlock()
   270  
   271  	return pm.removedPeers.Contains(id)
   272  }
   273  
   274  func (pm *ProtocolManager) isRaftIdUsed(raftId uint16) bool {
   275  	if pm.raftId == raftId || pm.isRaftIdRemoved(raftId) {
   276  		return true
   277  	}
   278  
   279  	pm.mu.RLock()
   280  	defer pm.mu.RUnlock()
   281  
   282  	return pm.peers[raftId] != nil
   283  }
   284  
   285  func (pm *ProtocolManager) isNodeAlreadyInCluster(node *enode.Node) error {
   286  	pm.mu.RLock()
   287  	defer pm.mu.RUnlock()
   288  
   289  	for _, peer := range pm.peers {
   290  		peerRaftId := peer.Address.RaftId
   291  		peerNode := peer.P2pNode
   292  
   293  		if peerNode.ID() == node.ID() {
   294  			return fmt.Errorf("node with this enode has already been added to the cluster: %s", node.ID())
   295  		}
   296  
   297  		if peerNode.IP().Equal(node.IP()) {
   298  			if peerNode.TCP() == node.TCP() {
   299  				return fmt.Errorf("existing node %v with raft ID %v is already using eth p2p at %v:%v", peerNode.ID(), peerRaftId, node.IP(), node.TCP())
   300  			} else if peer.Address.RaftPort == enr.RaftPort(node.RaftPort()) {
   301  				return fmt.Errorf("existing node %v with raft ID %v is already using raft at %v:%v", peerNode.ID(), peerRaftId, node.IP(), node.RaftPort())
   302  			}
   303  		}
   304  	}
   305  
   306  	return nil
   307  }
   308  
   309  func (pm *ProtocolManager) ProposeNewPeer(enodeId string, raftId uint16) (uint16, error) {
   310  	node, err := enode.ParseV4(enodeId)
   311  	if err != nil {
   312  		return 0, err
   313  	}
   314  
   315  	if len(node.IP()) != 4 {
   316  		return 0, fmt.Errorf("expected IPv4 address (with length 4), but got IP of length %v", len(node.IP()))
   317  	}
   318  
   319  	if !node.HasRaftPort() {
   320  		return 0, fmt.Errorf("enodeId is missing raftport querystring parameter: %v", enodeId)
   321  	}
   322  
   323  	if err := pm.isNodeAlreadyInCluster(node); err != nil {
   324  		return 0, nil // wsw add
   325  	}
   326  	address := raft.NewAddress(raftId, node.RaftPort(), node)
   327  
   328  	pm.confChangeProposalC <- raftpb.ConfChange{
   329  		Type:    raftpb.ConfChangeAddNode,
   330  		NodeID:  uint64(raftId),
   331  		Context: address.ToBytes(),
   332  	}
   333  
   334  	return raftId, nil
   335  }
   336  
   337  func (pm *ProtocolManager) ProposePeerRemoval(raftId uint16) {
   338  	pm.confChangeProposalC <- raftpb.ConfChange{
   339  		Type:   raftpb.ConfChangeRemoveNode,
   340  		NodeID: uint64(raftId),
   341  	}
   342  }
   343  
   344  //
   345  // MsgWriter interface (necessary for p2p.Send)
   346  //
   347  
   348  func (pm *ProtocolManager) WriteMsg(msg p2p.Msg) error {
   349  	// read *into* buffer
   350  	var buffer = make([]byte, msg.Size)
   351  	msg.Payload.Read(buffer)
   352  
   353  	return pm.rawNode().Propose(context.TODO(), buffer)
   354  }
   355  
   356  //
   357  // Raft interface
   358  //
   359  
   360  func (pm *ProtocolManager) Process(ctx context.Context, m raftpb.Message) error {
   361  	return pm.rawNode().Step(ctx, m)
   362  }
   363  
   364  func (pm *ProtocolManager) IsIDRemoved(id uint64) bool {
   365  	return pm.isRaftIdRemoved(uint16(id))
   366  }
   367  
   368  func (pm *ProtocolManager) ReportUnreachable(id uint64) {
   369  	log.Info("peer is currently unreachable", "peer id", id)
   370  
   371  	pm.rawNode().ReportUnreachable(id)
   372  }
   373  
   374  func (pm *ProtocolManager) ReportSnapshot(id uint64, status etcdRaft.SnapshotStatus) {
   375  	if status == etcdRaft.SnapshotFailure {
   376  		log.Info("failed to send snapshot", "raft peer", id)
   377  	} else if status == etcdRaft.SnapshotFinish {
   378  		log.Info("finished sending snapshot", "raft peer", id)
   379  	}
   380  
   381  	pm.rawNode().ReportSnapshot(id, status)
   382  }
   383  
   384  //
   385  // Private methods
   386  //
   387  
   388  func (pm *ProtocolManager) resetRaftId(enodeId string, raftId uint16) error {
   389  	node, err := enode.ParseV4(enodeId)
   390  	if err != nil {
   391  		return err
   392  	}
   393  	address := raft.NewAddress(raftId, node.RaftPort(), node)
   394  	pm.confChangeProposalC <- raftpb.ConfChange{
   395  		Type:    raftpb.ConfChangeUpdateNode,
   396  		NodeID:  uint64(raftId),
   397  		Context: address.ToBytes(),
   398  	}
   399  	return nil
   400  }
   401  
   402  func (pm *ProtocolManager) startRaft() {
   403  	if !fileutil.Exist(pm.snapdir) {
   404  		if err := os.Mkdir(pm.snapdir, 0750); err != nil {
   405  			raft.Fatalf("cannot create dir for snapshot (%v)", err)
   406  		}
   407  	}
   408  	walExisted := wal.Exist(pm.waldir)
   409  	lastAppliedIndex := pm.loadAppliedIndex()
   410  
   411  	ss := &stats.ServerStats{}
   412  	ss.Initialize()
   413  	pm.transport = &rafthttp.Transport{
   414  		ID:          raftTypes.ID(pm.raftId),
   415  		ClusterID:   0x1000,
   416  		Raft:        pm,
   417  		ServerStats: ss,
   418  		LeaderStats: stats.NewLeaderStats(strconv.Itoa(int(pm.raftId))),
   419  		ErrorC:      make(chan error),
   420  	}
   421  	pm.transport.Start()
   422  
   423  	// We load the snapshot to connect to prev peers before replaying the WAL,
   424  	// which typically goes further into the future than the snapshot.
   425  
   426  	var maybeRaftSnapshot *raftpb.Snapshot
   427  
   428  	if walExisted {
   429  		maybeRaftSnapshot = pm.loadSnapshot() // re-establishes peer connections
   430  	}
   431  
   432  	pm.wal = pm.replayWAL(maybeRaftSnapshot)
   433  
   434  	if walExisted {
   435  		if hardState, _, err := pm.raftStorage.InitialState(); err != nil {
   436  			panic(fmt.Sprintf("failed to read initial state from raft while restarting: %v", err))
   437  		} else {
   438  			if lastPersistedCommittedIndex := hardState.Commit; lastPersistedCommittedIndex < lastAppliedIndex {
   439  				log.Info("rolling back applied index to last-durably-committed", "last applied index", lastAppliedIndex, "last persisted index", lastPersistedCommittedIndex)
   440  
   441  				// Roll back our applied index. See the logic and explanation around
   442  				// the single call to `pm.applyNewChainHead` for more context.
   443  				lastAppliedIndex = lastPersistedCommittedIndex
   444  			}
   445  		}
   446  	}
   447  
   448  	// NOTE: cockroach sets this to false for now until they've "worked out the
   449  	//       bugs"
   450  	enablePreVote := true
   451  	defaultLogger := &etcdRaft.DefaultLogger{Logger: etcdlog.New(os.Stderr, "raft", etcdlog.LstdFlags)}
   452  	defaultLogger.EnableDebug()
   453  	logger := etcdRaft.Logger(defaultLogger)
   454  
   455  	raftConfig := &etcdRaft.Config{
   456  		Applied:       lastAppliedIndex,
   457  		ID:            uint64(pm.raftId),
   458  		ElectionTick:  10, // NOTE: cockroach sets this to 15
   459  		HeartbeatTick: 1,  // NOTE: cockroach sets this to 5
   460  		Storage:       pm.raftStorage,
   461  
   462  		// NOTE, from cockroach:
   463  		// "PreVote and CheckQuorum are two ways of achieving the same thing.
   464  		// PreVote is more compatible with quiesced ranges, so we want to switch
   465  		// to it once we've worked out the bugs."
   466  		//
   467  		// TODO: vendor again?
   468  		// PreVote:     enablePreVote,
   469  		CheckQuorum: !enablePreVote,
   470  
   471  		// MaxSizePerMsg controls how many Raft log entries the leader will send to
   472  		// followers in a single MsgApp.
   473  		MaxSizePerMsg: 4096, // NOTE: in cockroachdb this is 16*1024
   474  
   475  		// MaxInflightMsgs controls how many in-flight messages Raft will send to
   476  		// a follower without hearing a response. The total number of Raft log
   477  		// entries is a combination of this setting and MaxSizePerMsg.
   478  		//
   479  		// NOTE: Cockroach's settings (MaxSizePerMsg of 4k and MaxInflightMsgs
   480  		// of 4) provide for up to 64 KB of raft log to be sent without
   481  		// acknowledgement. With an average entry size of 1 KB that translates
   482  		// to ~64 commands that might be executed in the handling of a single
   483  		// etcdraft.Ready operation.
   484  		MaxInflightMsgs: 256, // NOTE: in cockroachdb this is 4
   485  		Logger:          logger,
   486  	}
   487  	raftConfig.Logger.Debug("raftlog startRaft", "raft ID", raftConfig.ID)
   488  	log.Info("startRaft", "raft ID", raftConfig.ID)
   489  
   490  	if walExisted {
   491  		log.Info("remounting an existing raft log; connecting to peers.")
   492  		pm.unsafeRawNode = etcdRaft.RestartNode(raftConfig)
   493  	} else if pm.joinExisting {
   494  		log.Info("newly joining an existing cluster; waiting for connections.")
   495  		pm.unsafeRawNode = etcdRaft.StartNode(raftConfig, nil)
   496  	} else {
   497  		if numPeers := len(pm.bootstrapNodes); numPeers == 0 {
   498  			panic("exiting due to empty raft peers list")
   499  		} else {
   500  			log.Info("starting a new raft log", "initial cluster size of", numPeers)
   501  		}
   502  
   503  		raftPeers, peerAddresses, localAddress := pm.makeInitialRaftPeers()
   504  
   505  		pm.setLocalAddress(localAddress)
   506  
   507  		// We add all peers up-front even though we will see a ConfChangeAddNode
   508  		// for each shortly. This is because raft's ConfState will contain all of
   509  		// these nodes before we see these log entries, and we always want our
   510  		// snapshots to have all addresses for each of the nodes in the ConfState.
   511  		for _, peerAddress := range peerAddresses {
   512  			pm.addPeer(peerAddress)
   513  		}
   514  
   515  		pm.unsafeRawNode = etcdRaft.StartNode(raftConfig, raftPeers)
   516  	}
   517  
   518  	go pm.serveRaft()
   519  	go pm.serveLocalProposals()
   520  	go pm.eventLoop()
   521  	go pm.handleRoleChange(pm.rawNode().RoleChan().Out())
   522  }
   523  
   524  func (pm *ProtocolManager) setLocalAddress(addr *raft.Address) {
   525  	pm.mu.Lock()
   526  	pm.address = addr
   527  	pm.mu.Unlock()
   528  
   529  	// By setting `URLs` on the raft transport, we advertise our URL (in an HTTP
   530  	// header) to any recipient. This is necessary for a newcomer to the cluster
   531  	// to be able to accept a snapshot from us to bootstrap them.
   532  	if urls, err := raftTypes.NewURLs([]string{raftUrl(addr)}); err == nil {
   533  		pm.transport.URLs = urls
   534  	} else {
   535  		panic(fmt.Sprintf("error: could not create URL from local address: %v", addr))
   536  	}
   537  }
   538  
   539  func (pm *ProtocolManager) serveRaft() {
   540  	urlString := fmt.Sprintf("http://0.0.0.0:%d", pm.raftPort)
   541  	url, err := url.Parse(urlString)
   542  	if err != nil {
   543  		raft.Fatalf("Failed parsing URL (%v)", err)
   544  	}
   545  
   546  	listener, err := raft.NewStoppableListener(url.Host, pm.httpstopc)
   547  	if err != nil {
   548  		raft.Fatalf("Failed to listen rafthttp (%v)", err)
   549  	}
   550  	err = (&http.Server{Handler: pm.transport.Handler()}).Serve(listener)
   551  
   552  	select {
   553  	case <-pm.httpstopc:
   554  	default:
   555  		raft.Fatalf("Failed to serve rafthttp (%v)", err)
   556  	}
   557  	close(pm.httpdonec)
   558  }
   559  
   560  func (pm *ProtocolManager) handleRoleChange(roleC <-chan interface{}) {
   561  	for {
   562  		select {
   563  		case role := <-roleC:
   564  			intRole, ok := role.(int)
   565  
   566  			if !ok {
   567  				panic("Couldn't cast role to int")
   568  			}
   569  
   570  			if intRole == raft.MinterRole {
   571  				pm.minter.Start(common.Address{})
   572  			} else { // verifier
   573  				pm.minter.Stop()
   574  			}
   575  
   576  			pm.mu.Lock()
   577  			pm.role = intRole
   578  			pm.mu.Unlock()
   579  
   580  		case <-pm.quitSync:
   581  			return
   582  		}
   583  	}
   584  }
   585  
   586  func (pm *ProtocolManager) minedBroadcastLoop() {
   587  	for obj := range pm.minedBlockSub.Chan() {
   588  		switch ev := obj.Data.(type) {
   589  		case core.NewMinedBlockEvent:
   590  			select {
   591  			case pm.blockProposalC <- ev.Block:
   592  			case <-pm.quitSync:
   593  				return
   594  			}
   595  		}
   596  	}
   597  }
   598  
   599  // Serve two channels to handle new blocks and raft configuration changes originating locally.
   600  func (pm *ProtocolManager) serveLocalProposals() {
   601  	//
   602  	// TODO: does it matter that this will restart from 0 whenever we restart a cluster?
   603  	//
   604  	var confChangeCount uint64
   605  
   606  	for {
   607  		select {
   608  		case block, ok := <-pm.blockProposalC:
   609  			if !ok {
   610  				log.Info("error: read from blockProposalC failed")
   611  				return
   612  			}
   613  
   614  			size, r, err := rlp.EncodeToReader(block)
   615  			if err != nil {
   616  				panic(fmt.Sprintf("error: failed to send RLP-encoded block: %s", err.Error()))
   617  			}
   618  			var buffer = make([]byte, uint32(size))
   619  			r.Read(buffer)
   620  
   621  			// blocks until accepted by the raft state machine
   622  			pm.rawNode().Propose(context.TODO(), buffer)
   623  		case cc, ok := <-pm.confChangeProposalC:
   624  			if !ok {
   625  				log.Info("error: read from confChangeProposalC failed")
   626  				return
   627  			}
   628  
   629  			confChangeCount++
   630  			cc.ID = confChangeCount
   631  			pm.rawNode().ProposeConfChange(context.TODO(), cc)
   632  		case <-pm.quitSync:
   633  			return
   634  		}
   635  	}
   636  }
   637  
   638  func (pm *ProtocolManager) entriesToApply(allEntries []raftpb.Entry) (entriesToApply []raftpb.Entry) {
   639  	if len(allEntries) == 0 {
   640  		return
   641  	}
   642  
   643  	first := allEntries[0].Index
   644  	pm.mu.RLock()
   645  	lastApplied := pm.appliedIndex
   646  	pm.mu.RUnlock()
   647  
   648  	if first > lastApplied+1 {
   649  		raft.Fatalf("first index of committed entry[%d] should <= appliedIndex[%d] + 1", first, lastApplied)
   650  	}
   651  
   652  	firstToApply := lastApplied - first + 1
   653  
   654  	if firstToApply < uint64(len(allEntries)) {
   655  		entriesToApply = allEntries[firstToApply:]
   656  	}
   657  	return
   658  }
   659  
   660  func raftUrl(address *raft.Address) string {
   661  	return fmt.Sprintf("http://%s:%d", address.Ip, address.RaftPort)
   662  }
   663  
   664  func (pm *ProtocolManager) addPeer(address *raft.Address) {
   665  	pm.mu.Lock()
   666  	defer pm.mu.Unlock()
   667  
   668  	raftId := address.RaftId
   669  
   670  	//Quorum - RAFT - derive pubkey from nodeId
   671  	pubKey, err := enode.HexPubkey(address.NodeId.String())
   672  	if err != nil {
   673  		log.Error("error decoding pub key from enodeId", "enodeId", address.NodeId.String(), "err", err)
   674  		panic(err)
   675  	}
   676  
   677  	// Add P2P connection:
   678  	p2pNode := enode.NewV4WithRaft(pubKey, address.Ip, int(address.P2pPort), 0, int(address.RaftPort))
   679  	pm.p2pServer.AddPeer(p2pNode)
   680  
   681  	// Add raft transport connection:
   682  	pm.transport.AddPeer(raftTypes.ID(raftId), []string{raftUrl(address)})
   683  	pm.peers[raftId] = &raft.Peer{Address: address, P2pNode: p2pNode}
   684  }
   685  
   686  func (pm *ProtocolManager) disconnectFromPeer(raftId uint16, peer *raft.Peer) {
   687  	pm.p2pServer.RemovePeer(peer.P2pNode)
   688  	pm.transport.RemovePeer(raftTypes.ID(raftId))
   689  }
   690  
   691  func (pm *ProtocolManager) removePeer(raftId uint16) {
   692  	pm.mu.Lock()
   693  	defer pm.mu.Unlock()
   694  
   695  	if peer := pm.peers[raftId]; peer != nil {
   696  		pm.disconnectFromPeer(raftId, peer)
   697  
   698  		delete(pm.peers, raftId)
   699  	}
   700  
   701  	// This is only necessary sometimes, but it's idempotent. Also, we *always*
   702  	// do this, and not just when there's still a peer in the map, because we
   703  	// need to do it for our *own* raft ID before we get booted from the cluster
   704  	// so that snapshots are identical on all nodes. It's important for a booted
   705  	// node to have a snapshot identical to every other node because that node
   706  	// can potentially re-enter the cluster with a new raft ID.
   707  	pm.removedPeers.Add(raftId)
   708  }
   709  
   710  func (pm *ProtocolManager) eventLoop() {
   711  	ticker := time.NewTicker(raft.TickerMS * time.Millisecond)
   712  	defer ticker.Stop()
   713  	defer pm.wal.Close()
   714  
   715  	exitAfterApplying := false
   716  
   717  	for {
   718  		select {
   719  		case <-ticker.C:
   720  			pm.rawNode().Tick()
   721  
   722  		// when the node is first ready it gives us entries to commit and messages
   723  		// to immediately publish
   724  		case rd := <-pm.rawNode().Ready():
   725  			pm.wal.Save(rd.HardState, rd.Entries)
   726  
   727  			if rd.SoftState != nil {
   728  				pm.updateLeader(rd.SoftState.Lead)
   729  			}
   730  
   731  			if snap := rd.Snapshot; !etcdRaft.IsEmptySnap(snap) {
   732  				pm.saveRaftSnapshot(snap)
   733  				pm.applyRaftSnapshot(snap)
   734  				pm.advanceAppliedIndex(snap.Metadata.Index)
   735  			}
   736  
   737  			// 1: Write HardState, Entries, and Snapshot to persistent storage if they
   738  			// are not empty.
   739  			pm.raftStorage.Append(rd.Entries)
   740  
   741  			// 2: Send all Messages to the nodes named in the To field.
   742  			pm.transport.Send(rd.Messages)
   743  
   744  			// 3: Apply Snapshot (if any) and CommittedEntries to the state machine.
   745  			for _, entry := range pm.entriesToApply(rd.CommittedEntries) {
   746  				switch entry.Type {
   747  				case raftpb.EntryNormal:
   748  					if len(entry.Data) == 0 {
   749  						break
   750  					}
   751  					var block types.Block
   752  					err := rlp.DecodeBytes(entry.Data, &block)
   753  					if err != nil {
   754  						log.Error("error decoding block: ", err)
   755  					}
   756  
   757  					if pm.blockchain.HasBlock(block.Hash(), block.NumberU64()) {
   758  						// This can happen:
   759  						//
   760  						// if (1) we crashed after applying this block to the chain, but
   761  						//        before writing appliedIndex to LDB.
   762  						// or (2) we crashed in a scenario where we applied further than
   763  						//        raft *durably persisted* its committed index (see
   764  						//        https://github.com/coreos/etcd/pull/7899). In this
   765  						//        scenario, when the node comes back up, we will re-apply
   766  						//        a few entries.
   767  
   768  						headBlockHash := pm.blockchain.CurrentBlock().Hash()
   769  						log.Warn("not applying already-applied block", "block hash", block.Hash(), "parent", block.ParentHash(), "head", headBlockHash)
   770  					} else {
   771  						pm.applyNewChainHead(&block)
   772  					}
   773  
   774  				case raftpb.EntryConfChange:
   775  					var cc raftpb.ConfChange
   776  					cc.Unmarshal(entry.Data)
   777  					raftId := uint16(cc.NodeID)
   778  
   779  					pm.confState = *pm.rawNode().ApplyConfChange(cc)
   780  
   781  					forceSnapshot := false
   782  
   783  					switch cc.Type {
   784  					case raftpb.ConfChangeAddNode:
   785  						if pm.isRaftIdRemoved(raftId) {
   786  							log.Info("ignoring ConfChangeAddNode for permanently-removed peer", "raft id", raftId)
   787  						} else if pm.isRaftIdUsed(raftId) && raftId <= uint16(len(pm.bootstrapNodes)) {
   788  							// See initial cluster logic in startRaft() for more information.
   789  							log.Info("ignoring expected ConfChangeAddNode for initial peer", "raft id", raftId)
   790  
   791  							// We need a snapshot to exist to reconnect to peers on start-up after a crash.
   792  							forceSnapshot = true
   793  						} else if pm.isRaftIdUsed(raftId) {
   794  							log.Info("ignoring ConfChangeAddNode for already-used raft ID", "raft id", raftId)
   795  						} else {
   796  							log.Info("adding peer due to ConfChangeAddNode", "raft id", raftId)
   797  
   798  							forceSnapshot = true
   799  							pm.addPeer(raft.BytesToAddress(cc.Context))
   800  						}
   801  
   802  					case raftpb.ConfChangeRemoveNode:
   803  						if pm.isRaftIdRemoved(raftId) {
   804  							log.Info("ignoring ConfChangeRemoveNode for already-removed peer", "raft id", raftId)
   805  						} else {
   806  							log.Info("removing peer due to ConfChangeRemoveNode", "raft id", raftId)
   807  
   808  							forceSnapshot = true
   809  
   810  							if raftId == pm.raftId {
   811  								exitAfterApplying = true
   812  							}
   813  
   814  							pm.removePeer(raftId)
   815  						}
   816  
   817  					case raftpb.ConfChangeUpdateNode:
   818  						// NOTE: remember to forceSnapshot in this case, if we add support
   819  						// for this.
   820  						raft.Fatalf("not yet handled: ConfChangeUpdateNode")
   821  					}
   822  
   823  					if forceSnapshot {
   824  						// We force a snapshot here to persist our updated confState, so we
   825  						// know our fellow cluster members when we come back online.
   826  						//
   827  						// It is critical here to snapshot *before* writing our applied
   828  						// index in LevelDB, otherwise a crash while/before snapshotting
   829  						// (after advancing our applied index) would result in the loss of a
   830  						// cluster member upon restart: we would re-mount with an old
   831  						// ConfState.
   832  						pm.triggerSnapshot(entry.Index)
   833  					}
   834  				}
   835  
   836  				pm.advanceAppliedIndex(entry.Index)
   837  			}
   838  
   839  			pm.maybeTriggerSnapshot()
   840  
   841  			if exitAfterApplying {
   842  				log.Warn("permanently removing self from the cluster")
   843  				pm.Stop()
   844  				log.Warn("permanently exited the cluster")
   845  
   846  				return
   847  			}
   848  
   849  			// 4: Call Node.Advance() to signal readiness for the next batch of
   850  			// updates.
   851  			pm.rawNode().Advance()
   852  
   853  		case <-pm.quitSync:
   854  			return
   855  		}
   856  	}
   857  }
   858  
   859  func (pm *ProtocolManager) makeInitialRaftPeers() (raftPeers []etcdRaft.Peer, peerAddresses []*raft.Address, localAddress *raft.Address) {
   860  	initialNodes := pm.bootstrapNodes
   861  	raftPeers = make([]etcdRaft.Peer, len(initialNodes))       // Entire cluster
   862  	peerAddresses = make([]*raft.Address, len(initialNodes)-1) // Cluster without *this* node
   863  
   864  	peersSeen := 0
   865  	for i, node := range initialNodes {
   866  		raftId, err := raft.GetRaftConfigJson(pm.raftConfigPath)
   867  		if err != nil {
   868  			panic(err)
   869  		}
   870  		log.Info("makeInitialRaftPeers", "raft id ", raftId, "pm.raftid", pm.raftId)
   871  		// We initially get the raftPort from the enode ID's query string. As an alternative, we can move away from
   872  		// requiring the use of static peers for the initial set, and load them from e.g. another JSON file which
   873  		// contains pairs of enodes and raft ports, or we can get this initial peer list from commandline flags.
   874  		address := raft.NewAddress(raftId, node.RaftPort(), node)
   875  		raftPeers[i] = etcdRaft.Peer{
   876  			ID:      uint64(raftId),
   877  			Context: address.ToBytes(),
   878  		}
   879  
   880  		if raftId == pm.raftId {
   881  			localAddress = address
   882  		} else {
   883  			peerAddresses[peersSeen] = address
   884  			peersSeen += 1
   885  		}
   886  	}
   887  
   888  	return
   889  }
   890  
   891  func blockExtendsChain(block *types.Block, chain *core.BlockChain) bool {
   892  	return block.ParentHash() == chain.CurrentBlock().Hash()
   893  }
   894  
   895  func (pm *ProtocolManager) applyNewChainHead(block *types.Block) {
   896  	if !blockExtendsChain(block, pm.blockchain) {
   897  		headBlock := pm.blockchain.CurrentBlock()
   898  		log.Info("Non-extending block", "block", block.Hash(), "parent", block.ParentHash(), "head", headBlock.Hash())
   899  		pm.minter.InvalidRaftOrdering() <- raft.InvalidRaftOrdering{HeadBlock: headBlock, InvalidBlock: block}
   900  
   901  	} else {
   902  		if existingBlock := pm.blockchain.GetBlockByHash(block.Hash()); nil == existingBlock {
   903  			if err := pm.blockchain.Validator().ValidateBody(block); err != nil {
   904  				panic(fmt.Sprintf("failed to validate block %x (%v)", block.Hash(), err))
   905  			}
   906  		}
   907  
   908  		_, err := pm.blockchain.InsertChain([]*types.Block{block})
   909  
   910  		if err != nil {
   911  			panic(fmt.Sprintf("failed to extend chain: %s", err.Error()))
   912  		}
   913  	}
   914  }
   915  
   916  // Sets new appliedIndex in-memory, *and* writes this appliedIndex to LevelDB.
   917  func (pm *ProtocolManager) advanceAppliedIndex(index uint64) {
   918  	pm.writeAppliedIndex(index)
   919  
   920  	pm.mu.Lock()
   921  	pm.appliedIndex = index
   922  	pm.mu.Unlock()
   923  }
   924  
   925  func (pm *ProtocolManager) updateLeader(leader uint64) {
   926  	pm.mu.Lock()
   927  	defer pm.mu.Unlock()
   928  
   929  	pm.leader = uint16(leader)
   930  }
   931  
   932  // The Address for the current leader, or an error if no leader is elected.
   933  func (pm *ProtocolManager) LeaderAddress() (*raft.Address, error) {
   934  	pm.mu.RLock()
   935  	defer pm.mu.RUnlock()
   936  
   937  	if raft.MinterRole == pm.role {
   938  		return pm.address, nil
   939  	} else if l, ok := pm.peers[pm.leader]; ok {
   940  		return l.Address, nil
   941  	}
   942  	// We expect to reach this if pm.leader is 0, which is how etcd denotes the lack of a leader.
   943  	return nil, errors.New("no leader is currently elected")
   944  }
   945  
   946  // Returns the raft id for a given enodeId
   947  func (pm *ProtocolManager) FetchRaftId(enodeId string) (uint16, error) {
   948  	node, err := enode.ParseV4(enodeId)
   949  	if err != nil {
   950  		return 0, err
   951  	}
   952  	for raftId, peer := range pm.peers {
   953  		if peer.P2pNode.ID() == node.ID() {
   954  			return raftId, nil
   955  		}
   956  	}
   957  	return 0, fmt.Errorf("node not found in the cluster: %v", enodeId)
   958  }
   959  
   960  func (pm *ProtocolManager) MaxRaftId() uint16 {
   961  	maxId := pm.raftId
   962  
   963  	for peerId := range pm.peers {
   964  		if maxId < peerId {
   965  			maxId = peerId
   966  		}
   967  	}
   968  	return maxId
   969  }