github.com/lzy4123/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/node.go

github.com/lzy4123/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/node.go (about)

     1  /*
     2  Copyright IBM Corp. All Rights Reserved.
     3  
     4  SPDX-License-Identifier: Apache-2.0
     5  */
     6  
     7  package etcdraft
     8  
     9  import (
    10  	"context"
    11  	"crypto/sha256"
    12  	"sync"
    13  	"sync/atomic"
    14  	"time"
    15  
    16  	"code.cloudfoundry.org/clock"
    17  	"github.com/golang/protobuf/proto"
    18  	"github.com/hyperledger/fabric-protos-go/orderer"
    19  	"github.com/hyperledger/fabric-protos-go/orderer/etcdraft"
    20  	"github.com/hyperledger/fabric/common/flogging"
    21  	"github.com/hyperledger/fabric/protoutil"
    22  	"go.etcd.io/etcd/raft"
    23  	"go.etcd.io/etcd/raft/raftpb"
    24  )
    25  
    26  type node struct {
    27  	chainID string
    28  	logger  *flogging.FabricLogger
    29  	metrics *Metrics
    30  
    31  	unreachableLock sync.RWMutex
    32  	unreachable     map[uint64]struct{}
    33  
    34  	tracker *Tracker
    35  
    36  	storage *RaftStorage
    37  	config  *raft.Config
    38  
    39  	rpc RPC
    40  
    41  	chain *Chain
    42  
    43  	tickInterval time.Duration
    44  	clock        clock.Clock
    45  
    46  	metadata *etcdraft.BlockMetadata
    47  
    48  	subscriberC chan chan uint64
    49  
    50  	raft.Node
    51  }
    52  
    53  func (n *node) start(fresh, join bool) {
    54  	raftPeers := RaftPeers(n.metadata.ConsenterIds)
    55  	n.logger.Debugf("Starting raft node: #peers: %v", len(raftPeers))
    56  
    57  	var campaign bool
    58  	if fresh {
    59  		if join {
    60  			raftPeers = nil
    61  			n.logger.Info("Starting raft node to join an existing channel")
    62  		} else {
    63  			n.logger.Info("Starting raft node as part of a new channel")
    64  
    65  			// determine the node to start campaign by selecting the node with ID equals to:
    66  			//                hash(channelID) % cluster_size + 1
    67  			sha := sha256.Sum256([]byte(n.chainID))
    68  			number, _ := proto.DecodeVarint(sha[24:])
    69  			if n.config.ID == number%uint64(len(raftPeers))+1 {
    70  				campaign = true
    71  			}
    72  		}
    73  		n.Node = raft.StartNode(n.config, raftPeers)
    74  	} else {
    75  		n.logger.Info("Restarting raft node")
    76  		n.Node = raft.RestartNode(n.config)
    77  	}
    78  
    79  	n.subscriberC = make(chan chan uint64)
    80  
    81  	go n.run(campaign)
    82  }
    83  
    84  func (n *node) run(campaign bool) {
    85  	electionTimeout := n.tickInterval.Seconds() * float64(n.config.ElectionTick)
    86  	halfElectionTimeout := electionTimeout / 2
    87  
    88  	raftTicker := n.clock.NewTicker(n.tickInterval)
    89  
    90  	if s := n.storage.Snapshot(); !raft.IsEmptySnap(s) {
    91  		n.chain.snapC <- &s
    92  	}
    93  
    94  	elected := make(chan struct{})
    95  	if campaign {
    96  		n.logger.Infof("This node is picked to start campaign")
    97  		go func() {
    98  			// Attempt campaign every two HeartbeatTimeout elapses, until leader is present - either this
    99  			// node successfully claims leadership, or another leader already existed when this node starts.
   100  			// We could do this more lazily and exit proactive campaign once transitioned to Candidate state
   101  			// (not PreCandidate because other nodes might not have started yet, in which case PreVote
   102  			// messages are dropped at recipients). But there is no obvious reason (for now) to be lazy.
   103  			//
   104  			// 2*HeartbeatTick is used to avoid excessive campaign when network latency is significant and
   105  			// Raft term keeps advancing in this extreme case.
   106  			campaignTicker := n.clock.NewTicker(n.tickInterval * time.Duration(n.config.HeartbeatTick) * 2)
   107  			defer campaignTicker.Stop()
   108  
   109  			for {
   110  				select {
   111  				case <-campaignTicker.C():
   112  					n.Campaign(context.TODO())
   113  				case <-elected:
   114  					return
   115  				case <-n.chain.doneC:
   116  					return
   117  				}
   118  			}
   119  		}()
   120  	}
   121  
   122  	var notifyLeaderChangeC chan uint64
   123  
   124  	for {
   125  		select {
   126  		case <-raftTicker.C():
   127  			// grab raft Status before ticking it, so `RecentActive` attributes
   128  			// are not reset yet.
   129  			status := n.Status()
   130  
   131  			n.Tick()
   132  			n.tracker.Check(&status)
   133  
   134  		case rd := <-n.Ready():
   135  			startStoring := n.clock.Now()
   136  			if err := n.storage.Store(rd.Entries, rd.HardState, rd.Snapshot); err != nil {
   137  				n.logger.Panicf("Failed to persist etcd/raft data: %s", err)
   138  			}
   139  			duration := n.clock.Since(startStoring).Seconds()
   140  			n.metrics.DataPersistDuration.Observe(float64(duration))
   141  			if duration > halfElectionTimeout {
   142  				n.logger.Warningf("WAL sync took %v seconds and the network is configured to start elections after %v seconds. Your disk is too slow and may cause loss of quorum and trigger leadership election.", duration, electionTimeout)
   143  			}
   144  
   145  			if !raft.IsEmptySnap(rd.Snapshot) {
   146  				n.chain.snapC <- &rd.Snapshot
   147  			}
   148  
   149  			if notifyLeaderChangeC != nil && rd.SoftState != nil {
   150  				if l := atomic.LoadUint64(&rd.SoftState.Lead); l != raft.None {
   151  					select {
   152  					case notifyLeaderChangeC <- l:
   153  					default:
   154  					}
   155  
   156  					notifyLeaderChangeC = nil
   157  				}
   158  			}
   159  
   160  			// skip empty apply
   161  			if len(rd.CommittedEntries) != 0 || rd.SoftState != nil {
   162  				n.chain.applyC <- apply{rd.CommittedEntries, rd.SoftState}
   163  			}
   164  
   165  			if campaign && rd.SoftState != nil {
   166  				leader := atomic.LoadUint64(&rd.SoftState.Lead) // etcdraft requires atomic access to this var
   167  				if leader != raft.None {
   168  					n.logger.Infof("Leader %d is present, quit campaign", leader)
   169  					campaign = false
   170  					close(elected)
   171  				}
   172  			}
   173  
   174  			n.Advance()
   175  
   176  			// TODO(jay_guo) leader can write to disk in parallel with replicating
   177  			// to the followers and them writing to their disks. Check 10.2.1 in thesis
   178  			n.send(rd.Messages)
   179  
   180  		case notifyLeaderChangeC = <-n.subscriberC:
   181  
   182  		case <-n.chain.haltC:
   183  			raftTicker.Stop()
   184  			n.Stop()
   185  			n.storage.Close()
   186  			n.logger.Infof("Raft node stopped")
   187  			close(n.chain.doneC) // close after all the artifacts are closed
   188  			return
   189  		}
   190  	}
   191  }
   192  
   193  func (n *node) send(msgs []raftpb.Message) {
   194  	n.unreachableLock.RLock()
   195  	defer n.unreachableLock.RUnlock()
   196  
   197  	for _, msg := range msgs {
   198  		if msg.To == 0 {
   199  			continue
   200  		}
   201  
   202  		status := raft.SnapshotFinish
   203  
   204  		msgBytes := protoutil.MarshalOrPanic(&msg)
   205  		err := n.rpc.SendConsensus(msg.To, &orderer.ConsensusRequest{Channel: n.chainID, Payload: msgBytes})
   206  		if err != nil {
   207  			n.ReportUnreachable(msg.To)
   208  			n.logSendFailure(msg.To, err)
   209  
   210  			status = raft.SnapshotFailure
   211  		} else if _, ok := n.unreachable[msg.To]; ok {
   212  			n.logger.Infof("Successfully sent StepRequest to %d after failed attempt(s)", msg.To)
   213  			delete(n.unreachable, msg.To)
   214  		}
   215  
   216  		if msg.Type == raftpb.MsgSnap {
   217  			n.ReportSnapshot(msg.To, status)
   218  		}
   219  	}
   220  }
   221  
   222  // If this is called on leader, it picks a node that is
   223  // recently active, and attempt to transfer leadership to it.
   224  // If this is called on follower, it simply waits for a
   225  // leader change till timeout (ElectionTimeout).
   226  func (n *node) abdicateLeader(currentLead uint64) {
   227  	status := n.Status()
   228  
   229  	if status.Lead != raft.None && status.Lead != currentLead {
   230  		n.logger.Warn("Leader has changed since asked to transfer leadership")
   231  		return
   232  	}
   233  
   234  	// register a leader subscriberC
   235  	notifyc := make(chan uint64, 1)
   236  	select {
   237  	case n.subscriberC <- notifyc:
   238  	case <-n.chain.doneC:
   239  		return
   240  	}
   241  
   242  	// Leader initiates leader transfer
   243  	if status.RaftState == raft.StateLeader {
   244  		var transferee uint64
   245  		for id, pr := range status.Progress {
   246  			if id == status.ID {
   247  				continue // skip self
   248  			}
   249  
   250  			if pr.RecentActive && !pr.Paused {
   251  				transferee = id
   252  				break
   253  			}
   254  
   255  			n.logger.Debugf("Node %d is not qualified as transferee because it's either paused or not active", id)
   256  		}
   257  
   258  		if transferee == raft.None {
   259  			n.logger.Errorf("No follower is qualified as transferee, abort leader transfer")
   260  			return
   261  		}
   262  
   263  		n.logger.Infof("Transferring leadership to %d", transferee)
   264  		n.TransferLeadership(context.TODO(), status.ID, transferee)
   265  	}
   266  
   267  	timer := n.clock.NewTimer(time.Duration(n.config.ElectionTick) * n.tickInterval)
   268  	defer timer.Stop() // prevent timer leak
   269  
   270  	select {
   271  	case <-timer.C():
   272  		n.logger.Warn("Leader transfer timeout")
   273  	case l := <-notifyc:
   274  		n.logger.Infof("Leader has been transferred from %d to %d", currentLead, l)
   275  	case <-n.chain.doneC:
   276  	}
   277  }
   278  
   279  func (n *node) logSendFailure(dest uint64, err error) {
   280  	if _, ok := n.unreachable[dest]; ok {
   281  		n.logger.Debugf("Failed to send StepRequest to %d, because: %s", dest, err)
   282  		return
   283  	}
   284  
   285  	n.logger.Errorf("Failed to send StepRequest to %d, because: %s", dest, err)
   286  	n.unreachable[dest] = struct{}{}
   287  }
   288  
   289  func (n *node) takeSnapshot(index uint64, cs raftpb.ConfState, data []byte) {
   290  	if err := n.storage.TakeSnapshot(index, cs, data); err != nil {
   291  		n.logger.Errorf("Failed to create snapshot at index %d: %s", index, err)
   292  	}
   293  }
   294  
   295  func (n *node) lastIndex() uint64 {
   296  	i, _ := n.storage.ram.LastIndex()
   297  	return i
   298  }