github.com/lzy4123/fabric@v2.1.1+incompatible/orderer/consensus/etcdraft/node.go (about) 1 /* 2 Copyright IBM Corp. All Rights Reserved. 3 4 SPDX-License-Identifier: Apache-2.0 5 */ 6 7 package etcdraft 8 9 import ( 10 "context" 11 "crypto/sha256" 12 "sync" 13 "sync/atomic" 14 "time" 15 16 "code.cloudfoundry.org/clock" 17 "github.com/golang/protobuf/proto" 18 "github.com/hyperledger/fabric-protos-go/orderer" 19 "github.com/hyperledger/fabric-protos-go/orderer/etcdraft" 20 "github.com/hyperledger/fabric/common/flogging" 21 "github.com/hyperledger/fabric/protoutil" 22 "go.etcd.io/etcd/raft" 23 "go.etcd.io/etcd/raft/raftpb" 24 ) 25 26 type node struct { 27 chainID string 28 logger *flogging.FabricLogger 29 metrics *Metrics 30 31 unreachableLock sync.RWMutex 32 unreachable map[uint64]struct{} 33 34 tracker *Tracker 35 36 storage *RaftStorage 37 config *raft.Config 38 39 rpc RPC 40 41 chain *Chain 42 43 tickInterval time.Duration 44 clock clock.Clock 45 46 metadata *etcdraft.BlockMetadata 47 48 subscriberC chan chan uint64 49 50 raft.Node 51 } 52 53 func (n *node) start(fresh, join bool) { 54 raftPeers := RaftPeers(n.metadata.ConsenterIds) 55 n.logger.Debugf("Starting raft node: #peers: %v", len(raftPeers)) 56 57 var campaign bool 58 if fresh { 59 if join { 60 raftPeers = nil 61 n.logger.Info("Starting raft node to join an existing channel") 62 } else { 63 n.logger.Info("Starting raft node as part of a new channel") 64 65 // determine the node to start campaign by selecting the node with ID equals to: 66 // hash(channelID) % cluster_size + 1 67 sha := sha256.Sum256([]byte(n.chainID)) 68 number, _ := proto.DecodeVarint(sha[24:]) 69 if n.config.ID == number%uint64(len(raftPeers))+1 { 70 campaign = true 71 } 72 } 73 n.Node = raft.StartNode(n.config, raftPeers) 74 } else { 75 n.logger.Info("Restarting raft node") 76 n.Node = raft.RestartNode(n.config) 77 } 78 79 n.subscriberC = make(chan chan uint64) 80 81 go n.run(campaign) 82 } 83 84 func (n *node) run(campaign bool) { 85 electionTimeout := n.tickInterval.Seconds() * float64(n.config.ElectionTick) 86 halfElectionTimeout := electionTimeout / 2 87 88 raftTicker := n.clock.NewTicker(n.tickInterval) 89 90 if s := n.storage.Snapshot(); !raft.IsEmptySnap(s) { 91 n.chain.snapC <- &s 92 } 93 94 elected := make(chan struct{}) 95 if campaign { 96 n.logger.Infof("This node is picked to start campaign") 97 go func() { 98 // Attempt campaign every two HeartbeatTimeout elapses, until leader is present - either this 99 // node successfully claims leadership, or another leader already existed when this node starts. 100 // We could do this more lazily and exit proactive campaign once transitioned to Candidate state 101 // (not PreCandidate because other nodes might not have started yet, in which case PreVote 102 // messages are dropped at recipients). But there is no obvious reason (for now) to be lazy. 103 // 104 // 2*HeartbeatTick is used to avoid excessive campaign when network latency is significant and 105 // Raft term keeps advancing in this extreme case. 106 campaignTicker := n.clock.NewTicker(n.tickInterval * time.Duration(n.config.HeartbeatTick) * 2) 107 defer campaignTicker.Stop() 108 109 for { 110 select { 111 case <-campaignTicker.C(): 112 n.Campaign(context.TODO()) 113 case <-elected: 114 return 115 case <-n.chain.doneC: 116 return 117 } 118 } 119 }() 120 } 121 122 var notifyLeaderChangeC chan uint64 123 124 for { 125 select { 126 case <-raftTicker.C(): 127 // grab raft Status before ticking it, so `RecentActive` attributes 128 // are not reset yet. 129 status := n.Status() 130 131 n.Tick() 132 n.tracker.Check(&status) 133 134 case rd := <-n.Ready(): 135 startStoring := n.clock.Now() 136 if err := n.storage.Store(rd.Entries, rd.HardState, rd.Snapshot); err != nil { 137 n.logger.Panicf("Failed to persist etcd/raft data: %s", err) 138 } 139 duration := n.clock.Since(startStoring).Seconds() 140 n.metrics.DataPersistDuration.Observe(float64(duration)) 141 if duration > halfElectionTimeout { 142 n.logger.Warningf("WAL sync took %v seconds and the network is configured to start elections after %v seconds. Your disk is too slow and may cause loss of quorum and trigger leadership election.", duration, electionTimeout) 143 } 144 145 if !raft.IsEmptySnap(rd.Snapshot) { 146 n.chain.snapC <- &rd.Snapshot 147 } 148 149 if notifyLeaderChangeC != nil && rd.SoftState != nil { 150 if l := atomic.LoadUint64(&rd.SoftState.Lead); l != raft.None { 151 select { 152 case notifyLeaderChangeC <- l: 153 default: 154 } 155 156 notifyLeaderChangeC = nil 157 } 158 } 159 160 // skip empty apply 161 if len(rd.CommittedEntries) != 0 || rd.SoftState != nil { 162 n.chain.applyC <- apply{rd.CommittedEntries, rd.SoftState} 163 } 164 165 if campaign && rd.SoftState != nil { 166 leader := atomic.LoadUint64(&rd.SoftState.Lead) // etcdraft requires atomic access to this var 167 if leader != raft.None { 168 n.logger.Infof("Leader %d is present, quit campaign", leader) 169 campaign = false 170 close(elected) 171 } 172 } 173 174 n.Advance() 175 176 // TODO(jay_guo) leader can write to disk in parallel with replicating 177 // to the followers and them writing to their disks. Check 10.2.1 in thesis 178 n.send(rd.Messages) 179 180 case notifyLeaderChangeC = <-n.subscriberC: 181 182 case <-n.chain.haltC: 183 raftTicker.Stop() 184 n.Stop() 185 n.storage.Close() 186 n.logger.Infof("Raft node stopped") 187 close(n.chain.doneC) // close after all the artifacts are closed 188 return 189 } 190 } 191 } 192 193 func (n *node) send(msgs []raftpb.Message) { 194 n.unreachableLock.RLock() 195 defer n.unreachableLock.RUnlock() 196 197 for _, msg := range msgs { 198 if msg.To == 0 { 199 continue 200 } 201 202 status := raft.SnapshotFinish 203 204 msgBytes := protoutil.MarshalOrPanic(&msg) 205 err := n.rpc.SendConsensus(msg.To, &orderer.ConsensusRequest{Channel: n.chainID, Payload: msgBytes}) 206 if err != nil { 207 n.ReportUnreachable(msg.To) 208 n.logSendFailure(msg.To, err) 209 210 status = raft.SnapshotFailure 211 } else if _, ok := n.unreachable[msg.To]; ok { 212 n.logger.Infof("Successfully sent StepRequest to %d after failed attempt(s)", msg.To) 213 delete(n.unreachable, msg.To) 214 } 215 216 if msg.Type == raftpb.MsgSnap { 217 n.ReportSnapshot(msg.To, status) 218 } 219 } 220 } 221 222 // If this is called on leader, it picks a node that is 223 // recently active, and attempt to transfer leadership to it. 224 // If this is called on follower, it simply waits for a 225 // leader change till timeout (ElectionTimeout). 226 func (n *node) abdicateLeader(currentLead uint64) { 227 status := n.Status() 228 229 if status.Lead != raft.None && status.Lead != currentLead { 230 n.logger.Warn("Leader has changed since asked to transfer leadership") 231 return 232 } 233 234 // register a leader subscriberC 235 notifyc := make(chan uint64, 1) 236 select { 237 case n.subscriberC <- notifyc: 238 case <-n.chain.doneC: 239 return 240 } 241 242 // Leader initiates leader transfer 243 if status.RaftState == raft.StateLeader { 244 var transferee uint64 245 for id, pr := range status.Progress { 246 if id == status.ID { 247 continue // skip self 248 } 249 250 if pr.RecentActive && !pr.Paused { 251 transferee = id 252 break 253 } 254 255 n.logger.Debugf("Node %d is not qualified as transferee because it's either paused or not active", id) 256 } 257 258 if transferee == raft.None { 259 n.logger.Errorf("No follower is qualified as transferee, abort leader transfer") 260 return 261 } 262 263 n.logger.Infof("Transferring leadership to %d", transferee) 264 n.TransferLeadership(context.TODO(), status.ID, transferee) 265 } 266 267 timer := n.clock.NewTimer(time.Duration(n.config.ElectionTick) * n.tickInterval) 268 defer timer.Stop() // prevent timer leak 269 270 select { 271 case <-timer.C(): 272 n.logger.Warn("Leader transfer timeout") 273 case l := <-notifyc: 274 n.logger.Infof("Leader has been transferred from %d to %d", currentLead, l) 275 case <-n.chain.doneC: 276 } 277 } 278 279 func (n *node) logSendFailure(dest uint64, err error) { 280 if _, ok := n.unreachable[dest]; ok { 281 n.logger.Debugf("Failed to send StepRequest to %d, because: %s", dest, err) 282 return 283 } 284 285 n.logger.Errorf("Failed to send StepRequest to %d, because: %s", dest, err) 286 n.unreachable[dest] = struct{}{} 287 } 288 289 func (n *node) takeSnapshot(index uint64, cs raftpb.ConfState, data []byte) { 290 if err := n.storage.TakeSnapshot(index, cs, data); err != nil { 291 n.logger.Errorf("Failed to create snapshot at index %d: %s", index, err) 292 } 293 } 294 295 func (n *node) lastIndex() uint64 { 296 i, _ := n.storage.ram.LastIndex() 297 return i 298 }