github.com/okex/exchain@v1.8.0/libs/tendermint/consensus/consensus_main_routine.go (about) 1 package consensus 2 3 import ( 4 "bytes" 5 "fmt" 6 cfg "github.com/okex/exchain/libs/tendermint/config" 7 cstypes "github.com/okex/exchain/libs/tendermint/consensus/types" 8 "github.com/okex/exchain/libs/tendermint/libs/fail" 9 "github.com/okex/exchain/libs/tendermint/types" 10 tmtime "github.com/okex/exchain/libs/tendermint/types/time" 11 "reflect" 12 "runtime/debug" 13 "time" 14 ) 15 16 //----------------------------------------- 17 // the main go routines 18 19 // receiveRoutine handles messages which may cause state transitions. 20 // it's argument (n) is the number of messages to process before exiting - use 0 to run forever 21 // It keeps the RoundState and is the only thing that updates it. 22 // Updates (state transitions) happen on timeouts, complete proposals, and 2/3 majorities. 23 // State must be locked before any internal state is updated. 24 func (cs *State) receiveRoutine(maxSteps int) { 25 onExit := func(cs *State) { 26 // NOTE: the internalMsgQueue may have signed messages from our 27 // priv_val that haven't hit the WAL, but its ok because 28 // priv_val tracks LastSig 29 30 // close wal now that we're done writing to it 31 cs.wal.Stop() 32 cs.wal.Wait() 33 34 close(cs.done) 35 cs.done = nil 36 } 37 38 defer func() { 39 if r := recover(); r != nil { 40 cs.Logger.Error("CONSENSUS FAILURE!!!", "err", r, "stack", string(debug.Stack())) 41 // stop gracefully 42 // 43 // NOTE: We most probably shouldn't be running any further when there is 44 // some unexpected panic. Some unknown error happened, and so we don't 45 // know if that will result in the validator signing an invalid thing. It 46 // might be worthwhile to explore a mechanism for manual resuming via 47 // some console or secure RPC system, but for now, halting the chain upon 48 // unexpected consensus bugs sounds like the better option. 49 onExit(cs) 50 } 51 }() 52 53 for { 54 if maxSteps > 0 { 55 if cs.nSteps >= maxSteps { 56 cs.Logger.Info("reached max steps. exiting receive routine") 57 cs.nSteps = 0 58 return 59 } 60 } 61 rs := cs.RoundState 62 var mi msgInfo 63 64 select { 65 case <-cs.txNotifier.TxsAvailable(): 66 cs.handleTxsAvailable() 67 case mi = <-cs.peerMsgQueue: 68 // handles proposals, block parts, votes 69 // may generate internal events (votes, complete proposals, 2/3 majorities) 70 if cs.handleMsg(mi) { 71 cs.wal.Write(mi) 72 } 73 case mi = <-cs.internalMsgQueue: 74 err := cs.wal.WriteSync(mi) // NOTE: fsync 75 if err != nil { 76 panic(fmt.Sprintf("Failed to write %v msg to consensus wal due to %v. Check your FS and restart the node", mi, err)) 77 } 78 79 if _, ok := mi.Msg.(*VoteMessage); ok { 80 // we actually want to simulate failing during 81 // the previous WriteSync, but this isn't easy to do. 82 // Equivalent would be to fail here and manually remove 83 // some bytes from the end of the wal. 84 fail.Fail() // XXX 85 } 86 87 // handles proposals, block parts, votes 88 cs.handleMsg(mi) 89 case ti := <-cs.timeoutTicker.Chan(): // tockChan: 90 cs.wal.Write(ti) 91 // if the timeout is relevant to the rs 92 // go to the next step 93 cs.handleTimeout(ti, rs) 94 case <-cs.Quit(): 95 onExit(cs) 96 return 97 } 98 } 99 } 100 101 func (cs *State) handleAVCProposal(proposal *types.Proposal) { 102 if !GetActiveVC() || 103 cs.Height != proposal.Height || cs.Round != proposal.Round || 104 len(cs.taskResultChan) == 0 { 105 return 106 } 107 res := cs.getPreBlockResult(proposal.Height) 108 if res == nil { 109 cs.Logger.Error("handleAVCProposal get block nil", "cs height", cs.Height, "proposal height", proposal.Height) 110 return 111 } 112 if !bytes.Equal(proposal.BlockID.PartsHeader.Hash, res.blockParts.Header().Hash) || proposal.Height != res.block.Height { 113 return 114 } 115 cs.sendInternalMessage(msgInfo{&ProposalMessage{proposal}, ""}) 116 for i := 0; i < res.blockParts.Total(); i++ { 117 part := res.blockParts.GetPart(i) 118 cs.sendInternalMessage(msgInfo{&BlockPartMessage{cs.Height, cs.Round, part}, ""}) 119 } 120 } 121 122 // state transitions on complete-proposal, 2/3-any, 2/3-one 123 func (cs *State) handleMsg(mi msgInfo) (added bool) { 124 cs.mtx.Lock() 125 defer cs.mtx.Unlock() 126 127 var ( 128 err error 129 ) 130 msg, peerID := mi.Msg, mi.PeerID 131 switch msg := msg.(type) { 132 case *ProposeResponseMessage: 133 cs.handleAVCProposal(msg.Proposal) 134 135 case *ViewChangeMessage: 136 if !GetActiveVC() { 137 return 138 } 139 140 // no need to handle duplicate vcMsg 141 if cs.vcMsg != nil && cs.vcMsg.Height >= msg.Height { 142 return 143 } 144 145 // enterNewHeight use cs.vcMsg 146 if msg.Height == cs.Height+1 { 147 cs.vcMsg = msg 148 cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg) 149 } else if msg.Height == cs.Height { 150 // ApplyBlock of height-1 has finished 151 // at this height, it has enterNewHeight 152 // vc immediately 153 cs.vcMsg = msg 154 cs.Logger.Info("handle vcMsg", "height", cs.Height, "vcMsg", cs.vcMsg) 155 if cs.Step != cstypes.RoundStepNewHeight && cs.Round == 0 { 156 _, val := cs.Validators.GetByAddress(msg.NewProposer) 157 cs.enterNewRoundAVC(cs.Height, 0, val) 158 } 159 } 160 161 case *ProposalMessage: 162 // will not cause transition. 163 // once proposal is set, we can receive block parts 164 if added, err = cs.setProposal(msg.Proposal); added { 165 cs.handleAVCProposal(msg.Proposal) 166 } 167 case *BlockPartMessage: 168 // if avc and has 2/3 votes, it can use the blockPartsHeader from votes 169 if cs.HasVC && cs.ProposalBlockParts == nil && cs.Round == 0 { 170 prevotes := cs.Votes.Prevotes(cs.Round) 171 blockID, hasTwoThirds := prevotes.TwoThirdsMajority() 172 if hasTwoThirds && !blockID.IsZero() { 173 cs.ProposalBlockParts = types.NewPartSetFromHeader(blockID.PartsHeader) 174 } 175 } 176 // if the proposal is complete, we'll enterPrevote or tryFinalizeCommit 177 added, err = cs.addProposalBlockPart(msg, peerID) 178 179 // We unlock here to yield to any routines that need to read the the RoundState. 180 // Previously, this code held the lock from the point at which the final block 181 // part was received until the block executed against the application. 182 // This prevented the reactor from being able to retrieve the most updated 183 // version of the RoundState. The reactor needs the updated RoundState to 184 // gossip the now completed block. 185 // 186 // This code can be further improved by either always operating on a copy 187 // of RoundState and only locking when switching out State's copy of 188 // RoundState with the updated copy or by emitting RoundState events in 189 // more places for routines depending on it to listen for. 190 191 cs.mtx.Unlock() 192 cs.mtx.Lock() 193 if added && cs.ProposalBlockParts.IsComplete() { 194 cs.handleCompleteProposal(msg.Height) 195 } 196 197 if added { 198 cs.statsMsgQueue <- mi 199 } 200 201 if err != nil && msg.Round != cs.Round { 202 cs.Logger.Debug( 203 "Received block part from wrong round", 204 "height", 205 cs.Height, 206 "csRound", 207 cs.Round, 208 "blockRound", 209 msg.Round) 210 err = nil 211 } 212 case *VoteMessage: 213 // attempt to add the vote and dupeout the validator if its a duplicate signature 214 // if the vote gives us a 2/3-any or 2/3-one, we transition 215 added, err = cs.tryAddVote(msg.Vote, peerID) 216 if added { 217 cs.statsMsgQueue <- mi 218 } 219 220 // if err == ErrAddingVote { 221 // TODO: punish peer 222 // We probably don't want to stop the peer here. The vote does not 223 // necessarily comes from a malicious peer but can be just broadcasted by 224 // a typical peer. 225 // https://github.com/tendermint/tendermint/issues/1281 226 // } 227 228 // NOTE: the vote is broadcast to peers by the reactor listening 229 // for vote events 230 231 // TODO: If rs.Height == vote.Height && rs.Round < vote.Round, 232 // the peer is sending us CatchupCommit precommits. 233 // We could make note of this and help filter in broadcastHasVoteMessage(). 234 default: 235 cs.Logger.Error("Unknown msg type", "type", reflect.TypeOf(msg)) 236 return 237 } 238 239 if err != nil { // nolint:staticcheck 240 // Causes TestReactorValidatorSetChanges to timeout 241 // https://github.com/tendermint/tendermint/issues/3406 242 // cs.Logger.Error("Error with msg", "height", cs.Height, "round", cs.Round, 243 // "peer", peerID, "err", err, "msg", msg) 244 } 245 return 246 } 247 248 func (cs *State) handleTimeout(ti timeoutInfo, rs cstypes.RoundState) { 249 cs.Logger.Debug("Received tock", "timeout", ti.Duration, "height", ti.Height, "round", ti.Round, "step", ti.Step) 250 251 // timeouts must be for current height, round, step 252 if ti.Height != rs.Height || ti.Round < rs.Round || (ti.Round == rs.Round && ti.Step < rs.Step) { 253 cs.Logger.Debug("Ignoring tock because we're ahead", "height", rs.Height, "round", rs.Round, "step", rs.Step) 254 return 255 } 256 257 // the timeout will now cause a state transition 258 cs.mtx.Lock() 259 defer cs.mtx.Unlock() 260 261 switch ti.Step { 262 case cstypes.RoundStepNewHeight: 263 // NewRound event fired from enterNewRound. 264 // XXX: should we fire timeout here (for timeout commit)? 265 cs.enterNewHeight(ti.Height) 266 case cstypes.RoundStepNewRound: 267 cs.enterPropose(ti.Height, 0) 268 case cstypes.RoundStepPropose: 269 cs.eventBus.PublishEventTimeoutPropose(cs.RoundStateEvent()) 270 cs.enterPrevote(ti.Height, ti.Round) 271 case cstypes.RoundStepPrevoteWait: 272 cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()) 273 cs.enterPrecommit(ti.Height, ti.Round) 274 case cstypes.RoundStepPrecommitWait: 275 cs.eventBus.PublishEventTimeoutWait(cs.RoundStateEvent()) 276 cs.enterPrecommit(ti.Height, ti.Round) 277 cs.enterNewRound(ti.Height, ti.Round+1) 278 default: 279 panic(fmt.Sprintf("Invalid timeout step: %v", ti.Step)) 280 } 281 282 } 283 284 // enterNewRound(height, 0) at cs.StartTime. 285 func (cs *State) scheduleRound0(rs *cstypes.RoundState) { 286 overDuration := tmtime.Now().Sub(cs.StartTime) 287 if overDuration < 0 { 288 overDuration = 0 289 } 290 sleepDuration := cfg.DynamicConfig.GetCsTimeoutCommit() - overDuration 291 if sleepDuration < 0 { 292 sleepDuration = 0 293 } 294 295 if !cs.config.Waiting { 296 sleepDuration = 0 297 } 298 299 if GetActiveVC() && cs.privValidator != nil { 300 select { 301 case cs.preBlockTaskChan <- &preBlockTask{cs.Height, sleepDuration}: 302 default: 303 } 304 305 } 306 307 cs.scheduleTimeout(sleepDuration, rs.Height, 0, cstypes.RoundStepNewHeight) 308 } 309 310 // requestForProposer FireEvent to broadcast ProposeRequestMessage 311 func (cs *State) requestForProposer(prMsg ProposeRequestMessage) { 312 if signature, err := cs.privValidator.SignBytes(prMsg.SignBytes()); err == nil { 313 prMsg.Signature = signature 314 cs.evsw.FireEvent(types.EventProposeRequest, &prMsg) 315 } else { 316 cs.Logger.Error("requestForProposer", "err", err) 317 } 318 } 319 320 // Attempt to schedule a timeout (by sending timeoutInfo on the tickChan) 321 func (cs *State) scheduleTimeout(duration time.Duration, height int64, round int, step cstypes.RoundStepType) { 322 cs.timeoutTicker.ScheduleTimeout(timeoutInfo{Duration: duration, Height: height, Round: round, Step: step}) 323 } 324 325 // send a msg into the receiveRoutine regarding our own proposal, block part, or vote 326 func (cs *State) sendInternalMessage(mi msgInfo) { 327 select { 328 case cs.internalMsgQueue <- mi: 329 default: 330 // NOTE: using the go-routine means our votes can 331 // be processed out of order. 332 // TODO: use CList here for strict determinism and 333 // attempt push to internalMsgQueue in receiveRoutine 334 cs.Logger.Info("Internal msg queue is full. Using a go-routine") 335 go func() { cs.internalMsgQueue <- mi }() 336 } 337 } 338 339 func (cs *State) handleTxsAvailable() { 340 cs.mtx.Lock() 341 defer cs.mtx.Unlock() 342 343 // We only need to do this for round 0. 344 if cs.Round != 0 { 345 return 346 } 347 348 switch cs.Step { 349 case cstypes.RoundStepNewHeight: // timeoutCommit phase 350 if cs.needProofBlock(cs.Height) { 351 // enterPropose will be called by enterNewRound 352 return 353 } 354 355 // +1ms to ensure RoundStepNewRound timeout always happens after RoundStepNewHeight 356 timeoutCommit := cs.StartTime.Sub(tmtime.Now()) + 1*time.Millisecond 357 cs.scheduleTimeout(timeoutCommit, cs.Height, 0, cstypes.RoundStepNewRound) 358 case cstypes.RoundStepNewRound: // after timeoutCommit 359 cs.enterPropose(cs.Height, 0) 360 } 361 } 362 363 func (cs *State) preMakeBlockRoutine() { 364 for { 365 select { 366 case task := <-cs.preBlockTaskChan: 367 if task.height == cs.Height { 368 cs.preMakeBlock(task.height, task.duration) 369 } 370 case <-cs.Quit(): 371 return 372 } 373 } 374 }