github.com/vipernet-xyz/tendermint-core@v0.32.0/blockchain/v1/reactor_fsm.go (about) 1 package v1 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/tendermint/tendermint/libs/log" 10 "github.com/tendermint/tendermint/p2p" 11 "github.com/tendermint/tendermint/types" 12 ) 13 14 // Blockchain Reactor State 15 type bcReactorFSMState struct { 16 name string 17 18 // called when transitioning out of current state 19 handle func(*BcReactorFSM, bReactorEvent, bReactorEventData) (next *bcReactorFSMState, err error) 20 // called when entering the state 21 enter func(fsm *BcReactorFSM) 22 23 // timeout to ensure FSM is not stuck in a state forever 24 // the timer is owned and run by the fsm instance 25 timeout time.Duration 26 } 27 28 func (s *bcReactorFSMState) String() string { 29 return s.name 30 } 31 32 // BcReactorFSM is the datastructure for the Blockchain Reactor State Machine 33 type BcReactorFSM struct { 34 logger log.Logger 35 mtx sync.Mutex 36 37 startTime time.Time 38 39 state *bcReactorFSMState 40 stateTimer *time.Timer 41 pool *BlockPool 42 43 // interface used to call the Blockchain reactor to send StatusRequest, BlockRequest, reporting errors, etc. 44 toBcR bcReactor 45 } 46 47 // NewFSM creates a new reactor FSM. 48 func NewFSM(height int64, toBcR bcReactor) *BcReactorFSM { 49 return &BcReactorFSM{ 50 state: unknown, 51 startTime: time.Now(), 52 pool: NewBlockPool(height, toBcR), 53 toBcR: toBcR, 54 } 55 } 56 57 // bReactorEventData is part of the message sent by the reactor to the FSM and used by the state handlers. 58 type bReactorEventData struct { 59 peerID p2p.ID 60 err error // for peer error: timeout, slow; for processed block event if error occurred 61 base int64 // for status response 62 height int64 // for status response; for processed block event 63 block *types.Block // for block response 64 stateName string // for state timeout events 65 length int // for block response event, length of received block, used to detect slow peers 66 maxNumRequests int // for request needed event, maximum number of pending requests 67 } 68 69 // Blockchain Reactor Events (the input to the state machine) 70 type bReactorEvent uint 71 72 const ( 73 // message type events 74 startFSMEv = iota + 1 75 statusResponseEv 76 blockResponseEv 77 processedBlockEv 78 makeRequestsEv 79 stopFSMEv 80 81 // other events 82 peerRemoveEv = iota + 256 83 stateTimeoutEv 84 ) 85 86 func (msg *bcReactorMessage) String() string { 87 var dataStr string 88 89 switch msg.event { 90 case startFSMEv: 91 dataStr = "" 92 case statusResponseEv: 93 dataStr = fmt.Sprintf("peer=%v base=%v height=%v", msg.data.peerID, msg.data.base, msg.data.height) 94 case blockResponseEv: 95 dataStr = fmt.Sprintf("peer=%v block.height=%v length=%v", 96 msg.data.peerID, msg.data.block.Height, msg.data.length) 97 case processedBlockEv: 98 dataStr = fmt.Sprintf("error=%v", msg.data.err) 99 case makeRequestsEv: 100 dataStr = "" 101 case stopFSMEv: 102 dataStr = "" 103 case peerRemoveEv: 104 dataStr = fmt.Sprintf("peer: %v is being removed by the switch", msg.data.peerID) 105 case stateTimeoutEv: 106 dataStr = fmt.Sprintf("state=%v", msg.data.stateName) 107 default: 108 dataStr = fmt.Sprintf("cannot interpret message data") 109 } 110 111 return fmt.Sprintf("%v: %v", msg.event, dataStr) 112 } 113 114 func (ev bReactorEvent) String() string { 115 switch ev { 116 case startFSMEv: 117 return "startFSMEv" 118 case statusResponseEv: 119 return "statusResponseEv" 120 case blockResponseEv: 121 return "blockResponseEv" 122 case processedBlockEv: 123 return "processedBlockEv" 124 case makeRequestsEv: 125 return "makeRequestsEv" 126 case stopFSMEv: 127 return "stopFSMEv" 128 case peerRemoveEv: 129 return "peerRemoveEv" 130 case stateTimeoutEv: 131 return "stateTimeoutEv" 132 default: 133 return "event unknown" 134 } 135 136 } 137 138 // states 139 var ( 140 unknown *bcReactorFSMState 141 waitForPeer *bcReactorFSMState 142 waitForBlock *bcReactorFSMState 143 finished *bcReactorFSMState 144 ) 145 146 // timeouts for state timers 147 const ( 148 waitForPeerTimeout = 10 * time.Second 149 waitForBlockAtCurrentHeightTimeout = 60 * time.Second 150 ) 151 152 // errors 153 var ( 154 // internal to the package 155 errNoErrorFinished = errors.New("fast sync is finished") 156 errInvalidEvent = errors.New("invalid event in current state") 157 errMissingBlock = errors.New("missing blocks") 158 errNilPeerForBlockRequest = errors.New("peer for block request does not exist in the switch") 159 errSendQueueFull = errors.New("block request not made, send-queue is full") 160 errPeerTooShort = errors.New("peer height too low, old peer removed/ new peer not added") 161 errSwitchRemovesPeer = errors.New("switch is removing peer") 162 errTimeoutEventWrongState = errors.New("timeout event for a state different than the current one") 163 errNoTallerPeer = errors.New("fast sync timed out on waiting for a peer taller than this node") 164 165 // reported eventually to the switch 166 // handle return 167 errPeerLowersItsHeight = errors.New("fast sync peer reports a height lower than previous") 168 // handle return 169 errNoPeerResponseForCurrentHeights = errors.New("fast sync timed out on peer block response for current heights") 170 errNoPeerResponse = errors.New("fast sync timed out on peer block response") // xx 171 errBadDataFromPeer = errors.New("fast sync received block from wrong peer or block is bad") // xx 172 errDuplicateBlock = errors.New("fast sync received duplicate block from peer") 173 errBlockVerificationFailure = errors.New("fast sync block verification failure") // xx 174 errSlowPeer = errors.New("fast sync peer is not sending us data fast enough") // xx 175 176 ) 177 178 func init() { 179 unknown = &bcReactorFSMState{ 180 name: "unknown", 181 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 182 switch ev { 183 case startFSMEv: 184 // Broadcast Status message. Currently doesn't return non-nil error. 185 fsm.toBcR.sendStatusRequest() 186 return waitForPeer, nil 187 188 case stopFSMEv: 189 return finished, errNoErrorFinished 190 191 default: 192 return unknown, errInvalidEvent 193 } 194 }, 195 } 196 197 waitForPeer = &bcReactorFSMState{ 198 name: "waitForPeer", 199 timeout: waitForPeerTimeout, 200 enter: func(fsm *BcReactorFSM) { 201 // Stop when leaving the state. 202 fsm.resetStateTimer() 203 }, 204 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 205 switch ev { 206 case stateTimeoutEv: 207 if data.stateName != "waitForPeer" { 208 fsm.logger.Error("received a state timeout event for different state", 209 "state", data.stateName) 210 return waitForPeer, errTimeoutEventWrongState 211 } 212 // There was no statusResponse received from any peer. 213 // Should we send status request again? 214 return finished, errNoTallerPeer 215 216 case statusResponseEv: 217 if err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height); err != nil { 218 if fsm.pool.NumPeers() == 0 { 219 return waitForPeer, err 220 } 221 } 222 if fsm.stateTimer != nil { 223 fsm.stateTimer.Stop() 224 } 225 return waitForBlock, nil 226 227 case stopFSMEv: 228 if fsm.stateTimer != nil { 229 fsm.stateTimer.Stop() 230 } 231 return finished, errNoErrorFinished 232 233 default: 234 return waitForPeer, errInvalidEvent 235 } 236 }, 237 } 238 239 waitForBlock = &bcReactorFSMState{ 240 name: "waitForBlock", 241 timeout: waitForBlockAtCurrentHeightTimeout, 242 enter: func(fsm *BcReactorFSM) { 243 // Stop when leaving the state. 244 fsm.resetStateTimer() 245 }, 246 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 247 switch ev { 248 249 case statusResponseEv: 250 err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height) 251 if fsm.pool.NumPeers() == 0 { 252 return waitForPeer, err 253 } 254 if fsm.pool.ReachedMaxHeight() { 255 return finished, err 256 } 257 return waitForBlock, err 258 259 case blockResponseEv: 260 fsm.logger.Debug("blockResponseEv", "H", data.block.Height) 261 err := fsm.pool.AddBlock(data.peerID, data.block, data.length) 262 if err != nil { 263 // A block was received that was unsolicited, from unexpected peer, or that we already have it. 264 // Ignore block, remove peer and send error to switch. 265 fsm.pool.RemovePeer(data.peerID, err) 266 fsm.toBcR.sendPeerError(err, data.peerID) 267 } 268 if fsm.pool.NumPeers() == 0 { 269 return waitForPeer, err 270 } 271 return waitForBlock, err 272 273 case processedBlockEv: 274 if data.err != nil { 275 first, second, _ := fsm.pool.FirstTwoBlocksAndPeers() 276 fsm.logger.Error("error processing block", "err", data.err, 277 "first", first.block.Height, "second", second.block.Height) 278 fsm.logger.Error("send peer error for", "peer", first.peer.ID) 279 fsm.toBcR.sendPeerError(data.err, first.peer.ID) 280 fsm.logger.Error("send peer error for", "peer", second.peer.ID) 281 fsm.toBcR.sendPeerError(data.err, second.peer.ID) 282 // Remove the first two blocks. This will also remove the peers 283 fsm.pool.InvalidateFirstTwoBlocks(data.err) 284 } else { 285 fsm.pool.ProcessedCurrentHeightBlock() 286 // Since we advanced one block reset the state timer 287 fsm.resetStateTimer() 288 } 289 290 // Both cases above may result in achieving maximum height. 291 if fsm.pool.ReachedMaxHeight() { 292 return finished, nil 293 } 294 295 return waitForBlock, data.err 296 297 case peerRemoveEv: 298 // This event is sent by the switch to remove disconnected and errored peers. 299 fsm.pool.RemovePeer(data.peerID, data.err) 300 if fsm.pool.NumPeers() == 0 { 301 return waitForPeer, nil 302 } 303 if fsm.pool.ReachedMaxHeight() { 304 return finished, nil 305 } 306 return waitForBlock, nil 307 308 case makeRequestsEv: 309 fsm.makeNextRequests(data.maxNumRequests) 310 return waitForBlock, nil 311 312 case stateTimeoutEv: 313 if data.stateName != "waitForBlock" { 314 fsm.logger.Error("received a state timeout event for different state", 315 "state", data.stateName) 316 return waitForBlock, errTimeoutEventWrongState 317 } 318 // We haven't received the block at current height or height+1. Remove peer. 319 fsm.pool.RemovePeerAtCurrentHeights(errNoPeerResponseForCurrentHeights) 320 fsm.resetStateTimer() 321 if fsm.pool.NumPeers() == 0 { 322 return waitForPeer, errNoPeerResponseForCurrentHeights 323 } 324 if fsm.pool.ReachedMaxHeight() { 325 return finished, nil 326 } 327 return waitForBlock, errNoPeerResponseForCurrentHeights 328 329 case stopFSMEv: 330 if fsm.stateTimer != nil { 331 fsm.stateTimer.Stop() 332 } 333 return finished, errNoErrorFinished 334 335 default: 336 return waitForBlock, errInvalidEvent 337 } 338 }, 339 } 340 341 finished = &bcReactorFSMState{ 342 name: "finished", 343 enter: func(fsm *BcReactorFSM) { 344 fsm.logger.Info("Time to switch to consensus reactor!", "height", fsm.pool.Height) 345 fsm.toBcR.switchToConsensus() 346 fsm.cleanup() 347 }, 348 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 349 return finished, nil 350 }, 351 } 352 } 353 354 // Interface used by FSM for sending Block and Status requests, 355 // informing of peer errors and state timeouts 356 // Implemented by BlockchainReactor and tests 357 type bcReactor interface { 358 sendStatusRequest() 359 sendBlockRequest(peerID p2p.ID, height int64) error 360 sendPeerError(err error, peerID p2p.ID) 361 resetStateTimer(name string, timer **time.Timer, timeout time.Duration) 362 switchToConsensus() 363 } 364 365 // SetLogger sets the FSM logger. 366 func (fsm *BcReactorFSM) SetLogger(l log.Logger) { 367 fsm.logger = l 368 fsm.pool.SetLogger(l) 369 } 370 371 // Start starts the FSM. 372 func (fsm *BcReactorFSM) Start() { 373 _ = fsm.Handle(&bcReactorMessage{event: startFSMEv}) 374 } 375 376 // Handle processes messages and events sent to the FSM. 377 func (fsm *BcReactorFSM) Handle(msg *bcReactorMessage) error { 378 fsm.mtx.Lock() 379 defer fsm.mtx.Unlock() 380 fsm.logger.Debug("FSM received", "event", msg, "state", fsm.state) 381 382 if fsm.state == nil { 383 fsm.state = unknown 384 } 385 next, err := fsm.state.handle(fsm, msg.event, msg.data) 386 if err != nil { 387 fsm.logger.Error("FSM event handler returned", "err", err, 388 "state", fsm.state, "event", msg.event) 389 } 390 391 oldState := fsm.state.name 392 fsm.transition(next) 393 if oldState != fsm.state.name { 394 fsm.logger.Info("FSM changed state", "new_state", fsm.state) 395 } 396 return err 397 } 398 399 func (fsm *BcReactorFSM) transition(next *bcReactorFSMState) { 400 if next == nil { 401 return 402 } 403 if fsm.state != next { 404 fsm.state = next 405 if next.enter != nil { 406 next.enter(fsm) 407 } 408 } 409 } 410 411 // Called when entering an FSM state in order to detect lack of progress in the state machine. 412 // Note the use of the 'bcr' interface to facilitate testing without timer expiring. 413 func (fsm *BcReactorFSM) resetStateTimer() { 414 fsm.toBcR.resetStateTimer(fsm.state.name, &fsm.stateTimer, fsm.state.timeout) 415 } 416 417 func (fsm *BcReactorFSM) isCaughtUp() bool { 418 return fsm.state == finished 419 } 420 421 func (fsm *BcReactorFSM) makeNextRequests(maxNumRequests int) { 422 fsm.pool.MakeNextRequests(maxNumRequests) 423 } 424 425 func (fsm *BcReactorFSM) cleanup() { 426 fsm.pool.Cleanup() 427 } 428 429 // NeedsBlocks checks if more block requests are required. 430 func (fsm *BcReactorFSM) NeedsBlocks() bool { 431 fsm.mtx.Lock() 432 defer fsm.mtx.Unlock() 433 return fsm.state.name == "waitForBlock" && fsm.pool.NeedsBlocks() 434 } 435 436 // FirstTwoBlocks returns the two blocks at pool height and height+1 437 func (fsm *BcReactorFSM) FirstTwoBlocks() (first, second *types.Block, err error) { 438 fsm.mtx.Lock() 439 defer fsm.mtx.Unlock() 440 firstBP, secondBP, err := fsm.pool.FirstTwoBlocksAndPeers() 441 if err == nil { 442 first = firstBP.block 443 second = secondBP.block 444 } 445 return 446 } 447 448 // Status returns the pool's height and the maximum peer height. 449 func (fsm *BcReactorFSM) Status() (height, maxPeerHeight int64) { 450 fsm.mtx.Lock() 451 defer fsm.mtx.Unlock() 452 return fsm.pool.Height, fsm.pool.MaxPeerHeight 453 }