github.com/DFWallet/tendermint-cosmos@v0.0.2/blockchain/v1/reactor_fsm.go (about) 1 package v1 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/DFWallet/tendermint-cosmos/libs/log" 10 "github.com/DFWallet/tendermint-cosmos/p2p" 11 "github.com/DFWallet/tendermint-cosmos/types" 12 ) 13 14 // Blockchain Reactor State 15 type bcReactorFSMState struct { 16 name string 17 18 // called when transitioning out of current state 19 handle func(*BcReactorFSM, bReactorEvent, bReactorEventData) (next *bcReactorFSMState, err error) 20 // called when entering the state 21 enter func(fsm *BcReactorFSM) 22 23 // timeout to ensure FSM is not stuck in a state forever 24 // the timer is owned and run by the fsm instance 25 timeout time.Duration 26 } 27 28 func (s *bcReactorFSMState) String() string { 29 return s.name 30 } 31 32 // BcReactorFSM is the datastructure for the Blockchain Reactor State Machine 33 type BcReactorFSM struct { 34 logger log.Logger 35 mtx sync.Mutex 36 37 startTime time.Time 38 39 state *bcReactorFSMState 40 stateTimer *time.Timer 41 pool *BlockPool 42 43 // interface used to call the Blockchain reactor to send StatusRequest, BlockRequest, reporting errors, etc. 44 toBcR bcReactor 45 } 46 47 // NewFSM creates a new reactor FSM. 48 func NewFSM(height int64, toBcR bcReactor) *BcReactorFSM { 49 return &BcReactorFSM{ 50 state: unknown, 51 startTime: time.Now(), 52 pool: NewBlockPool(height, toBcR), 53 toBcR: toBcR, 54 } 55 } 56 57 // bReactorEventData is part of the message sent by the reactor to the FSM and used by the state handlers. 58 type bReactorEventData struct { 59 peerID p2p.ID 60 err error // for peer error: timeout, slow; for processed block event if error occurred 61 base int64 // for status response 62 height int64 // for status response; for processed block event 63 block *types.Block // for block response 64 stateName string // for state timeout events 65 length int // for block response event, length of received block, used to detect slow peers 66 maxNumRequests int // for request needed event, maximum number of pending requests 67 } 68 69 // Blockchain Reactor Events (the input to the state machine) 70 type bReactorEvent uint 71 72 const ( 73 // message type events 74 startFSMEv = iota + 1 75 statusResponseEv 76 blockResponseEv 77 noBlockResponseEv 78 processedBlockEv 79 makeRequestsEv 80 stopFSMEv 81 82 // other events 83 peerRemoveEv = iota + 256 84 stateTimeoutEv 85 ) 86 87 func (msg *bcReactorMessage) String() string { 88 var dataStr string 89 90 switch msg.event { 91 case startFSMEv: 92 dataStr = "" 93 case statusResponseEv: 94 dataStr = fmt.Sprintf("peer=%v base=%v height=%v", msg.data.peerID, msg.data.base, msg.data.height) 95 case blockResponseEv: 96 dataStr = fmt.Sprintf("peer=%v block.height=%v length=%v", 97 msg.data.peerID, msg.data.block.Height, msg.data.length) 98 case noBlockResponseEv: 99 dataStr = fmt.Sprintf("peer=%v requested height=%v", 100 msg.data.peerID, msg.data.height) 101 case processedBlockEv: 102 dataStr = fmt.Sprintf("error=%v", msg.data.err) 103 case makeRequestsEv: 104 dataStr = "" 105 case stopFSMEv: 106 dataStr = "" 107 case peerRemoveEv: 108 dataStr = fmt.Sprintf("peer: %v is being removed by the switch", msg.data.peerID) 109 case stateTimeoutEv: 110 dataStr = fmt.Sprintf("state=%v", msg.data.stateName) 111 default: 112 dataStr = "cannot interpret message data" 113 } 114 115 return fmt.Sprintf("%v: %v", msg.event, dataStr) 116 } 117 118 func (ev bReactorEvent) String() string { 119 switch ev { 120 case startFSMEv: 121 return "startFSMEv" 122 case statusResponseEv: 123 return "statusResponseEv" 124 case blockResponseEv: 125 return "blockResponseEv" 126 case noBlockResponseEv: 127 return "noBlockResponseEv" 128 case processedBlockEv: 129 return "processedBlockEv" 130 case makeRequestsEv: 131 return "makeRequestsEv" 132 case stopFSMEv: 133 return "stopFSMEv" 134 case peerRemoveEv: 135 return "peerRemoveEv" 136 case stateTimeoutEv: 137 return "stateTimeoutEv" 138 default: 139 return "event unknown" 140 } 141 142 } 143 144 // states 145 var ( 146 unknown *bcReactorFSMState 147 waitForPeer *bcReactorFSMState 148 waitForBlock *bcReactorFSMState 149 finished *bcReactorFSMState 150 ) 151 152 // timeouts for state timers 153 const ( 154 waitForPeerTimeout = 3 * time.Second 155 waitForBlockAtCurrentHeightTimeout = 10 * time.Second 156 ) 157 158 // errors 159 var ( 160 // internal to the package 161 errNoErrorFinished = errors.New("fast sync is finished") 162 errInvalidEvent = errors.New("invalid event in current state") 163 errMissingBlock = errors.New("missing blocks") 164 errNilPeerForBlockRequest = errors.New("peer for block request does not exist in the switch") 165 errSendQueueFull = errors.New("block request not made, send-queue is full") 166 errPeerTooShort = errors.New("peer height too low, old peer removed/ new peer not added") 167 errSwitchRemovesPeer = errors.New("switch is removing peer") 168 errTimeoutEventWrongState = errors.New("timeout event for a state different than the current one") 169 errNoTallerPeer = errors.New("fast sync timed out on waiting for a peer taller than this node") 170 171 // reported eventually to the switch 172 // handle return 173 errPeerLowersItsHeight = errors.New("fast sync peer reports a height lower than previous") 174 // handle return 175 errNoPeerResponseForCurrentHeights = errors.New("fast sync timed out on peer block response for current heights") 176 errNoPeerResponse = errors.New("fast sync timed out on peer block response") // xx 177 errBadDataFromPeer = errors.New("fast sync received block from wrong peer or block is bad") // xx 178 errDuplicateBlock = errors.New("fast sync received duplicate block from peer") 179 errBlockVerificationFailure = errors.New("fast sync block verification failure") // xx 180 errSlowPeer = errors.New("fast sync peer is not sending us data fast enough") // xx 181 182 ) 183 184 func init() { 185 unknown = &bcReactorFSMState{ 186 name: "unknown", 187 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 188 switch ev { 189 case startFSMEv: 190 // Broadcast Status message. Currently doesn't return non-nil error. 191 fsm.toBcR.sendStatusRequest() 192 return waitForPeer, nil 193 194 case stopFSMEv: 195 return finished, errNoErrorFinished 196 197 default: 198 return unknown, errInvalidEvent 199 } 200 }, 201 } 202 203 waitForPeer = &bcReactorFSMState{ 204 name: "waitForPeer", 205 timeout: waitForPeerTimeout, 206 enter: func(fsm *BcReactorFSM) { 207 // Stop when leaving the state. 208 fsm.resetStateTimer() 209 }, 210 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 211 switch ev { 212 case stateTimeoutEv: 213 if data.stateName != "waitForPeer" { 214 fsm.logger.Error("received a state timeout event for different state", 215 "state", data.stateName) 216 return waitForPeer, errTimeoutEventWrongState 217 } 218 // There was no statusResponse received from any peer. 219 // Should we send status request again? 220 return finished, errNoTallerPeer 221 222 case statusResponseEv: 223 if err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height); err != nil { 224 if fsm.pool.NumPeers() == 0 { 225 return waitForPeer, err 226 } 227 } 228 if fsm.stateTimer != nil { 229 fsm.stateTimer.Stop() 230 } 231 return waitForBlock, nil 232 233 case stopFSMEv: 234 if fsm.stateTimer != nil { 235 fsm.stateTimer.Stop() 236 } 237 return finished, errNoErrorFinished 238 239 default: 240 return waitForPeer, errInvalidEvent 241 } 242 }, 243 } 244 245 waitForBlock = &bcReactorFSMState{ 246 name: "waitForBlock", 247 timeout: waitForBlockAtCurrentHeightTimeout, 248 enter: func(fsm *BcReactorFSM) { 249 // Stop when leaving the state. 250 fsm.resetStateTimer() 251 }, 252 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 253 switch ev { 254 255 case statusResponseEv: 256 err := fsm.pool.UpdatePeer(data.peerID, data.base, data.height) 257 if fsm.pool.NumPeers() == 0 { 258 return waitForPeer, err 259 } 260 if fsm.pool.ReachedMaxHeight() { 261 return finished, err 262 } 263 return waitForBlock, err 264 265 case blockResponseEv: 266 fsm.logger.Debug("blockResponseEv", "H", data.block.Height) 267 err := fsm.pool.AddBlock(data.peerID, data.block, data.length) 268 if err != nil { 269 // A block was received that was unsolicited, from unexpected peer, or that we already have it. 270 // Ignore block, remove peer and send error to switch. 271 fsm.pool.RemovePeer(data.peerID, err) 272 fsm.toBcR.sendPeerError(err, data.peerID) 273 } 274 if fsm.pool.NumPeers() == 0 { 275 return waitForPeer, err 276 } 277 return waitForBlock, err 278 case noBlockResponseEv: 279 fsm.logger.Error("peer does not have requested block", "peer", data.peerID) 280 281 return waitForBlock, nil 282 case processedBlockEv: 283 if data.err != nil { 284 first, second, _ := fsm.pool.FirstTwoBlocksAndPeers() 285 fsm.logger.Error("error processing block", "err", data.err, 286 "first", first.block.Height, "second", second.block.Height) 287 fsm.logger.Error("send peer error for", "peer", first.peer.ID) 288 fsm.toBcR.sendPeerError(data.err, first.peer.ID) 289 fsm.logger.Error("send peer error for", "peer", second.peer.ID) 290 fsm.toBcR.sendPeerError(data.err, second.peer.ID) 291 // Remove the first two blocks. This will also remove the peers 292 fsm.pool.InvalidateFirstTwoBlocks(data.err) 293 } else { 294 fsm.pool.ProcessedCurrentHeightBlock() 295 // Since we advanced one block reset the state timer 296 fsm.resetStateTimer() 297 } 298 299 // Both cases above may result in achieving maximum height. 300 if fsm.pool.ReachedMaxHeight() { 301 return finished, nil 302 } 303 304 return waitForBlock, data.err 305 306 case peerRemoveEv: 307 // This event is sent by the switch to remove disconnected and errored peers. 308 fsm.pool.RemovePeer(data.peerID, data.err) 309 if fsm.pool.NumPeers() == 0 { 310 return waitForPeer, nil 311 } 312 if fsm.pool.ReachedMaxHeight() { 313 return finished, nil 314 } 315 return waitForBlock, nil 316 317 case makeRequestsEv: 318 fsm.makeNextRequests(data.maxNumRequests) 319 return waitForBlock, nil 320 321 case stateTimeoutEv: 322 if data.stateName != "waitForBlock" { 323 fsm.logger.Error("received a state timeout event for different state", 324 "state", data.stateName) 325 return waitForBlock, errTimeoutEventWrongState 326 } 327 // We haven't received the block at current height or height+1. Remove peer. 328 fsm.pool.RemovePeerAtCurrentHeights(errNoPeerResponseForCurrentHeights) 329 fsm.resetStateTimer() 330 if fsm.pool.NumPeers() == 0 { 331 return waitForPeer, errNoPeerResponseForCurrentHeights 332 } 333 if fsm.pool.ReachedMaxHeight() { 334 return finished, nil 335 } 336 return waitForBlock, errNoPeerResponseForCurrentHeights 337 338 case stopFSMEv: 339 if fsm.stateTimer != nil { 340 fsm.stateTimer.Stop() 341 } 342 return finished, errNoErrorFinished 343 344 default: 345 return waitForBlock, errInvalidEvent 346 } 347 }, 348 } 349 350 finished = &bcReactorFSMState{ 351 name: "finished", 352 enter: func(fsm *BcReactorFSM) { 353 fsm.logger.Info("Time to switch to consensus reactor!", "height", fsm.pool.Height) 354 fsm.toBcR.switchToConsensus() 355 fsm.cleanup() 356 }, 357 handle: func(fsm *BcReactorFSM, ev bReactorEvent, data bReactorEventData) (*bcReactorFSMState, error) { 358 return finished, nil 359 }, 360 } 361 } 362 363 // Interface used by FSM for sending Block and Status requests, 364 // informing of peer errors and state timeouts 365 // Implemented by BlockchainReactor and tests 366 type bcReactor interface { 367 sendStatusRequest() 368 sendBlockRequest(peerID p2p.ID, height int64) error 369 sendPeerError(err error, peerID p2p.ID) 370 resetStateTimer(name string, timer **time.Timer, timeout time.Duration) 371 switchToConsensus() 372 } 373 374 // SetLogger sets the FSM logger. 375 func (fsm *BcReactorFSM) SetLogger(l log.Logger) { 376 fsm.logger = l 377 fsm.pool.SetLogger(l) 378 } 379 380 // Start starts the FSM. 381 func (fsm *BcReactorFSM) Start() { 382 _ = fsm.Handle(&bcReactorMessage{event: startFSMEv}) 383 } 384 385 // Handle processes messages and events sent to the FSM. 386 func (fsm *BcReactorFSM) Handle(msg *bcReactorMessage) error { 387 fsm.mtx.Lock() 388 defer fsm.mtx.Unlock() 389 fsm.logger.Debug("FSM received", "event", msg, "state", fsm.state) 390 391 if fsm.state == nil { 392 fsm.state = unknown 393 } 394 next, err := fsm.state.handle(fsm, msg.event, msg.data) 395 if err != nil { 396 fsm.logger.Error("FSM event handler returned", "err", err, 397 "state", fsm.state, "event", msg.event) 398 } 399 400 oldState := fsm.state.name 401 fsm.transition(next) 402 if oldState != fsm.state.name { 403 fsm.logger.Info("FSM changed state", "new_state", fsm.state) 404 } 405 return err 406 } 407 408 func (fsm *BcReactorFSM) transition(next *bcReactorFSMState) { 409 if next == nil { 410 return 411 } 412 if fsm.state != next { 413 fsm.state = next 414 if next.enter != nil { 415 next.enter(fsm) 416 } 417 } 418 } 419 420 // Called when entering an FSM state in order to detect lack of progress in the state machine. 421 // Note the use of the 'bcr' interface to facilitate testing without timer expiring. 422 func (fsm *BcReactorFSM) resetStateTimer() { 423 fsm.toBcR.resetStateTimer(fsm.state.name, &fsm.stateTimer, fsm.state.timeout) 424 } 425 426 func (fsm *BcReactorFSM) isCaughtUp() bool { 427 return fsm.state == finished 428 } 429 430 func (fsm *BcReactorFSM) makeNextRequests(maxNumRequests int) { 431 fsm.pool.MakeNextRequests(maxNumRequests) 432 } 433 434 func (fsm *BcReactorFSM) cleanup() { 435 fsm.pool.Cleanup() 436 } 437 438 // NeedsBlocks checks if more block requests are required. 439 func (fsm *BcReactorFSM) NeedsBlocks() bool { 440 fsm.mtx.Lock() 441 defer fsm.mtx.Unlock() 442 return fsm.state.name == "waitForBlock" && fsm.pool.NeedsBlocks() 443 } 444 445 // FirstTwoBlocks returns the two blocks at pool height and height+1 446 func (fsm *BcReactorFSM) FirstTwoBlocks() (first, second *types.Block, err error) { 447 fsm.mtx.Lock() 448 defer fsm.mtx.Unlock() 449 firstBP, secondBP, err := fsm.pool.FirstTwoBlocksAndPeers() 450 if err == nil { 451 first = firstBP.block 452 second = secondBP.block 453 } 454 return 455 } 456 457 // Status returns the pool's height and the maximum peer height. 458 func (fsm *BcReactorFSM) Status() (height, maxPeerHeight int64) { 459 fsm.mtx.Lock() 460 defer fsm.mtx.Unlock() 461 return fsm.pool.Height, fsm.pool.MaxPeerHeight 462 }