github.com/okex/exchain@v1.8.0/libs/tendermint/blockchain/v0/reactor.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "reflect" 7 "sync" 8 "time" 9 10 amino "github.com/tendermint/go-amino" 11 12 "github.com/okex/exchain/libs/tendermint/libs/log" 13 "github.com/okex/exchain/libs/tendermint/p2p" 14 sm "github.com/okex/exchain/libs/tendermint/state" 15 "github.com/okex/exchain/libs/tendermint/store" 16 "github.com/okex/exchain/libs/tendermint/types" 17 ) 18 19 const ( 20 // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height) 21 BlockchainChannel = byte(0x40) 22 23 trySyncIntervalMS = 10 24 25 // stop syncing when last block's time is 26 // within this much of the system time. 27 // stopSyncingDurationMinutes = 10 28 29 // ask for best height every 1s 30 statusUpdateIntervalSeconds = 1 31 // check if we should switch to consensus reactor 32 switchToConsensusIntervalSeconds = 1 33 34 // NOTE: keep up to date with bcBlockResponseMessage 35 bcBlockResponseMessagePrefixSize = 4 36 bcBlockResponseMessageFieldKeySize = 1 37 maxMsgSize = types.MaxBlockSizeBytes + 38 bcBlockResponseMessagePrefixSize + 39 bcBlockResponseMessageFieldKeySize 40 ) 41 42 var ( 43 MaxIntervalForFastSync int64 = 20 44 ) 45 46 type consensusReactor interface { 47 // SwitchToConsensus called when we switch from blockchain reactor and fast sync to 48 // the consensus machine 49 SwitchToConsensus(sm.State, uint64) bool 50 51 // SwitchToFastSync called when we switch from the consensus machine to blockchain reactor and fast sync 52 SwitchToFastSync() (sm.State, error) 53 } 54 55 type peerError struct { 56 err error 57 peerID p2p.ID 58 } 59 60 func (e peerError) Error() string { 61 return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error()) 62 } 63 64 // BlockchainReactor handles long-term catchup syncing. 65 type BlockchainReactor struct { 66 p2p.BaseReactor 67 68 // mutable 69 curState sm.State 70 71 blockExec *sm.BlockExecutor 72 store *store.BlockStore 73 pool *BlockPool 74 fastSync bool 75 autoFastSync bool 76 isSyncing bool 77 mtx sync.RWMutex 78 79 requestsCh <-chan BlockRequest 80 errorsCh <-chan peerError 81 82 finishCh chan struct{} 83 } 84 85 // NewBlockchainReactor returns new reactor instance. 86 func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, fastSync bool) *BlockchainReactor { 87 if state.LastBlockHeight != store.Height() { 88 panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, 89 store.Height())) 90 } 91 92 requestsCh := make(chan BlockRequest, maxTotalRequesters) 93 94 const capacity = 1000 // must be bigger than peers count 95 errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock 96 97 finishCh := make(chan struct{}, 1) 98 99 pool := NewBlockPool( 100 store.Height()+1, 101 requestsCh, 102 errorsCh, 103 ) 104 105 bcR := &BlockchainReactor{ 106 curState: state, 107 blockExec: blockExec, 108 store: store, 109 pool: pool, 110 fastSync: fastSync, 111 mtx: sync.RWMutex{}, 112 requestsCh: requestsCh, 113 errorsCh: errorsCh, 114 finishCh: finishCh, 115 } 116 bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR) 117 return bcR 118 } 119 120 // SetLogger implements service.Service by setting the logger on reactor and pool. 121 func (bcR *BlockchainReactor) SetLogger(l log.Logger) { 122 bcR.BaseService.Logger = l 123 bcR.pool.Logger = l 124 } 125 126 // OnStart implements service.Service. 127 func (bcR *BlockchainReactor) OnStart() error { 128 if bcR.fastSync { 129 err := bcR.pool.Start() 130 if err != nil { 131 return err 132 } 133 go bcR.poolRoutine() 134 } 135 return nil 136 } 137 138 // OnStop implements service.Service. 139 func (bcR *BlockchainReactor) OnStop() { 140 bcR.pool.Stop() 141 bcR.pool.Reset() 142 bcR.syncStopPoolRoutine() 143 } 144 145 func (bcR *BlockchainReactor) syncStopPoolRoutine() { 146 bcR.finishCh <- struct{}{} 147 for { 148 if !bcR.getIsSyncing() { 149 break 150 } 151 time.Sleep(10 * time.Millisecond) 152 } 153 } 154 155 // GetChannels implements Reactor 156 func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { 157 return []*p2p.ChannelDescriptor{ 158 { 159 ID: BlockchainChannel, 160 Priority: 10, 161 SendQueueCapacity: 1000, 162 RecvBufferCapacity: 50 * 4096, 163 RecvMessageCapacity: maxMsgSize, 164 }, 165 } 166 } 167 168 // AddPeer implements Reactor by sending our state to peer. 169 func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { 170 msgBytes := cdc.MustMarshalBinaryBare(&bcStatusResponseMessage{ 171 Height: bcR.store.Height(), 172 Base: bcR.store.Base(), 173 }) 174 peer.Send(BlockchainChannel, msgBytes) 175 // it's OK if send fails. will try later in poolRoutine 176 177 // peer is added to the pool once we receive the first 178 // bcStatusResponseMessage from the peer and call pool.SetPeerRange 179 } 180 181 // RemovePeer implements Reactor by removing peer from the pool. 182 func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { 183 bcR.pool.RemovePeer(peer.ID()) 184 } 185 186 // respondToPeer loads a block and sends it to the requesting peer, 187 // if we have it. Otherwise, we'll respond saying we don't have it. 188 func (bcR *BlockchainReactor) respondToPeer(msg *bcBlockRequestMessage, 189 src p2p.Peer) (queued bool) { 190 191 block, blockExInfo := bcR.store.LoadBlockWithExInfo(msg.Height) 192 if block != nil { 193 msgBytes := cdc.MustMarshalBinaryBare(&bcBlockResponseMessage{Block: block, ExInfo: blockExInfo}) 194 return src.TrySend(BlockchainChannel, msgBytes) 195 } 196 197 bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height) 198 199 msgBytes := cdc.MustMarshalBinaryBare(&bcNoBlockResponseMessage{Height: msg.Height}) 200 return src.TrySend(BlockchainChannel, msgBytes) 201 } 202 203 // Receive implements Reactor by handling 4 types of messages (look below). 204 func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { 205 msg, err := decodeMsg(msgBytes) 206 if err != nil { 207 bcR.Logger.Error("Error decoding message", "src", src, "chId", chID, "msg", msg, "err", err, "bytes", msgBytes) 208 bcR.Switch.StopPeerForError(src, err) 209 return 210 } 211 212 if err = msg.ValidateBasic(); err != nil { 213 bcR.Logger.Error("Peer sent us invalid msg", "peer", src, "msg", msg, "err", err) 214 bcR.Switch.StopPeerForError(src, err) 215 return 216 } 217 218 bcR.Logger.Debug("Receive", "src", src, "chID", chID, "msg", msg) 219 220 switch msg := msg.(type) { 221 case *bcBlockRequestMessage: 222 bcR.respondToPeer(msg, src) 223 case *bcBlockResponseMessage: 224 bcR.Logger.Info("AddBlock.", "Height", msg.Block.Height, "Peer", src.ID()) 225 bcR.pool.AddBlock(src.ID(), msg, len(msgBytes)) 226 case *bcStatusRequestMessage: 227 // Send peer our state. 228 src.TrySend(BlockchainChannel, cdc.MustMarshalBinaryBare(&bcStatusResponseMessage{ 229 Height: bcR.store.Height(), 230 Base: bcR.store.Base(), 231 })) 232 case *bcStatusResponseMessage: 233 // Got a peer status. Unverified. TODO: should verify before SetPeerRange 234 shouldSync := bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height, bcR.store.Height()) 235 // should switch to fast-sync when more than XX peers' height is greater than store.Height 236 if shouldSync { 237 bcR.Logger.Info("ShouldSync.", "Status peer", msg.Height, "now", bcR.store.Height()) 238 go bcR.poolRoutine() 239 } 240 case *bcNoBlockResponseMessage: 241 bcR.Logger.Debug("Peer does not have requested block", "peer", src, "height", msg.Height) 242 default: 243 bcR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) 244 } 245 } 246 247 // Handle messages from the poolReactor telling the reactor what to do. 248 // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down! 249 func (bcR *BlockchainReactor) poolRoutine() { 250 bcR.mtx.Lock() 251 if bcR.isSyncing { 252 bcR.mtx.Unlock() 253 return 254 } 255 bcR.isSyncing = true 256 bcR.mtx.Unlock() 257 258 defer func() { 259 bcR.setIsSyncing(false) 260 }() 261 262 conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) 263 if ok { 264 conState, err := conR.SwitchToFastSync() 265 if err == nil { 266 bcR.curState = conState 267 } 268 } 269 chainID := bcR.curState.ChainID 270 271 bcR.pool.SetHeight(bcR.curState.LastBlockHeight + 1) 272 bcR.pool.Stop() 273 bcR.pool.Reset() 274 bcR.pool.Start() 275 276 blocksSynced := uint64(0) 277 278 lastHundred := time.Now() 279 lastRate := 0.0 280 281 switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second) 282 trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond) 283 statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second) 284 285 didProcessCh := make(chan struct{}, 1) 286 287 go func() { 288 for { 289 select { 290 case <-bcR.Quit(): 291 return 292 case <-bcR.pool.Quit(): 293 return 294 case request := <-bcR.requestsCh: 295 peer := bcR.Switch.Peers().Get(request.PeerID) 296 if peer == nil { 297 continue 298 } 299 msgBytes := cdc.MustMarshalBinaryBare(&bcBlockRequestMessage{request.Height}) 300 queued := peer.TrySend(BlockchainChannel, msgBytes) 301 if !queued { 302 bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height) 303 } 304 case err := <-bcR.errorsCh: 305 peer := bcR.Switch.Peers().Get(err.peerID) 306 if peer != nil { 307 bcR.Switch.StopPeerForError(peer, err) 308 } 309 310 case <-statusUpdateTicker.C: 311 // ask for status updates 312 go bcR.BroadcastStatusRequest() // nolint: errcheck 313 } 314 } 315 }() 316 317 FOR_LOOP: 318 for { 319 select { 320 case <-switchToConsensusTicker.C: 321 if bcR.SwitchToConsensus(bcR.curState) { 322 break FOR_LOOP 323 } 324 325 case <-trySyncTicker.C: // chan time 326 select { 327 case didProcessCh <- struct{}{}: 328 default: 329 } 330 331 case <-didProcessCh: 332 // NOTE: It is a subtle mistake to process more than a single block 333 // at a time (e.g. 10) here, because we only TrySend 1 request per 334 // loop. The ratio mismatch can result in starving of blocks, a 335 // sudden burst of requests and responses, and repeat. 336 // Consequently, it is better to split these routines rather than 337 // coupling them as it's written here. TODO uncouple from request 338 // routine. 339 340 // See if there are any blocks to sync. 341 first, second, firstParts := bcR.pool.PeekTwoBlocks() 342 //bcR.Logger.Info("TrySync peeked", "first", first, "second", second) 343 if first == nil || second == nil { 344 // We need both to sync the first block. 345 continue FOR_LOOP 346 } else { 347 // Try again quickly next loop. 348 didProcessCh <- struct{}{} 349 } 350 bcR.Logger.Info("PeekTwoBlocks.", "First", first.Height, "Second", second.Height) 351 352 firstPartsHeader := firstParts.Header() 353 firstID := types.BlockID{Hash: first.Hash(), PartsHeader: firstPartsHeader} 354 // Finally, verify the first block using the second's commit 355 // NOTE: we can probably make this more efficient, but note that calling 356 // first.Hash() doesn't verify the tx contents, so MakePartSet() is 357 // currently necessary. 358 err := bcR.curState.Validators.VerifyCommitLight( 359 chainID, firstID, first.Height, second.LastCommit) 360 if err != nil { 361 bcR.Logger.Error("Error in validation", "err", err) 362 peerID := bcR.pool.RedoRequest(first.Height) 363 peer := bcR.Switch.Peers().Get(peerID) 364 if peer != nil { 365 // NOTE: we've already removed the peer's request, but we 366 // still need to clean up the rest. 367 bcR.Switch.StopPeerForError(peer, fmt.Errorf("blockchainReactor validation error: %v", err)) 368 } 369 peerID2 := bcR.pool.RedoRequest(second.Height) 370 peer2 := bcR.Switch.Peers().Get(peerID2) 371 if peer2 != nil && peer2 != peer { 372 // NOTE: we've already removed the peer's request, but we 373 // still need to clean up the rest. 374 bcR.Switch.StopPeerForError(peer2, fmt.Errorf("blockchainReactor validation error: %v", err)) 375 } 376 continue FOR_LOOP 377 } else { 378 bcR.pool.PopRequest() 379 380 // TODO: batch saves so we dont persist to disk every block 381 bcR.store.SaveBlock(first, firstParts, second.LastCommit) 382 383 // TODO: same thing for app - but we would need a way to 384 // get the hash without persisting the state 385 var err error 386 387 bcR.curState, _, err = bcR.blockExec.ApplyBlockWithTrace(bcR.curState, firstID, first) // rpc 388 if err != nil { 389 // TODO This is bad, are we zombie? 390 // The block can't be committed, do we need to delete it from store db? 391 _, errDel := bcR.store.DeleteBlocksFromTop(first.Height - 1) 392 if errDel != nil { 393 bcR.Logger.Error("Failed to delete blocks from top", "height", first.Height-1, "err", errDel) 394 } 395 panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) 396 } 397 blocksSynced++ 398 399 if blocksSynced%100 == 0 { 400 lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds()) 401 bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height, 402 "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate) 403 lastHundred = time.Now() 404 } 405 } 406 continue FOR_LOOP 407 case <-bcR.finishCh: 408 break FOR_LOOP 409 case <-bcR.Quit(): 410 break FOR_LOOP 411 } 412 } 413 } 414 415 func (bcR *BlockchainReactor) CheckFastSyncCondition() { 416 // ask for status updates 417 bcR.Logger.Info("CheckFastSyncCondition.") 418 go bcR.BroadcastStatusRequest() 419 } 420 421 func (bcR *BlockchainReactor) SwitchToConsensus(state sm.State) bool { 422 if !bcR.getIsSyncing() { 423 return false 424 } 425 426 blocksSynced := uint64(0) 427 height, numPending, lenRequesters := bcR.pool.GetStatus() 428 outbound, inbound, _ := bcR.Switch.NumPeers() 429 bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters, 430 "outbound", outbound, "inbound", inbound) 431 conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) 432 if bcR.pool.IsCaughtUp() && ok { 433 bcR.Logger.Info("Time to switch to consensus reactor!", "height", height) 434 435 succeed := conR.SwitchToConsensus(state, blocksSynced) 436 if succeed { 437 bcR.pool.Stop() 438 return true 439 } 440 } 441 return false 442 } 443 444 // BroadcastStatusRequest broadcasts `BlockStore` base and height. 445 func (bcR *BlockchainReactor) BroadcastStatusRequest() error { 446 msgBytes := cdc.MustMarshalBinaryBare(&bcStatusRequestMessage{ 447 Base: bcR.store.Base(), 448 Height: bcR.store.Height(), 449 }) 450 bcR.Switch.Broadcast(BlockchainChannel, msgBytes) 451 return nil 452 } 453 454 func (bcR *BlockchainReactor) setIsSyncing(value bool) { 455 bcR.mtx.Lock() 456 bcR.isSyncing = value 457 bcR.mtx.Unlock() 458 } 459 460 func (bcR *BlockchainReactor) getIsSyncing() bool { 461 bcR.mtx.Lock() 462 defer bcR.mtx.Unlock() 463 return bcR.isSyncing 464 } 465 466 //----------------------------------------------------------------------------- 467 // Messages 468 469 // BlockchainMessage is a generic message for this reactor. 470 type BlockchainMessage interface { 471 ValidateBasic() error 472 } 473 474 // RegisterBlockchainMessages registers the fast sync messages for amino encoding. 475 func RegisterBlockchainMessages(cdc *amino.Codec) { 476 cdc.RegisterInterface((*BlockchainMessage)(nil), nil) 477 cdc.RegisterConcrete(&bcBlockRequestMessage{}, "tendermint/blockchain/BlockRequest", nil) 478 cdc.RegisterConcrete(&bcBlockResponseMessage{}, "tendermint/blockchain/BlockResponse", nil) 479 cdc.RegisterConcrete(&bcNoBlockResponseMessage{}, "tendermint/blockchain/NoBlockResponse", nil) 480 cdc.RegisterConcrete(&bcStatusResponseMessage{}, "tendermint/blockchain/StatusResponse", nil) 481 cdc.RegisterConcrete(&bcStatusRequestMessage{}, "tendermint/blockchain/StatusRequest", nil) 482 } 483 484 func decodeMsg(bz []byte) (msg BlockchainMessage, err error) { 485 if len(bz) > maxMsgSize { 486 return msg, fmt.Errorf("msg exceeds max size (%d > %d)", len(bz), maxMsgSize) 487 } 488 err = cdc.UnmarshalBinaryBare(bz, &msg) 489 return 490 } 491 492 //------------------------------------- 493 494 type bcBlockRequestMessage struct { 495 Height int64 496 } 497 498 // ValidateBasic performs basic validation. 499 func (m *bcBlockRequestMessage) ValidateBasic() error { 500 if m.Height < 0 { 501 return errors.New("negative Height") 502 } 503 return nil 504 } 505 506 func (m *bcBlockRequestMessage) String() string { 507 return fmt.Sprintf("[bcBlockRequestMessage %v]", m.Height) 508 } 509 510 type bcNoBlockResponseMessage struct { 511 Height int64 512 } 513 514 // ValidateBasic performs basic validation. 515 func (m *bcNoBlockResponseMessage) ValidateBasic() error { 516 if m.Height < 0 { 517 return errors.New("negative Height") 518 } 519 return nil 520 } 521 522 func (m *bcNoBlockResponseMessage) String() string { 523 return fmt.Sprintf("[bcNoBlockResponseMessage %d]", m.Height) 524 } 525 526 //------------------------------------- 527 528 type bcBlockResponseMessage struct { 529 Block *types.Block 530 ExInfo *types.BlockExInfo 531 } 532 533 // ValidateBasic performs basic validation. 534 func (m *bcBlockResponseMessage) ValidateBasic() error { 535 return m.Block.ValidateBasic() 536 } 537 538 func (m *bcBlockResponseMessage) String() string { 539 return fmt.Sprintf("[bcBlockResponseMessage %v]", m.Block.Height) 540 } 541 542 //------------------------------------- 543 544 type bcStatusRequestMessage struct { 545 Height int64 546 Base int64 547 } 548 549 // ValidateBasic performs basic validation. 550 func (m *bcStatusRequestMessage) ValidateBasic() error { 551 if m.Base < 0 { 552 return errors.New("negative Base") 553 } 554 if m.Height < 0 { 555 return errors.New("negative Height") 556 } 557 if m.Base > m.Height { 558 return fmt.Errorf("base %v cannot be greater than height %v", m.Base, m.Height) 559 } 560 return nil 561 } 562 563 func (m *bcStatusRequestMessage) String() string { 564 return fmt.Sprintf("[bcStatusRequestMessage %v:%v]", m.Base, m.Height) 565 } 566 567 //------------------------------------- 568 569 type bcStatusResponseMessage struct { 570 Height int64 571 Base int64 572 } 573 574 // ValidateBasic performs basic validation. 575 func (m *bcStatusResponseMessage) ValidateBasic() error { 576 if m.Base < 0 { 577 return errors.New("negative Base") 578 } 579 if m.Height < 0 { 580 return errors.New("negative Height") 581 } 582 if m.Base > m.Height { 583 return fmt.Errorf("base %v cannot be greater than height %v", m.Base, m.Height) 584 } 585 return nil 586 } 587 588 func (m *bcStatusResponseMessage) String() string { 589 return fmt.Sprintf("[bcStatusResponseMessage %v:%v]", m.Base, m.Height) 590 }