github.com/lazyledger/lazyledger-core@v0.35.0-dev.0.20210613111200-4c651f053571/blockchain/v0/reactor.go (about) 1 package v0 2 3 import ( 4 "fmt" 5 "reflect" 6 "time" 7 8 bc "github.com/lazyledger/lazyledger-core/blockchain" 9 "github.com/lazyledger/lazyledger-core/libs/log" 10 "github.com/lazyledger/lazyledger-core/p2p" 11 bcproto "github.com/lazyledger/lazyledger-core/proto/tendermint/blockchain" 12 sm "github.com/lazyledger/lazyledger-core/state" 13 "github.com/lazyledger/lazyledger-core/store" 14 "github.com/lazyledger/lazyledger-core/types" 15 ) 16 17 const ( 18 // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height) 19 BlockchainChannel = byte(0x40) 20 21 trySyncIntervalMS = 10 22 23 // stop syncing when last block's time is 24 // within this much of the system time. 25 // stopSyncingDurationMinutes = 10 26 27 // ask for best height every 10s 28 statusUpdateIntervalSeconds = 10 29 // check if we should switch to consensus reactor 30 switchToConsensusIntervalSeconds = 1 31 32 // switch to consensus after this duration of inactivity 33 syncTimeout = 60 * time.Second 34 ) 35 36 type consensusReactor interface { 37 // for when we switch from blockchain reactor and fast sync to 38 // the consensus machine 39 SwitchToConsensus(state sm.State, skipWAL bool) 40 } 41 42 type peerError struct { 43 err error 44 peerID p2p.ID 45 } 46 47 func (e peerError) Error() string { 48 return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error()) 49 } 50 51 // BlockchainReactor handles long-term catchup syncing. 52 type BlockchainReactor struct { 53 p2p.BaseReactor 54 55 // immutable 56 initialState sm.State 57 58 blockExec *sm.BlockExecutor 59 store *store.BlockStore 60 pool *BlockPool 61 fastSync bool 62 63 requestsCh <-chan BlockRequest 64 errorsCh <-chan peerError 65 } 66 67 // NewBlockchainReactor returns new reactor instance. 68 func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, 69 fastSync bool) *BlockchainReactor { 70 71 if state.LastBlockHeight != store.Height() { 72 panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, 73 store.Height())) 74 } 75 76 requestsCh := make(chan BlockRequest, maxTotalRequesters) 77 78 const capacity = 1000 // must be bigger than peers count 79 errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock 80 81 startHeight := store.Height() + 1 82 if startHeight == 1 { 83 startHeight = state.InitialHeight 84 } 85 pool := NewBlockPool(startHeight, requestsCh, errorsCh) 86 87 bcR := &BlockchainReactor{ 88 initialState: state, 89 blockExec: blockExec, 90 store: store, 91 pool: pool, 92 fastSync: fastSync, 93 requestsCh: requestsCh, 94 errorsCh: errorsCh, 95 } 96 bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR) 97 return bcR 98 } 99 100 // SetLogger implements service.Service by setting the logger on reactor and pool. 101 func (bcR *BlockchainReactor) SetLogger(l log.Logger) { 102 bcR.BaseService.Logger = l 103 bcR.pool.Logger = l 104 } 105 106 // OnStart implements service.Service. 107 func (bcR *BlockchainReactor) OnStart() error { 108 if bcR.fastSync { 109 err := bcR.pool.Start() 110 if err != nil { 111 return err 112 } 113 go bcR.poolRoutine(false) 114 } 115 return nil 116 } 117 118 // SwitchToFastSync is called by the state sync reactor when switching to fast sync. 119 func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error { 120 bcR.fastSync = true 121 bcR.initialState = state 122 123 bcR.pool.height = state.LastBlockHeight + 1 124 err := bcR.pool.Start() 125 if err != nil { 126 return err 127 } 128 go bcR.poolRoutine(true) 129 return nil 130 } 131 132 // OnStop implements service.Service. 133 func (bcR *BlockchainReactor) OnStop() { 134 if bcR.fastSync { 135 if err := bcR.pool.Stop(); err != nil { 136 bcR.Logger.Error("Error stopping pool", "err", err) 137 } 138 } 139 } 140 141 // GetChannels implements Reactor 142 func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { 143 return []*p2p.ChannelDescriptor{ 144 { 145 ID: BlockchainChannel, 146 Priority: 10, 147 SendQueueCapacity: 1000, 148 RecvBufferCapacity: 50 * 4096, 149 RecvMessageCapacity: bc.MaxMsgSize, 150 }, 151 } 152 } 153 154 // AddPeer implements Reactor by sending our state to peer. 155 func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { 156 msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ 157 Base: bcR.store.Base(), 158 Height: bcR.store.Height()}) 159 if err != nil { 160 bcR.Logger.Error("could not convert msg to protobuf", "err", err) 161 return 162 } 163 164 _ = peer.Send(BlockchainChannel, msgBytes) 165 // it's OK if send fails. will try later in poolRoutine 166 167 // peer is added to the pool once we receive the first 168 // bcStatusResponseMessage from the peer and call pool.SetPeerRange 169 } 170 171 // RemovePeer implements Reactor by removing peer from the pool. 172 func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { 173 bcR.pool.RemovePeer(peer.ID()) 174 } 175 176 // respondToPeer loads a block and sends it to the requesting peer, 177 // if we have it. Otherwise, we'll respond saying we don't have it. 178 func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, 179 src p2p.Peer) (queued bool) { 180 181 block := bcR.store.LoadBlock(msg.Height) 182 if block != nil { 183 bl, err := block.ToProto() 184 if err != nil { 185 bcR.Logger.Error("could not convert msg to protobuf", "err", err) 186 return false 187 } 188 189 msgBytes, err := bc.EncodeMsg(&bcproto.BlockResponse{Block: bl}) 190 if err != nil { 191 bcR.Logger.Error("could not marshal msg", "err", err) 192 return false 193 } 194 195 return src.TrySend(BlockchainChannel, msgBytes) 196 } 197 198 bcR.Logger.Info("Peer asking for a block we don't have", "src", src, "height", msg.Height) 199 200 msgBytes, err := bc.EncodeMsg(&bcproto.NoBlockResponse{Height: msg.Height}) 201 if err != nil { 202 bcR.Logger.Error("could not convert msg to protobuf", "err", err) 203 return false 204 } 205 206 return src.TrySend(BlockchainChannel, msgBytes) 207 } 208 209 // Receive implements Reactor by handling 4 types of messages (look below). 210 // XXX: do not call any methods that can block or incur heavy processing. 211 // https://github.com/tendermint/tendermint/issues/2888 212 func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) { 213 logger := bcR.Logger.With("src", src, "chId", chID) 214 215 msg, err := bc.DecodeMsg(msgBytes) 216 if err != nil { 217 logger.Error("Error decoding message", "err", err) 218 bcR.Switch.StopPeerForError(src, err) 219 return 220 } 221 222 if err = bc.ValidateMsg(msg); err != nil { 223 logger.Error("Peer sent us invalid msg", "msg", msg, "err", err) 224 bcR.Switch.StopPeerForError(src, err) 225 return 226 } 227 228 logger.Debug("Receive", "msg", msg) 229 230 switch msg := msg.(type) { 231 case *bcproto.BlockRequest: 232 bcR.respondToPeer(msg, src) 233 case *bcproto.BlockResponse: 234 bi, err := types.BlockFromProto(msg.Block) 235 if err != nil { 236 logger.Error("Block content is invalid", "err", err) 237 bcR.Switch.StopPeerForError(src, err) 238 return 239 } 240 bcR.pool.AddBlock(src.ID(), bi, len(msgBytes)) 241 case *bcproto.StatusRequest: 242 // Send peer our state. 243 msgBytes, err := bc.EncodeMsg(&bcproto.StatusResponse{ 244 Height: bcR.store.Height(), 245 Base: bcR.store.Base(), 246 }) 247 if err != nil { 248 logger.Error("could not convert msg to protobut", "err", err) 249 return 250 } 251 src.TrySend(BlockchainChannel, msgBytes) 252 case *bcproto.StatusResponse: 253 // Got a peer status. Unverified. 254 bcR.pool.SetPeerRange(src.ID(), msg.Base, msg.Height) 255 case *bcproto.NoBlockResponse: 256 logger.Debug("Peer does not have requested block", "height", msg.Height) 257 default: 258 logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) 259 } 260 } 261 262 // Handle messages from the poolReactor telling the reactor what to do. 263 // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down! 264 func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) { 265 var ( 266 trySyncTicker = time.NewTicker(trySyncIntervalMS * time.Millisecond) 267 statusUpdateTicker = time.NewTicker(statusUpdateIntervalSeconds * time.Second) 268 switchToConsensusTicker = time.NewTicker(switchToConsensusIntervalSeconds * time.Second) 269 270 blocksSynced = uint64(0) 271 272 chainID = bcR.initialState.ChainID 273 state = bcR.initialState 274 275 lastHundred = time.Now() 276 lastRate = 0.0 277 278 didProcessCh = make(chan struct{}, 1) 279 ) 280 281 go func() { 282 for { 283 select { 284 285 case <-bcR.Quit(): 286 return 287 288 case <-bcR.pool.Quit(): 289 return 290 291 case request := <-bcR.requestsCh: 292 peer := bcR.Switch.Peers().Get(request.PeerID) 293 if peer == nil { 294 bcR.Logger.Debug("Can't send request: no peer", "peer_id", request.PeerID) 295 continue 296 } 297 msgBytes, err := bc.EncodeMsg(&bcproto.BlockRequest{Height: request.Height}) 298 if err != nil { 299 bcR.Logger.Error("could not convert BlockRequest to proto", "err", err) 300 continue 301 } 302 303 queued := peer.TrySend(BlockchainChannel, msgBytes) 304 if !queued { 305 bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height) 306 } 307 308 case err := <-bcR.errorsCh: 309 peer := bcR.Switch.Peers().Get(err.peerID) 310 if peer != nil { 311 bcR.Switch.StopPeerForError(peer, err) 312 } 313 314 case <-statusUpdateTicker.C: 315 // ask for status updates 316 go bcR.BroadcastStatusRequest() 317 } 318 } 319 }() 320 321 FOR_LOOP: 322 for { 323 select { 324 325 case <-switchToConsensusTicker.C: 326 var ( 327 height, numPending, lenRequesters = bcR.pool.GetStatus() 328 outbound, inbound, _ = bcR.Switch.NumPeers() 329 lastAdvance = bcR.pool.LastAdvance() 330 ) 331 332 bcR.Logger.Debug("Consensus ticker", 333 "numPending", numPending, 334 "total", lenRequesters) 335 336 switch { 337 case bcR.pool.IsCaughtUp(): 338 bcR.Logger.Info("Time to switch to consensus reactor!", "height", height) 339 case time.Since(lastAdvance) > syncTimeout: 340 bcR.Logger.Error(fmt.Sprintf("No progress since last advance: %v", lastAdvance)) 341 default: 342 bcR.Logger.Info("Not caught up yet", 343 "height", height, "max_peer_height", bcR.pool.MaxPeerHeight(), 344 "num_peers", outbound+inbound, 345 "timeout_in", syncTimeout-time.Since(lastAdvance)) 346 continue 347 } 348 349 if err := bcR.pool.Stop(); err != nil { 350 bcR.Logger.Error("Error stopping pool", "err", err) 351 } 352 conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) 353 if ok { 354 conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced) 355 } 356 357 break FOR_LOOP 358 359 case <-trySyncTicker.C: // chan time 360 select { 361 case didProcessCh <- struct{}{}: 362 default: 363 } 364 365 case <-didProcessCh: 366 // NOTE: It is a subtle mistake to process more than a single block 367 // at a time (e.g. 10) here, because we only TrySend 1 request per 368 // loop. The ratio mismatch can result in starving of blocks, a 369 // sudden burst of requests and responses, and repeat. 370 // Consequently, it is better to split these routines rather than 371 // coupling them as it's written here. TODO uncouple from request 372 // routine. 373 374 // See if there are any blocks to sync. 375 first, second := bcR.pool.PeekTwoBlocks() 376 // bcR.Logger.Info("TrySync peeked", "first", first, "second", second) 377 if first == nil || second == nil { 378 // We need both to sync the first block. 379 continue FOR_LOOP 380 } else { 381 // Try again quickly next loop. 382 didProcessCh <- struct{}{} 383 } 384 385 var ( 386 firstParts = first.MakePartSet(types.BlockPartSizeBytes) 387 firstPartSetHeader = firstParts.Header() 388 firstID = types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader} 389 ) 390 391 // Finally, verify the first block using the second's commit 392 // NOTE: we can probably make this more efficient, but note that calling 393 // first.Hash() doesn't verify the tx contents, so MakePartSet() is 394 // currently necessary. 395 err := state.Validators.VerifyCommitLight(chainID, firstID, first.Height, second.LastCommit) 396 if err != nil { 397 err = fmt.Errorf("invalid last commit: %w", err) 398 bcR.Logger.Error(err.Error(), 399 "last_commit", second.LastCommit, "block_id", firstID, "height", first.Height) 400 401 peerID := bcR.pool.RedoRequest(first.Height) 402 peer := bcR.Switch.Peers().Get(peerID) 403 if peer != nil { 404 // NOTE: we've already removed the peer's request, but we still need 405 // to clean up the rest. 406 bcR.Switch.StopPeerForError(peer, err) 407 } 408 409 peerID2 := bcR.pool.RedoRequest(second.Height) 410 if peerID2 != peerID { 411 if peer2 := bcR.Switch.Peers().Get(peerID2); peer2 != nil { 412 bcR.Switch.StopPeerForError(peer2, err) 413 } 414 } 415 416 continue FOR_LOOP 417 } else { 418 bcR.pool.PopRequest() 419 420 // TODO: batch saves so we dont persist to disk every block 421 bcR.store.SaveBlock(first, firstParts, second.LastCommit) 422 423 // TODO: same thing for app - but we would need a way to get the hash 424 // without persisting the state. 425 var err error 426 state, _, err = bcR.blockExec.ApplyBlock(state, firstID, first) 427 if err != nil { 428 // TODO This is bad, are we zombie? 429 panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) 430 } 431 blocksSynced++ 432 433 if blocksSynced%100 == 0 { 434 lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds()) 435 bcR.Logger.Info("Fast Sync Rate", 436 "height", bcR.pool.height, "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate) 437 lastHundred = time.Now() 438 } 439 } 440 continue FOR_LOOP 441 442 case <-bcR.Quit(): 443 break FOR_LOOP 444 } 445 } 446 } 447 448 // BroadcastStatusRequest broadcasts `BlockStore` base and height. 449 func (bcR *BlockchainReactor) BroadcastStatusRequest() { 450 bm, err := bc.EncodeMsg(&bcproto.StatusRequest{}) 451 if err != nil { 452 bcR.Logger.Error("could not convert StatusRequest to proto", "err", err) 453 return 454 } 455 456 // We don't care about whenever broadcast is successful or not. 457 _ = bcR.Switch.Broadcast(BlockchainChannel, bm) 458 }