github.com/pure-x-eth/consensus_tm@v0.0.0-20230502163723-e3c2ff987250/blockchain/v0/reactor.go (about) 1 package v0 2 3 import ( 4 "fmt" 5 "reflect" 6 "time" 7 8 "github.com/gogo/protobuf/proto" 9 10 bc "github.com/pure-x-eth/consensus_tm/blockchain" 11 "github.com/pure-x-eth/consensus_tm/libs/log" 12 "github.com/pure-x-eth/consensus_tm/p2p" 13 bcproto "github.com/pure-x-eth/consensus_tm/proto/tendermint/blockchain" 14 sm "github.com/pure-x-eth/consensus_tm/state" 15 "github.com/pure-x-eth/consensus_tm/store" 16 "github.com/pure-x-eth/consensus_tm/types" 17 ) 18 19 const ( 20 // BlockchainChannel is a channel for blocks and status updates (`BlockStore` height) 21 BlockchainChannel = byte(0x40) 22 23 trySyncIntervalMS = 10 24 25 // stop syncing when last block's time is 26 // within this much of the system time. 27 // stopSyncingDurationMinutes = 10 28 29 // ask for best height every 10s 30 statusUpdateIntervalSeconds = 10 31 // check if we should switch to consensus reactor 32 switchToConsensusIntervalSeconds = 1 33 ) 34 35 type consensusReactor interface { 36 // for when we switch from blockchain reactor and fast sync to 37 // the consensus machine 38 SwitchToConsensus(state sm.State, skipWAL bool) 39 } 40 41 type peerError struct { 42 err error 43 peerID p2p.ID 44 } 45 46 func (e peerError) Error() string { 47 return fmt.Sprintf("error with peer %v: %s", e.peerID, e.err.Error()) 48 } 49 50 // BlockchainReactor handles long-term catchup syncing. 51 type BlockchainReactor struct { 52 p2p.BaseReactor 53 54 // immutable 55 initialState sm.State 56 57 blockExec *sm.BlockExecutor 58 store *store.BlockStore 59 pool *BlockPool 60 fastSync bool 61 62 requestsCh <-chan BlockRequest 63 errorsCh <-chan peerError 64 } 65 66 // NewBlockchainReactor returns new reactor instance. 67 func NewBlockchainReactor(state sm.State, blockExec *sm.BlockExecutor, store *store.BlockStore, 68 fastSync bool) *BlockchainReactor { 69 70 if state.LastBlockHeight != store.Height() { 71 panic(fmt.Sprintf("state (%v) and store (%v) height mismatch", state.LastBlockHeight, 72 store.Height())) 73 } 74 75 requestsCh := make(chan BlockRequest, maxTotalRequesters) 76 77 const capacity = 1000 // must be bigger than peers count 78 errorsCh := make(chan peerError, capacity) // so we don't block in #Receive#pool.AddBlock 79 80 startHeight := store.Height() + 1 81 if startHeight == 1 { 82 startHeight = state.InitialHeight 83 } 84 pool := NewBlockPool(startHeight, requestsCh, errorsCh) 85 86 bcR := &BlockchainReactor{ 87 initialState: state, 88 blockExec: blockExec, 89 store: store, 90 pool: pool, 91 fastSync: fastSync, 92 requestsCh: requestsCh, 93 errorsCh: errorsCh, 94 } 95 bcR.BaseReactor = *p2p.NewBaseReactor("BlockchainReactor", bcR) 96 return bcR 97 } 98 99 // SetLogger implements service.Service by setting the logger on reactor and pool. 100 func (bcR *BlockchainReactor) SetLogger(l log.Logger) { 101 bcR.BaseService.Logger = l 102 bcR.pool.Logger = l 103 } 104 105 // OnStart implements service.Service. 106 func (bcR *BlockchainReactor) OnStart() error { 107 if bcR.fastSync { 108 err := bcR.pool.Start() 109 if err != nil { 110 return err 111 } 112 go bcR.poolRoutine(false) 113 } 114 return nil 115 } 116 117 // SwitchToFastSync is called by the state sync reactor when switching to fast sync. 118 func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error { 119 bcR.fastSync = true 120 bcR.initialState = state 121 122 bcR.pool.height = state.LastBlockHeight + 1 123 err := bcR.pool.Start() 124 if err != nil { 125 return err 126 } 127 go bcR.poolRoutine(true) 128 return nil 129 } 130 131 // OnStop implements service.Service. 132 func (bcR *BlockchainReactor) OnStop() { 133 if bcR.fastSync { 134 if err := bcR.pool.Stop(); err != nil { 135 bcR.Logger.Error("Error stopping pool", "err", err) 136 } 137 } 138 } 139 140 // GetChannels implements Reactor 141 func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor { 142 return []*p2p.ChannelDescriptor{ 143 { 144 ID: BlockchainChannel, 145 Priority: 5, 146 SendQueueCapacity: 1000, 147 RecvBufferCapacity: 50 * 4096, 148 RecvMessageCapacity: bc.MaxMsgSize, 149 MessageType: &bcproto.Message{}, 150 }, 151 } 152 } 153 154 // AddPeer implements Reactor by sending our state to peer. 155 func (bcR *BlockchainReactor) AddPeer(peer p2p.Peer) { 156 p2p.SendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck 157 ChannelID: BlockchainChannel, 158 Message: &bcproto.StatusResponse{ 159 Base: bcR.store.Base(), 160 Height: bcR.store.Height(), 161 }, 162 }, bcR.Logger) 163 // it's OK if send fails. will try later in poolRoutine 164 165 // peer is added to the pool once we receive the first 166 // bcStatusResponseMessage from the peer and call pool.SetPeerRange 167 } 168 169 // RemovePeer implements Reactor by removing peer from the pool. 170 func (bcR *BlockchainReactor) RemovePeer(peer p2p.Peer, reason interface{}) { 171 bcR.pool.RemovePeer(peer.ID()) 172 } 173 174 // respondToPeer loads a block and sends it to the requesting peer, 175 // if we have it. Otherwise, we'll respond saying we don't have it. 176 func (bcR *BlockchainReactor) respondToPeer(msg *bcproto.BlockRequest, 177 src p2p.Peer) (queued bool) { 178 179 block := bcR.store.LoadBlock(msg.Height) 180 if block != nil { 181 bl, err := block.ToProto() 182 if err != nil { 183 bcR.Logger.Error("could not convert msg to protobuf", "err", err) 184 return false 185 } 186 return p2p.TrySendEnvelopeShim(src, p2p.Envelope{ //nolint: staticcheck 187 ChannelID: BlockchainChannel, 188 Message: &bcproto.BlockResponse{Block: bl}, 189 }, bcR.Logger) 190 } 191 192 return p2p.TrySendEnvelopeShim(src, p2p.Envelope{ //nolint: staticcheck 193 ChannelID: BlockchainChannel, 194 Message: &bcproto.NoBlockResponse{Height: msg.Height}, 195 }, bcR.Logger) 196 } 197 198 func (bcR *BlockchainReactor) ReceiveEnvelope(e p2p.Envelope) { 199 if err := bc.ValidateMsg(e.Message); err != nil { 200 bcR.Logger.Error("Peer sent us invalid msg", "peer", e.Src, "msg", e.Message, "err", err) 201 bcR.Switch.StopPeerForError(e.Src, err) 202 return 203 } 204 205 bcR.Logger.Debug("Receive", "e.Src", e.Src, "chID", e.ChannelID, "msg", e.Message) 206 207 switch msg := e.Message.(type) { 208 case *bcproto.BlockRequest: 209 bcR.respondToPeer(msg, e.Src) 210 case *bcproto.BlockResponse: 211 bi, err := types.BlockFromProto(msg.Block) 212 if err != nil { 213 bcR.Logger.Error("Block content is invalid", "err", err) 214 return 215 } 216 bcR.pool.AddBlock(e.Src.ID(), bi, msg.Block.Size()) 217 case *bcproto.StatusRequest: 218 // Send peer our state. 219 p2p.TrySendEnvelopeShim(e.Src, p2p.Envelope{ //nolint: staticcheck 220 ChannelID: BlockchainChannel, 221 Message: &bcproto.StatusResponse{ 222 Height: bcR.store.Height(), 223 Base: bcR.store.Base(), 224 }, 225 }, bcR.Logger) 226 case *bcproto.StatusResponse: 227 // Got a peer status. Unverified. 228 bcR.pool.SetPeerRange(e.Src.ID(), msg.Base, msg.Height) 229 case *bcproto.NoBlockResponse: 230 bcR.Logger.Debug("Peer does not have requested block", "peer", e.Src, "height", msg.Height) 231 default: 232 bcR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg))) 233 } 234 } 235 236 func (bcR *BlockchainReactor) Receive(chID byte, peer p2p.Peer, msgBytes []byte) { 237 msg := &bcproto.Message{} 238 err := proto.Unmarshal(msgBytes, msg) 239 if err != nil { 240 panic(err) 241 } 242 uw, err := msg.Unwrap() 243 if err != nil { 244 panic(err) 245 } 246 bcR.ReceiveEnvelope(p2p.Envelope{ 247 ChannelID: chID, 248 Src: peer, 249 Message: uw, 250 }) 251 } 252 253 // Handle messages from the poolReactor telling the reactor what to do. 254 // NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down! 255 func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) { 256 257 trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond) 258 defer trySyncTicker.Stop() 259 260 statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second) 261 defer statusUpdateTicker.Stop() 262 263 switchToConsensusTicker := time.NewTicker(switchToConsensusIntervalSeconds * time.Second) 264 defer switchToConsensusTicker.Stop() 265 266 blocksSynced := uint64(0) 267 268 chainID := bcR.initialState.ChainID 269 state := bcR.initialState 270 271 lastHundred := time.Now() 272 lastRate := 0.0 273 274 didProcessCh := make(chan struct{}, 1) 275 276 go func() { 277 for { 278 select { 279 case <-bcR.Quit(): 280 return 281 case <-bcR.pool.Quit(): 282 return 283 case request := <-bcR.requestsCh: 284 peer := bcR.Switch.Peers().Get(request.PeerID) 285 if peer == nil { 286 continue 287 } 288 queued := p2p.TrySendEnvelopeShim(peer, p2p.Envelope{ //nolint: staticcheck 289 ChannelID: BlockchainChannel, 290 Message: &bcproto.BlockRequest{Height: request.Height}, 291 }, bcR.Logger) 292 if !queued { 293 bcR.Logger.Debug("Send queue is full, drop block request", "peer", peer.ID(), "height", request.Height) 294 } 295 case err := <-bcR.errorsCh: 296 peer := bcR.Switch.Peers().Get(err.peerID) 297 if peer != nil { 298 bcR.Switch.StopPeerForError(peer, err) 299 } 300 301 case <-statusUpdateTicker.C: 302 // ask for status updates 303 go bcR.BroadcastStatusRequest() //nolint: errcheck 304 305 } 306 } 307 }() 308 309 FOR_LOOP: 310 for { 311 select { 312 case <-switchToConsensusTicker.C: 313 height, numPending, lenRequesters := bcR.pool.GetStatus() 314 outbound, inbound, _ := bcR.Switch.NumPeers() 315 bcR.Logger.Debug("Consensus ticker", "numPending", numPending, "total", lenRequesters, 316 "outbound", outbound, "inbound", inbound) 317 if bcR.pool.IsCaughtUp() { 318 bcR.Logger.Info("Time to switch to consensus reactor!", "height", height) 319 if err := bcR.pool.Stop(); err != nil { 320 bcR.Logger.Error("Error stopping pool", "err", err) 321 } 322 conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor) 323 if ok { 324 conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced) 325 } 326 // else { 327 // should only happen during testing 328 // } 329 330 break FOR_LOOP 331 } 332 333 case <-trySyncTicker.C: // chan time 334 select { 335 case didProcessCh <- struct{}{}: 336 default: 337 } 338 339 case <-didProcessCh: 340 // NOTE: It is a subtle mistake to process more than a single block 341 // at a time (e.g. 10) here, because we only TrySend 1 request per 342 // loop. The ratio mismatch can result in starving of blocks, a 343 // sudden burst of requests and responses, and repeat. 344 // Consequently, it is better to split these routines rather than 345 // coupling them as it's written here. TODO uncouple from request 346 // routine. 347 348 // See if there are any blocks to sync. 349 first, second := bcR.pool.PeekTwoBlocks() 350 // bcR.Logger.Info("TrySync peeked", "first", first, "second", second) 351 if first == nil || second == nil { 352 // We need both to sync the first block. 353 continue FOR_LOOP 354 } else { 355 // Try again quickly next loop. 356 didProcessCh <- struct{}{} 357 } 358 359 firstParts := first.MakePartSet(types.BlockPartSizeBytes) 360 firstPartSetHeader := firstParts.Header() 361 firstID := types.BlockID{Hash: first.Hash(), PartSetHeader: firstPartSetHeader} 362 // Finally, verify the first block using the second's commit 363 // NOTE: we can probably make this more efficient, but note that calling 364 // first.Hash() doesn't verify the tx contents, so MakePartSet() is 365 // currently necessary. 366 err := state.Validators.VerifyCommitLight( 367 chainID, firstID, first.Height, second.LastCommit) 368 369 if err == nil { 370 // validate the block before we persist it 371 err = bcR.blockExec.ValidateBlock(state, first) 372 } 373 374 if err != nil { 375 bcR.Logger.Error("Error in validation", "err", err) 376 peerID := bcR.pool.RedoRequest(first.Height) 377 peer := bcR.Switch.Peers().Get(peerID) 378 if peer != nil { 379 // NOTE: we've already removed the peer's request, but we 380 // still need to clean up the rest. 381 bcR.Switch.StopPeerForError(peer, fmt.Errorf("blockchainReactor validation error: %v", err)) 382 } 383 peerID2 := bcR.pool.RedoRequest(second.Height) 384 peer2 := bcR.Switch.Peers().Get(peerID2) 385 if peer2 != nil && peer2 != peer { 386 // NOTE: we've already removed the peer's request, but we 387 // still need to clean up the rest. 388 bcR.Switch.StopPeerForError(peer2, fmt.Errorf("blockchainReactor validation error: %v", err)) 389 } 390 continue FOR_LOOP 391 } 392 393 bcR.pool.PopRequest() 394 395 // TODO: batch saves so we dont persist to disk every block 396 bcR.store.SaveBlock(first, firstParts, second.LastCommit) 397 398 // TODO: same thing for app - but we would need a way to 399 // get the hash without persisting the state 400 state, _, err = bcR.blockExec.ApplyBlock(state, firstID, first) 401 if err != nil { 402 // TODO This is bad, are we zombie? 403 panic(fmt.Sprintf("Failed to process committed block (%d:%X): %v", first.Height, first.Hash(), err)) 404 } 405 blocksSynced++ 406 407 if blocksSynced%100 == 0 { 408 lastRate = 0.9*lastRate + 0.1*(100/time.Since(lastHundred).Seconds()) 409 bcR.Logger.Info("Fast Sync Rate", "height", bcR.pool.height, 410 "max_peer_height", bcR.pool.MaxPeerHeight(), "blocks/s", lastRate) 411 lastHundred = time.Now() 412 } 413 414 continue FOR_LOOP 415 416 case <-bcR.Quit(): 417 break FOR_LOOP 418 } 419 } 420 } 421 422 // BroadcastStatusRequest broadcasts `BlockStore` base and height. 423 func (bcR *BlockchainReactor) BroadcastStatusRequest() error { 424 bcR.Switch.BroadcastEnvelope(p2p.Envelope{ 425 ChannelID: BlockchainChannel, 426 Message: &bcproto.StatusRequest{}, 427 }) 428 return nil 429 }