github.com/gnolang/gno@v0.0.0-20240520182011-228e9d0192ce/tm2/pkg/bft/blockchain/pool.go (about) 1 package blockchain 2 3 import ( 4 "errors" 5 "fmt" 6 "log/slog" 7 "math" 8 "sync" 9 "sync/atomic" 10 "time" 11 12 "github.com/gnolang/gno/tm2/pkg/bft/types" 13 "github.com/gnolang/gno/tm2/pkg/flow" 14 "github.com/gnolang/gno/tm2/pkg/log" 15 "github.com/gnolang/gno/tm2/pkg/p2p" 16 "github.com/gnolang/gno/tm2/pkg/service" 17 ) 18 19 /* 20 eg, L = latency = 0.1s 21 P = num peers = 10 22 FN = num full nodes 23 BS = 1kB block size 24 CB = 1 Mbit/s = 128 kB/s 25 CB/P = 12.8 kB 26 B/S = CB/P/BS = 12.8 blocks/s 27 28 12.8 * 0.1 = 1.28 blocks on conn 29 */ 30 31 const ( 32 requestIntervalMS = 2 33 maxTotalRequesters = 600 34 maxPendingRequests = maxTotalRequesters 35 maxPendingRequestsPerPeer = 20 36 37 // Minimum recv rate to ensure we're receiving blocks from a peer fast 38 // enough. If a peer is not sending us data at at least that rate, we 39 // consider them to have timedout and we disconnect. 40 // 41 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 42 // sending data across atlantic ~ 7.5 KB/s. 43 minRecvRate = 7680 44 45 // Maximum difference between current and new block's height. 46 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 47 ) 48 49 var peerTimeout = 15 * time.Second // not const so we can override with tests 50 51 /* 52 Peers self report their heights when we join the block pool. 53 Starting from our latest pool.height, we request blocks 54 in sequence from peers that reported higher heights than ours. 55 Every so often we ask peers what height they're on so we can keep going. 56 57 Requests are continuously made for blocks of higher heights until 58 the limit is reached. If most of the requests have no available peers, and we 59 are not at peer limits, we can probably switch to consensus reactor 60 */ 61 62 // BlockPool keeps track of the fast sync peers, block requests and block responses. 63 type BlockPool struct { 64 service.BaseService 65 startTime time.Time 66 67 mtx sync.Mutex 68 // block requests 69 requesters map[int64]*bpRequester 70 height int64 // the lowest key in requesters. 71 // peers 72 peers map[p2p.ID]*bpPeer 73 maxPeerHeight int64 // the biggest reported height 74 75 // atomic 76 numPending int32 // number of requests pending assignment or block response 77 78 requestsCh chan<- BlockRequest 79 errorsCh chan<- peerError 80 } 81 82 // NewBlockPool returns a new BlockPool with the height equal to start. Block 83 // requests and errors will be sent to requestsCh and errorsCh accordingly. 84 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 85 bp := &BlockPool{ 86 peers: make(map[p2p.ID]*bpPeer), 87 88 requesters: make(map[int64]*bpRequester), 89 height: start, 90 numPending: 0, 91 92 requestsCh: requestsCh, 93 errorsCh: errorsCh, 94 } 95 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 96 return bp 97 } 98 99 // OnStart implements service.Service by spawning requesters routine and recording 100 // pool's start time. 101 func (pool *BlockPool) OnStart() error { 102 go pool.makeRequestersRoutine() 103 pool.startTime = time.Now() 104 return nil 105 } 106 107 // spawns requesters as needed 108 func (pool *BlockPool) makeRequestersRoutine() { 109 for { 110 if !pool.IsRunning() { 111 break 112 } 113 114 _, numPending, lenRequesters := pool.GetStatus() 115 switch { 116 case numPending >= maxPendingRequests: 117 // sleep for a bit. 118 time.Sleep(requestIntervalMS * time.Millisecond) 119 // check for timed out peers 120 pool.removeTimedoutPeers() 121 case lenRequesters >= maxTotalRequesters: 122 // sleep for a bit. 123 time.Sleep(requestIntervalMS * time.Millisecond) 124 // check for timed out peers 125 pool.removeTimedoutPeers() 126 default: 127 // request for more blocks. 128 pool.makeNextRequester() 129 } 130 } 131 } 132 133 func (pool *BlockPool) removeTimedoutPeers() { 134 pool.mtx.Lock() 135 defer pool.mtx.Unlock() 136 137 for _, peer := range pool.peers { 138 if !peer.didTimeout && peer.numPending > 0 { 139 curRate := peer.recvMonitor.Status().CurRate 140 // curRate can be 0 on start 141 if curRate != 0 && curRate < minRecvRate { 142 err := errors.New("peer is not sending us data fast enough") 143 pool.sendError(err, peer.id) 144 pool.Logger.Error("SendTimeout", "peer", peer.id, 145 "reason", err, 146 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 147 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 148 peer.didTimeout = true 149 } 150 } 151 if peer.didTimeout { 152 pool.removePeer(peer.id) 153 } 154 } 155 } 156 157 // GetStatus returns pool's height, numPending requests and the number of 158 // requesters. 159 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 160 pool.mtx.Lock() 161 defer pool.mtx.Unlock() 162 163 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 164 } 165 166 // IsCaughtUp returns true if this node is caught up, false - otherwise. 167 // TODO: relax conditions, prevent abuse. 168 func (pool *BlockPool) IsCaughtUp() bool { 169 pool.mtx.Lock() 170 defer pool.mtx.Unlock() 171 172 // Need at least 1 peer to be considered caught up. 173 if len(pool.peers) == 0 { 174 pool.Logger.Debug("Blockpool has no peers") 175 return false 176 } 177 178 // Some conditions to determine if we're caught up. 179 // Ensures we've either received a block or waited some amount of time, 180 // and that we're synced to the highest known height. 181 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 182 // to verify the LastCommit. 183 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 184 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 185 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 186 return isCaughtUp 187 } 188 189 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 190 // We need to see the second block's Commit to validate the first block. 191 // So we peek two blocks at a time. 192 // The caller will verify the commit. 193 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 194 pool.mtx.Lock() 195 defer pool.mtx.Unlock() 196 197 if r := pool.requesters[pool.height]; r != nil { 198 first = r.getBlock() 199 } 200 if r := pool.requesters[pool.height+1]; r != nil { 201 second = r.getBlock() 202 } 203 return 204 } 205 206 // PopRequest pops the first block at pool.height. 207 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 208 func (pool *BlockPool) PopRequest() { 209 pool.mtx.Lock() 210 defer pool.mtx.Unlock() 211 212 if r := pool.requesters[pool.height]; r != nil { 213 /* The block can disappear at any time, due to removePeer(). 214 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 215 PanicSanity("PopRequest() requires a valid block") 216 } 217 */ 218 r.Stop() 219 delete(pool.requesters, pool.height) 220 pool.height++ 221 } else { 222 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 223 } 224 } 225 226 // RedoRequest invalidates the block at pool.height, 227 // Remove the peer and redo request from others. 228 // Returns the ID of the removed peer. 229 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 230 pool.mtx.Lock() 231 defer pool.mtx.Unlock() 232 233 request := pool.requesters[height] 234 peerID := request.getPeerID() 235 if peerID != p2p.ID("") { 236 // RemovePeer will redo all requesters associated with this peer. 237 pool.removePeer(peerID) 238 } 239 return peerID 240 } 241 242 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 243 // TODO: ensure that blocks come in order for each peer. 244 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) { 245 pool.mtx.Lock() 246 defer pool.mtx.Unlock() 247 248 requester := pool.requesters[block.Height] 249 if requester == nil { 250 pool.Logger.Info("peer sent us a block we didn't expect", "peer", peerID, "curHeight", pool.height, "blockHeight", block.Height) 251 diff := pool.height - block.Height 252 if diff < 0 { 253 diff *= -1 254 } 255 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 256 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 257 } 258 return 259 } 260 261 if requester.setBlock(block, peerID) { 262 atomic.AddInt32(&pool.numPending, -1) 263 peer := pool.peers[peerID] 264 if peer != nil { 265 peer.decrPending(blockSize) 266 } 267 } else { 268 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 269 pool.sendError(errors.New("invalid peer"), peerID) 270 } 271 } 272 273 // MaxPeerHeight returns the highest reported height. 274 func (pool *BlockPool) MaxPeerHeight() int64 { 275 pool.mtx.Lock() 276 defer pool.mtx.Unlock() 277 return pool.maxPeerHeight 278 } 279 280 // SetPeerHeight sets the peer's alleged blockchain height. 281 func (pool *BlockPool) SetPeerHeight(peerID p2p.ID, height int64) { 282 pool.mtx.Lock() 283 defer pool.mtx.Unlock() 284 285 peer := pool.peers[peerID] 286 if peer != nil { 287 peer.height = height 288 } else { 289 peer = newBPPeer(pool, peerID, height) 290 peer.setLogger(pool.Logger.With("peer", peerID)) 291 pool.peers[peerID] = peer 292 } 293 294 if height > pool.maxPeerHeight { 295 pool.maxPeerHeight = height 296 } 297 } 298 299 // RemovePeer removes the peer with peerID from the pool. If there's no peer 300 // with peerID, function is a no-op. 301 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 302 pool.mtx.Lock() 303 defer pool.mtx.Unlock() 304 305 pool.removePeer(peerID) 306 } 307 308 func (pool *BlockPool) removePeer(peerID p2p.ID) { 309 for _, requester := range pool.requesters { 310 if requester.getPeerID() == peerID { 311 requester.redo(peerID) 312 } 313 } 314 315 peer, ok := pool.peers[peerID] 316 if ok { 317 if peer.timeout != nil { 318 peer.timeout.Stop() 319 } 320 321 delete(pool.peers, peerID) 322 323 // Find a new peer with the biggest height and update maxPeerHeight if the 324 // peer's height was the biggest. 325 if peer.height == pool.maxPeerHeight { 326 pool.updateMaxPeerHeight() 327 } 328 } 329 } 330 331 // If no peers are left, maxPeerHeight is set to 0. 332 func (pool *BlockPool) updateMaxPeerHeight() { 333 var max int64 334 for _, peer := range pool.peers { 335 if peer.height > max { 336 max = peer.height 337 } 338 } 339 pool.maxPeerHeight = max 340 } 341 342 // Pick an available peer with at least the given minHeight. 343 // If no peers are available, returns nil. 344 func (pool *BlockPool) pickIncrAvailablePeer(minHeight int64) *bpPeer { 345 pool.mtx.Lock() 346 defer pool.mtx.Unlock() 347 348 for _, peer := range pool.peers { 349 if peer.didTimeout { 350 pool.removePeer(peer.id) 351 continue 352 } 353 if peer.numPending >= maxPendingRequestsPerPeer { 354 continue 355 } 356 if peer.height < minHeight { 357 continue 358 } 359 peer.incrPending() 360 return peer 361 } 362 return nil 363 } 364 365 func (pool *BlockPool) makeNextRequester() { 366 pool.mtx.Lock() 367 defer pool.mtx.Unlock() 368 369 nextHeight := pool.height + pool.requestersLen() 370 if nextHeight > pool.maxPeerHeight { 371 return 372 } 373 374 request := newBPRequester(pool, nextHeight) 375 376 pool.requesters[nextHeight] = request 377 atomic.AddInt32(&pool.numPending, 1) 378 379 err := request.Start() 380 if err != nil { 381 request.Logger.Error("Error starting request", "err", err) 382 } 383 } 384 385 func (pool *BlockPool) requestersLen() int64 { 386 return int64(len(pool.requesters)) 387 } 388 389 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 390 if !pool.IsRunning() { 391 return 392 } 393 pool.requestsCh <- BlockRequest{height, peerID} 394 } 395 396 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 397 if !pool.IsRunning() { 398 return 399 } 400 pool.errorsCh <- peerError{err, peerID} 401 } 402 403 // for debugging purposes 404 // 405 //nolint:unused 406 func (pool *BlockPool) debug() string { 407 pool.mtx.Lock() 408 defer pool.mtx.Unlock() 409 410 str := "" 411 nextHeight := pool.height + pool.requestersLen() 412 for h := pool.height; h < nextHeight; h++ { 413 if pool.requesters[h] == nil { 414 str += fmt.Sprintf("H(%v):X ", h) 415 } else { 416 str += fmt.Sprintf("H(%v):", h) 417 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 418 } 419 } 420 return str 421 } 422 423 // ------------------------------------- 424 425 type bpPeer struct { 426 pool *BlockPool 427 id p2p.ID 428 recvMonitor *flow.Monitor 429 430 height int64 431 numPending int32 432 timeout *time.Timer 433 didTimeout bool 434 435 logger *slog.Logger 436 } 437 438 func newBPPeer(pool *BlockPool, peerID p2p.ID, height int64) *bpPeer { 439 peer := &bpPeer{ 440 pool: pool, 441 id: peerID, 442 height: height, 443 numPending: 0, 444 logger: log.NewNoopLogger(), 445 } 446 return peer 447 } 448 449 func (peer *bpPeer) setLogger(l *slog.Logger) { 450 peer.logger = l 451 } 452 453 func (peer *bpPeer) resetMonitor() { 454 peer.recvMonitor = flow.New(time.Second, time.Second*40) 455 initialValue := float64(minRecvRate) * math.E 456 peer.recvMonitor.SetREMA(initialValue) 457 } 458 459 func (peer *bpPeer) resetTimeout() { 460 if peer.timeout == nil { 461 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 462 } else { 463 peer.timeout.Reset(peerTimeout) 464 } 465 } 466 467 func (peer *bpPeer) incrPending() { 468 if peer.numPending == 0 { 469 peer.resetMonitor() 470 peer.resetTimeout() 471 } 472 peer.numPending++ 473 } 474 475 func (peer *bpPeer) decrPending(recvSize int) { 476 peer.numPending-- 477 if peer.numPending == 0 { 478 peer.timeout.Stop() 479 } else { 480 peer.recvMonitor.Update(recvSize) 481 peer.resetTimeout() 482 } 483 } 484 485 func (peer *bpPeer) onTimeout() { 486 peer.pool.mtx.Lock() 487 defer peer.pool.mtx.Unlock() 488 489 err := errors.New("peer did not send us anything") 490 peer.pool.sendError(err, peer.id) 491 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 492 peer.didTimeout = true 493 } 494 495 // ------------------------------------- 496 497 type bpRequester struct { 498 service.BaseService 499 pool *BlockPool 500 height int64 501 gotBlockCh chan struct{} 502 redoCh chan p2p.ID // redo may send multitime, add peerId to identify repeat 503 504 mtx sync.Mutex 505 peerID p2p.ID 506 block *types.Block 507 } 508 509 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 510 bpr := &bpRequester{ 511 pool: pool, 512 height: height, 513 gotBlockCh: make(chan struct{}, 1), 514 redoCh: make(chan p2p.ID, 1), 515 516 peerID: "", 517 block: nil, 518 } 519 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 520 return bpr 521 } 522 523 func (bpr *bpRequester) OnStart() error { 524 go bpr.requestRoutine() 525 return nil 526 } 527 528 // Returns true if the peer matches and block doesn't already exist. 529 func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool { 530 bpr.mtx.Lock() 531 if bpr.block != nil || bpr.peerID != peerID { 532 bpr.mtx.Unlock() 533 return false 534 } 535 bpr.block = block 536 bpr.mtx.Unlock() 537 538 select { 539 case bpr.gotBlockCh <- struct{}{}: 540 default: 541 } 542 return true 543 } 544 545 func (bpr *bpRequester) getBlock() *types.Block { 546 bpr.mtx.Lock() 547 defer bpr.mtx.Unlock() 548 return bpr.block 549 } 550 551 func (bpr *bpRequester) getPeerID() p2p.ID { 552 bpr.mtx.Lock() 553 defer bpr.mtx.Unlock() 554 return bpr.peerID 555 } 556 557 // This is called from the requestRoutine, upon redo(). 558 func (bpr *bpRequester) reset() { 559 bpr.mtx.Lock() 560 defer bpr.mtx.Unlock() 561 562 if bpr.block != nil { 563 atomic.AddInt32(&bpr.pool.numPending, 1) 564 } 565 566 bpr.peerID = "" 567 bpr.block = nil 568 } 569 570 // Tells bpRequester to pick another peer and try again. 571 // NOTE: Nonblocking, and does nothing if another redo 572 // was already requested. 573 func (bpr *bpRequester) redo(peerID p2p.ID) { 574 select { 575 case bpr.redoCh <- peerID: 576 default: 577 } 578 } 579 580 // Responsible for making more requests as necessary 581 // Returns only when a block is found (e.g. AddBlock() is called) 582 func (bpr *bpRequester) requestRoutine() { 583 OUTER_LOOP: 584 for { 585 // Pick a peer to send request to. 586 var peer *bpPeer 587 PICK_PEER_LOOP: 588 for { 589 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 590 return 591 } 592 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 593 if peer == nil { 594 // log.Info("No peers available", "height", height) 595 time.Sleep(requestIntervalMS * time.Millisecond) 596 continue PICK_PEER_LOOP 597 } 598 break PICK_PEER_LOOP 599 } 600 bpr.mtx.Lock() 601 bpr.peerID = peer.id 602 bpr.mtx.Unlock() 603 604 // Send request and wait. 605 bpr.pool.sendRequest(bpr.height, peer.id) 606 WAIT_LOOP: 607 for { 608 select { 609 case <-bpr.pool.Quit(): 610 bpr.Stop() 611 return 612 case <-bpr.Quit(): 613 return 614 case peerID := <-bpr.redoCh: 615 if peerID == bpr.peerID { 616 bpr.reset() 617 continue OUTER_LOOP 618 } else { 619 continue WAIT_LOOP 620 } 621 case <-bpr.gotBlockCh: 622 // We got a block! 623 // Continue the for-loop and wait til Quit. 624 continue WAIT_LOOP 625 } 626 } 627 } 628 } 629 630 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 631 // delivering the block 632 type BlockRequest struct { 633 Height int64 634 PeerID p2p.ID 635 }