github.com/consideritdone/landslidecore@v0.0.0-20230718131026-a8b21c5cf8a7/blockchain/v0/pool.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync/atomic" 8 "time" 9 10 flow "github.com/consideritdone/landslidecore/libs/flowrate" 11 "github.com/consideritdone/landslidecore/libs/log" 12 "github.com/consideritdone/landslidecore/libs/service" 13 tmsync "github.com/consideritdone/landslidecore/libs/sync" 14 "github.com/consideritdone/landslidecore/p2p" 15 "github.com/consideritdone/landslidecore/types" 16 ) 17 18 /* 19 eg, L = latency = 0.1s 20 P = num peers = 10 21 FN = num full nodes 22 BS = 1kB block size 23 CB = 1 Mbit/s = 128 kB/s 24 CB/P = 12.8 kB 25 B/S = CB/P/BS = 12.8 blocks/s 26 27 12.8 * 0.1 = 1.28 blocks on conn 28 */ 29 30 const ( 31 requestIntervalMS = 2 32 maxTotalRequesters = 600 33 maxPendingRequests = maxTotalRequesters 34 maxPendingRequestsPerPeer = 20 35 36 // Minimum recv rate to ensure we're receiving blocks from a peer fast 37 // enough. If a peer is not sending us data at at least that rate, we 38 // consider them to have timedout and we disconnect. 39 // 40 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 41 // sending data across atlantic ~ 7.5 KB/s. 42 minRecvRate = 7680 43 44 // Maximum difference between current and new block's height. 45 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 46 ) 47 48 var peerTimeout = 15 * time.Second // not const so we can override with tests 49 50 /* 51 Peers self report their heights when we join the block pool. 52 Starting from our latest pool.height, we request blocks 53 in sequence from peers that reported higher heights than ours. 54 Every so often we ask peers what height they're on so we can keep going. 55 56 Requests are continuously made for blocks of higher heights until 57 the limit is reached. If most of the requests have no available peers, and we 58 are not at peer limits, we can probably switch to consensus reactor 59 */ 60 61 // BlockPool keeps track of the fast sync peers, block requests and block responses. 62 type BlockPool struct { 63 service.BaseService 64 startTime time.Time 65 66 mtx tmsync.Mutex 67 // block requests 68 requesters map[int64]*bpRequester 69 height int64 // the lowest key in requesters. 70 // peers 71 peers map[p2p.ID]*bpPeer 72 maxPeerHeight int64 // the biggest reported height 73 74 // atomic 75 numPending int32 // number of requests pending assignment or block response 76 77 requestsCh chan<- BlockRequest 78 errorsCh chan<- peerError 79 } 80 81 // NewBlockPool returns a new BlockPool with the height equal to start. Block 82 // requests and errors will be sent to requestsCh and errorsCh accordingly. 83 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 84 bp := &BlockPool{ 85 peers: make(map[p2p.ID]*bpPeer), 86 87 requesters: make(map[int64]*bpRequester), 88 height: start, 89 numPending: 0, 90 91 requestsCh: requestsCh, 92 errorsCh: errorsCh, 93 } 94 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 95 return bp 96 } 97 98 // OnStart implements service.Service by spawning requesters routine and recording 99 // pool's start time. 100 func (pool *BlockPool) OnStart() error { 101 go pool.makeRequestersRoutine() 102 pool.startTime = time.Now() 103 return nil 104 } 105 106 // spawns requesters as needed 107 func (pool *BlockPool) makeRequestersRoutine() { 108 for { 109 if !pool.IsRunning() { 110 break 111 } 112 113 _, numPending, lenRequesters := pool.GetStatus() 114 switch { 115 case numPending >= maxPendingRequests: 116 // sleep for a bit. 117 time.Sleep(requestIntervalMS * time.Millisecond) 118 // check for timed out peers 119 pool.removeTimedoutPeers() 120 case lenRequesters >= maxTotalRequesters: 121 // sleep for a bit. 122 time.Sleep(requestIntervalMS * time.Millisecond) 123 // check for timed out peers 124 pool.removeTimedoutPeers() 125 default: 126 // request for more blocks. 127 pool.makeNextRequester() 128 } 129 } 130 } 131 132 func (pool *BlockPool) removeTimedoutPeers() { 133 pool.mtx.Lock() 134 defer pool.mtx.Unlock() 135 136 for _, peer := range pool.peers { 137 if !peer.didTimeout && peer.numPending > 0 { 138 curRate := peer.recvMonitor.Status().CurRate 139 // curRate can be 0 on start 140 if curRate != 0 && curRate < minRecvRate { 141 err := errors.New("peer is not sending us data fast enough") 142 pool.sendError(err, peer.id) 143 pool.Logger.Error("SendTimeout", "peer", peer.id, 144 "reason", err, 145 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 146 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 147 peer.didTimeout = true 148 } 149 } 150 if peer.didTimeout { 151 pool.removePeer(peer.id) 152 } 153 } 154 } 155 156 // GetStatus returns pool's height, numPending requests and the number of 157 // requesters. 158 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 159 pool.mtx.Lock() 160 defer pool.mtx.Unlock() 161 162 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 163 } 164 165 // IsCaughtUp returns true if this node is caught up, false - otherwise. 166 // TODO: relax conditions, prevent abuse. 167 func (pool *BlockPool) IsCaughtUp() bool { 168 pool.mtx.Lock() 169 defer pool.mtx.Unlock() 170 171 // Need at least 1 peer to be considered caught up. 172 if len(pool.peers) == 0 { 173 pool.Logger.Debug("Blockpool has no peers") 174 return false 175 } 176 177 // Some conditions to determine if we're caught up. 178 // Ensures we've either received a block or waited some amount of time, 179 // and that we're synced to the highest known height. 180 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 181 // to verify the LastCommit. 182 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 183 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 184 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 185 return isCaughtUp 186 } 187 188 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 189 // We need to see the second block's Commit to validate the first block. 190 // So we peek two blocks at a time. 191 // The caller will verify the commit. 192 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 193 pool.mtx.Lock() 194 defer pool.mtx.Unlock() 195 196 if r := pool.requesters[pool.height]; r != nil { 197 first = r.getBlock() 198 } 199 if r := pool.requesters[pool.height+1]; r != nil { 200 second = r.getBlock() 201 } 202 return 203 } 204 205 // PopRequest pops the first block at pool.height. 206 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 207 func (pool *BlockPool) PopRequest() { 208 pool.mtx.Lock() 209 defer pool.mtx.Unlock() 210 211 if r := pool.requesters[pool.height]; r != nil { 212 /* The block can disappear at any time, due to removePeer(). 213 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 214 PanicSanity("PopRequest() requires a valid block") 215 } 216 */ 217 if err := r.Stop(); err != nil { 218 pool.Logger.Error("Error stopping requester", "err", err) 219 } 220 delete(pool.requesters, pool.height) 221 pool.height++ 222 } else { 223 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 224 } 225 } 226 227 // RedoRequest invalidates the block at pool.height, 228 // Remove the peer and redo request from others. 229 // Returns the ID of the removed peer. 230 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 231 pool.mtx.Lock() 232 defer pool.mtx.Unlock() 233 234 request := pool.requesters[height] 235 peerID := request.getPeerID() 236 if peerID != p2p.ID("") { 237 // RemovePeer will redo all requesters associated with this peer. 238 pool.removePeer(peerID) 239 } 240 return peerID 241 } 242 243 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 244 // TODO: ensure that blocks come in order for each peer. 245 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) { 246 pool.mtx.Lock() 247 defer pool.mtx.Unlock() 248 249 requester := pool.requesters[block.Height] 250 if requester == nil { 251 pool.Logger.Info( 252 "peer sent us a block we didn't expect", 253 "peer", 254 peerID, 255 "curHeight", 256 pool.height, 257 "blockHeight", 258 block.Height) 259 diff := pool.height - block.Height 260 if diff < 0 { 261 diff *= -1 262 } 263 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 264 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 265 } 266 return 267 } 268 269 if requester.setBlock(block, peerID) { 270 atomic.AddInt32(&pool.numPending, -1) 271 peer := pool.peers[peerID] 272 if peer != nil { 273 peer.decrPending(blockSize) 274 } 275 } else { 276 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 277 pool.sendError(errors.New("invalid peer"), peerID) 278 } 279 } 280 281 // MaxPeerHeight returns the highest reported height. 282 func (pool *BlockPool) MaxPeerHeight() int64 { 283 pool.mtx.Lock() 284 defer pool.mtx.Unlock() 285 return pool.maxPeerHeight 286 } 287 288 // SetPeerRange sets the peer's alleged blockchain base and height. 289 func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { 290 pool.mtx.Lock() 291 defer pool.mtx.Unlock() 292 293 peer := pool.peers[peerID] 294 if peer != nil { 295 peer.base = base 296 peer.height = height 297 } else { 298 peer = newBPPeer(pool, peerID, base, height) 299 peer.setLogger(pool.Logger.With("peer", peerID)) 300 pool.peers[peerID] = peer 301 } 302 303 if height > pool.maxPeerHeight { 304 pool.maxPeerHeight = height 305 } 306 } 307 308 // RemovePeer removes the peer with peerID from the pool. If there's no peer 309 // with peerID, function is a no-op. 310 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 311 pool.mtx.Lock() 312 defer pool.mtx.Unlock() 313 314 pool.removePeer(peerID) 315 } 316 317 func (pool *BlockPool) removePeer(peerID p2p.ID) { 318 for _, requester := range pool.requesters { 319 if requester.getPeerID() == peerID { 320 requester.redo(peerID) 321 } 322 } 323 324 peer, ok := pool.peers[peerID] 325 if ok { 326 if peer.timeout != nil { 327 peer.timeout.Stop() 328 } 329 330 delete(pool.peers, peerID) 331 332 // Find a new peer with the biggest height and update maxPeerHeight if the 333 // peer's height was the biggest. 334 if peer.height == pool.maxPeerHeight { 335 pool.updateMaxPeerHeight() 336 } 337 } 338 } 339 340 // If no peers are left, maxPeerHeight is set to 0. 341 func (pool *BlockPool) updateMaxPeerHeight() { 342 var max int64 343 for _, peer := range pool.peers { 344 if peer.height > max { 345 max = peer.height 346 } 347 } 348 pool.maxPeerHeight = max 349 } 350 351 // Pick an available peer with the given height available. 352 // If no peers are available, returns nil. 353 func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer { 354 pool.mtx.Lock() 355 defer pool.mtx.Unlock() 356 357 for _, peer := range pool.peers { 358 if peer.didTimeout { 359 pool.removePeer(peer.id) 360 continue 361 } 362 if peer.numPending >= maxPendingRequestsPerPeer { 363 continue 364 } 365 if height < peer.base || height > peer.height { 366 continue 367 } 368 peer.incrPending() 369 return peer 370 } 371 return nil 372 } 373 374 func (pool *BlockPool) makeNextRequester() { 375 pool.mtx.Lock() 376 defer pool.mtx.Unlock() 377 378 nextHeight := pool.height + pool.requestersLen() 379 if nextHeight > pool.maxPeerHeight { 380 return 381 } 382 383 request := newBPRequester(pool, nextHeight) 384 385 pool.requesters[nextHeight] = request 386 atomic.AddInt32(&pool.numPending, 1) 387 388 err := request.Start() 389 if err != nil { 390 request.Logger.Error("Error starting request", "err", err) 391 } 392 } 393 394 func (pool *BlockPool) requestersLen() int64 { 395 return int64(len(pool.requesters)) 396 } 397 398 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 399 if !pool.IsRunning() { 400 return 401 } 402 pool.requestsCh <- BlockRequest{height, peerID} 403 } 404 405 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 406 if !pool.IsRunning() { 407 return 408 } 409 pool.errorsCh <- peerError{err, peerID} 410 } 411 412 // for debugging purposes 413 // 414 //nolint:unused 415 func (pool *BlockPool) debug() string { 416 pool.mtx.Lock() 417 defer pool.mtx.Unlock() 418 419 str := "" 420 nextHeight := pool.height + pool.requestersLen() 421 for h := pool.height; h < nextHeight; h++ { 422 if pool.requesters[h] == nil { 423 str += fmt.Sprintf("H(%v):X ", h) 424 } else { 425 str += fmt.Sprintf("H(%v):", h) 426 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 427 } 428 } 429 return str 430 } 431 432 //------------------------------------- 433 434 type bpPeer struct { 435 didTimeout bool 436 numPending int32 437 height int64 438 base int64 439 pool *BlockPool 440 id p2p.ID 441 recvMonitor *flow.Monitor 442 443 timeout *time.Timer 444 445 logger log.Logger 446 } 447 448 func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer { 449 peer := &bpPeer{ 450 pool: pool, 451 id: peerID, 452 base: base, 453 height: height, 454 numPending: 0, 455 logger: log.NewNopLogger(), 456 } 457 return peer 458 } 459 460 func (peer *bpPeer) setLogger(l log.Logger) { 461 peer.logger = l 462 } 463 464 func (peer *bpPeer) resetMonitor() { 465 peer.recvMonitor = flow.New(time.Second, time.Second*40) 466 initialValue := float64(minRecvRate) * math.E 467 peer.recvMonitor.SetREMA(initialValue) 468 } 469 470 func (peer *bpPeer) resetTimeout() { 471 if peer.timeout == nil { 472 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 473 } else { 474 peer.timeout.Reset(peerTimeout) 475 } 476 } 477 478 func (peer *bpPeer) incrPending() { 479 if peer.numPending == 0 { 480 peer.resetMonitor() 481 peer.resetTimeout() 482 } 483 peer.numPending++ 484 } 485 486 func (peer *bpPeer) decrPending(recvSize int) { 487 peer.numPending-- 488 if peer.numPending == 0 { 489 peer.timeout.Stop() 490 } else { 491 peer.recvMonitor.Update(recvSize) 492 peer.resetTimeout() 493 } 494 } 495 496 func (peer *bpPeer) onTimeout() { 497 peer.pool.mtx.Lock() 498 defer peer.pool.mtx.Unlock() 499 500 err := errors.New("peer did not send us anything") 501 peer.pool.sendError(err, peer.id) 502 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 503 peer.didTimeout = true 504 } 505 506 //------------------------------------- 507 508 type bpRequester struct { 509 service.BaseService 510 pool *BlockPool 511 height int64 512 gotBlockCh chan struct{} 513 redoCh chan p2p.ID // redo may send multitime, add peerId to identify repeat 514 515 mtx tmsync.Mutex 516 peerID p2p.ID 517 block *types.Block 518 } 519 520 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 521 bpr := &bpRequester{ 522 pool: pool, 523 height: height, 524 gotBlockCh: make(chan struct{}, 1), 525 redoCh: make(chan p2p.ID, 1), 526 527 peerID: "", 528 block: nil, 529 } 530 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 531 return bpr 532 } 533 534 func (bpr *bpRequester) OnStart() error { 535 go bpr.requestRoutine() 536 return nil 537 } 538 539 // Returns true if the peer matches and block doesn't already exist. 540 func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool { 541 bpr.mtx.Lock() 542 if bpr.block != nil || bpr.peerID != peerID { 543 bpr.mtx.Unlock() 544 return false 545 } 546 bpr.block = block 547 bpr.mtx.Unlock() 548 549 select { 550 case bpr.gotBlockCh <- struct{}{}: 551 default: 552 } 553 return true 554 } 555 556 func (bpr *bpRequester) getBlock() *types.Block { 557 bpr.mtx.Lock() 558 defer bpr.mtx.Unlock() 559 return bpr.block 560 } 561 562 func (bpr *bpRequester) getPeerID() p2p.ID { 563 bpr.mtx.Lock() 564 defer bpr.mtx.Unlock() 565 return bpr.peerID 566 } 567 568 // This is called from the requestRoutine, upon redo(). 569 func (bpr *bpRequester) reset() { 570 bpr.mtx.Lock() 571 defer bpr.mtx.Unlock() 572 573 if bpr.block != nil { 574 atomic.AddInt32(&bpr.pool.numPending, 1) 575 } 576 577 bpr.peerID = "" 578 bpr.block = nil 579 } 580 581 // Tells bpRequester to pick another peer and try again. 582 // NOTE: Nonblocking, and does nothing if another redo 583 // was already requested. 584 func (bpr *bpRequester) redo(peerID p2p.ID) { 585 select { 586 case bpr.redoCh <- peerID: 587 default: 588 } 589 } 590 591 // Responsible for making more requests as necessary 592 // Returns only when a block is found (e.g. AddBlock() is called) 593 func (bpr *bpRequester) requestRoutine() { 594 OUTER_LOOP: 595 for { 596 // Pick a peer to send request to. 597 var peer *bpPeer 598 PICK_PEER_LOOP: 599 for { 600 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 601 return 602 } 603 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 604 if peer == nil { 605 // log.Info("No peers available", "height", height) 606 time.Sleep(requestIntervalMS * time.Millisecond) 607 continue PICK_PEER_LOOP 608 } 609 break PICK_PEER_LOOP 610 } 611 bpr.mtx.Lock() 612 bpr.peerID = peer.id 613 bpr.mtx.Unlock() 614 615 // Send request and wait. 616 bpr.pool.sendRequest(bpr.height, peer.id) 617 WAIT_LOOP: 618 for { 619 select { 620 case <-bpr.pool.Quit(): 621 if err := bpr.Stop(); err != nil { 622 bpr.Logger.Error("Error stopped requester", "err", err) 623 } 624 return 625 case <-bpr.Quit(): 626 return 627 case peerID := <-bpr.redoCh: 628 if peerID == bpr.peerID { 629 bpr.reset() 630 continue OUTER_LOOP 631 } else { 632 continue WAIT_LOOP 633 } 634 case <-bpr.gotBlockCh: 635 // We got a block! 636 // Continue the for-loop and wait til Quit. 637 continue WAIT_LOOP 638 } 639 } 640 } 641 } 642 643 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 644 // delivering the block 645 type BlockRequest struct { 646 Height int64 647 PeerID p2p.ID 648 }