github.com/vipernet-xyz/tm@v0.34.24/blockchain/v0/pool.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync/atomic" 8 "time" 9 10 flow "github.com/vipernet-xyz/tm/libs/flowrate" 11 "github.com/vipernet-xyz/tm/libs/log" 12 "github.com/vipernet-xyz/tm/libs/service" 13 tmsync "github.com/vipernet-xyz/tm/libs/sync" 14 "github.com/vipernet-xyz/tm/p2p" 15 "github.com/vipernet-xyz/tm/types" 16 ) 17 18 /* 19 eg, L = latency = 0.1s 20 P = num peers = 10 21 FN = num full nodes 22 BS = 1kB block size 23 CB = 1 Mbit/s = 128 kB/s 24 CB/P = 12.8 kB 25 B/S = CB/P/BS = 12.8 blocks/s 26 27 12.8 * 0.1 = 1.28 blocks on conn 28 */ 29 30 const ( 31 requestIntervalMS = 2 32 maxTotalRequesters = 600 33 maxPendingRequests = maxTotalRequesters 34 maxPendingRequestsPerPeer = 20 35 requestRetrySeconds = 30 36 37 // Minimum recv rate to ensure we're receiving blocks from a peer fast 38 // enough. If a peer is not sending us data at at least that rate, we 39 // consider them to have timedout and we disconnect. 40 // 41 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 42 // sending data across atlantic ~ 7.5 KB/s. 43 minRecvRate = 7680 44 45 // Maximum difference between current and new block's height. 46 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 47 ) 48 49 var peerTimeout = 15 * time.Second // not const so we can override with tests 50 51 /* 52 Peers self report their heights when we join the block pool. 53 Starting from our latest pool.height, we request blocks 54 in sequence from peers that reported higher heights than ours. 55 Every so often we ask peers what height they're on so we can keep going. 56 57 Requests are continuously made for blocks of higher heights until 58 the limit is reached. If most of the requests have no available peers, and we 59 are not at peer limits, we can probably switch to consensus reactor 60 */ 61 62 // BlockPool keeps track of the fast sync peers, block requests and block responses. 63 type BlockPool struct { 64 service.BaseService 65 startTime time.Time 66 67 mtx tmsync.Mutex 68 // block requests 69 requesters map[int64]*bpRequester 70 height int64 // the lowest key in requesters. 71 // peers 72 peers map[p2p.ID]*bpPeer 73 maxPeerHeight int64 // the biggest reported height 74 75 // atomic 76 numPending int32 // number of requests pending assignment or block response 77 78 requestsCh chan<- BlockRequest 79 errorsCh chan<- peerError 80 } 81 82 // NewBlockPool returns a new BlockPool with the height equal to start. Block 83 // requests and errors will be sent to requestsCh and errorsCh accordingly. 84 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 85 bp := &BlockPool{ 86 peers: make(map[p2p.ID]*bpPeer), 87 88 requesters: make(map[int64]*bpRequester), 89 height: start, 90 numPending: 0, 91 92 requestsCh: requestsCh, 93 errorsCh: errorsCh, 94 } 95 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 96 return bp 97 } 98 99 // OnStart implements service.Service by spawning requesters routine and recording 100 // pool's start time. 101 func (pool *BlockPool) OnStart() error { 102 go pool.makeRequestersRoutine() 103 pool.startTime = time.Now() 104 return nil 105 } 106 107 // spawns requesters as needed 108 func (pool *BlockPool) makeRequestersRoutine() { 109 for { 110 if !pool.IsRunning() { 111 break 112 } 113 114 _, numPending, lenRequesters := pool.GetStatus() 115 switch { 116 case numPending >= maxPendingRequests: 117 // sleep for a bit. 118 time.Sleep(requestIntervalMS * time.Millisecond) 119 // check for timed out peers 120 pool.removeTimedoutPeers() 121 case lenRequesters >= maxTotalRequesters: 122 // sleep for a bit. 123 time.Sleep(requestIntervalMS * time.Millisecond) 124 // check for timed out peers 125 pool.removeTimedoutPeers() 126 default: 127 // request for more blocks. 128 pool.makeNextRequester() 129 } 130 } 131 } 132 133 func (pool *BlockPool) removeTimedoutPeers() { 134 pool.mtx.Lock() 135 defer pool.mtx.Unlock() 136 137 for _, peer := range pool.peers { 138 if !peer.didTimeout && peer.numPending > 0 { 139 curRate := peer.recvMonitor.Status().CurRate 140 // curRate can be 0 on start 141 if curRate != 0 && curRate < minRecvRate { 142 err := errors.New("peer is not sending us data fast enough") 143 pool.sendError(err, peer.id) 144 pool.Logger.Error("SendTimeout", "peer", peer.id, 145 "reason", err, 146 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 147 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 148 peer.didTimeout = true 149 } 150 } 151 if peer.didTimeout { 152 pool.removePeer(peer.id) 153 } 154 } 155 } 156 157 // GetStatus returns pool's height, numPending requests and the number of 158 // requesters. 159 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 160 pool.mtx.Lock() 161 defer pool.mtx.Unlock() 162 163 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 164 } 165 166 // IsCaughtUp returns true if this node is caught up, false - otherwise. 167 // TODO: relax conditions, prevent abuse. 168 func (pool *BlockPool) IsCaughtUp() bool { 169 pool.mtx.Lock() 170 defer pool.mtx.Unlock() 171 172 // Need at least 1 peer to be considered caught up. 173 if len(pool.peers) == 0 { 174 pool.Logger.Debug("Blockpool has no peers") 175 return false 176 } 177 178 // Some conditions to determine if we're caught up. 179 // Ensures we've either received a block or waited some amount of time, 180 // and that we're synced to the highest known height. 181 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 182 // to verify the LastCommit. 183 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 184 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 185 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 186 return isCaughtUp 187 } 188 189 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 190 // We need to see the second block's Commit to validate the first block. 191 // So we peek two blocks at a time. 192 // The caller will verify the commit. 193 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 194 pool.mtx.Lock() 195 defer pool.mtx.Unlock() 196 197 if r := pool.requesters[pool.height]; r != nil { 198 first = r.getBlock() 199 } 200 if r := pool.requesters[pool.height+1]; r != nil { 201 second = r.getBlock() 202 } 203 return 204 } 205 206 // PopRequest pops the first block at pool.height. 207 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 208 func (pool *BlockPool) PopRequest() { 209 pool.mtx.Lock() 210 defer pool.mtx.Unlock() 211 212 if r := pool.requesters[pool.height]; r != nil { 213 /* The block can disappear at any time, due to removePeer(). 214 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 215 PanicSanity("PopRequest() requires a valid block") 216 } 217 */ 218 if err := r.Stop(); err != nil { 219 pool.Logger.Error("Error stopping requester", "err", err) 220 } 221 delete(pool.requesters, pool.height) 222 pool.height++ 223 } else { 224 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 225 } 226 } 227 228 // RedoRequest invalidates the block at pool.height, 229 // Remove the peer and redo request from others. 230 // Returns the ID of the removed peer. 231 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 232 pool.mtx.Lock() 233 defer pool.mtx.Unlock() 234 235 request := pool.requesters[height] 236 peerID := request.getPeerID() 237 if peerID != p2p.ID("") { 238 // RemovePeer will redo all requesters associated with this peer. 239 pool.removePeer(peerID) 240 } 241 return peerID 242 } 243 244 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 245 // TODO: ensure that blocks come in order for each peer. 246 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) { 247 pool.mtx.Lock() 248 defer pool.mtx.Unlock() 249 250 requester := pool.requesters[block.Height] 251 if requester == nil { 252 pool.Logger.Info( 253 "peer sent us a block we didn't expect", 254 "peer", 255 peerID, 256 "curHeight", 257 pool.height, 258 "blockHeight", 259 block.Height) 260 diff := pool.height - block.Height 261 if diff < 0 { 262 diff *= -1 263 } 264 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 265 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 266 } 267 return 268 } 269 270 if requester.setBlock(block, peerID) { 271 atomic.AddInt32(&pool.numPending, -1) 272 peer := pool.peers[peerID] 273 if peer != nil { 274 peer.decrPending(blockSize) 275 } 276 } else { 277 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 278 pool.sendError(errors.New("invalid peer"), peerID) 279 } 280 } 281 282 // MaxPeerHeight returns the highest reported height. 283 func (pool *BlockPool) MaxPeerHeight() int64 { 284 pool.mtx.Lock() 285 defer pool.mtx.Unlock() 286 return pool.maxPeerHeight 287 } 288 289 // SetPeerRange sets the peer's alleged blockchain base and height. 290 func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { 291 pool.mtx.Lock() 292 defer pool.mtx.Unlock() 293 294 peer := pool.peers[peerID] 295 if peer != nil { 296 peer.base = base 297 peer.height = height 298 } else { 299 peer = newBPPeer(pool, peerID, base, height) 300 peer.setLogger(pool.Logger.With("peer", peerID)) 301 pool.peers[peerID] = peer 302 } 303 304 if height > pool.maxPeerHeight { 305 pool.maxPeerHeight = height 306 } 307 } 308 309 // RemovePeer removes the peer with peerID from the pool. If there's no peer 310 // with peerID, function is a no-op. 311 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 312 pool.mtx.Lock() 313 defer pool.mtx.Unlock() 314 315 pool.removePeer(peerID) 316 } 317 318 func (pool *BlockPool) removePeer(peerID p2p.ID) { 319 for _, requester := range pool.requesters { 320 if requester.getPeerID() == peerID { 321 requester.redo(peerID) 322 } 323 } 324 325 peer, ok := pool.peers[peerID] 326 if ok { 327 if peer.timeout != nil { 328 peer.timeout.Stop() 329 } 330 331 delete(pool.peers, peerID) 332 333 // Find a new peer with the biggest height and update maxPeerHeight if the 334 // peer's height was the biggest. 335 if peer.height == pool.maxPeerHeight { 336 pool.updateMaxPeerHeight() 337 } 338 } 339 } 340 341 // If no peers are left, maxPeerHeight is set to 0. 342 func (pool *BlockPool) updateMaxPeerHeight() { 343 var max int64 344 for _, peer := range pool.peers { 345 if peer.height > max { 346 max = peer.height 347 } 348 } 349 pool.maxPeerHeight = max 350 } 351 352 // Pick an available peer with the given height available. 353 // If no peers are available, returns nil. 354 func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer { 355 pool.mtx.Lock() 356 defer pool.mtx.Unlock() 357 358 for _, peer := range pool.peers { 359 if peer.didTimeout { 360 pool.removePeer(peer.id) 361 continue 362 } 363 if peer.numPending >= maxPendingRequestsPerPeer { 364 continue 365 } 366 if height < peer.base || height > peer.height { 367 continue 368 } 369 peer.incrPending() 370 return peer 371 } 372 return nil 373 } 374 375 func (pool *BlockPool) makeNextRequester() { 376 pool.mtx.Lock() 377 defer pool.mtx.Unlock() 378 379 nextHeight := pool.height + pool.requestersLen() 380 if nextHeight > pool.maxPeerHeight { 381 return 382 } 383 384 request := newBPRequester(pool, nextHeight) 385 386 pool.requesters[nextHeight] = request 387 atomic.AddInt32(&pool.numPending, 1) 388 389 err := request.Start() 390 if err != nil { 391 request.Logger.Error("Error starting request", "err", err) 392 } 393 } 394 395 func (pool *BlockPool) requestersLen() int64 { 396 return int64(len(pool.requesters)) 397 } 398 399 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 400 if !pool.IsRunning() { 401 return 402 } 403 pool.requestsCh <- BlockRequest{height, peerID} 404 } 405 406 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 407 if !pool.IsRunning() { 408 return 409 } 410 pool.errorsCh <- peerError{err, peerID} 411 } 412 413 // for debugging purposes 414 // 415 //nolint:unused 416 func (pool *BlockPool) debug() string { 417 pool.mtx.Lock() 418 defer pool.mtx.Unlock() 419 420 str := "" 421 nextHeight := pool.height + pool.requestersLen() 422 for h := pool.height; h < nextHeight; h++ { 423 if pool.requesters[h] == nil { 424 str += fmt.Sprintf("H(%v):X ", h) 425 } else { 426 str += fmt.Sprintf("H(%v):", h) 427 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 428 } 429 } 430 return str 431 } 432 433 //------------------------------------- 434 435 type bpPeer struct { 436 didTimeout bool 437 numPending int32 438 height int64 439 base int64 440 pool *BlockPool 441 id p2p.ID 442 recvMonitor *flow.Monitor 443 444 timeout *time.Timer 445 446 logger log.Logger 447 } 448 449 func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer { 450 peer := &bpPeer{ 451 pool: pool, 452 id: peerID, 453 base: base, 454 height: height, 455 numPending: 0, 456 logger: log.NewNopLogger(), 457 } 458 return peer 459 } 460 461 func (peer *bpPeer) setLogger(l log.Logger) { 462 peer.logger = l 463 } 464 465 func (peer *bpPeer) resetMonitor() { 466 peer.recvMonitor = flow.New(time.Second, time.Second*40) 467 initialValue := float64(minRecvRate) * math.E 468 peer.recvMonitor.SetREMA(initialValue) 469 } 470 471 func (peer *bpPeer) resetTimeout() { 472 if peer.timeout == nil { 473 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 474 } else { 475 peer.timeout.Reset(peerTimeout) 476 } 477 } 478 479 func (peer *bpPeer) incrPending() { 480 if peer.numPending == 0 { 481 peer.resetMonitor() 482 peer.resetTimeout() 483 } 484 peer.numPending++ 485 } 486 487 func (peer *bpPeer) decrPending(recvSize int) { 488 peer.numPending-- 489 if peer.numPending == 0 { 490 peer.timeout.Stop() 491 } else { 492 peer.recvMonitor.Update(recvSize) 493 peer.resetTimeout() 494 } 495 } 496 497 func (peer *bpPeer) onTimeout() { 498 peer.pool.mtx.Lock() 499 defer peer.pool.mtx.Unlock() 500 501 err := errors.New("peer did not send us anything") 502 peer.pool.sendError(err, peer.id) 503 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 504 peer.didTimeout = true 505 } 506 507 //------------------------------------- 508 509 type bpRequester struct { 510 service.BaseService 511 pool *BlockPool 512 height int64 513 gotBlockCh chan struct{} 514 redoCh chan p2p.ID // redo may send multitime, add peerId to identify repeat 515 516 mtx tmsync.Mutex 517 peerID p2p.ID 518 block *types.Block 519 } 520 521 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 522 bpr := &bpRequester{ 523 pool: pool, 524 height: height, 525 gotBlockCh: make(chan struct{}, 1), 526 redoCh: make(chan p2p.ID, 1), 527 528 peerID: "", 529 block: nil, 530 } 531 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 532 return bpr 533 } 534 535 func (bpr *bpRequester) OnStart() error { 536 go bpr.requestRoutine() 537 return nil 538 } 539 540 // Returns true if the peer matches and block doesn't already exist. 541 func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool { 542 bpr.mtx.Lock() 543 if bpr.block != nil || bpr.peerID != peerID { 544 bpr.mtx.Unlock() 545 return false 546 } 547 bpr.block = block 548 bpr.mtx.Unlock() 549 550 select { 551 case bpr.gotBlockCh <- struct{}{}: 552 default: 553 } 554 return true 555 } 556 557 func (bpr *bpRequester) getBlock() *types.Block { 558 bpr.mtx.Lock() 559 defer bpr.mtx.Unlock() 560 return bpr.block 561 } 562 563 func (bpr *bpRequester) getPeerID() p2p.ID { 564 bpr.mtx.Lock() 565 defer bpr.mtx.Unlock() 566 return bpr.peerID 567 } 568 569 // This is called from the requestRoutine, upon redo(). 570 func (bpr *bpRequester) reset() { 571 bpr.mtx.Lock() 572 defer bpr.mtx.Unlock() 573 574 if bpr.block != nil { 575 atomic.AddInt32(&bpr.pool.numPending, 1) 576 } 577 578 bpr.peerID = "" 579 bpr.block = nil 580 } 581 582 // Tells bpRequester to pick another peer and try again. 583 // NOTE: Nonblocking, and does nothing if another redo 584 // was already requested. 585 func (bpr *bpRequester) redo(peerID p2p.ID) { 586 select { 587 case bpr.redoCh <- peerID: 588 default: 589 } 590 } 591 592 // Responsible for making more requests as necessary 593 // Returns only when a block is found (e.g. AddBlock() is called) 594 func (bpr *bpRequester) requestRoutine() { 595 OUTER_LOOP: 596 for { 597 // Pick a peer to send request to. 598 var peer *bpPeer 599 PICK_PEER_LOOP: 600 for { 601 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 602 return 603 } 604 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 605 if peer == nil { 606 bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height) 607 time.Sleep(requestIntervalMS * time.Millisecond) 608 continue PICK_PEER_LOOP 609 } 610 break PICK_PEER_LOOP 611 } 612 bpr.mtx.Lock() 613 bpr.peerID = peer.id 614 bpr.mtx.Unlock() 615 616 to := time.NewTimer(requestRetrySeconds * time.Second) 617 // Send request and wait. 618 bpr.pool.sendRequest(bpr.height, peer.id) 619 WAIT_LOOP: 620 for { 621 select { 622 case <-bpr.pool.Quit(): 623 if err := bpr.Stop(); err != nil { 624 bpr.Logger.Error("Error stopped requester", "err", err) 625 } 626 return 627 case <-bpr.Quit(): 628 return 629 case <-to.C: 630 bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID) 631 // Simulate a redo 632 bpr.reset() 633 continue OUTER_LOOP 634 case peerID := <-bpr.redoCh: 635 if peerID == bpr.peerID { 636 bpr.reset() 637 continue OUTER_LOOP 638 } else { 639 continue WAIT_LOOP 640 } 641 case <-bpr.gotBlockCh: 642 // We got a block! 643 // Continue the for-loop and wait til Quit. 644 continue WAIT_LOOP 645 } 646 } 647 } 648 } 649 650 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 651 // delivering the block 652 type BlockRequest struct { 653 Height int64 654 PeerID p2p.ID 655 }