github.com/number571/tendermint@v0.34.11-gost/internal/blockchain/v0/pool.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync/atomic" 8 "time" 9 10 flow "github.com/number571/tendermint/internal/libs/flowrate" 11 tmsync "github.com/number571/tendermint/internal/libs/sync" 12 "github.com/number571/tendermint/libs/log" 13 "github.com/number571/tendermint/libs/service" 14 "github.com/number571/tendermint/types" 15 ) 16 17 /* 18 eg, L = latency = 0.1s 19 P = num peers = 10 20 FN = num full nodes 21 BS = 1kB block size 22 CB = 1 Mbit/s = 128 kB/s 23 CB/P = 12.8 kB 24 B/S = CB/P/BS = 12.8 blocks/s 25 26 12.8 * 0.1 = 1.28 blocks on conn 27 */ 28 29 const ( 30 requestIntervalMS = 2 31 maxTotalRequesters = 600 32 maxPeerErrBuffer = 1000 33 maxPendingRequests = maxTotalRequesters 34 maxPendingRequestsPerPeer = 20 35 36 // Minimum recv rate to ensure we're receiving blocks from a peer fast 37 // enough. If a peer is not sending us data at at least that rate, we 38 // consider them to have timedout and we disconnect. 39 // 40 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 41 // sending data across atlantic ~ 7.5 KB/s. 42 minRecvRate = 7680 43 44 // Maximum difference between current and new block's height. 45 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 46 ) 47 48 var peerTimeout = 15 * time.Second // not const so we can override with tests 49 50 /* 51 Peers self report their heights when we join the block pool. 52 Starting from our latest pool.height, we request blocks 53 in sequence from peers that reported higher heights than ours. 54 Every so often we ask peers what height they're on so we can keep going. 55 56 Requests are continuously made for blocks of higher heights until 57 the limit is reached. If most of the requests have no available peers, and we 58 are not at peer limits, we can probably switch to consensus reactor 59 */ 60 61 // BlockRequest stores a block request identified by the block Height and the 62 // PeerID responsible for delivering the block. 63 type BlockRequest struct { 64 Height int64 65 PeerID types.NodeID 66 } 67 68 // BlockPool keeps track of the fast sync peers, block requests and block responses. 69 type BlockPool struct { 70 service.BaseService 71 lastAdvance time.Time 72 73 mtx tmsync.RWMutex 74 // block requests 75 requesters map[int64]*bpRequester 76 height int64 // the lowest key in requesters. 77 // peers 78 peers map[types.NodeID]*bpPeer 79 maxPeerHeight int64 // the biggest reported height 80 81 // atomic 82 numPending int32 // number of requests pending assignment or block response 83 84 requestsCh chan<- BlockRequest 85 errorsCh chan<- peerError 86 87 startHeight int64 88 lastHundredBlockTimeStamp time.Time 89 lastSyncRate float64 90 } 91 92 // NewBlockPool returns a new BlockPool with the height equal to start. Block 93 // requests and errors will be sent to requestsCh and errorsCh accordingly. 94 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 95 bp := &BlockPool{ 96 peers: make(map[types.NodeID]*bpPeer), 97 98 requesters: make(map[int64]*bpRequester), 99 height: start, 100 startHeight: start, 101 numPending: 0, 102 103 requestsCh: requestsCh, 104 errorsCh: errorsCh, 105 lastSyncRate: 0, 106 } 107 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 108 return bp 109 } 110 111 // OnStart implements service.Service by spawning requesters routine and recording 112 // pool's start time. 113 func (pool *BlockPool) OnStart() error { 114 pool.lastAdvance = time.Now() 115 pool.lastHundredBlockTimeStamp = pool.lastAdvance 116 go pool.makeRequestersRoutine() 117 return nil 118 } 119 120 // spawns requesters as needed 121 func (pool *BlockPool) makeRequestersRoutine() { 122 for { 123 if !pool.IsRunning() { 124 break 125 } 126 127 _, numPending, lenRequesters := pool.GetStatus() 128 switch { 129 case numPending >= maxPendingRequests: 130 // sleep for a bit. 131 time.Sleep(requestIntervalMS * time.Millisecond) 132 // check for timed out peers 133 pool.removeTimedoutPeers() 134 case lenRequesters >= maxTotalRequesters: 135 // sleep for a bit. 136 time.Sleep(requestIntervalMS * time.Millisecond) 137 // check for timed out peers 138 pool.removeTimedoutPeers() 139 default: 140 // request for more blocks. 141 pool.makeNextRequester() 142 } 143 } 144 } 145 146 func (pool *BlockPool) removeTimedoutPeers() { 147 pool.mtx.Lock() 148 defer pool.mtx.Unlock() 149 150 for _, peer := range pool.peers { 151 // check if peer timed out 152 if !peer.didTimeout && peer.numPending > 0 { 153 curRate := peer.recvMonitor.Status().CurRate 154 // curRate can be 0 on start 155 if curRate != 0 && curRate < minRecvRate { 156 err := errors.New("peer is not sending us data fast enough") 157 pool.sendError(err, peer.id) 158 pool.Logger.Error("SendTimeout", "peer", peer.id, 159 "reason", err, 160 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 161 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 162 peer.didTimeout = true 163 } 164 } 165 166 if peer.didTimeout { 167 pool.removePeer(peer.id) 168 } 169 } 170 } 171 172 // GetStatus returns pool's height, numPending requests and the number of 173 // requesters. 174 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 175 pool.mtx.RLock() 176 defer pool.mtx.RUnlock() 177 178 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 179 } 180 181 // IsCaughtUp returns true if this node is caught up, false - otherwise. 182 func (pool *BlockPool) IsCaughtUp() bool { 183 pool.mtx.RLock() 184 defer pool.mtx.RUnlock() 185 186 // Need at least 1 peer to be considered caught up. 187 if len(pool.peers) == 0 { 188 return false 189 } 190 191 // NOTE: we use maxPeerHeight - 1 because to sync block H requires block H+1 192 // to verify the LastCommit. 193 return pool.height >= (pool.maxPeerHeight - 1) 194 } 195 196 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 197 // We need to see the second block's Commit to validate the first block. 198 // So we peek two blocks at a time. 199 // The caller will verify the commit. 200 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 201 pool.mtx.RLock() 202 defer pool.mtx.RUnlock() 203 204 if r := pool.requesters[pool.height]; r != nil { 205 first = r.getBlock() 206 } 207 if r := pool.requesters[pool.height+1]; r != nil { 208 second = r.getBlock() 209 } 210 return 211 } 212 213 // PopRequest pops the first block at pool.height. 214 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 215 func (pool *BlockPool) PopRequest() { 216 pool.mtx.Lock() 217 defer pool.mtx.Unlock() 218 219 if r := pool.requesters[pool.height]; r != nil { 220 if err := r.Stop(); err != nil { 221 pool.Logger.Error("Error stopping requester", "err", err) 222 } 223 delete(pool.requesters, pool.height) 224 pool.height++ 225 pool.lastAdvance = time.Now() 226 227 // the lastSyncRate will be updated every 100 blocks, it uses the adaptive filter 228 // to smooth the block sync rate and the unit represents the number of blocks per second. 229 if (pool.height-pool.startHeight)%100 == 0 { 230 newSyncRate := 100 / time.Since(pool.lastHundredBlockTimeStamp).Seconds() 231 if pool.lastSyncRate == 0 { 232 pool.lastSyncRate = newSyncRate 233 } else { 234 pool.lastSyncRate = 0.9*pool.lastSyncRate + 0.1*newSyncRate 235 } 236 pool.lastHundredBlockTimeStamp = time.Now() 237 } 238 239 } else { 240 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 241 } 242 } 243 244 // RedoRequest invalidates the block at pool.height, 245 // Remove the peer and redo request from others. 246 // Returns the ID of the removed peer. 247 func (pool *BlockPool) RedoRequest(height int64) types.NodeID { 248 pool.mtx.Lock() 249 defer pool.mtx.Unlock() 250 251 request := pool.requesters[height] 252 peerID := request.getPeerID() 253 if peerID != types.NodeID("") { 254 // RemovePeer will redo all requesters associated with this peer. 255 pool.removePeer(peerID) 256 } 257 return peerID 258 } 259 260 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 261 // TODO: ensure that blocks come in order for each peer. 262 func (pool *BlockPool) AddBlock(peerID types.NodeID, block *types.Block, blockSize int) { 263 pool.mtx.Lock() 264 defer pool.mtx.Unlock() 265 266 requester := pool.requesters[block.Height] 267 if requester == nil { 268 pool.Logger.Error("peer sent us a block we didn't expect", 269 "peer", peerID, "curHeight", pool.height, "blockHeight", block.Height) 270 diff := pool.height - block.Height 271 if diff < 0 { 272 diff *= -1 273 } 274 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 275 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 276 } 277 return 278 } 279 280 if requester.setBlock(block, peerID) { 281 atomic.AddInt32(&pool.numPending, -1) 282 peer := pool.peers[peerID] 283 if peer != nil { 284 peer.decrPending(blockSize) 285 } 286 } else { 287 err := errors.New("requester is different or block already exists") 288 pool.Logger.Error(err.Error(), "peer", peerID, "requester", requester.getPeerID(), "blockHeight", block.Height) 289 pool.sendError(err, peerID) 290 } 291 } 292 293 // MaxPeerHeight returns the highest reported height. 294 func (pool *BlockPool) MaxPeerHeight() int64 { 295 pool.mtx.RLock() 296 defer pool.mtx.RUnlock() 297 return pool.maxPeerHeight 298 } 299 300 // LastAdvance returns the time when the last block was processed (or start 301 // time if no blocks were processed). 302 func (pool *BlockPool) LastAdvance() time.Time { 303 pool.mtx.RLock() 304 defer pool.mtx.RUnlock() 305 return pool.lastAdvance 306 } 307 308 // SetPeerRange sets the peer's alleged blockchain base and height. 309 func (pool *BlockPool) SetPeerRange(peerID types.NodeID, base int64, height int64) { 310 pool.mtx.Lock() 311 defer pool.mtx.Unlock() 312 313 peer := pool.peers[peerID] 314 if peer != nil { 315 peer.base = base 316 peer.height = height 317 } else { 318 peer = newBPPeer(pool, peerID, base, height) 319 peer.setLogger(pool.Logger.With("peer", peerID)) 320 pool.peers[peerID] = peer 321 } 322 323 if height > pool.maxPeerHeight { 324 pool.maxPeerHeight = height 325 } 326 } 327 328 // RemovePeer removes the peer with peerID from the pool. If there's no peer 329 // with peerID, function is a no-op. 330 func (pool *BlockPool) RemovePeer(peerID types.NodeID) { 331 pool.mtx.Lock() 332 defer pool.mtx.Unlock() 333 334 pool.removePeer(peerID) 335 } 336 337 func (pool *BlockPool) removePeer(peerID types.NodeID) { 338 for _, requester := range pool.requesters { 339 if requester.getPeerID() == peerID { 340 requester.redo(peerID) 341 } 342 } 343 344 peer, ok := pool.peers[peerID] 345 if ok { 346 if peer.timeout != nil { 347 peer.timeout.Stop() 348 } 349 350 delete(pool.peers, peerID) 351 352 // Find a new peer with the biggest height and update maxPeerHeight if the 353 // peer's height was the biggest. 354 if peer.height == pool.maxPeerHeight { 355 pool.updateMaxPeerHeight() 356 } 357 } 358 } 359 360 // If no peers are left, maxPeerHeight is set to 0. 361 func (pool *BlockPool) updateMaxPeerHeight() { 362 var max int64 363 for _, peer := range pool.peers { 364 if peer.height > max { 365 max = peer.height 366 } 367 } 368 pool.maxPeerHeight = max 369 } 370 371 // Pick an available peer with the given height available. 372 // If no peers are available, returns nil. 373 func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer { 374 pool.mtx.Lock() 375 defer pool.mtx.Unlock() 376 377 for _, peer := range pool.peers { 378 if peer.didTimeout { 379 pool.removePeer(peer.id) 380 continue 381 } 382 if peer.numPending >= maxPendingRequestsPerPeer { 383 continue 384 } 385 if height < peer.base || height > peer.height { 386 continue 387 } 388 peer.incrPending() 389 return peer 390 } 391 return nil 392 } 393 394 func (pool *BlockPool) makeNextRequester() { 395 pool.mtx.Lock() 396 defer pool.mtx.Unlock() 397 398 nextHeight := pool.height + pool.requestersLen() 399 if nextHeight > pool.maxPeerHeight { 400 return 401 } 402 403 request := newBPRequester(pool, nextHeight) 404 405 pool.requesters[nextHeight] = request 406 atomic.AddInt32(&pool.numPending, 1) 407 408 err := request.Start() 409 if err != nil { 410 request.Logger.Error("Error starting request", "err", err) 411 } 412 } 413 414 func (pool *BlockPool) requestersLen() int64 { 415 return int64(len(pool.requesters)) 416 } 417 418 func (pool *BlockPool) sendRequest(height int64, peerID types.NodeID) { 419 if !pool.IsRunning() { 420 return 421 } 422 pool.requestsCh <- BlockRequest{height, peerID} 423 } 424 425 func (pool *BlockPool) sendError(err error, peerID types.NodeID) { 426 if !pool.IsRunning() { 427 return 428 } 429 pool.errorsCh <- peerError{err, peerID} 430 } 431 432 // for debugging purposes 433 //nolint:unused 434 func (pool *BlockPool) debug() string { 435 pool.mtx.Lock() 436 defer pool.mtx.Unlock() 437 438 str := "" 439 nextHeight := pool.height + pool.requestersLen() 440 for h := pool.height; h < nextHeight; h++ { 441 if pool.requesters[h] == nil { 442 str += fmt.Sprintf("H(%v):X ", h) 443 } else { 444 str += fmt.Sprintf("H(%v):", h) 445 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 446 } 447 } 448 return str 449 } 450 451 func (pool *BlockPool) targetSyncBlocks() int64 { 452 pool.mtx.RLock() 453 defer pool.mtx.RUnlock() 454 455 return pool.maxPeerHeight - pool.startHeight + 1 456 } 457 458 func (pool *BlockPool) getLastSyncRate() float64 { 459 pool.mtx.RLock() 460 defer pool.mtx.RUnlock() 461 462 return pool.lastSyncRate 463 } 464 465 //------------------------------------- 466 467 type bpPeer struct { 468 didTimeout bool 469 numPending int32 470 height int64 471 base int64 472 pool *BlockPool 473 id types.NodeID 474 recvMonitor *flow.Monitor 475 476 timeout *time.Timer 477 478 logger log.Logger 479 } 480 481 func newBPPeer(pool *BlockPool, peerID types.NodeID, base int64, height int64) *bpPeer { 482 peer := &bpPeer{ 483 pool: pool, 484 id: peerID, 485 base: base, 486 height: height, 487 numPending: 0, 488 logger: log.NewNopLogger(), 489 } 490 return peer 491 } 492 493 func (peer *bpPeer) setLogger(l log.Logger) { 494 peer.logger = l 495 } 496 497 func (peer *bpPeer) resetMonitor() { 498 peer.recvMonitor = flow.New(time.Second, time.Second*40) 499 initialValue := float64(minRecvRate) * math.E 500 peer.recvMonitor.SetREMA(initialValue) 501 } 502 503 func (peer *bpPeer) resetTimeout() { 504 if peer.timeout == nil { 505 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 506 } else { 507 peer.timeout.Reset(peerTimeout) 508 } 509 } 510 511 func (peer *bpPeer) incrPending() { 512 if peer.numPending == 0 { 513 peer.resetMonitor() 514 peer.resetTimeout() 515 } 516 peer.numPending++ 517 } 518 519 func (peer *bpPeer) decrPending(recvSize int) { 520 peer.numPending-- 521 if peer.numPending == 0 { 522 peer.timeout.Stop() 523 } else { 524 peer.recvMonitor.Update(recvSize) 525 peer.resetTimeout() 526 } 527 } 528 529 func (peer *bpPeer) onTimeout() { 530 peer.pool.mtx.Lock() 531 defer peer.pool.mtx.Unlock() 532 533 err := errors.New("peer did not send us anything") 534 peer.pool.sendError(err, peer.id) 535 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 536 peer.didTimeout = true 537 } 538 539 //------------------------------------- 540 541 type bpRequester struct { 542 service.BaseService 543 pool *BlockPool 544 height int64 545 gotBlockCh chan struct{} 546 redoCh chan types.NodeID // redo may send multitime, add peerId to identify repeat 547 548 mtx tmsync.Mutex 549 peerID types.NodeID 550 block *types.Block 551 } 552 553 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 554 bpr := &bpRequester{ 555 pool: pool, 556 height: height, 557 gotBlockCh: make(chan struct{}, 1), 558 redoCh: make(chan types.NodeID, 1), 559 560 peerID: "", 561 block: nil, 562 } 563 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 564 return bpr 565 } 566 567 func (bpr *bpRequester) OnStart() error { 568 go bpr.requestRoutine() 569 return nil 570 } 571 572 // Returns true if the peer matches and block doesn't already exist. 573 func (bpr *bpRequester) setBlock(block *types.Block, peerID types.NodeID) bool { 574 bpr.mtx.Lock() 575 if bpr.block != nil || bpr.peerID != peerID { 576 bpr.mtx.Unlock() 577 return false 578 } 579 bpr.block = block 580 bpr.mtx.Unlock() 581 582 select { 583 case bpr.gotBlockCh <- struct{}{}: 584 default: 585 } 586 return true 587 } 588 589 func (bpr *bpRequester) getBlock() *types.Block { 590 bpr.mtx.Lock() 591 defer bpr.mtx.Unlock() 592 return bpr.block 593 } 594 595 func (bpr *bpRequester) getPeerID() types.NodeID { 596 bpr.mtx.Lock() 597 defer bpr.mtx.Unlock() 598 return bpr.peerID 599 } 600 601 // This is called from the requestRoutine, upon redo(). 602 func (bpr *bpRequester) reset() { 603 bpr.mtx.Lock() 604 defer bpr.mtx.Unlock() 605 606 if bpr.block != nil { 607 atomic.AddInt32(&bpr.pool.numPending, 1) 608 } 609 610 bpr.peerID = "" 611 bpr.block = nil 612 } 613 614 // Tells bpRequester to pick another peer and try again. 615 // NOTE: Nonblocking, and does nothing if another redo 616 // was already requested. 617 func (bpr *bpRequester) redo(peerID types.NodeID) { 618 select { 619 case bpr.redoCh <- peerID: 620 default: 621 } 622 } 623 624 // Responsible for making more requests as necessary 625 // Returns only when a block is found (e.g. AddBlock() is called) 626 func (bpr *bpRequester) requestRoutine() { 627 OUTER_LOOP: 628 for { 629 // Pick a peer to send request to. 630 var peer *bpPeer 631 PICK_PEER_LOOP: 632 for { 633 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 634 return 635 } 636 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 637 if peer == nil { 638 time.Sleep(requestIntervalMS * time.Millisecond) 639 continue PICK_PEER_LOOP 640 } 641 break PICK_PEER_LOOP 642 } 643 bpr.mtx.Lock() 644 bpr.peerID = peer.id 645 bpr.mtx.Unlock() 646 647 // Send request and wait. 648 bpr.pool.sendRequest(bpr.height, peer.id) 649 WAIT_LOOP: 650 for { 651 select { 652 case <-bpr.pool.Quit(): 653 if err := bpr.Stop(); err != nil { 654 bpr.Logger.Error("Error stopped requester", "err", err) 655 } 656 return 657 case <-bpr.Quit(): 658 return 659 case peerID := <-bpr.redoCh: 660 if peerID == bpr.peerID { 661 bpr.reset() 662 continue OUTER_LOOP 663 } else { 664 continue WAIT_LOOP 665 } 666 case <-bpr.gotBlockCh: 667 // We got a block! 668 // Continue the for-loop and wait til Quit. 669 continue WAIT_LOOP 670 } 671 } 672 } 673 }