github.com/okex/exchain@v1.8.0/libs/tendermint/blockchain/v0/pool.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 flow "github.com/okex/exchain/libs/tendermint/libs/flowrate" 12 "github.com/okex/exchain/libs/tendermint/libs/log" 13 "github.com/okex/exchain/libs/tendermint/libs/service" 14 "github.com/okex/exchain/libs/tendermint/p2p" 15 "github.com/okex/exchain/libs/tendermint/types" 16 ) 17 18 /* 19 eg, L = latency = 0.1s 20 P = num peers = 10 21 FN = num full nodes 22 BS = 1kB block size 23 CB = 1 Mbit/s = 128 kB/s 24 CB/P = 12.8 kB 25 B/S = CB/P/BS = 12.8 blocks/s 26 27 12.8 * 0.1 = 1.28 blocks on conn 28 */ 29 30 const ( 31 requestIntervalMS = 2 32 maxTotalRequesters = 600 33 maxPendingRequests = maxTotalRequesters 34 maxPendingRequestsPerPeer = 20 35 36 // Minimum recv rate to ensure we're receiving blocks from a peer fast 37 // enough. If a peer is not sending us data at at least that rate, we 38 // consider them to have timedout and we disconnect. 39 // 40 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 41 // sending data across atlantic ~ 7.5 KB/s. 42 minRecvRate = 7680 43 44 // Maximum difference between current and new block's height. 45 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 46 ) 47 48 var peerTimeout = 15 * time.Second // not const so we can override with tests 49 50 /* 51 Peers self report their heights when we join the block pool. 52 Starting from our latest pool.height, we request blocks 53 in sequence from peers that reported higher heights than ours. 54 Every so often we ask peers what height they're on so we can keep going. 55 56 Requests are continuously made for blocks of higher heights until 57 the limit is reached. If most of the requests have no available peers, and we 58 are not at peer limits, we can probably switch to consensus reactor 59 */ 60 61 // BlockPool keeps track of the fast sync peers, block requests and block responses. 62 type BlockPool struct { 63 service.BaseService 64 startTime time.Time 65 66 mtx sync.Mutex 67 // block requests 68 requesters map[int64]*bpRequester 69 height int64 // the lowest key in requesters. 70 // peers 71 peers map[p2p.ID]*bpPeer 72 maxPeerHeight int64 // the biggest reported height 73 74 // atomic 75 numPending int32 // number of requests pending assignment or block response 76 77 requestsCh chan<- BlockRequest 78 errorsCh chan<- peerError 79 } 80 81 // NewBlockPool returns a new BlockPool with the height equal to start. Block 82 // requests and errors will be sent to requestsCh and errorsCh accordingly. 83 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 84 bp := &BlockPool{ 85 peers: make(map[p2p.ID]*bpPeer), 86 87 requesters: make(map[int64]*bpRequester), 88 height: start, 89 numPending: 0, 90 91 requestsCh: requestsCh, 92 errorsCh: errorsCh, 93 } 94 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 95 return bp 96 } 97 98 func (pool *BlockPool) SetHeight(height int64) { 99 pool.mtx.Lock() 100 defer pool.mtx.Unlock() 101 102 pool.height = height 103 } 104 105 // OnStart implements service.Service by spawning requesters routine and recording 106 // pool's start time. 107 func (pool *BlockPool) OnStart() error { 108 go pool.makeRequestersRoutine() 109 pool.startTime = time.Now() 110 return nil 111 } 112 113 func (pool *BlockPool) OnReset() error { 114 // clear up all requesters 115 pool.mtx.Lock() 116 defer pool.mtx.Unlock() 117 118 for height, r := range pool.requesters { 119 r.Stop() 120 delete(pool.requesters, height) 121 } 122 pool.numPending = 0 123 124 return nil 125 } 126 127 // spawns requesters as needed 128 func (pool *BlockPool) makeRequestersRoutine() { 129 for { 130 if !pool.IsRunning() { 131 break 132 } 133 134 _, numPending, lenRequesters := pool.GetStatus() 135 switch { 136 case numPending >= maxPendingRequests: 137 // sleep for a bit. 138 time.Sleep(requestIntervalMS * time.Millisecond) 139 // check for timed out peers 140 pool.removeTimedoutPeers() 141 case lenRequesters >= maxTotalRequesters: 142 // sleep for a bit. 143 time.Sleep(requestIntervalMS * time.Millisecond) 144 // check for timed out peers 145 pool.removeTimedoutPeers() 146 default: 147 // request for more blocks. 148 pool.makeNextRequester() 149 } 150 } 151 } 152 153 func (pool *BlockPool) removeTimedoutPeers() { 154 pool.mtx.Lock() 155 defer pool.mtx.Unlock() 156 157 for _, peer := range pool.peers { 158 if !peer.didTimeout && peer.numPending > 0 { 159 curRate := peer.recvMonitor.Status().CurRate 160 // curRate can be 0 on start 161 if curRate != 0 && curRate < minRecvRate { 162 err := errors.New("peer is not sending us data fast enough") 163 pool.sendError(err, peer.id) 164 pool.Logger.Error("SendTimeout", "peer", peer.id, 165 "reason", err, 166 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 167 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 168 peer.didTimeout = true 169 } 170 } 171 if peer.didTimeout { 172 pool.removePeer(peer.id) 173 } 174 } 175 } 176 177 // GetStatus returns pool's height, numPending requests and the number of 178 // requesters. 179 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 180 pool.mtx.Lock() 181 defer pool.mtx.Unlock() 182 183 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 184 } 185 186 // IsCaughtUp returns true if this node is caught up, false - otherwise. 187 // TODO: relax conditions, prevent abuse. 188 func (pool *BlockPool) IsCaughtUp() bool { 189 pool.mtx.Lock() 190 defer pool.mtx.Unlock() 191 192 // Need at least 1 peer to be considered caught up. 193 if len(pool.peers) == 0 { 194 pool.Logger.Debug("Blockpool has no peers") 195 return false 196 } 197 198 // Some conditions to determine if we're caught up. 199 // Ensures we've either received a block or waited some amount of time, 200 // and that we're synced to the highest known height. 201 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 202 // to verify the LastCommit. 203 // TODO: should change judge conditions 204 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 205 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 206 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 207 return isCaughtUp 208 } 209 210 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 211 // We need to see the second block's Commit to validate the first block. 212 // So we peek two blocks at a time. 213 // The caller will verify the commit. 214 func (pool *BlockPool) PeekTwoBlocks() (first, second *types.Block, firstParts *types.PartSet) { 215 pool.mtx.Lock() 216 defer pool.mtx.Unlock() 217 218 if r := pool.requesters[pool.height]; r != nil { 219 first, firstParts = r.getBlock() 220 } 221 if r := pool.requesters[pool.height+1]; r != nil { 222 second, _ = r.getBlock() 223 } 224 return 225 } 226 227 // PopRequest pops the first block at pool.height. 228 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 229 func (pool *BlockPool) PopRequest() { 230 pool.mtx.Lock() 231 defer pool.mtx.Unlock() 232 233 if r := pool.requesters[pool.height]; r != nil { 234 /* The block can disappear at any time, due to removePeer(). 235 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 236 PanicSanity("PopRequest() requires a valid block") 237 } 238 */ 239 r.Stop() 240 delete(pool.requesters, pool.height) 241 pool.height++ 242 } else { 243 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 244 } 245 } 246 247 // RedoRequest invalidates the block at pool.height, 248 // Remove the peer and redo request from others. 249 // Returns the ID of the removed peer. 250 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 251 pool.mtx.Lock() 252 defer pool.mtx.Unlock() 253 254 request := pool.requesters[height] 255 peerID := request.getPeerID() 256 if peerID != p2p.ID("") { 257 // RemovePeer will redo all requesters associated with this peer. 258 pool.removePeer(peerID) 259 } 260 return peerID 261 } 262 263 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 264 // TODO: ensure that blocks come in order for each peer. 265 func (pool *BlockPool) AddBlock(peerID p2p.ID, msg *bcBlockResponseMessage, blockSize int) { 266 pool.mtx.Lock() 267 defer pool.mtx.Unlock() 268 269 block := msg.Block 270 requester := pool.requesters[block.Height] 271 if requester == nil { 272 pool.Logger.Info( 273 "peer sent us a block we didn't expect", 274 "peer", 275 peerID, 276 "curHeight", 277 pool.height, 278 "blockHeight", 279 block.Height) 280 diff := pool.height - block.Height 281 if diff < 0 { 282 diff *= -1 283 } 284 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 285 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 286 } 287 return 288 } 289 290 if requester.setBlock(block, msg.ExInfo, peerID) { 291 atomic.AddInt32(&pool.numPending, -1) 292 peer := pool.peers[peerID] 293 if peer != nil { 294 peer.decrPending(blockSize) 295 } 296 } else { 297 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 298 pool.sendError(errors.New("invalid peer"), peerID) 299 } 300 } 301 302 // MaxPeerHeight returns the highest reported height. 303 func (pool *BlockPool) MaxPeerHeight() int64 { 304 pool.mtx.Lock() 305 defer pool.mtx.Unlock() 306 return pool.maxPeerHeight 307 } 308 309 // SetPeerRange sets the peer's alleged blockchain base and height. 310 func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64, storeHeight int64) bool { 311 pool.mtx.Lock() 312 defer pool.mtx.Unlock() 313 314 peer := pool.peers[peerID] 315 if peer != nil { 316 peer.base = base 317 peer.height = height 318 } else { 319 peer = newBPPeer(pool, peerID, base, height) 320 peer.setLogger(pool.Logger.With("peer", peerID)) 321 pool.peers[peerID] = peer 322 } 323 324 if height > pool.maxPeerHeight { 325 pool.maxPeerHeight = height 326 } 327 328 // compute how many peers' height is greater than height 329 if !pool.IsRunning() && storeHeight+MaxIntervalForFastSync <= height { 330 return true 331 } 332 333 return false 334 } 335 336 // RemovePeer removes the peer with peerID from the pool. If there's no peer 337 // with peerID, function is a no-op. 338 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 339 pool.mtx.Lock() 340 defer pool.mtx.Unlock() 341 342 pool.removePeer(peerID) 343 } 344 345 func (pool *BlockPool) removePeer(peerID p2p.ID) { 346 for _, requester := range pool.requesters { 347 if requester.getPeerID() == peerID { 348 requester.redo(peerID) 349 } 350 } 351 352 peer, ok := pool.peers[peerID] 353 if ok { 354 if peer.timeout != nil { 355 peer.timeout.Stop() 356 } 357 358 delete(pool.peers, peerID) 359 360 // Find a new peer with the biggest height and update maxPeerHeight if the 361 // peer's height was the biggest. 362 if peer.height == pool.maxPeerHeight { 363 pool.updateMaxPeerHeight() 364 } 365 } 366 } 367 368 // If no peers are left, maxPeerHeight is set to 0. 369 func (pool *BlockPool) updateMaxPeerHeight() { 370 var max int64 371 for _, peer := range pool.peers { 372 if peer.height > max { 373 max = peer.height 374 } 375 } 376 pool.maxPeerHeight = max 377 } 378 379 // Pick an available peer with the given height available. 380 // If no peers are available, returns nil. 381 func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer { 382 pool.mtx.Lock() 383 defer pool.mtx.Unlock() 384 385 for _, peer := range pool.peers { 386 if peer.didTimeout { 387 pool.removePeer(peer.id) 388 continue 389 } 390 if peer.numPending >= maxPendingRequestsPerPeer { 391 continue 392 } 393 if height < peer.base || height > peer.height { 394 continue 395 } 396 peer.incrPending() 397 return peer 398 } 399 return nil 400 } 401 402 func (pool *BlockPool) makeNextRequester() { 403 pool.mtx.Lock() 404 defer pool.mtx.Unlock() 405 406 nextHeight := pool.height + pool.requestersLen() 407 if nextHeight > pool.maxPeerHeight { 408 return 409 } 410 411 request := newBPRequester(pool, nextHeight) 412 413 pool.requesters[nextHeight] = request 414 atomic.AddInt32(&pool.numPending, 1) 415 416 err := request.Start() 417 if err != nil { 418 request.Logger.Error("Error starting request", "err", err) 419 } 420 } 421 422 func (pool *BlockPool) requestersLen() int64 { 423 return int64(len(pool.requesters)) 424 } 425 426 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 427 if !pool.IsRunning() { 428 return 429 } 430 pool.requestsCh <- BlockRequest{height, peerID} 431 } 432 433 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 434 if !pool.IsRunning() { 435 return 436 } 437 pool.errorsCh <- peerError{err, peerID} 438 } 439 440 // for debugging purposes 441 //nolint:unused 442 func (pool *BlockPool) debug() string { 443 pool.mtx.Lock() 444 defer pool.mtx.Unlock() 445 446 str := "" 447 nextHeight := pool.height + pool.requestersLen() 448 for h := pool.height; h < nextHeight; h++ { 449 if pool.requesters[h] == nil { 450 str += fmt.Sprintf("H(%v):X ", h) 451 } else { 452 str += fmt.Sprintf("H(%v):", h) 453 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 454 } 455 } 456 return str 457 } 458 459 //------------------------------------- 460 461 type bpPeer struct { 462 didTimeout bool 463 numPending int32 464 height int64 465 base int64 466 pool *BlockPool 467 id p2p.ID 468 recvMonitor *flow.Monitor 469 470 timeout *time.Timer 471 472 logger log.Logger 473 } 474 475 func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer { 476 peer := &bpPeer{ 477 pool: pool, 478 id: peerID, 479 base: base, 480 height: height, 481 numPending: 0, 482 logger: log.NewNopLogger(), 483 } 484 return peer 485 } 486 487 func (peer *bpPeer) setLogger(l log.Logger) { 488 peer.logger = l 489 } 490 491 func (peer *bpPeer) resetMonitor() { 492 peer.recvMonitor = flow.New(time.Second, time.Second*40) 493 initialValue := float64(minRecvRate) * math.E 494 peer.recvMonitor.SetREMA(initialValue) 495 } 496 497 func (peer *bpPeer) resetTimeout() { 498 if peer.timeout == nil { 499 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 500 } else { 501 peer.timeout.Reset(peerTimeout) 502 } 503 } 504 505 func (peer *bpPeer) incrPending() { 506 if peer.numPending == 0 { 507 peer.resetMonitor() 508 peer.resetTimeout() 509 } 510 peer.numPending++ 511 } 512 513 func (peer *bpPeer) decrPending(recvSize int) { 514 peer.numPending-- 515 if peer.numPending == 0 { 516 peer.timeout.Stop() 517 } else { 518 peer.recvMonitor.Update(recvSize) 519 peer.resetTimeout() 520 } 521 } 522 523 func (peer *bpPeer) onTimeout() { 524 peer.pool.mtx.Lock() 525 defer peer.pool.mtx.Unlock() 526 527 err := errors.New("peer did not send us anything") 528 peer.pool.sendError(err, peer.id) 529 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 530 peer.didTimeout = true 531 } 532 533 //------------------------------------- 534 535 type bpRequester struct { 536 service.BaseService 537 pool *BlockPool 538 height int64 539 gotBlockCh chan struct{} 540 redoCh chan p2p.ID //redo may send multitime, add peerId to identify repeat 541 542 mtx sync.Mutex 543 peerID p2p.ID 544 block *types.Block 545 blockParts *types.PartSet 546 } 547 548 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 549 bpr := &bpRequester{ 550 pool: pool, 551 height: height, 552 gotBlockCh: make(chan struct{}, 1), 553 redoCh: make(chan p2p.ID, 1), 554 555 peerID: "", 556 block: nil, 557 } 558 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 559 return bpr 560 } 561 562 func (bpr *bpRequester) OnStart() error { 563 go bpr.requestRoutine() 564 return nil 565 } 566 567 // Returns true if the peer matches and block doesn't already exist. 568 func (bpr *bpRequester) setBlock(block *types.Block, exInfo *types.BlockExInfo, peerID p2p.ID) bool { 569 bpr.mtx.Lock() 570 if bpr.block != nil || bpr.peerID != peerID { 571 bpr.mtx.Unlock() 572 return false 573 } 574 bpr.block = block 575 bpr.blockParts = block.MakePartSetByExInfo(exInfo) 576 577 bpr.mtx.Unlock() 578 579 select { 580 case bpr.gotBlockCh <- struct{}{}: 581 default: 582 } 583 return true 584 } 585 586 func (bpr *bpRequester) getBlock() (*types.Block, *types.PartSet) { 587 bpr.mtx.Lock() 588 defer bpr.mtx.Unlock() 589 return bpr.block, bpr.blockParts 590 } 591 592 func (bpr *bpRequester) getPeerID() p2p.ID { 593 bpr.mtx.Lock() 594 defer bpr.mtx.Unlock() 595 return bpr.peerID 596 } 597 598 // This is called from the requestRoutine, upon redo(). 599 func (bpr *bpRequester) reset() { 600 bpr.mtx.Lock() 601 defer bpr.mtx.Unlock() 602 603 if bpr.block != nil { 604 atomic.AddInt32(&bpr.pool.numPending, 1) 605 } 606 607 bpr.peerID = "" 608 bpr.block = nil 609 } 610 611 // Tells bpRequester to pick another peer and try again. 612 // NOTE: Nonblocking, and does nothing if another redo 613 // was already requested. 614 func (bpr *bpRequester) redo(peerID p2p.ID) { 615 select { 616 case bpr.redoCh <- peerID: 617 default: 618 } 619 } 620 621 // Responsible for making more requests as necessary 622 // Returns only when a block is found (e.g. AddBlock() is called) 623 func (bpr *bpRequester) requestRoutine() { 624 OUTER_LOOP: 625 for { 626 // Pick a peer to send request to. 627 var peer *bpPeer 628 PICK_PEER_LOOP: 629 for { 630 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 631 return 632 } 633 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 634 if peer == nil { 635 //log.Info("No peers available", "height", height) 636 time.Sleep(requestIntervalMS * time.Millisecond) 637 continue PICK_PEER_LOOP 638 } 639 break PICK_PEER_LOOP 640 } 641 bpr.mtx.Lock() 642 bpr.peerID = peer.id 643 bpr.mtx.Unlock() 644 645 // Send request and wait. 646 bpr.pool.sendRequest(bpr.height, peer.id) 647 WAIT_LOOP: 648 for { 649 select { 650 case <-bpr.pool.Quit(): 651 bpr.Stop() 652 return 653 case <-bpr.Quit(): 654 return 655 case peerID := <-bpr.redoCh: 656 if peerID == bpr.peerID { 657 bpr.reset() 658 continue OUTER_LOOP 659 } else { 660 continue WAIT_LOOP 661 } 662 case <-bpr.gotBlockCh: 663 // We got a block! 664 // Continue the for-loop and wait til Quit. 665 continue WAIT_LOOP 666 } 667 } 668 } 669 } 670 671 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 672 // delivering the block 673 type BlockRequest struct { 674 Height int64 675 PeerID p2p.ID 676 }