github.com/DFWallet/tendermint-cosmos@v0.0.2/blockchain/v0/pool.go (about) 1 package v0 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync/atomic" 8 "time" 9 10 flow "github.com/DFWallet/tendermint-cosmos/libs/flowrate" 11 "github.com/DFWallet/tendermint-cosmos/libs/log" 12 "github.com/DFWallet/tendermint-cosmos/libs/service" 13 tmsync "github.com/DFWallet/tendermint-cosmos/libs/sync" 14 "github.com/DFWallet/tendermint-cosmos/p2p" 15 "github.com/DFWallet/tendermint-cosmos/types" 16 ) 17 18 /* 19 eg, L = latency = 0.1s 20 P = num peers = 10 21 FN = num full nodes 22 BS = 1kB block size 23 CB = 1 Mbit/s = 128 kB/s 24 CB/P = 12.8 kB 25 B/S = CB/P/BS = 12.8 blocks/s 26 27 12.8 * 0.1 = 1.28 blocks on conn 28 */ 29 30 const ( 31 requestIntervalMS = 2 32 maxTotalRequesters = 600 33 maxPendingRequests = maxTotalRequesters 34 maxPendingRequestsPerPeer = 20 35 36 // Minimum recv rate to ensure we're receiving blocks from a peer fast 37 // enough. If a peer is not sending us data at at least that rate, we 38 // consider them to have timedout and we disconnect. 39 // 40 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 41 // sending data across atlantic ~ 7.5 KB/s. 42 minRecvRate = 7680 43 44 // Maximum difference between current and new block's height. 45 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 46 ) 47 48 var peerTimeout = 15 * time.Second // not const so we can override with tests 49 50 /* 51 Peers self report their heights when we join the block pool. 52 Starting from our latest pool.height, we request blocks 53 in sequence from peers that reported higher heights than ours. 54 Every so often we ask peers what height they're on so we can keep going. 55 56 Requests are continuously made for blocks of higher heights until 57 the limit is reached. If most of the requests have no available peers, and we 58 are not at peer limits, we can probably switch to consensus reactor 59 */ 60 61 // BlockPool keeps track of the fast sync peers, block requests and block responses. 62 type BlockPool struct { 63 service.BaseService 64 startTime time.Time 65 66 mtx tmsync.Mutex 67 // block requests 68 requesters map[int64]*bpRequester 69 height int64 // the lowest key in requesters. 70 // peers 71 peers map[p2p.ID]*bpPeer 72 maxPeerHeight int64 // the biggest reported height 73 74 // atomic 75 numPending int32 // number of requests pending assignment or block response 76 77 requestsCh chan<- BlockRequest 78 errorsCh chan<- peerError 79 } 80 81 // NewBlockPool returns a new BlockPool with the height equal to start. Block 82 // requests and errors will be sent to requestsCh and errorsCh accordingly. 83 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 84 bp := &BlockPool{ 85 peers: make(map[p2p.ID]*bpPeer), 86 87 requesters: make(map[int64]*bpRequester), 88 height: start, 89 numPending: 0, 90 91 requestsCh: requestsCh, 92 errorsCh: errorsCh, 93 } 94 bp.BaseService = *service.NewBaseService(nil, "BlockPool", bp) 95 return bp 96 } 97 98 // OnStart implements service.Service by spawning requesters routine and recording 99 // pool's start time. 100 func (pool *BlockPool) OnStart() error { 101 go pool.makeRequestersRoutine() 102 pool.startTime = time.Now() 103 return nil 104 } 105 106 // spawns requesters as needed 107 func (pool *BlockPool) makeRequestersRoutine() { 108 for { 109 if !pool.IsRunning() { 110 break 111 } 112 113 _, numPending, lenRequesters := pool.GetStatus() 114 switch { 115 case numPending >= maxPendingRequests: 116 // sleep for a bit. 117 time.Sleep(requestIntervalMS * time.Millisecond) 118 // check for timed out peers 119 pool.removeTimedoutPeers() 120 case lenRequesters >= maxTotalRequesters: 121 // sleep for a bit. 122 time.Sleep(requestIntervalMS * time.Millisecond) 123 // check for timed out peers 124 pool.removeTimedoutPeers() 125 default: 126 // request for more blocks. 127 pool.makeNextRequester() 128 } 129 } 130 } 131 132 func (pool *BlockPool) removeTimedoutPeers() { 133 pool.mtx.Lock() 134 defer pool.mtx.Unlock() 135 136 for _, peer := range pool.peers { 137 if !peer.didTimeout && peer.numPending > 0 { 138 curRate := peer.recvMonitor.Status().CurRate 139 // curRate can be 0 on start 140 if curRate != 0 && curRate < minRecvRate { 141 err := errors.New("peer is not sending us data fast enough") 142 pool.sendError(err, peer.id) 143 pool.Logger.Error("SendTimeout", "peer", peer.id, 144 "reason", err, 145 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 146 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 147 peer.didTimeout = true 148 } 149 } 150 if peer.didTimeout { 151 pool.removePeer(peer.id) 152 } 153 } 154 } 155 156 // GetStatus returns pool's height, numPending requests and the number of 157 // requesters. 158 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 159 pool.mtx.Lock() 160 defer pool.mtx.Unlock() 161 162 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 163 } 164 165 // IsCaughtUp returns true if this node is caught up, false - otherwise. 166 // TODO: relax conditions, prevent abuse. 167 func (pool *BlockPool) IsCaughtUp() bool { 168 pool.mtx.Lock() 169 defer pool.mtx.Unlock() 170 171 // Need at least 1 peer to be considered caught up. 172 if len(pool.peers) == 0 { 173 pool.Logger.Debug("Blockpool has no peers") 174 return false 175 } 176 177 // Some conditions to determine if we're caught up. 178 // Ensures we've either received a block or waited some amount of time, 179 // and that we're synced to the highest known height. 180 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 181 // to verify the LastCommit. 182 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 183 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 184 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 185 return isCaughtUp 186 } 187 188 // PeekTwoBlocks returns blocks at pool.height and pool.height+1. 189 // We need to see the second block's Commit to validate the first block. 190 // So we peek two blocks at a time. 191 // The caller will verify the commit. 192 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 193 pool.mtx.Lock() 194 defer pool.mtx.Unlock() 195 196 if r := pool.requesters[pool.height]; r != nil { 197 first = r.getBlock() 198 } 199 if r := pool.requesters[pool.height+1]; r != nil { 200 second = r.getBlock() 201 } 202 return 203 } 204 205 // PopRequest pops the first block at pool.height. 206 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 207 func (pool *BlockPool) PopRequest() { 208 pool.mtx.Lock() 209 defer pool.mtx.Unlock() 210 211 if r := pool.requesters[pool.height]; r != nil { 212 /* The block can disappear at any time, due to removePeer(). 213 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 214 PanicSanity("PopRequest() requires a valid block") 215 } 216 */ 217 if err := r.Stop(); err != nil { 218 pool.Logger.Error("Error stopping requester", "err", err) 219 } 220 delete(pool.requesters, pool.height) 221 pool.height++ 222 } else { 223 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 224 } 225 } 226 227 // RedoRequest invalidates the block at pool.height, 228 // Remove the peer and redo request from others. 229 // Returns the ID of the removed peer. 230 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 231 pool.mtx.Lock() 232 defer pool.mtx.Unlock() 233 234 request := pool.requesters[height] 235 peerID := request.getPeerID() 236 if peerID != p2p.ID("") { 237 // RemovePeer will redo all requesters associated with this peer. 238 pool.removePeer(peerID) 239 } 240 return peerID 241 } 242 243 // AddBlock validates that the block comes from the peer it was expected from and calls the requester to store it. 244 // TODO: ensure that blocks come in order for each peer. 245 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) { 246 pool.mtx.Lock() 247 defer pool.mtx.Unlock() 248 249 requester := pool.requesters[block.Height] 250 if requester == nil { 251 pool.Logger.Info( 252 "peer sent us a block we didn't expect", 253 "peer", 254 peerID, 255 "curHeight", 256 pool.height, 257 "blockHeight", 258 block.Height) 259 diff := pool.height - block.Height 260 if diff < 0 { 261 diff *= -1 262 } 263 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 264 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 265 } 266 return 267 } 268 269 if requester.setBlock(block, peerID) { 270 atomic.AddInt32(&pool.numPending, -1) 271 peer := pool.peers[peerID] 272 if peer != nil { 273 peer.decrPending(blockSize) 274 } 275 } else { 276 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 277 pool.sendError(errors.New("invalid peer"), peerID) 278 } 279 } 280 281 // MaxPeerHeight returns the highest reported height. 282 func (pool *BlockPool) MaxPeerHeight() int64 { 283 pool.mtx.Lock() 284 defer pool.mtx.Unlock() 285 return pool.maxPeerHeight 286 } 287 288 // SetPeerRange sets the peer's alleged blockchain base and height. 289 func (pool *BlockPool) SetPeerRange(peerID p2p.ID, base int64, height int64) { 290 pool.mtx.Lock() 291 defer pool.mtx.Unlock() 292 293 peer := pool.peers[peerID] 294 if peer != nil { 295 peer.base = base 296 peer.height = height 297 } else { 298 peer = newBPPeer(pool, peerID, base, height) 299 peer.setLogger(pool.Logger.With("peer", peerID)) 300 pool.peers[peerID] = peer 301 } 302 303 if height > pool.maxPeerHeight { 304 pool.maxPeerHeight = height 305 } 306 } 307 308 // RemovePeer removes the peer with peerID from the pool. If there's no peer 309 // with peerID, function is a no-op. 310 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 311 pool.mtx.Lock() 312 defer pool.mtx.Unlock() 313 314 pool.removePeer(peerID) 315 } 316 317 func (pool *BlockPool) removePeer(peerID p2p.ID) { 318 for _, requester := range pool.requesters { 319 if requester.getPeerID() == peerID { 320 requester.redo(peerID) 321 } 322 } 323 324 peer, ok := pool.peers[peerID] 325 if ok { 326 if peer.timeout != nil { 327 peer.timeout.Stop() 328 } 329 330 delete(pool.peers, peerID) 331 332 // Find a new peer with the biggest height and update maxPeerHeight if the 333 // peer's height was the biggest. 334 if peer.height == pool.maxPeerHeight { 335 pool.updateMaxPeerHeight() 336 } 337 } 338 } 339 340 // If no peers are left, maxPeerHeight is set to 0. 341 func (pool *BlockPool) updateMaxPeerHeight() { 342 var max int64 343 for _, peer := range pool.peers { 344 if peer.height > max { 345 max = peer.height 346 } 347 } 348 pool.maxPeerHeight = max 349 } 350 351 // Pick an available peer with the given height available. 352 // If no peers are available, returns nil. 353 func (pool *BlockPool) pickIncrAvailablePeer(height int64) *bpPeer { 354 pool.mtx.Lock() 355 defer pool.mtx.Unlock() 356 357 for _, peer := range pool.peers { 358 if peer.didTimeout { 359 pool.removePeer(peer.id) 360 continue 361 } 362 if peer.numPending >= maxPendingRequestsPerPeer { 363 continue 364 } 365 if height < peer.base || height > peer.height { 366 continue 367 } 368 peer.incrPending() 369 return peer 370 } 371 return nil 372 } 373 374 func (pool *BlockPool) makeNextRequester() { 375 pool.mtx.Lock() 376 defer pool.mtx.Unlock() 377 378 nextHeight := pool.height + pool.requestersLen() 379 if nextHeight > pool.maxPeerHeight { 380 return 381 } 382 383 request := newBPRequester(pool, nextHeight) 384 385 pool.requesters[nextHeight] = request 386 atomic.AddInt32(&pool.numPending, 1) 387 388 err := request.Start() 389 if err != nil { 390 request.Logger.Error("Error starting request", "err", err) 391 } 392 } 393 394 func (pool *BlockPool) requestersLen() int64 { 395 return int64(len(pool.requesters)) 396 } 397 398 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 399 if !pool.IsRunning() { 400 return 401 } 402 pool.requestsCh <- BlockRequest{height, peerID} 403 } 404 405 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 406 if !pool.IsRunning() { 407 return 408 } 409 pool.errorsCh <- peerError{err, peerID} 410 } 411 412 // for debugging purposes 413 //nolint:unused 414 func (pool *BlockPool) debug() string { 415 pool.mtx.Lock() 416 defer pool.mtx.Unlock() 417 418 str := "" 419 nextHeight := pool.height + pool.requestersLen() 420 for h := pool.height; h < nextHeight; h++ { 421 if pool.requesters[h] == nil { 422 str += fmt.Sprintf("H(%v):X ", h) 423 } else { 424 str += fmt.Sprintf("H(%v):", h) 425 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 426 } 427 } 428 return str 429 } 430 431 //------------------------------------- 432 433 type bpPeer struct { 434 didTimeout bool 435 numPending int32 436 height int64 437 base int64 438 pool *BlockPool 439 id p2p.ID 440 recvMonitor *flow.Monitor 441 442 timeout *time.Timer 443 444 logger log.Logger 445 } 446 447 func newBPPeer(pool *BlockPool, peerID p2p.ID, base int64, height int64) *bpPeer { 448 peer := &bpPeer{ 449 pool: pool, 450 id: peerID, 451 base: base, 452 height: height, 453 numPending: 0, 454 logger: log.NewNopLogger(), 455 } 456 return peer 457 } 458 459 func (peer *bpPeer) setLogger(l log.Logger) { 460 peer.logger = l 461 } 462 463 func (peer *bpPeer) resetMonitor() { 464 peer.recvMonitor = flow.New(time.Second, time.Second*40) 465 initialValue := float64(minRecvRate) * math.E 466 peer.recvMonitor.SetREMA(initialValue) 467 } 468 469 func (peer *bpPeer) resetTimeout() { 470 if peer.timeout == nil { 471 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 472 } else { 473 peer.timeout.Reset(peerTimeout) 474 } 475 } 476 477 func (peer *bpPeer) incrPending() { 478 if peer.numPending == 0 { 479 peer.resetMonitor() 480 peer.resetTimeout() 481 } 482 peer.numPending++ 483 } 484 485 func (peer *bpPeer) decrPending(recvSize int) { 486 peer.numPending-- 487 if peer.numPending == 0 { 488 peer.timeout.Stop() 489 } else { 490 peer.recvMonitor.Update(recvSize) 491 peer.resetTimeout() 492 } 493 } 494 495 func (peer *bpPeer) onTimeout() { 496 peer.pool.mtx.Lock() 497 defer peer.pool.mtx.Unlock() 498 499 err := errors.New("peer did not send us anything") 500 peer.pool.sendError(err, peer.id) 501 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 502 peer.didTimeout = true 503 } 504 505 //------------------------------------- 506 507 type bpRequester struct { 508 service.BaseService 509 pool *BlockPool 510 height int64 511 gotBlockCh chan struct{} 512 redoCh chan p2p.ID // redo may send multitime, add peerId to identify repeat 513 514 mtx tmsync.Mutex 515 peerID p2p.ID 516 block *types.Block 517 } 518 519 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 520 bpr := &bpRequester{ 521 pool: pool, 522 height: height, 523 gotBlockCh: make(chan struct{}, 1), 524 redoCh: make(chan p2p.ID, 1), 525 526 peerID: "", 527 block: nil, 528 } 529 bpr.BaseService = *service.NewBaseService(nil, "bpRequester", bpr) 530 return bpr 531 } 532 533 func (bpr *bpRequester) OnStart() error { 534 go bpr.requestRoutine() 535 return nil 536 } 537 538 // Returns true if the peer matches and block doesn't already exist. 539 func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool { 540 bpr.mtx.Lock() 541 if bpr.block != nil || bpr.peerID != peerID { 542 bpr.mtx.Unlock() 543 return false 544 } 545 bpr.block = block 546 bpr.mtx.Unlock() 547 548 select { 549 case bpr.gotBlockCh <- struct{}{}: 550 default: 551 } 552 return true 553 } 554 555 func (bpr *bpRequester) getBlock() *types.Block { 556 bpr.mtx.Lock() 557 defer bpr.mtx.Unlock() 558 return bpr.block 559 } 560 561 func (bpr *bpRequester) getPeerID() p2p.ID { 562 bpr.mtx.Lock() 563 defer bpr.mtx.Unlock() 564 return bpr.peerID 565 } 566 567 // This is called from the requestRoutine, upon redo(). 568 func (bpr *bpRequester) reset() { 569 bpr.mtx.Lock() 570 defer bpr.mtx.Unlock() 571 572 if bpr.block != nil { 573 atomic.AddInt32(&bpr.pool.numPending, 1) 574 } 575 576 bpr.peerID = "" 577 bpr.block = nil 578 } 579 580 // Tells bpRequester to pick another peer and try again. 581 // NOTE: Nonblocking, and does nothing if another redo 582 // was already requested. 583 func (bpr *bpRequester) redo(peerID p2p.ID) { 584 select { 585 case bpr.redoCh <- peerID: 586 default: 587 } 588 } 589 590 // Responsible for making more requests as necessary 591 // Returns only when a block is found (e.g. AddBlock() is called) 592 func (bpr *bpRequester) requestRoutine() { 593 OUTER_LOOP: 594 for { 595 // Pick a peer to send request to. 596 var peer *bpPeer 597 PICK_PEER_LOOP: 598 for { 599 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 600 return 601 } 602 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 603 if peer == nil { 604 // log.Info("No peers available", "height", height) 605 time.Sleep(requestIntervalMS * time.Millisecond) 606 continue PICK_PEER_LOOP 607 } 608 break PICK_PEER_LOOP 609 } 610 bpr.mtx.Lock() 611 bpr.peerID = peer.id 612 bpr.mtx.Unlock() 613 614 // Send request and wait. 615 bpr.pool.sendRequest(bpr.height, peer.id) 616 WAIT_LOOP: 617 for { 618 select { 619 case <-bpr.pool.Quit(): 620 if err := bpr.Stop(); err != nil { 621 bpr.Logger.Error("Error stopped requester", "err", err) 622 } 623 return 624 case <-bpr.Quit(): 625 return 626 case peerID := <-bpr.redoCh: 627 if peerID == bpr.peerID { 628 bpr.reset() 629 continue OUTER_LOOP 630 } else { 631 continue WAIT_LOOP 632 } 633 case <-bpr.gotBlockCh: 634 // We got a block! 635 // Continue the for-loop and wait til Quit. 636 continue WAIT_LOOP 637 } 638 } 639 } 640 } 641 642 // BlockRequest stores a block request identified by the block Height and the PeerID responsible for 643 // delivering the block 644 type BlockRequest struct { 645 Height int64 646 PeerID p2p.ID 647 }