github.com/evdatsion/aphelion-dpos-bft@v0.32.1/blockchain/pool.go (about) 1 package blockchain 2 3 import ( 4 "errors" 5 "fmt" 6 "math" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 cmn "github.com/evdatsion/aphelion-dpos-bft/libs/common" 12 flow "github.com/evdatsion/aphelion-dpos-bft/libs/flowrate" 13 "github.com/evdatsion/aphelion-dpos-bft/libs/log" 14 15 "github.com/evdatsion/aphelion-dpos-bft/p2p" 16 "github.com/evdatsion/aphelion-dpos-bft/types" 17 ) 18 19 /* 20 eg, L = latency = 0.1s 21 P = num peers = 10 22 FN = num full nodes 23 BS = 1kB block size 24 CB = 1 Mbit/s = 128 kB/s 25 CB/P = 12.8 kB 26 B/S = CB/P/BS = 12.8 blocks/s 27 28 12.8 * 0.1 = 1.28 blocks on conn 29 */ 30 31 const ( 32 requestIntervalMS = 2 33 maxTotalRequesters = 600 34 maxPendingRequests = maxTotalRequesters 35 maxPendingRequestsPerPeer = 20 36 37 // Minimum recv rate to ensure we're receiving blocks from a peer fast 38 // enough. If a peer is not sending us data at at least that rate, we 39 // consider them to have timedout and we disconnect. 40 // 41 // Assuming a DSL connection (not a good choice) 128 Kbps (upload) ~ 15 KB/s, 42 // sending data across atlantic ~ 7.5 KB/s. 43 minRecvRate = 7680 44 45 // Maximum difference between current and new block's height. 46 maxDiffBetweenCurrentAndReceivedBlockHeight = 100 47 ) 48 49 var peerTimeout = 15 * time.Second // not const so we can override with tests 50 51 /* 52 Peers self report their heights when we join the block pool. 53 Starting from our latest pool.height, we request blocks 54 in sequence from peers that reported higher heights than ours. 55 Every so often we ask peers what height they're on so we can keep going. 56 57 Requests are continuously made for blocks of higher heights until 58 the limit is reached. If most of the requests have no available peers, and we 59 are not at peer limits, we can probably switch to consensus reactor 60 */ 61 62 type BlockPool struct { 63 cmn.BaseService 64 startTime time.Time 65 66 mtx sync.Mutex 67 // block requests 68 requesters map[int64]*bpRequester 69 height int64 // the lowest key in requesters. 70 // peers 71 peers map[p2p.ID]*bpPeer 72 maxPeerHeight int64 // the biggest reported height 73 74 // atomic 75 numPending int32 // number of requests pending assignment or block response 76 77 requestsCh chan<- BlockRequest 78 errorsCh chan<- peerError 79 } 80 81 // NewBlockPool returns a new BlockPool with the height equal to start. Block 82 // requests and errors will be sent to requestsCh and errorsCh accordingly. 83 func NewBlockPool(start int64, requestsCh chan<- BlockRequest, errorsCh chan<- peerError) *BlockPool { 84 bp := &BlockPool{ 85 peers: make(map[p2p.ID]*bpPeer), 86 87 requesters: make(map[int64]*bpRequester), 88 height: start, 89 numPending: 0, 90 91 requestsCh: requestsCh, 92 errorsCh: errorsCh, 93 } 94 bp.BaseService = *cmn.NewBaseService(nil, "BlockPool", bp) 95 return bp 96 } 97 98 // OnStart implements cmn.Service by spawning requesters routine and recording 99 // pool's start time. 100 func (pool *BlockPool) OnStart() error { 101 go pool.makeRequestersRoutine() 102 pool.startTime = time.Now() 103 return nil 104 } 105 106 // spawns requesters as needed 107 func (pool *BlockPool) makeRequestersRoutine() { 108 for { 109 if !pool.IsRunning() { 110 break 111 } 112 113 _, numPending, lenRequesters := pool.GetStatus() 114 if numPending >= maxPendingRequests { 115 // sleep for a bit. 116 time.Sleep(requestIntervalMS * time.Millisecond) 117 // check for timed out peers 118 pool.removeTimedoutPeers() 119 } else if lenRequesters >= maxTotalRequesters { 120 // sleep for a bit. 121 time.Sleep(requestIntervalMS * time.Millisecond) 122 // check for timed out peers 123 pool.removeTimedoutPeers() 124 } else { 125 // request for more blocks. 126 pool.makeNextRequester() 127 } 128 } 129 } 130 131 func (pool *BlockPool) removeTimedoutPeers() { 132 pool.mtx.Lock() 133 defer pool.mtx.Unlock() 134 135 for _, peer := range pool.peers { 136 if !peer.didTimeout && peer.numPending > 0 { 137 curRate := peer.recvMonitor.Status().CurRate 138 // curRate can be 0 on start 139 if curRate != 0 && curRate < minRecvRate { 140 err := errors.New("peer is not sending us data fast enough") 141 pool.sendError(err, peer.id) 142 pool.Logger.Error("SendTimeout", "peer", peer.id, 143 "reason", err, 144 "curRate", fmt.Sprintf("%d KB/s", curRate/1024), 145 "minRate", fmt.Sprintf("%d KB/s", minRecvRate/1024)) 146 peer.didTimeout = true 147 } 148 } 149 if peer.didTimeout { 150 pool.removePeer(peer.id) 151 } 152 } 153 } 154 155 // GetStatus returns pool's height, numPending requests and the number of 156 // requesters. 157 func (pool *BlockPool) GetStatus() (height int64, numPending int32, lenRequesters int) { 158 pool.mtx.Lock() 159 defer pool.mtx.Unlock() 160 161 return pool.height, atomic.LoadInt32(&pool.numPending), len(pool.requesters) 162 } 163 164 // IsCaughtUp returns true if this node is caught up, false - otherwise. 165 // TODO: relax conditions, prevent abuse. 166 func (pool *BlockPool) IsCaughtUp() bool { 167 pool.mtx.Lock() 168 defer pool.mtx.Unlock() 169 170 // Need at least 1 peer to be considered caught up. 171 if len(pool.peers) == 0 { 172 pool.Logger.Debug("Blockpool has no peers") 173 return false 174 } 175 176 // Some conditions to determine if we're caught up. 177 // Ensures we've either received a block or waited some amount of time, 178 // and that we're synced to the highest known height. 179 // Note we use maxPeerHeight - 1 because to sync block H requires block H+1 180 // to verify the LastCommit. 181 receivedBlockOrTimedOut := pool.height > 0 || time.Since(pool.startTime) > 5*time.Second 182 ourChainIsLongestAmongPeers := pool.maxPeerHeight == 0 || pool.height >= (pool.maxPeerHeight-1) 183 isCaughtUp := receivedBlockOrTimedOut && ourChainIsLongestAmongPeers 184 return isCaughtUp 185 } 186 187 // We need to see the second block's Commit to validate the first block. 188 // So we peek two blocks at a time. 189 // The caller will verify the commit. 190 func (pool *BlockPool) PeekTwoBlocks() (first *types.Block, second *types.Block) { 191 pool.mtx.Lock() 192 defer pool.mtx.Unlock() 193 194 if r := pool.requesters[pool.height]; r != nil { 195 first = r.getBlock() 196 } 197 if r := pool.requesters[pool.height+1]; r != nil { 198 second = r.getBlock() 199 } 200 return 201 } 202 203 // Pop the first block at pool.height 204 // It must have been validated by 'second'.Commit from PeekTwoBlocks(). 205 func (pool *BlockPool) PopRequest() { 206 pool.mtx.Lock() 207 defer pool.mtx.Unlock() 208 209 if r := pool.requesters[pool.height]; r != nil { 210 /* The block can disappear at any time, due to removePeer(). 211 if r := pool.requesters[pool.height]; r == nil || r.block == nil { 212 PanicSanity("PopRequest() requires a valid block") 213 } 214 */ 215 r.Stop() 216 delete(pool.requesters, pool.height) 217 pool.height++ 218 } else { 219 panic(fmt.Sprintf("Expected requester to pop, got nothing at height %v", pool.height)) 220 } 221 } 222 223 // Invalidates the block at pool.height, 224 // Remove the peer and redo request from others. 225 // Returns the ID of the removed peer. 226 func (pool *BlockPool) RedoRequest(height int64) p2p.ID { 227 pool.mtx.Lock() 228 defer pool.mtx.Unlock() 229 230 request := pool.requesters[height] 231 peerID := request.getPeerID() 232 if peerID != p2p.ID("") { 233 // RemovePeer will redo all requesters associated with this peer. 234 pool.removePeer(peerID) 235 } 236 return peerID 237 } 238 239 // TODO: ensure that blocks come in order for each peer. 240 func (pool *BlockPool) AddBlock(peerID p2p.ID, block *types.Block, blockSize int) { 241 pool.mtx.Lock() 242 defer pool.mtx.Unlock() 243 244 requester := pool.requesters[block.Height] 245 if requester == nil { 246 pool.Logger.Info("peer sent us a block we didn't expect", "peer", peerID, "curHeight", pool.height, "blockHeight", block.Height) 247 diff := pool.height - block.Height 248 if diff < 0 { 249 diff *= -1 250 } 251 if diff > maxDiffBetweenCurrentAndReceivedBlockHeight { 252 pool.sendError(errors.New("peer sent us a block we didn't expect with a height too far ahead/behind"), peerID) 253 } 254 return 255 } 256 257 if requester.setBlock(block, peerID) { 258 atomic.AddInt32(&pool.numPending, -1) 259 peer := pool.peers[peerID] 260 if peer != nil { 261 peer.decrPending(blockSize) 262 } 263 } else { 264 pool.Logger.Info("invalid peer", "peer", peerID, "blockHeight", block.Height) 265 pool.sendError(errors.New("invalid peer"), peerID) 266 } 267 } 268 269 // MaxPeerHeight returns the highest reported height. 270 func (pool *BlockPool) MaxPeerHeight() int64 { 271 pool.mtx.Lock() 272 defer pool.mtx.Unlock() 273 return pool.maxPeerHeight 274 } 275 276 // SetPeerHeight sets the peer's alleged blockchain height. 277 func (pool *BlockPool) SetPeerHeight(peerID p2p.ID, height int64) { 278 pool.mtx.Lock() 279 defer pool.mtx.Unlock() 280 281 peer := pool.peers[peerID] 282 if peer != nil { 283 peer.height = height 284 } else { 285 peer = newBPPeer(pool, peerID, height) 286 peer.setLogger(pool.Logger.With("peer", peerID)) 287 pool.peers[peerID] = peer 288 } 289 290 if height > pool.maxPeerHeight { 291 pool.maxPeerHeight = height 292 } 293 } 294 295 // RemovePeer removes the peer with peerID from the pool. If there's no peer 296 // with peerID, function is a no-op. 297 func (pool *BlockPool) RemovePeer(peerID p2p.ID) { 298 pool.mtx.Lock() 299 defer pool.mtx.Unlock() 300 301 pool.removePeer(peerID) 302 } 303 304 func (pool *BlockPool) removePeer(peerID p2p.ID) { 305 for _, requester := range pool.requesters { 306 if requester.getPeerID() == peerID { 307 requester.redo(peerID) 308 } 309 } 310 311 peer, ok := pool.peers[peerID] 312 if ok { 313 if peer.timeout != nil { 314 peer.timeout.Stop() 315 } 316 317 delete(pool.peers, peerID) 318 319 // Find a new peer with the biggest height and update maxPeerHeight if the 320 // peer's height was the biggest. 321 if peer.height == pool.maxPeerHeight { 322 pool.updateMaxPeerHeight() 323 } 324 } 325 } 326 327 // If no peers are left, maxPeerHeight is set to 0. 328 func (pool *BlockPool) updateMaxPeerHeight() { 329 var max int64 330 for _, peer := range pool.peers { 331 if peer.height > max { 332 max = peer.height 333 } 334 } 335 pool.maxPeerHeight = max 336 } 337 338 // Pick an available peer with at least the given minHeight. 339 // If no peers are available, returns nil. 340 func (pool *BlockPool) pickIncrAvailablePeer(minHeight int64) *bpPeer { 341 pool.mtx.Lock() 342 defer pool.mtx.Unlock() 343 344 for _, peer := range pool.peers { 345 if peer.didTimeout { 346 pool.removePeer(peer.id) 347 continue 348 } 349 if peer.numPending >= maxPendingRequestsPerPeer { 350 continue 351 } 352 if peer.height < minHeight { 353 continue 354 } 355 peer.incrPending() 356 return peer 357 } 358 return nil 359 } 360 361 func (pool *BlockPool) makeNextRequester() { 362 pool.mtx.Lock() 363 defer pool.mtx.Unlock() 364 365 nextHeight := pool.height + pool.requestersLen() 366 if nextHeight > pool.maxPeerHeight { 367 return 368 } 369 370 request := newBPRequester(pool, nextHeight) 371 372 pool.requesters[nextHeight] = request 373 atomic.AddInt32(&pool.numPending, 1) 374 375 err := request.Start() 376 if err != nil { 377 request.Logger.Error("Error starting request", "err", err) 378 } 379 } 380 381 func (pool *BlockPool) requestersLen() int64 { 382 return int64(len(pool.requesters)) 383 } 384 385 func (pool *BlockPool) sendRequest(height int64, peerID p2p.ID) { 386 if !pool.IsRunning() { 387 return 388 } 389 pool.requestsCh <- BlockRequest{height, peerID} 390 } 391 392 func (pool *BlockPool) sendError(err error, peerID p2p.ID) { 393 if !pool.IsRunning() { 394 return 395 } 396 pool.errorsCh <- peerError{err, peerID} 397 } 398 399 // for debugging purposes 400 //nolint:unused 401 func (pool *BlockPool) debug() string { 402 pool.mtx.Lock() 403 defer pool.mtx.Unlock() 404 405 str := "" 406 nextHeight := pool.height + pool.requestersLen() 407 for h := pool.height; h < nextHeight; h++ { 408 if pool.requesters[h] == nil { 409 str += fmt.Sprintf("H(%v):X ", h) 410 } else { 411 str += fmt.Sprintf("H(%v):", h) 412 str += fmt.Sprintf("B?(%v) ", pool.requesters[h].block != nil) 413 } 414 } 415 return str 416 } 417 418 //------------------------------------- 419 420 type bpPeer struct { 421 pool *BlockPool 422 id p2p.ID 423 recvMonitor *flow.Monitor 424 425 height int64 426 numPending int32 427 timeout *time.Timer 428 didTimeout bool 429 430 logger log.Logger 431 } 432 433 func newBPPeer(pool *BlockPool, peerID p2p.ID, height int64) *bpPeer { 434 peer := &bpPeer{ 435 pool: pool, 436 id: peerID, 437 height: height, 438 numPending: 0, 439 logger: log.NewNopLogger(), 440 } 441 return peer 442 } 443 444 func (peer *bpPeer) setLogger(l log.Logger) { 445 peer.logger = l 446 } 447 448 func (peer *bpPeer) resetMonitor() { 449 peer.recvMonitor = flow.New(time.Second, time.Second*40) 450 initialValue := float64(minRecvRate) * math.E 451 peer.recvMonitor.SetREMA(initialValue) 452 } 453 454 func (peer *bpPeer) resetTimeout() { 455 if peer.timeout == nil { 456 peer.timeout = time.AfterFunc(peerTimeout, peer.onTimeout) 457 } else { 458 peer.timeout.Reset(peerTimeout) 459 } 460 } 461 462 func (peer *bpPeer) incrPending() { 463 if peer.numPending == 0 { 464 peer.resetMonitor() 465 peer.resetTimeout() 466 } 467 peer.numPending++ 468 } 469 470 func (peer *bpPeer) decrPending(recvSize int) { 471 peer.numPending-- 472 if peer.numPending == 0 { 473 peer.timeout.Stop() 474 } else { 475 peer.recvMonitor.Update(recvSize) 476 peer.resetTimeout() 477 } 478 } 479 480 func (peer *bpPeer) onTimeout() { 481 peer.pool.mtx.Lock() 482 defer peer.pool.mtx.Unlock() 483 484 err := errors.New("peer did not send us anything") 485 peer.pool.sendError(err, peer.id) 486 peer.logger.Error("SendTimeout", "reason", err, "timeout", peerTimeout) 487 peer.didTimeout = true 488 } 489 490 //------------------------------------- 491 492 type bpRequester struct { 493 cmn.BaseService 494 pool *BlockPool 495 height int64 496 gotBlockCh chan struct{} 497 redoCh chan p2p.ID //redo may send multitime, add peerId to identify repeat 498 499 mtx sync.Mutex 500 peerID p2p.ID 501 block *types.Block 502 } 503 504 func newBPRequester(pool *BlockPool, height int64) *bpRequester { 505 bpr := &bpRequester{ 506 pool: pool, 507 height: height, 508 gotBlockCh: make(chan struct{}, 1), 509 redoCh: make(chan p2p.ID, 1), 510 511 peerID: "", 512 block: nil, 513 } 514 bpr.BaseService = *cmn.NewBaseService(nil, "bpRequester", bpr) 515 return bpr 516 } 517 518 func (bpr *bpRequester) OnStart() error { 519 go bpr.requestRoutine() 520 return nil 521 } 522 523 // Returns true if the peer matches and block doesn't already exist. 524 func (bpr *bpRequester) setBlock(block *types.Block, peerID p2p.ID) bool { 525 bpr.mtx.Lock() 526 if bpr.block != nil || bpr.peerID != peerID { 527 bpr.mtx.Unlock() 528 return false 529 } 530 bpr.block = block 531 bpr.mtx.Unlock() 532 533 select { 534 case bpr.gotBlockCh <- struct{}{}: 535 default: 536 } 537 return true 538 } 539 540 func (bpr *bpRequester) getBlock() *types.Block { 541 bpr.mtx.Lock() 542 defer bpr.mtx.Unlock() 543 return bpr.block 544 } 545 546 func (bpr *bpRequester) getPeerID() p2p.ID { 547 bpr.mtx.Lock() 548 defer bpr.mtx.Unlock() 549 return bpr.peerID 550 } 551 552 // This is called from the requestRoutine, upon redo(). 553 func (bpr *bpRequester) reset() { 554 bpr.mtx.Lock() 555 defer bpr.mtx.Unlock() 556 557 if bpr.block != nil { 558 atomic.AddInt32(&bpr.pool.numPending, 1) 559 } 560 561 bpr.peerID = "" 562 bpr.block = nil 563 } 564 565 // Tells bpRequester to pick another peer and try again. 566 // NOTE: Nonblocking, and does nothing if another redo 567 // was already requested. 568 func (bpr *bpRequester) redo(peerId p2p.ID) { 569 select { 570 case bpr.redoCh <- peerId: 571 default: 572 } 573 } 574 575 // Responsible for making more requests as necessary 576 // Returns only when a block is found (e.g. AddBlock() is called) 577 func (bpr *bpRequester) requestRoutine() { 578 OUTER_LOOP: 579 for { 580 // Pick a peer to send request to. 581 var peer *bpPeer 582 PICK_PEER_LOOP: 583 for { 584 if !bpr.IsRunning() || !bpr.pool.IsRunning() { 585 return 586 } 587 peer = bpr.pool.pickIncrAvailablePeer(bpr.height) 588 if peer == nil { 589 //log.Info("No peers available", "height", height) 590 time.Sleep(requestIntervalMS * time.Millisecond) 591 continue PICK_PEER_LOOP 592 } 593 break PICK_PEER_LOOP 594 } 595 bpr.mtx.Lock() 596 bpr.peerID = peer.id 597 bpr.mtx.Unlock() 598 599 // Send request and wait. 600 bpr.pool.sendRequest(bpr.height, peer.id) 601 WAIT_LOOP: 602 for { 603 select { 604 case <-bpr.pool.Quit(): 605 bpr.Stop() 606 return 607 case <-bpr.Quit(): 608 return 609 case peerID := <-bpr.redoCh: 610 if peerID == bpr.peerID { 611 bpr.reset() 612 continue OUTER_LOOP 613 } else { 614 continue WAIT_LOOP 615 } 616 case <-bpr.gotBlockCh: 617 // We got a block! 618 // Continue the for-loop and wait til Quit. 619 continue WAIT_LOOP 620 } 621 } 622 } 623 } 624 625 //------------------------------------- 626 627 type BlockRequest struct { 628 Height int64 629 PeerID p2p.ID 630 }