github.com/pure-x-eth/consensus_tm@v0.0.0-20230502163723-e3c2ff987250/blockchain/v2/scheduler.go (about) 1 package v2 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "math" 8 "sort" 9 "time" 10 11 "github.com/pure-x-eth/consensus_tm/p2p" 12 "github.com/pure-x-eth/consensus_tm/types" 13 ) 14 15 // Events generated by the scheduler: 16 // all blocks have been processed 17 type scFinishedEv struct { 18 priorityNormal 19 reason string 20 } 21 22 func (e scFinishedEv) String() string { 23 return fmt.Sprintf("scFinishedEv{%v}", e.reason) 24 } 25 26 // send a blockRequest message 27 type scBlockRequest struct { 28 priorityNormal 29 peerID p2p.ID 30 height int64 31 } 32 33 func (e scBlockRequest) String() string { 34 return fmt.Sprintf("scBlockRequest{%d from %v}", e.height, e.peerID) 35 } 36 37 // a block has been received and validated by the scheduler 38 type scBlockReceived struct { 39 priorityNormal 40 peerID p2p.ID 41 block *types.Block 42 } 43 44 func (e scBlockReceived) String() string { 45 return fmt.Sprintf("scBlockReceived{%d#%X from %v}", e.block.Height, e.block.Hash(), e.peerID) 46 } 47 48 // scheduler detected a peer error 49 type scPeerError struct { 50 priorityHigh 51 peerID p2p.ID 52 reason error 53 } 54 55 func (e scPeerError) String() string { 56 return fmt.Sprintf("scPeerError{%v errored with %v}", e.peerID, e.reason) 57 } 58 59 // scheduler removed a set of peers (timed out or slow peer) 60 type scPeersPruned struct { 61 priorityHigh 62 peers []p2p.ID 63 } 64 65 func (e scPeersPruned) String() string { 66 return fmt.Sprintf("scPeersPruned{%v}", e.peers) 67 } 68 69 // XXX: make this fatal? 70 // scheduler encountered a fatal error 71 type scSchedulerFail struct { 72 priorityHigh 73 reason error 74 } 75 76 func (e scSchedulerFail) String() string { 77 return fmt.Sprintf("scSchedulerFail{%v}", e.reason) 78 } 79 80 type blockState int 81 82 const ( 83 blockStateUnknown blockState = iota + 1 // no known peer has this block 84 blockStateNew // indicates that a peer has reported having this block 85 blockStatePending // indicates that this block has been requested from a peer 86 blockStateReceived // indicates that this block has been received by a peer 87 blockStateProcessed // indicates that this block has been applied 88 ) 89 90 func (e blockState) String() string { 91 switch e { 92 case blockStateUnknown: 93 return "Unknown" 94 case blockStateNew: 95 return "New" 96 case blockStatePending: 97 return "Pending" 98 case blockStateReceived: 99 return "Received" 100 case blockStateProcessed: 101 return "Processed" 102 default: 103 return fmt.Sprintf("invalid blockState: %d", e) 104 } 105 } 106 107 type peerState int 108 109 const ( 110 peerStateNew = iota + 1 111 peerStateReady 112 peerStateRemoved 113 ) 114 115 func (e peerState) String() string { 116 switch e { 117 case peerStateNew: 118 return "New" 119 case peerStateReady: 120 return "Ready" 121 case peerStateRemoved: 122 return "Removed" 123 default: 124 panic(fmt.Sprintf("unknown peerState: %d", e)) 125 } 126 } 127 128 type scPeer struct { 129 peerID p2p.ID 130 131 // initialized as New when peer is added, updated to Ready when statusUpdate is received, 132 // updated to Removed when peer is removed 133 state peerState 134 135 base int64 // updated when statusResponse is received 136 height int64 // updated when statusResponse is received 137 lastTouched time.Time 138 lastRate int64 // last receive rate in bytes 139 } 140 141 func (p scPeer) String() string { 142 return fmt.Sprintf("{state %v, base %d, height %d, lastTouched %v, lastRate %d, id %v}", 143 p.state, p.base, p.height, p.lastTouched, p.lastRate, p.peerID) 144 } 145 146 func newScPeer(peerID p2p.ID) *scPeer { 147 return &scPeer{ 148 peerID: peerID, 149 state: peerStateNew, 150 base: -1, 151 height: -1, 152 lastTouched: time.Time{}, 153 } 154 } 155 156 // The scheduler keep track of the state of each block and each peer. The 157 // scheduler will attempt to schedule new block requests with `trySchedule` 158 // events and remove slow peers with `tryPrune` events. 159 type scheduler struct { 160 initHeight int64 161 162 // next block that needs to be processed. All blocks with smaller height are 163 // in Processed state. 164 height int64 165 166 // lastAdvance tracks the last time a block execution happened. 167 // syncTimeout is the maximum time the scheduler waits to advance in the fast sync process before finishing. 168 // This covers the cases where there are no peers or all peers have a lower height. 169 lastAdvance time.Time 170 syncTimeout time.Duration 171 172 // a map of peerID to scheduler specific peer struct `scPeer` used to keep 173 // track of peer specific state 174 peers map[p2p.ID]*scPeer 175 peerTimeout time.Duration // maximum response time from a peer otherwise prune 176 minRecvRate int64 // minimum receive rate from peer otherwise prune 177 178 // the maximum number of blocks that should be New, Received or Pending at any point 179 // in time. This is used to enforce a limit on the blockStates map. 180 targetPending int 181 // a list of blocks to be scheduled (New), Pending or Received. Its length should be 182 // smaller than targetPending. 183 blockStates map[int64]blockState 184 185 // a map of heights to the peer we are waiting a response from 186 pendingBlocks map[int64]p2p.ID 187 188 // the time at which a block was put in blockStatePending 189 pendingTime map[int64]time.Time 190 191 // a map of heights to the peers that put the block in blockStateReceived 192 receivedBlocks map[int64]p2p.ID 193 } 194 195 func (sc scheduler) String() string { 196 return fmt.Sprintf("ih: %d, bst: %v, peers: %v, pblks: %v, ptm %v, rblks: %v", 197 sc.initHeight, sc.blockStates, sc.peers, sc.pendingBlocks, sc.pendingTime, sc.receivedBlocks) 198 } 199 200 func newScheduler(initHeight int64, startTime time.Time) *scheduler { 201 sc := scheduler{ 202 initHeight: initHeight, 203 lastAdvance: startTime, 204 syncTimeout: 60 * time.Second, 205 height: initHeight, 206 blockStates: make(map[int64]blockState), 207 peers: make(map[p2p.ID]*scPeer), 208 pendingBlocks: make(map[int64]p2p.ID), 209 pendingTime: make(map[int64]time.Time), 210 receivedBlocks: make(map[int64]p2p.ID), 211 targetPending: 10, // TODO - pass as param 212 peerTimeout: 15 * time.Second, // TODO - pass as param 213 minRecvRate: 0, // int64(7680), TODO - pass as param 214 } 215 216 return &sc 217 } 218 219 func (sc *scheduler) ensurePeer(peerID p2p.ID) *scPeer { 220 if _, ok := sc.peers[peerID]; !ok { 221 sc.peers[peerID] = newScPeer(peerID) 222 } 223 return sc.peers[peerID] 224 } 225 226 func (sc *scheduler) touchPeer(peerID p2p.ID, time time.Time) error { 227 peer, ok := sc.peers[peerID] 228 if !ok { 229 return fmt.Errorf("couldn't find peer %s", peerID) 230 } 231 232 if peer.state != peerStateReady { 233 return fmt.Errorf("tried to touch peer in state %s, must be Ready", peer.state) 234 } 235 236 peer.lastTouched = time 237 238 return nil 239 } 240 241 func (sc *scheduler) removePeer(peerID p2p.ID) { 242 peer, ok := sc.peers[peerID] 243 if !ok { 244 return 245 } 246 if peer.state == peerStateRemoved { 247 return 248 } 249 250 for height, pendingPeerID := range sc.pendingBlocks { 251 if pendingPeerID == peerID { 252 sc.setStateAtHeight(height, blockStateNew) 253 delete(sc.pendingTime, height) 254 delete(sc.pendingBlocks, height) 255 } 256 } 257 258 for height, rcvPeerID := range sc.receivedBlocks { 259 if rcvPeerID == peerID { 260 sc.setStateAtHeight(height, blockStateNew) 261 delete(sc.receivedBlocks, height) 262 } 263 } 264 265 // remove the blocks from blockStates if the peer removal causes the max peer height to be lower. 266 peer.state = peerStateRemoved 267 maxPeerHeight := int64(0) 268 for _, otherPeer := range sc.peers { 269 if otherPeer.state != peerStateReady { 270 continue 271 } 272 if otherPeer.peerID != peer.peerID && otherPeer.height > maxPeerHeight { 273 maxPeerHeight = otherPeer.height 274 } 275 } 276 for h := range sc.blockStates { 277 if h > maxPeerHeight { 278 delete(sc.blockStates, h) 279 } 280 } 281 } 282 283 // check if the blockPool is running low and add new blocks in New state to be requested. 284 // This function is called when there is an increase in the maximum peer height or when 285 // blocks are processed. 286 func (sc *scheduler) addNewBlocks() { 287 if len(sc.blockStates) >= sc.targetPending { 288 return 289 } 290 291 for i := sc.height; i < int64(sc.targetPending)+sc.height; i++ { 292 if i > sc.maxHeight() { 293 break 294 } 295 if sc.getStateAtHeight(i) == blockStateUnknown { 296 sc.setStateAtHeight(i, blockStateNew) 297 } 298 } 299 } 300 301 func (sc *scheduler) setPeerRange(peerID p2p.ID, base int64, height int64) error { 302 peer := sc.ensurePeer(peerID) 303 304 if peer.state == peerStateRemoved { 305 return nil // noop 306 } 307 308 if height < peer.height { 309 sc.removePeer(peerID) 310 return fmt.Errorf("cannot move peer height lower. from %d to %d", peer.height, height) 311 } 312 313 if base > height { 314 sc.removePeer(peerID) 315 return fmt.Errorf("cannot set peer base higher than its height") 316 } 317 318 peer.base = base 319 peer.height = height 320 peer.state = peerStateReady 321 322 sc.addNewBlocks() 323 return nil 324 } 325 326 func (sc *scheduler) getStateAtHeight(height int64) blockState { 327 if height < sc.height { 328 return blockStateProcessed 329 } else if state, ok := sc.blockStates[height]; ok { 330 return state 331 } else { 332 return blockStateUnknown 333 } 334 } 335 336 func (sc *scheduler) getPeersWithHeight(height int64) []p2p.ID { 337 peers := make([]p2p.ID, 0) 338 for _, peer := range sc.peers { 339 if peer.state != peerStateReady { 340 continue 341 } 342 if peer.base <= height && peer.height >= height { 343 peers = append(peers, peer.peerID) 344 } 345 } 346 return peers 347 } 348 349 func (sc *scheduler) prunablePeers(peerTimout time.Duration, minRecvRate int64, now time.Time) []p2p.ID { 350 prunable := make([]p2p.ID, 0) 351 for peerID, peer := range sc.peers { 352 if peer.state != peerStateReady { 353 continue 354 } 355 if now.Sub(peer.lastTouched) > peerTimout || peer.lastRate < minRecvRate { 356 prunable = append(prunable, peerID) 357 } 358 } 359 // Tests for handleTryPrunePeer() may fail without sort due to range non-determinism 360 sort.Sort(PeerByID(prunable)) 361 return prunable 362 } 363 364 func (sc *scheduler) setStateAtHeight(height int64, state blockState) { 365 sc.blockStates[height] = state 366 } 367 368 // CONTRACT: peer exists and in Ready state. 369 func (sc *scheduler) markReceived(peerID p2p.ID, height int64, size int, now time.Time) error { 370 peer := sc.peers[peerID] 371 372 if state := sc.getStateAtHeight(height); state != blockStatePending || sc.pendingBlocks[height] != peerID { 373 return fmt.Errorf("received block %d from peer %s without being requested", height, peerID) 374 } 375 376 pendingTime, ok := sc.pendingTime[height] 377 if !ok || now.Sub(pendingTime) <= 0 { 378 return fmt.Errorf("clock error: block %d received at %s but requested at %s", 379 height, pendingTime, now) 380 } 381 382 peer.lastRate = int64(size) / now.Sub(pendingTime).Nanoseconds() 383 384 sc.setStateAtHeight(height, blockStateReceived) 385 delete(sc.pendingBlocks, height) 386 delete(sc.pendingTime, height) 387 388 sc.receivedBlocks[height] = peerID 389 390 return nil 391 } 392 393 func (sc *scheduler) markPending(peerID p2p.ID, height int64, time time.Time) error { 394 state := sc.getStateAtHeight(height) 395 if state != blockStateNew { 396 return fmt.Errorf("block %d should be in blockStateNew but is %s", height, state) 397 } 398 399 peer, ok := sc.peers[peerID] 400 if !ok { 401 return fmt.Errorf("cannot find peer %s", peerID) 402 } 403 404 if peer.state != peerStateReady { 405 return fmt.Errorf("cannot schedule %d from %s in %s", height, peerID, peer.state) 406 } 407 408 if height > peer.height { 409 return fmt.Errorf("cannot request height %d from peer %s that is at height %d", 410 height, peerID, peer.height) 411 } 412 413 if height < peer.base { 414 return fmt.Errorf("cannot request height %d for peer %s with base %d", 415 height, peerID, peer.base) 416 } 417 418 sc.setStateAtHeight(height, blockStatePending) 419 sc.pendingBlocks[height] = peerID 420 sc.pendingTime[height] = time 421 422 return nil 423 } 424 425 func (sc *scheduler) markProcessed(height int64) error { 426 // It is possible that a peer error or timeout is handled after the processor 427 // has processed the block but before the scheduler received this event, so 428 // when pcBlockProcessed event is received, the block had been requested 429 // again => don't check the block state. 430 sc.lastAdvance = time.Now() 431 sc.height = height + 1 432 delete(sc.pendingBlocks, height) 433 delete(sc.pendingTime, height) 434 delete(sc.receivedBlocks, height) 435 delete(sc.blockStates, height) 436 sc.addNewBlocks() 437 return nil 438 } 439 440 func (sc *scheduler) allBlocksProcessed() bool { 441 if len(sc.peers) == 0 { 442 return false 443 } 444 return sc.height >= sc.maxHeight() 445 } 446 447 // returns max peer height or the last processed block, i.e. sc.height 448 func (sc *scheduler) maxHeight() int64 { 449 max := sc.height - 1 450 for _, peer := range sc.peers { 451 if peer.state != peerStateReady { 452 continue 453 } 454 if max < peer.height { 455 max = peer.height 456 } 457 } 458 return max 459 } 460 461 // lowest block in sc.blockStates with state == blockStateNew or -1 if no new blocks 462 func (sc *scheduler) nextHeightToSchedule() int64 { 463 var min int64 = math.MaxInt64 464 for height, state := range sc.blockStates { 465 if state == blockStateNew && height < min { 466 min = height 467 } 468 } 469 if min == math.MaxInt64 { 470 min = -1 471 } 472 return min 473 } 474 475 func (sc *scheduler) pendingFrom(peerID p2p.ID) []int64 { 476 var heights []int64 477 for height, pendingPeerID := range sc.pendingBlocks { 478 if pendingPeerID == peerID { 479 heights = append(heights, height) 480 } 481 } 482 return heights 483 } 484 485 func (sc *scheduler) selectPeer(height int64) (p2p.ID, error) { 486 peers := sc.getPeersWithHeight(height) 487 if len(peers) == 0 { 488 return "", fmt.Errorf("cannot find peer for height %d", height) 489 } 490 491 // create a map from number of pending requests to a list 492 // of peers having that number of pending requests. 493 pendingFrom := make(map[int][]p2p.ID) 494 for _, peerID := range peers { 495 numPending := len(sc.pendingFrom(peerID)) 496 pendingFrom[numPending] = append(pendingFrom[numPending], peerID) 497 } 498 499 // find the set of peers with minimum number of pending requests. 500 var minPending int64 = math.MaxInt64 501 for mp := range pendingFrom { 502 if int64(mp) < minPending { 503 minPending = int64(mp) 504 } 505 } 506 507 sort.Sort(PeerByID(pendingFrom[int(minPending)])) 508 return pendingFrom[int(minPending)][0], nil 509 } 510 511 // PeerByID is a list of peers sorted by peerID. 512 type PeerByID []p2p.ID 513 514 func (peers PeerByID) Len() int { 515 return len(peers) 516 } 517 func (peers PeerByID) Less(i, j int) bool { 518 return bytes.Compare([]byte(peers[i]), []byte(peers[j])) == -1 519 } 520 521 func (peers PeerByID) Swap(i, j int) { 522 peers[i], peers[j] = peers[j], peers[i] 523 } 524 525 // Handlers 526 527 // This handler gets the block, performs some validation and then passes it on to the processor. 528 func (sc *scheduler) handleBlockResponse(event bcBlockResponse) (Event, error) { 529 err := sc.touchPeer(event.peerID, event.time) 530 if err != nil { 531 // peer does not exist OR not ready 532 return noOp, nil 533 } 534 535 err = sc.markReceived(event.peerID, event.block.Height, event.block.Size(), event.time) 536 if err != nil { 537 sc.removePeer(event.peerID) 538 return scPeerError{peerID: event.peerID, reason: err}, nil 539 } 540 541 return scBlockReceived{peerID: event.peerID, block: event.block}, nil 542 } 543 544 func (sc *scheduler) handleNoBlockResponse(event bcNoBlockResponse) (Event, error) { 545 // No such peer or peer was removed. 546 peer, ok := sc.peers[event.peerID] 547 if !ok || peer.state == peerStateRemoved { 548 return noOp, nil 549 } 550 551 // The peer may have been just removed due to errors, low speed or timeouts. 552 sc.removePeer(event.peerID) 553 554 return scPeerError{peerID: event.peerID, 555 reason: fmt.Errorf("peer %v with base %d height %d claims no block for %d", 556 event.peerID, peer.base, peer.height, event.height)}, nil 557 } 558 559 func (sc *scheduler) handleBlockProcessed(event pcBlockProcessed) (Event, error) { 560 if event.height != sc.height { 561 panic(fmt.Sprintf("processed height %d, but expected height %d", event.height, sc.height)) 562 } 563 564 err := sc.markProcessed(event.height) 565 if err != nil { 566 return scSchedulerFail{reason: err}, nil 567 } 568 569 if sc.allBlocksProcessed() { 570 return scFinishedEv{reason: "processed all blocks"}, nil 571 } 572 573 return noOp, nil 574 } 575 576 // Handles an error from the processor. The processor had already cleaned the blocks from 577 // the peers included in this event. Just attempt to remove the peers. 578 func (sc *scheduler) handleBlockProcessError(event pcBlockVerificationFailure) (Event, error) { 579 // The peers may have been just removed due to errors, low speed or timeouts. 580 sc.removePeer(event.firstPeerID) 581 if event.firstPeerID != event.secondPeerID { 582 sc.removePeer(event.secondPeerID) 583 } 584 585 if sc.allBlocksProcessed() { 586 return scFinishedEv{reason: "error on last block"}, nil 587 } 588 589 return noOp, nil 590 } 591 592 func (sc *scheduler) handleAddNewPeer(event bcAddNewPeer) (Event, error) { 593 sc.ensurePeer(event.peerID) 594 return noOp, nil 595 } 596 597 func (sc *scheduler) handleRemovePeer(event bcRemovePeer) (Event, error) { 598 sc.removePeer(event.peerID) 599 600 if sc.allBlocksProcessed() { 601 return scFinishedEv{reason: "removed peer"}, nil 602 } 603 604 // Return scPeerError so the peer (and all associated blocks) is removed from 605 // the processor. 606 return scPeerError{peerID: event.peerID, reason: errors.New("peer was stopped")}, nil 607 } 608 609 func (sc *scheduler) handleTryPrunePeer(event rTryPrunePeer) (Event, error) { 610 // Check behavior of peer responsible to deliver block at sc.height. 611 timeHeightAsked, ok := sc.pendingTime[sc.height] 612 if ok && time.Since(timeHeightAsked) > sc.peerTimeout { 613 // A request was sent to a peer for block at sc.height but a response was not received 614 // from that peer within sc.peerTimeout. Remove the peer. This is to ensure that a peer 615 // will be timed out even if it sends blocks at higher heights but prevents progress by 616 // not sending the block at current height. 617 sc.removePeer(sc.pendingBlocks[sc.height]) 618 } 619 620 prunablePeers := sc.prunablePeers(sc.peerTimeout, sc.minRecvRate, event.time) 621 if len(prunablePeers) == 0 { 622 return noOp, nil 623 } 624 for _, peerID := range prunablePeers { 625 sc.removePeer(peerID) 626 } 627 628 // If all blocks are processed we should finish. 629 if sc.allBlocksProcessed() { 630 return scFinishedEv{reason: "after try prune"}, nil 631 } 632 633 return scPeersPruned{peers: prunablePeers}, nil 634 } 635 636 func (sc *scheduler) handleResetState(event bcResetState) (Event, error) { 637 initHeight := event.state.LastBlockHeight + 1 638 if initHeight == 1 { 639 initHeight = event.state.InitialHeight 640 } 641 sc.initHeight = initHeight 642 sc.height = initHeight 643 sc.lastAdvance = time.Now() 644 sc.addNewBlocks() 645 return noOp, nil 646 } 647 648 func (sc *scheduler) handleTrySchedule(event rTrySchedule) (Event, error) { 649 if time.Since(sc.lastAdvance) > sc.syncTimeout { 650 return scFinishedEv{reason: "timeout, no advance"}, nil 651 } 652 653 nextHeight := sc.nextHeightToSchedule() 654 if nextHeight == -1 { 655 return noOp, nil 656 } 657 658 bestPeerID, err := sc.selectPeer(nextHeight) 659 if err != nil { 660 return scSchedulerFail{reason: err}, nil 661 } 662 if err := sc.markPending(bestPeerID, nextHeight, event.time); err != nil { 663 return scSchedulerFail{reason: err}, nil // XXX: peerError might be more appropriate 664 } 665 return scBlockRequest{peerID: bestPeerID, height: nextHeight}, nil 666 667 } 668 669 func (sc *scheduler) handleStatusResponse(event bcStatusResponse) (Event, error) { 670 err := sc.setPeerRange(event.peerID, event.base, event.height) 671 if err != nil { 672 return scPeerError{peerID: event.peerID, reason: err}, nil 673 } 674 return noOp, nil 675 } 676 677 func (sc *scheduler) handle(event Event) (Event, error) { 678 switch event := event.(type) { 679 case bcResetState: 680 nextEvent, err := sc.handleResetState(event) 681 return nextEvent, err 682 case bcStatusResponse: 683 nextEvent, err := sc.handleStatusResponse(event) 684 return nextEvent, err 685 case bcBlockResponse: 686 nextEvent, err := sc.handleBlockResponse(event) 687 return nextEvent, err 688 case bcNoBlockResponse: 689 nextEvent, err := sc.handleNoBlockResponse(event) 690 return nextEvent, err 691 case rTrySchedule: 692 nextEvent, err := sc.handleTrySchedule(event) 693 return nextEvent, err 694 case bcAddNewPeer: 695 nextEvent, err := sc.handleAddNewPeer(event) 696 return nextEvent, err 697 case bcRemovePeer: 698 nextEvent, err := sc.handleRemovePeer(event) 699 return nextEvent, err 700 case rTryPrunePeer: 701 nextEvent, err := sc.handleTryPrunePeer(event) 702 return nextEvent, err 703 case pcBlockProcessed: 704 nextEvent, err := sc.handleBlockProcessed(event) 705 return nextEvent, err 706 case pcBlockVerificationFailure: 707 nextEvent, err := sc.handleBlockProcessError(event) 708 return nextEvent, err 709 default: 710 return scSchedulerFail{reason: fmt.Errorf("unknown event %v", event)}, nil 711 } 712 }