github.com/number571/tendermint@v0.34.11-gost/internal/blockchain/v2/scheduler.go (about) 1 package v2 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "math" 8 "sort" 9 "time" 10 11 "github.com/number571/tendermint/types" 12 ) 13 14 // Events generated by the scheduler: 15 // all blocks have been processed 16 type scFinishedEv struct { 17 priorityNormal 18 reason string 19 } 20 21 func (e scFinishedEv) String() string { 22 return fmt.Sprintf("scFinishedEv{%v}", e.reason) 23 } 24 25 // send a blockRequest message 26 type scBlockRequest struct { 27 priorityNormal 28 peerID types.NodeID 29 height int64 30 } 31 32 func (e scBlockRequest) String() string { 33 return fmt.Sprintf("scBlockRequest{%d from %v}", e.height, e.peerID) 34 } 35 36 // a block has been received and validated by the scheduler 37 type scBlockReceived struct { 38 priorityNormal 39 peerID types.NodeID 40 block *types.Block 41 } 42 43 func (e scBlockReceived) String() string { 44 return fmt.Sprintf("scBlockReceived{%d#%X from %v}", e.block.Height, e.block.Hash(), e.peerID) 45 } 46 47 // scheduler detected a peer error 48 type scPeerError struct { 49 priorityHigh 50 peerID types.NodeID 51 reason error 52 } 53 54 func (e scPeerError) String() string { 55 return fmt.Sprintf("scPeerError{%v errored with %v}", e.peerID, e.reason) 56 } 57 58 // scheduler removed a set of peers (timed out or slow peer) 59 type scPeersPruned struct { 60 priorityHigh 61 peers []types.NodeID 62 } 63 64 func (e scPeersPruned) String() string { 65 return fmt.Sprintf("scPeersPruned{%v}", e.peers) 66 } 67 68 // XXX: make this fatal? 69 // scheduler encountered a fatal error 70 type scSchedulerFail struct { 71 priorityHigh 72 reason error 73 } 74 75 func (e scSchedulerFail) String() string { 76 return fmt.Sprintf("scSchedulerFail{%v}", e.reason) 77 } 78 79 type blockState int 80 81 const ( 82 blockStateUnknown blockState = iota + 1 // no known peer has this block 83 blockStateNew // indicates that a peer has reported having this block 84 blockStatePending // indicates that this block has been requested from a peer 85 blockStateReceived // indicates that this block has been received by a peer 86 blockStateProcessed // indicates that this block has been applied 87 ) 88 89 func (e blockState) String() string { 90 switch e { 91 case blockStateUnknown: 92 return "Unknown" 93 case blockStateNew: 94 return "New" 95 case blockStatePending: 96 return "Pending" 97 case blockStateReceived: 98 return "Received" 99 case blockStateProcessed: 100 return "Processed" 101 default: 102 return fmt.Sprintf("invalid blockState: %d", e) 103 } 104 } 105 106 type peerState int 107 108 const ( 109 peerStateNew = iota + 1 110 peerStateReady 111 peerStateRemoved 112 ) 113 114 func (e peerState) String() string { 115 switch e { 116 case peerStateNew: 117 return "New" 118 case peerStateReady: 119 return "Ready" 120 case peerStateRemoved: 121 return "Removed" 122 default: 123 panic(fmt.Sprintf("unknown peerState: %d", e)) 124 } 125 } 126 127 type scPeer struct { 128 peerID types.NodeID 129 130 // initialized as New when peer is added, updated to Ready when statusUpdate is received, 131 // updated to Removed when peer is removed 132 state peerState 133 134 base int64 // updated when statusResponse is received 135 height int64 // updated when statusResponse is received 136 lastTouched time.Time 137 lastRate int64 // last receive rate in bytes 138 } 139 140 func (p scPeer) String() string { 141 return fmt.Sprintf("{state %v, base %d, height %d, lastTouched %v, lastRate %d, id %v}", 142 p.state, p.base, p.height, p.lastTouched, p.lastRate, p.peerID) 143 } 144 145 func newScPeer(peerID types.NodeID) *scPeer { 146 return &scPeer{ 147 peerID: peerID, 148 state: peerStateNew, 149 base: -1, 150 height: -1, 151 lastTouched: time.Time{}, 152 } 153 } 154 155 // The scheduler keep track of the state of each block and each peer. The 156 // scheduler will attempt to schedule new block requests with `trySchedule` 157 // events and remove slow peers with `tryPrune` events. 158 type scheduler struct { 159 initHeight int64 160 161 // next block that needs to be processed. All blocks with smaller height are 162 // in Processed state. 163 height int64 164 165 // lastAdvance tracks the last time a block execution happened. 166 // syncTimeout is the maximum time the scheduler waits to advance in the fast sync process before finishing. 167 // This covers the cases where there are no peers or all peers have a lower height. 168 lastAdvance time.Time 169 syncTimeout time.Duration 170 171 // a map of peerID to scheduler specific peer struct `scPeer` used to keep 172 // track of peer specific state 173 peers map[types.NodeID]*scPeer 174 peerTimeout time.Duration // maximum response time from a peer otherwise prune 175 minRecvRate int64 // minimum receive rate from peer otherwise prune 176 177 // the maximum number of blocks that should be New, Received or Pending at any point 178 // in time. This is used to enforce a limit on the blockStates map. 179 targetPending int 180 // a list of blocks to be scheduled (New), Pending or Received. Its length should be 181 // smaller than targetPending. 182 blockStates map[int64]blockState 183 184 // a map of heights to the peer we are waiting a response from 185 pendingBlocks map[int64]types.NodeID 186 187 // the time at which a block was put in blockStatePending 188 pendingTime map[int64]time.Time 189 190 // a map of heights to the peers that put the block in blockStateReceived 191 receivedBlocks map[int64]types.NodeID 192 } 193 194 func (sc scheduler) String() string { 195 return fmt.Sprintf("ih: %d, bst: %v, peers: %v, pblks: %v, ptm %v, rblks: %v", 196 sc.initHeight, sc.blockStates, sc.peers, sc.pendingBlocks, sc.pendingTime, sc.receivedBlocks) 197 } 198 199 func newScheduler(initHeight int64, startTime time.Time) *scheduler { 200 sc := scheduler{ 201 initHeight: initHeight, 202 lastAdvance: startTime, 203 syncTimeout: 60 * time.Second, 204 height: initHeight, 205 blockStates: make(map[int64]blockState), 206 peers: make(map[types.NodeID]*scPeer), 207 pendingBlocks: make(map[int64]types.NodeID), 208 pendingTime: make(map[int64]time.Time), 209 receivedBlocks: make(map[int64]types.NodeID), 210 targetPending: 10, // TODO - pass as param 211 peerTimeout: 15 * time.Second, // TODO - pass as param 212 minRecvRate: 0, // int64(7680), TODO - pass as param 213 } 214 215 return &sc 216 } 217 218 func (sc *scheduler) ensurePeer(peerID types.NodeID) *scPeer { 219 if _, ok := sc.peers[peerID]; !ok { 220 sc.peers[peerID] = newScPeer(peerID) 221 } 222 return sc.peers[peerID] 223 } 224 225 func (sc *scheduler) touchPeer(peerID types.NodeID, time time.Time) error { 226 peer, ok := sc.peers[peerID] 227 if !ok { 228 return fmt.Errorf("couldn't find peer %s", peerID) 229 } 230 231 if peer.state != peerStateReady { 232 return fmt.Errorf("tried to touch peer in state %s, must be Ready", peer.state) 233 } 234 235 peer.lastTouched = time 236 237 return nil 238 } 239 240 func (sc *scheduler) removePeer(peerID types.NodeID) { 241 peer, ok := sc.peers[peerID] 242 if !ok { 243 return 244 } 245 if peer.state == peerStateRemoved { 246 return 247 } 248 249 for height, pendingPeerID := range sc.pendingBlocks { 250 if pendingPeerID == peerID { 251 sc.setStateAtHeight(height, blockStateNew) 252 delete(sc.pendingTime, height) 253 delete(sc.pendingBlocks, height) 254 } 255 } 256 257 for height, rcvPeerID := range sc.receivedBlocks { 258 if rcvPeerID == peerID { 259 sc.setStateAtHeight(height, blockStateNew) 260 delete(sc.receivedBlocks, height) 261 } 262 } 263 264 // remove the blocks from blockStates if the peer removal causes the max peer height to be lower. 265 peer.state = peerStateRemoved 266 maxPeerHeight := int64(0) 267 for _, otherPeer := range sc.peers { 268 if otherPeer.state != peerStateReady { 269 continue 270 } 271 if otherPeer.peerID != peer.peerID && otherPeer.height > maxPeerHeight { 272 maxPeerHeight = otherPeer.height 273 } 274 } 275 for h := range sc.blockStates { 276 if h > maxPeerHeight { 277 delete(sc.blockStates, h) 278 } 279 } 280 } 281 282 // check if the blockPool is running low and add new blocks in New state to be requested. 283 // This function is called when there is an increase in the maximum peer height or when 284 // blocks are processed. 285 func (sc *scheduler) addNewBlocks() { 286 if len(sc.blockStates) >= sc.targetPending { 287 return 288 } 289 290 for i := sc.height; i < int64(sc.targetPending)+sc.height; i++ { 291 if i > sc.maxHeight() { 292 break 293 } 294 if sc.getStateAtHeight(i) == blockStateUnknown { 295 sc.setStateAtHeight(i, blockStateNew) 296 } 297 } 298 } 299 300 func (sc *scheduler) setPeerRange(peerID types.NodeID, base int64, height int64) error { 301 peer := sc.ensurePeer(peerID) 302 303 if peer.state == peerStateRemoved { 304 return nil // noop 305 } 306 307 if height < peer.height { 308 sc.removePeer(peerID) 309 return fmt.Errorf("cannot move peer height lower. from %d to %d", peer.height, height) 310 } 311 312 if base > height { 313 sc.removePeer(peerID) 314 return fmt.Errorf("cannot set peer base higher than its height") 315 } 316 317 peer.base = base 318 peer.height = height 319 peer.state = peerStateReady 320 321 sc.addNewBlocks() 322 return nil 323 } 324 325 func (sc *scheduler) getStateAtHeight(height int64) blockState { 326 if height < sc.height { 327 return blockStateProcessed 328 } else if state, ok := sc.blockStates[height]; ok { 329 return state 330 } else { 331 return blockStateUnknown 332 } 333 } 334 335 func (sc *scheduler) getPeersWithHeight(height int64) []types.NodeID { 336 peers := make([]types.NodeID, 0) 337 for _, peer := range sc.peers { 338 if peer.state != peerStateReady { 339 continue 340 } 341 if peer.base <= height && peer.height >= height { 342 peers = append(peers, peer.peerID) 343 } 344 } 345 return peers 346 } 347 348 func (sc *scheduler) prunablePeers(peerTimout time.Duration, minRecvRate int64, now time.Time) []types.NodeID { 349 prunable := make([]types.NodeID, 0) 350 for peerID, peer := range sc.peers { 351 if peer.state != peerStateReady { 352 continue 353 } 354 if now.Sub(peer.lastTouched) > peerTimout || peer.lastRate < minRecvRate { 355 prunable = append(prunable, peerID) 356 } 357 } 358 // Tests for handleTryPrunePeer() may fail without sort due to range non-determinism 359 sort.Sort(PeerByID(prunable)) 360 return prunable 361 } 362 363 func (sc *scheduler) setStateAtHeight(height int64, state blockState) { 364 sc.blockStates[height] = state 365 } 366 367 // CONTRACT: peer exists and in Ready state. 368 func (sc *scheduler) markReceived(peerID types.NodeID, height int64, size int64, now time.Time) error { 369 peer := sc.peers[peerID] 370 371 if state := sc.getStateAtHeight(height); state != blockStatePending || sc.pendingBlocks[height] != peerID { 372 return fmt.Errorf("received block %d from peer %s without being requested", height, peerID) 373 } 374 375 pendingTime, ok := sc.pendingTime[height] 376 if !ok || now.Sub(pendingTime) <= 0 { 377 return fmt.Errorf("clock error: block %d received at %s but requested at %s", 378 height, pendingTime, now) 379 } 380 381 peer.lastRate = size / now.Sub(pendingTime).Nanoseconds() 382 383 sc.setStateAtHeight(height, blockStateReceived) 384 delete(sc.pendingBlocks, height) 385 delete(sc.pendingTime, height) 386 387 sc.receivedBlocks[height] = peerID 388 389 return nil 390 } 391 392 func (sc *scheduler) markPending(peerID types.NodeID, height int64, time time.Time) error { 393 state := sc.getStateAtHeight(height) 394 if state != blockStateNew { 395 return fmt.Errorf("block %d should be in blockStateNew but is %s", height, state) 396 } 397 398 peer, ok := sc.peers[peerID] 399 if !ok { 400 return fmt.Errorf("cannot find peer %s", peerID) 401 } 402 403 if peer.state != peerStateReady { 404 return fmt.Errorf("cannot schedule %d from %s in %s", height, peerID, peer.state) 405 } 406 407 if height > peer.height { 408 return fmt.Errorf("cannot request height %d from peer %s that is at height %d", 409 height, peerID, peer.height) 410 } 411 412 if height < peer.base { 413 return fmt.Errorf("cannot request height %d for peer %s with base %d", 414 height, peerID, peer.base) 415 } 416 417 sc.setStateAtHeight(height, blockStatePending) 418 sc.pendingBlocks[height] = peerID 419 sc.pendingTime[height] = time 420 421 return nil 422 } 423 424 func (sc *scheduler) markProcessed(height int64) error { 425 // It is possible that a peer error or timeout is handled after the processor 426 // has processed the block but before the scheduler received this event, so 427 // when pcBlockProcessed event is received, the block had been requested 428 // again => don't check the block state. 429 sc.lastAdvance = time.Now() 430 sc.height = height + 1 431 delete(sc.pendingBlocks, height) 432 delete(sc.pendingTime, height) 433 delete(sc.receivedBlocks, height) 434 delete(sc.blockStates, height) 435 sc.addNewBlocks() 436 return nil 437 } 438 439 func (sc *scheduler) allBlocksProcessed() bool { 440 if len(sc.peers) == 0 { 441 return false 442 } 443 return sc.height >= sc.maxHeight() 444 } 445 446 // returns max peer height or the last processed block, i.e. sc.height 447 func (sc *scheduler) maxHeight() int64 { 448 max := sc.height - 1 449 for _, peer := range sc.peers { 450 if peer.state != peerStateReady { 451 continue 452 } 453 if max < peer.height { 454 max = peer.height 455 } 456 } 457 return max 458 } 459 460 // lowest block in sc.blockStates with state == blockStateNew or -1 if no new blocks 461 func (sc *scheduler) nextHeightToSchedule() int64 { 462 var min int64 = math.MaxInt64 463 for height, state := range sc.blockStates { 464 if state == blockStateNew && height < min { 465 min = height 466 } 467 } 468 if min == math.MaxInt64 { 469 min = -1 470 } 471 return min 472 } 473 474 func (sc *scheduler) pendingFrom(peerID types.NodeID) []int64 { 475 var heights []int64 476 for height, pendingPeerID := range sc.pendingBlocks { 477 if pendingPeerID == peerID { 478 heights = append(heights, height) 479 } 480 } 481 return heights 482 } 483 484 func (sc *scheduler) selectPeer(height int64) (types.NodeID, error) { 485 peers := sc.getPeersWithHeight(height) 486 if len(peers) == 0 { 487 return "", fmt.Errorf("cannot find peer for height %d", height) 488 } 489 490 // create a map from number of pending requests to a list 491 // of peers having that number of pending requests. 492 pendingFrom := make(map[int][]types.NodeID) 493 for _, peerID := range peers { 494 numPending := len(sc.pendingFrom(peerID)) 495 pendingFrom[numPending] = append(pendingFrom[numPending], peerID) 496 } 497 498 // find the set of peers with minimum number of pending requests. 499 var minPending int64 = math.MaxInt64 500 for mp := range pendingFrom { 501 if int64(mp) < minPending { 502 minPending = int64(mp) 503 } 504 } 505 506 sort.Sort(PeerByID(pendingFrom[int(minPending)])) 507 return pendingFrom[int(minPending)][0], nil 508 } 509 510 // PeerByID is a list of peers sorted by peerID. 511 type PeerByID []types.NodeID 512 513 func (peers PeerByID) Len() int { 514 return len(peers) 515 } 516 func (peers PeerByID) Less(i, j int) bool { 517 return bytes.Compare([]byte(peers[i]), []byte(peers[j])) == -1 518 } 519 520 func (peers PeerByID) Swap(i, j int) { 521 peers[i], peers[j] = peers[j], peers[i] 522 } 523 524 // Handlers 525 526 // This handler gets the block, performs some validation and then passes it on to the processor. 527 func (sc *scheduler) handleBlockResponse(event bcBlockResponse) (Event, error) { 528 err := sc.touchPeer(event.peerID, event.time) 529 if err != nil { 530 // peer does not exist OR not ready 531 return noOp, nil 532 } 533 534 err = sc.markReceived(event.peerID, event.block.Height, event.size, event.time) 535 if err != nil { 536 sc.removePeer(event.peerID) 537 return scPeerError{peerID: event.peerID, reason: err}, nil 538 } 539 540 return scBlockReceived{peerID: event.peerID, block: event.block}, nil 541 } 542 543 func (sc *scheduler) handleNoBlockResponse(event bcNoBlockResponse) (Event, error) { 544 // No such peer or peer was removed. 545 peer, ok := sc.peers[event.peerID] 546 if !ok || peer.state == peerStateRemoved { 547 return noOp, nil 548 } 549 550 // The peer may have been just removed due to errors, low speed or timeouts. 551 sc.removePeer(event.peerID) 552 553 return scPeerError{peerID: event.peerID, 554 reason: fmt.Errorf("peer %v with base %d height %d claims no block for %d", 555 event.peerID, peer.base, peer.height, event.height)}, nil 556 } 557 558 func (sc *scheduler) handleBlockProcessed(event pcBlockProcessed) (Event, error) { 559 if event.height != sc.height { 560 panic(fmt.Sprintf("processed height %d, but expected height %d", event.height, sc.height)) 561 } 562 563 err := sc.markProcessed(event.height) 564 if err != nil { 565 return scSchedulerFail{reason: err}, nil 566 } 567 568 if sc.allBlocksProcessed() { 569 return scFinishedEv{reason: "processed all blocks"}, nil 570 } 571 572 return noOp, nil 573 } 574 575 // Handles an error from the processor. The processor had already cleaned the blocks from 576 // the peers included in this event. Just attempt to remove the peers. 577 func (sc *scheduler) handleBlockProcessError(event pcBlockVerificationFailure) (Event, error) { 578 // The peers may have been just removed due to errors, low speed or timeouts. 579 sc.removePeer(event.firstPeerID) 580 if event.firstPeerID != event.secondPeerID { 581 sc.removePeer(event.secondPeerID) 582 } 583 584 if sc.allBlocksProcessed() { 585 return scFinishedEv{reason: "error on last block"}, nil 586 } 587 588 return noOp, nil 589 } 590 591 func (sc *scheduler) handleAddNewPeer(event bcAddNewPeer) (Event, error) { 592 sc.ensurePeer(event.peerID) 593 return noOp, nil 594 } 595 596 func (sc *scheduler) handleRemovePeer(event bcRemovePeer) (Event, error) { 597 sc.removePeer(event.peerID) 598 599 if sc.allBlocksProcessed() { 600 return scFinishedEv{reason: "removed peer"}, nil 601 } 602 603 // Return scPeerError so the peer (and all associated blocks) is removed from 604 // the processor. 605 return scPeerError{peerID: event.peerID, reason: errors.New("peer was stopped")}, nil 606 } 607 608 func (sc *scheduler) handleTryPrunePeer(event rTryPrunePeer) (Event, error) { 609 // Check behavior of peer responsible to deliver block at sc.height. 610 timeHeightAsked, ok := sc.pendingTime[sc.height] 611 if ok && time.Since(timeHeightAsked) > sc.peerTimeout { 612 // A request was sent to a peer for block at sc.height but a response was not received 613 // from that peer within sc.peerTimeout. Remove the peer. This is to ensure that a peer 614 // will be timed out even if it sends blocks at higher heights but prevents progress by 615 // not sending the block at current height. 616 sc.removePeer(sc.pendingBlocks[sc.height]) 617 } 618 619 prunablePeers := sc.prunablePeers(sc.peerTimeout, sc.minRecvRate, event.time) 620 if len(prunablePeers) == 0 { 621 return noOp, nil 622 } 623 for _, peerID := range prunablePeers { 624 sc.removePeer(peerID) 625 } 626 627 // If all blocks are processed we should finish. 628 if sc.allBlocksProcessed() { 629 return scFinishedEv{reason: "after try prune"}, nil 630 } 631 632 return scPeersPruned{peers: prunablePeers}, nil 633 } 634 635 func (sc *scheduler) handleResetState(event bcResetState) (Event, error) { 636 initHeight := event.state.LastBlockHeight + 1 637 if initHeight == 1 { 638 initHeight = event.state.InitialHeight 639 } 640 sc.initHeight = initHeight 641 sc.height = initHeight 642 sc.lastAdvance = time.Now() 643 sc.addNewBlocks() 644 return noOp, nil 645 } 646 647 func (sc *scheduler) handleTrySchedule(event rTrySchedule) (Event, error) { 648 if time.Since(sc.lastAdvance) > sc.syncTimeout { 649 return scFinishedEv{reason: "timeout, no advance"}, nil 650 } 651 652 nextHeight := sc.nextHeightToSchedule() 653 if nextHeight == -1 { 654 return noOp, nil 655 } 656 657 bestPeerID, err := sc.selectPeer(nextHeight) 658 if err != nil { 659 return scSchedulerFail{reason: err}, nil 660 } 661 if err := sc.markPending(bestPeerID, nextHeight, event.time); err != nil { 662 return scSchedulerFail{reason: err}, nil // XXX: peerError might be more appropriate 663 } 664 return scBlockRequest{peerID: bestPeerID, height: nextHeight}, nil 665 666 } 667 668 func (sc *scheduler) handleStatusResponse(event bcStatusResponse) (Event, error) { 669 err := sc.setPeerRange(event.peerID, event.base, event.height) 670 if err != nil { 671 return scPeerError{peerID: event.peerID, reason: err}, nil 672 } 673 return noOp, nil 674 } 675 676 func (sc *scheduler) handle(event Event) (Event, error) { 677 switch event := event.(type) { 678 case bcResetState: 679 nextEvent, err := sc.handleResetState(event) 680 return nextEvent, err 681 case bcStatusResponse: 682 nextEvent, err := sc.handleStatusResponse(event) 683 return nextEvent, err 684 case bcBlockResponse: 685 nextEvent, err := sc.handleBlockResponse(event) 686 return nextEvent, err 687 case bcNoBlockResponse: 688 nextEvent, err := sc.handleNoBlockResponse(event) 689 return nextEvent, err 690 case rTrySchedule: 691 nextEvent, err := sc.handleTrySchedule(event) 692 return nextEvent, err 693 case bcAddNewPeer: 694 nextEvent, err := sc.handleAddNewPeer(event) 695 return nextEvent, err 696 case bcRemovePeer: 697 nextEvent, err := sc.handleRemovePeer(event) 698 return nextEvent, err 699 case rTryPrunePeer: 700 nextEvent, err := sc.handleTryPrunePeer(event) 701 return nextEvent, err 702 case pcBlockProcessed: 703 nextEvent, err := sc.handleBlockProcessed(event) 704 return nextEvent, err 705 case pcBlockVerificationFailure: 706 nextEvent, err := sc.handleBlockProcessError(event) 707 return nextEvent, err 708 default: 709 return scSchedulerFail{reason: fmt.Errorf("unknown event %v", event)}, nil 710 } 711 }