github.com/MetalBlockchain/metalgo@v1.11.9/snow/engine/snowman/bootstrap/bootstrapper.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package bootstrap 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "math" 11 "sync" 12 "time" 13 14 "go.uber.org/zap" 15 16 "github.com/MetalBlockchain/metalgo/database" 17 "github.com/MetalBlockchain/metalgo/genesis" 18 "github.com/MetalBlockchain/metalgo/ids" 19 "github.com/MetalBlockchain/metalgo/proto/pb/p2p" 20 "github.com/MetalBlockchain/metalgo/snow" 21 "github.com/MetalBlockchain/metalgo/snow/consensus/snowman" 22 "github.com/MetalBlockchain/metalgo/snow/consensus/snowman/bootstrapper" 23 "github.com/MetalBlockchain/metalgo/snow/engine/common" 24 "github.com/MetalBlockchain/metalgo/snow/engine/snowman/block" 25 "github.com/MetalBlockchain/metalgo/snow/engine/snowman/bootstrap/interval" 26 "github.com/MetalBlockchain/metalgo/utils/bimap" 27 "github.com/MetalBlockchain/metalgo/utils/set" 28 "github.com/MetalBlockchain/metalgo/utils/timer" 29 "github.com/MetalBlockchain/metalgo/version" 30 ) 31 32 const ( 33 // Delay bootstrapping to avoid potential CPU burns 34 bootstrappingDelay = 10 * time.Second 35 36 // statusUpdateFrequency is how many containers should be processed between 37 // logs 38 statusUpdateFrequency = 5000 39 40 // maxOutstandingBroadcastRequests is the maximum number of requests to have 41 // outstanding when broadcasting. 42 maxOutstandingBroadcastRequests = 50 43 44 epsilon = 1e-6 // small amount to add to time to avoid division by 0 45 ) 46 47 var ( 48 _ common.BootstrapableEngine = (*Bootstrapper)(nil) 49 50 errUnexpectedTimeout = errors.New("unexpected timeout fired") 51 ) 52 53 // bootstrapper repeatedly performs the bootstrapping protocol. 54 // 55 // 1. Wait until a sufficient amount of stake is connected. 56 // 2. Sample a small number of nodes to get the last accepted block ID 57 // 3. Verify against the full network that the last accepted block ID received 58 // in step 2 is an accepted block. 59 // 4. Sync the full ancestry of the last accepted block. 60 // 5. Execute all the fetched blocks that haven't already been executed. 61 // 6. Restart the bootstrapping protocol until the number of blocks being 62 // accepted during a bootstrapping round stops decreasing. 63 // 64 // Note: Because of step 6, the bootstrapping protocol will generally be 65 // performed multiple times. 66 // 67 // Invariant: The VM is not guaranteed to be initialized until Start has been 68 // called, so it must be guaranteed the VM is not used until after Start. 69 type Bootstrapper struct { 70 Config 71 common.Halter 72 *metrics 73 74 // list of NoOpsHandler for messages dropped by bootstrapper 75 common.StateSummaryFrontierHandler 76 common.AcceptedStateSummaryHandler 77 common.PutHandler 78 common.QueryHandler 79 common.ChitsHandler 80 common.AppHandler 81 82 requestID uint32 // Tracks the last requestID that was used in a request 83 84 started bool 85 restarted bool 86 87 minority bootstrapper.Poll 88 majority bootstrapper.Poll 89 90 // Greatest height of the blocks passed in startSyncing 91 tipHeight uint64 92 // Height of the last accepted block when bootstrapping starts 93 startingHeight uint64 94 // Number of blocks that were fetched on startSyncing 95 initiallyFetched uint64 96 // Time that startSyncing was last called 97 startTime time.Time 98 99 // tracks which validators were asked for which containers in which requests 100 outstandingRequests *bimap.BiMap[common.Request, ids.ID] 101 outstandingRequestTimes map[common.Request]time.Time 102 103 // number of state transitions executed 104 executedStateTransitions uint64 105 awaitingTimeout bool 106 107 tree *interval.Tree 108 missingBlockIDs set.Set[ids.ID] 109 110 // bootstrappedOnce ensures that the [Bootstrapped] callback is only invoked 111 // once, even if bootstrapping is retried. 112 bootstrappedOnce sync.Once 113 114 // Called when bootstrapping is done on a specific chain 115 onFinished func(ctx context.Context, lastReqID uint32) error 116 } 117 118 func New(config Config, onFinished func(ctx context.Context, lastReqID uint32) error) (*Bootstrapper, error) { 119 metrics, err := newMetrics(config.Ctx.Registerer) 120 return &Bootstrapper{ 121 Config: config, 122 metrics: metrics, 123 StateSummaryFrontierHandler: common.NewNoOpStateSummaryFrontierHandler(config.Ctx.Log), 124 AcceptedStateSummaryHandler: common.NewNoOpAcceptedStateSummaryHandler(config.Ctx.Log), 125 PutHandler: common.NewNoOpPutHandler(config.Ctx.Log), 126 QueryHandler: common.NewNoOpQueryHandler(config.Ctx.Log), 127 ChitsHandler: common.NewNoOpChitsHandler(config.Ctx.Log), 128 AppHandler: config.VM, 129 130 minority: bootstrapper.Noop, 131 majority: bootstrapper.Noop, 132 133 outstandingRequests: bimap.New[common.Request, ids.ID](), 134 outstandingRequestTimes: make(map[common.Request]time.Time), 135 136 executedStateTransitions: math.MaxInt, 137 onFinished: onFinished, 138 }, err 139 } 140 141 func (b *Bootstrapper) Context() *snow.ConsensusContext { 142 return b.Ctx 143 } 144 145 func (b *Bootstrapper) Clear(context.Context) error { 146 b.Ctx.Lock.Lock() 147 defer b.Ctx.Lock.Unlock() 148 149 return database.AtomicClear(b.DB, b.DB) 150 } 151 152 func (b *Bootstrapper) Start(ctx context.Context, startReqID uint32) error { 153 b.Ctx.State.Set(snow.EngineState{ 154 Type: p2p.EngineType_ENGINE_TYPE_SNOWMAN, 155 State: snow.Bootstrapping, 156 }) 157 if err := b.VM.SetState(ctx, snow.Bootstrapping); err != nil { 158 return fmt.Errorf("failed to notify VM that bootstrapping has started: %w", err) 159 } 160 161 lastAccepted, err := b.getLastAccepted(ctx) 162 if err != nil { 163 return err 164 } 165 166 lastAcceptedHeight := lastAccepted.Height() 167 b.Ctx.Log.Info("starting bootstrapper", 168 zap.Stringer("lastAcceptedID", lastAccepted.ID()), 169 zap.Uint64("lastAcceptedHeight", lastAcceptedHeight), 170 ) 171 172 // Set the starting height 173 b.startingHeight = lastAcceptedHeight 174 b.requestID = startReqID 175 176 b.tree, err = interval.NewTree(b.DB) 177 if err != nil { 178 return fmt.Errorf("failed to initialize interval tree: %w", err) 179 } 180 181 b.missingBlockIDs, err = getMissingBlockIDs(ctx, b.DB, b.VM, b.tree, b.startingHeight) 182 if err != nil { 183 return fmt.Errorf("failed to initialize missing block IDs: %w", err) 184 } 185 186 return b.tryStartBootstrapping(ctx) 187 } 188 189 func (b *Bootstrapper) Connected(ctx context.Context, nodeID ids.NodeID, nodeVersion *version.Application) error { 190 if err := b.VM.Connected(ctx, nodeID, nodeVersion); err != nil { 191 return err 192 } 193 194 if err := b.StartupTracker.Connected(ctx, nodeID, nodeVersion); err != nil { 195 return err 196 } 197 198 return b.tryStartBootstrapping(ctx) 199 } 200 201 func (b *Bootstrapper) Disconnected(ctx context.Context, nodeID ids.NodeID) error { 202 if err := b.VM.Disconnected(ctx, nodeID); err != nil { 203 return err 204 } 205 return b.StartupTracker.Disconnected(ctx, nodeID) 206 } 207 208 // tryStartBootstrapping will start bootstrapping the first time it is called 209 // while the startupTracker is reporting that the protocol should start. 210 func (b *Bootstrapper) tryStartBootstrapping(ctx context.Context) error { 211 if b.started || !b.StartupTracker.ShouldStart() { 212 return nil 213 } 214 215 b.started = true 216 return b.startBootstrapping(ctx) 217 } 218 219 func (b *Bootstrapper) startBootstrapping(ctx context.Context) error { 220 currentBeacons := b.Beacons.GetMap(b.Ctx.SubnetID) 221 nodeWeights := make(map[ids.NodeID]uint64, len(currentBeacons)) 222 for nodeID, beacon := range currentBeacons { 223 nodeWeights[nodeID] = beacon.Weight 224 } 225 226 frontierNodes, err := bootstrapper.Sample(nodeWeights, b.SampleK) 227 if err != nil { 228 return err 229 } 230 231 b.Ctx.Log.Debug("sampled nodes to seed bootstrapping frontier", 232 zap.Reflect("sampledNodes", frontierNodes), 233 zap.Int("numNodes", len(nodeWeights)), 234 ) 235 236 b.minority = bootstrapper.NewMinority( 237 b.Ctx.Log, 238 frontierNodes, 239 maxOutstandingBroadcastRequests, 240 ) 241 b.majority = bootstrapper.NewMajority( 242 b.Ctx.Log, 243 nodeWeights, 244 maxOutstandingBroadcastRequests, 245 ) 246 247 if accepted, finalized := b.majority.Result(ctx); finalized { 248 b.Ctx.Log.Info("bootstrapping skipped", 249 zap.String("reason", "no provided bootstraps"), 250 ) 251 return b.startSyncing(ctx, accepted) 252 } 253 254 b.requestID++ 255 return b.sendBootstrappingMessagesOrFinish(ctx) 256 } 257 258 func (b *Bootstrapper) sendBootstrappingMessagesOrFinish(ctx context.Context) error { 259 if peers := b.minority.GetPeers(ctx); peers.Len() > 0 { 260 b.Sender.SendGetAcceptedFrontier(ctx, peers, b.requestID) 261 return nil 262 } 263 264 potentialAccepted, finalized := b.minority.Result(ctx) 265 if !finalized { 266 // We haven't finalized the accepted frontier, so we should wait for the 267 // outstanding requests. 268 return nil 269 } 270 271 if peers := b.majority.GetPeers(ctx); peers.Len() > 0 { 272 b.Sender.SendGetAccepted(ctx, peers, b.requestID, potentialAccepted) 273 return nil 274 } 275 276 accepted, finalized := b.majority.Result(ctx) 277 if !finalized { 278 // We haven't finalized the accepted set, so we should wait for the 279 // outstanding requests. 280 return nil 281 } 282 283 numAccepted := len(accepted) 284 if numAccepted == 0 { 285 b.Ctx.Log.Debug("restarting bootstrap", 286 zap.String("reason", "no blocks accepted"), 287 zap.Int("numBeacons", b.Beacons.Count(b.Ctx.SubnetID)), 288 ) 289 // Invariant: These functions are mutualy recursive. However, when 290 // [startBootstrapping] calls [sendMessagesOrFinish], it is guaranteed 291 // to exit when sending GetAcceptedFrontier requests. 292 return b.startBootstrapping(ctx) 293 } 294 295 return b.startSyncing(ctx, accepted) 296 } 297 298 func (b *Bootstrapper) AcceptedFrontier(ctx context.Context, nodeID ids.NodeID, requestID uint32, containerID ids.ID) error { 299 if requestID != b.requestID { 300 b.Ctx.Log.Debug("received out-of-sync AcceptedFrontier message", 301 zap.Stringer("nodeID", nodeID), 302 zap.Uint32("expectedRequestID", b.requestID), 303 zap.Uint32("requestID", requestID), 304 ) 305 return nil 306 } 307 308 if err := b.minority.RecordOpinion(ctx, nodeID, set.Of(containerID)); err != nil { 309 return err 310 } 311 return b.sendBootstrappingMessagesOrFinish(ctx) 312 } 313 314 func (b *Bootstrapper) GetAcceptedFrontierFailed(ctx context.Context, nodeID ids.NodeID, requestID uint32) error { 315 if requestID != b.requestID { 316 b.Ctx.Log.Debug("received out-of-sync GetAcceptedFrontierFailed message", 317 zap.Stringer("nodeID", nodeID), 318 zap.Uint32("expectedRequestID", b.requestID), 319 zap.Uint32("requestID", requestID), 320 ) 321 return nil 322 } 323 324 if err := b.minority.RecordOpinion(ctx, nodeID, nil); err != nil { 325 return err 326 } 327 return b.sendBootstrappingMessagesOrFinish(ctx) 328 } 329 330 func (b *Bootstrapper) Accepted(ctx context.Context, nodeID ids.NodeID, requestID uint32, containerIDs set.Set[ids.ID]) error { 331 if requestID != b.requestID { 332 b.Ctx.Log.Debug("received out-of-sync Accepted message", 333 zap.Stringer("nodeID", nodeID), 334 zap.Uint32("expectedRequestID", b.requestID), 335 zap.Uint32("requestID", requestID), 336 ) 337 return nil 338 } 339 340 if err := b.majority.RecordOpinion(ctx, nodeID, containerIDs); err != nil { 341 return err 342 } 343 return b.sendBootstrappingMessagesOrFinish(ctx) 344 } 345 346 func (b *Bootstrapper) GetAcceptedFailed(ctx context.Context, nodeID ids.NodeID, requestID uint32) error { 347 if requestID != b.requestID { 348 b.Ctx.Log.Debug("received out-of-sync GetAcceptedFailed message", 349 zap.Stringer("nodeID", nodeID), 350 zap.Uint32("expectedRequestID", b.requestID), 351 zap.Uint32("requestID", requestID), 352 ) 353 return nil 354 } 355 356 if err := b.majority.RecordOpinion(ctx, nodeID, nil); err != nil { 357 return err 358 } 359 return b.sendBootstrappingMessagesOrFinish(ctx) 360 } 361 362 func (b *Bootstrapper) startSyncing(ctx context.Context, acceptedBlockIDs []ids.ID) error { 363 knownBlockIDs := genesis.GetCheckpoints(b.Ctx.NetworkID, b.Ctx.ChainID) 364 b.missingBlockIDs.Union(knownBlockIDs) 365 b.missingBlockIDs.Add(acceptedBlockIDs...) 366 numMissingBlockIDs := b.missingBlockIDs.Len() 367 368 log := b.Ctx.Log.Info 369 if b.restarted { 370 log = b.Ctx.Log.Debug 371 } 372 log("starting to fetch blocks", 373 zap.Int("numKnownBlocks", knownBlockIDs.Len()), 374 zap.Int("numAcceptedBlocks", len(acceptedBlockIDs)), 375 zap.Int("numMissingBlocks", numMissingBlockIDs), 376 ) 377 378 toProcess := make([]snowman.Block, 0, numMissingBlockIDs) 379 for blkID := range b.missingBlockIDs { 380 // TODO: if `GetBlock` returns an error other than 381 // `database.ErrNotFound`, then the error should be propagated. 382 blk, err := b.VM.GetBlock(ctx, blkID) 383 if err != nil { 384 if err := b.fetch(ctx, blkID); err != nil { 385 return err 386 } 387 continue 388 } 389 toProcess = append(toProcess, blk) 390 } 391 392 b.initiallyFetched = b.tree.Len() 393 b.startTime = time.Now() 394 395 // Process received blocks 396 for _, blk := range toProcess { 397 if err := b.process(ctx, blk, nil); err != nil { 398 return err 399 } 400 } 401 402 return b.tryStartExecuting(ctx) 403 } 404 405 // Get block [blkID] and its ancestors from a validator 406 func (b *Bootstrapper) fetch(ctx context.Context, blkID ids.ID) error { 407 // Make sure we haven't already requested this block 408 if b.outstandingRequests.HasValue(blkID) { 409 return nil 410 } 411 412 nodeID, ok := b.PeerTracker.SelectPeer() 413 if !ok { 414 // If we aren't connected to any peers, we send a request to ourself 415 // which is guaranteed to fail. We send this message to use the message 416 // timeout as a retry mechanism. Once we are connected to another node 417 // again we will select them to sample from. 418 nodeID = b.Ctx.NodeID 419 } 420 421 b.PeerTracker.RegisterRequest(nodeID) 422 423 b.requestID++ 424 request := common.Request{ 425 NodeID: nodeID, 426 RequestID: b.requestID, 427 } 428 b.outstandingRequests.Put(request, blkID) 429 b.outstandingRequestTimes[request] = time.Now() 430 b.Config.Sender.SendGetAncestors(ctx, nodeID, b.requestID, blkID) // request block and ancestors 431 return nil 432 } 433 434 // Ancestors handles the receipt of multiple containers. Should be received in 435 // response to a GetAncestors message to [nodeID] with request ID [requestID] 436 func (b *Bootstrapper) Ancestors(ctx context.Context, nodeID ids.NodeID, requestID uint32, blks [][]byte) error { 437 // Make sure this is in response to a request we made 438 request := common.Request{ 439 NodeID: nodeID, 440 RequestID: requestID, 441 } 442 wantedBlkID, ok := b.outstandingRequests.DeleteKey(request) 443 if !ok { // this message isn't in response to a request we made 444 b.Ctx.Log.Debug("received unexpected Ancestors", 445 zap.Stringer("nodeID", nodeID), 446 zap.Uint32("requestID", requestID), 447 ) 448 return nil 449 } 450 requestTime := b.outstandingRequestTimes[request] 451 delete(b.outstandingRequestTimes, request) 452 453 lenBlks := len(blks) 454 if lenBlks == 0 { 455 b.Ctx.Log.Debug("received Ancestors with no block", 456 zap.Stringer("nodeID", nodeID), 457 zap.Uint32("requestID", requestID), 458 ) 459 460 b.PeerTracker.RegisterFailure(nodeID) 461 462 // Send another request for this 463 return b.fetch(ctx, wantedBlkID) 464 } 465 466 if lenBlks > b.Config.AncestorsMaxContainersReceived { 467 blks = blks[:b.Config.AncestorsMaxContainersReceived] 468 b.Ctx.Log.Debug("ignoring containers in Ancestors", 469 zap.Int("numContainers", lenBlks-b.Config.AncestorsMaxContainersReceived), 470 zap.Stringer("nodeID", nodeID), 471 zap.Uint32("requestID", requestID), 472 ) 473 } 474 475 blocks, err := block.BatchedParseBlock(ctx, b.VM, blks) 476 if err != nil { // the provided blocks couldn't be parsed 477 b.Ctx.Log.Debug("failed to parse blocks in Ancestors", 478 zap.Stringer("nodeID", nodeID), 479 zap.Uint32("requestID", requestID), 480 zap.Error(err), 481 ) 482 b.PeerTracker.RegisterFailure(nodeID) 483 return b.fetch(ctx, wantedBlkID) 484 } 485 486 if len(blocks) == 0 { 487 b.Ctx.Log.Debug("parsing blocks returned an empty set of blocks", 488 zap.Stringer("nodeID", nodeID), 489 zap.Uint32("requestID", requestID), 490 ) 491 b.PeerTracker.RegisterFailure(nodeID) 492 return b.fetch(ctx, wantedBlkID) 493 } 494 495 requestedBlock := blocks[0] 496 if actualID := requestedBlock.ID(); actualID != wantedBlkID { 497 b.Ctx.Log.Debug("first block is not the requested block", 498 zap.Stringer("expectedBlkID", wantedBlkID), 499 zap.Stringer("blkID", actualID), 500 ) 501 b.PeerTracker.RegisterFailure(nodeID) 502 return b.fetch(ctx, wantedBlkID) 503 } 504 505 var ( 506 numBytes = len(requestedBlock.Bytes()) 507 ancestors = make(map[ids.ID]snowman.Block, len(blocks)) 508 ) 509 for _, block := range blocks[1:] { 510 numBytes += len(block.Bytes()) 511 ancestors[block.ID()] = block 512 } 513 514 // TODO: Calculate bandwidth based on the blocks that were persisted to 515 // disk. 516 var ( 517 requestLatency = time.Since(requestTime).Seconds() + epsilon 518 bandwidth = float64(numBytes) / requestLatency 519 ) 520 b.PeerTracker.RegisterResponse(nodeID, bandwidth) 521 522 if err := b.process(ctx, requestedBlock, ancestors); err != nil { 523 return err 524 } 525 526 return b.tryStartExecuting(ctx) 527 } 528 529 func (b *Bootstrapper) GetAncestorsFailed(ctx context.Context, nodeID ids.NodeID, requestID uint32) error { 530 request := common.Request{ 531 NodeID: nodeID, 532 RequestID: requestID, 533 } 534 blkID, ok := b.outstandingRequests.DeleteKey(request) 535 if !ok { 536 b.Ctx.Log.Debug("unexpectedly called GetAncestorsFailed", 537 zap.Stringer("nodeID", nodeID), 538 zap.Uint32("requestID", requestID), 539 ) 540 return nil 541 } 542 delete(b.outstandingRequestTimes, request) 543 544 // This node timed out their request. 545 b.PeerTracker.RegisterFailure(nodeID) 546 547 // Send another request for this 548 return b.fetch(ctx, blkID) 549 } 550 551 // process a series of consecutive blocks starting at [blk]. 552 // 553 // - blk is a block that is assumed to have been marked as acceptable by the 554 // bootstrapping engine. 555 // - ancestors is a set of blocks that can be used to optimistically lookup 556 // parent blocks. This enables the engine to process multiple blocks without 557 // relying on the VM to have stored blocks during `ParseBlock`. 558 func (b *Bootstrapper) process( 559 ctx context.Context, 560 blk snowman.Block, 561 ancestors map[ids.ID]snowman.Block, 562 ) error { 563 lastAccepted, err := b.getLastAccepted(ctx) 564 if err != nil { 565 return err 566 } 567 568 numPreviouslyFetched := b.tree.Len() 569 570 batch := b.DB.NewBatch() 571 missingBlockID, foundNewMissingID, err := process( 572 batch, 573 b.tree, 574 b.missingBlockIDs, 575 lastAccepted.Height(), 576 blk, 577 ancestors, 578 ) 579 if err != nil { 580 return err 581 } 582 583 // Update metrics and log statuses 584 { 585 numFetched := b.tree.Len() 586 b.numFetched.Add(float64(b.tree.Len() - numPreviouslyFetched)) 587 588 height := blk.Height() 589 b.tipHeight = max(b.tipHeight, height) 590 591 if numPreviouslyFetched/statusUpdateFrequency != numFetched/statusUpdateFrequency { 592 totalBlocksToFetch := b.tipHeight - b.startingHeight 593 eta := timer.EstimateETA( 594 b.startTime, 595 numFetched-b.initiallyFetched, // Number of blocks we have fetched during this run 596 totalBlocksToFetch-b.initiallyFetched, // Number of blocks we expect to fetch during this run 597 ) 598 599 if !b.restarted { 600 b.Ctx.Log.Info("fetching blocks", 601 zap.Uint64("numFetchedBlocks", numFetched), 602 zap.Uint64("numTotalBlocks", totalBlocksToFetch), 603 zap.Duration("eta", eta), 604 ) 605 } else { 606 b.Ctx.Log.Debug("fetching blocks", 607 zap.Uint64("numFetchedBlocks", numFetched), 608 zap.Uint64("numTotalBlocks", totalBlocksToFetch), 609 zap.Duration("eta", eta), 610 ) 611 } 612 } 613 } 614 615 if err := batch.Write(); err != nil || !foundNewMissingID { 616 return err 617 } 618 619 b.missingBlockIDs.Add(missingBlockID) 620 // Attempt to fetch the newly discovered block 621 return b.fetch(ctx, missingBlockID) 622 } 623 624 // tryStartExecuting executes all pending blocks if there are no more blocks 625 // being fetched. After executing all pending blocks it will either restart 626 // bootstrapping, or transition into normal operations. 627 func (b *Bootstrapper) tryStartExecuting(ctx context.Context) error { 628 if numMissingBlockIDs := b.missingBlockIDs.Len(); numMissingBlockIDs != 0 { 629 return nil 630 } 631 632 if b.Ctx.State.Get().State == snow.NormalOp || b.awaitingTimeout { 633 return nil 634 } 635 636 lastAccepted, err := b.getLastAccepted(ctx) 637 if err != nil { 638 return err 639 } 640 641 log := b.Ctx.Log.Info 642 if b.restarted { 643 log = b.Ctx.Log.Debug 644 } 645 646 numToExecute := b.tree.Len() 647 err = execute( 648 ctx, 649 b, 650 log, 651 b.DB, 652 &parseAcceptor{ 653 parser: b.VM, 654 ctx: b.Ctx, 655 numAccepted: b.numAccepted, 656 }, 657 b.tree, 658 lastAccepted.Height(), 659 ) 660 if err != nil { 661 // If a fatal error has occurred, include the last accepted block 662 // information. 663 lastAccepted, lastAcceptedErr := b.getLastAccepted(ctx) 664 if lastAcceptedErr != nil { 665 return fmt.Errorf("%w after %w", lastAcceptedErr, err) 666 } 667 return fmt.Errorf("%w with last accepted %s (height=%d)", 668 err, 669 lastAccepted.ID(), 670 lastAccepted.Height(), 671 ) 672 } 673 if b.Halted() { 674 return nil 675 } 676 677 previouslyExecuted := b.executedStateTransitions 678 b.executedStateTransitions = numToExecute 679 680 // Note that executedBlocks < c*previouslyExecuted ( 0 <= c < 1 ) is enforced 681 // so that the bootstrapping process will terminate even as new blocks are 682 // being issued. 683 if numToExecute > 0 && numToExecute < previouslyExecuted/2 { 684 return b.restartBootstrapping(ctx) 685 } 686 687 // If there is an additional callback, notify them that this chain has been 688 // synced. 689 if b.Bootstrapped != nil { 690 b.bootstrappedOnce.Do(b.Bootstrapped) 691 } 692 693 // Notify the subnet that this chain is synced 694 b.Config.BootstrapTracker.Bootstrapped(b.Ctx.ChainID) 695 696 // If the subnet hasn't finished bootstrapping, this chain should remain 697 // syncing. 698 if !b.Config.BootstrapTracker.IsBootstrapped() { 699 log("waiting for the remaining chains in this subnet to finish syncing") 700 // Restart bootstrapping after [bootstrappingDelay] to keep up to date 701 // on the latest tip. 702 b.Config.Timer.RegisterTimeout(bootstrappingDelay) 703 b.awaitingTimeout = true 704 return nil 705 } 706 return b.onFinished(ctx, b.requestID) 707 } 708 709 func (b *Bootstrapper) getLastAccepted(ctx context.Context) (snowman.Block, error) { 710 lastAcceptedID, err := b.VM.LastAccepted(ctx) 711 if err != nil { 712 return nil, fmt.Errorf("couldn't get last accepted ID: %w", err) 713 } 714 lastAccepted, err := b.VM.GetBlock(ctx, lastAcceptedID) 715 if err != nil { 716 return nil, fmt.Errorf("couldn't get last accepted block %s: %w", lastAcceptedID, err) 717 } 718 return lastAccepted, nil 719 } 720 721 func (b *Bootstrapper) Timeout(ctx context.Context) error { 722 if !b.awaitingTimeout { 723 return errUnexpectedTimeout 724 } 725 b.awaitingTimeout = false 726 727 if !b.Config.BootstrapTracker.IsBootstrapped() { 728 return b.restartBootstrapping(ctx) 729 } 730 return b.onFinished(ctx, b.requestID) 731 } 732 733 func (b *Bootstrapper) restartBootstrapping(ctx context.Context) error { 734 b.Ctx.Log.Debug("Checking for new frontiers") 735 b.restarted = true 736 b.outstandingRequests = bimap.New[common.Request, ids.ID]() 737 b.outstandingRequestTimes = make(map[common.Request]time.Time) 738 return b.startBootstrapping(ctx) 739 } 740 741 func (b *Bootstrapper) Notify(_ context.Context, msg common.Message) error { 742 if msg != common.StateSyncDone { 743 b.Ctx.Log.Warn("received an unexpected message from the VM", 744 zap.Stringer("msg", msg), 745 ) 746 return nil 747 } 748 749 b.Ctx.StateSyncing.Set(false) 750 return nil 751 } 752 753 func (b *Bootstrapper) HealthCheck(ctx context.Context) (interface{}, error) { 754 b.Ctx.Lock.Lock() 755 defer b.Ctx.Lock.Unlock() 756 757 vmIntf, vmErr := b.VM.HealthCheck(ctx) 758 intf := map[string]interface{}{ 759 "consensus": struct{}{}, 760 "vm": vmIntf, 761 } 762 return intf, vmErr 763 } 764 765 func (b *Bootstrapper) Shutdown(ctx context.Context) error { 766 b.Ctx.Log.Info("shutting down bootstrapper") 767 768 b.Ctx.Lock.Lock() 769 defer b.Ctx.Lock.Unlock() 770 771 return b.VM.Shutdown(ctx) 772 } 773 774 func (*Bootstrapper) Gossip(context.Context) error { 775 return nil 776 }