github.com/koko1123/flow-go-1@v0.29.6/engine/access/ingestion/engine.go (about) 1 // (c) 2019 Dapper Labs - ALL RIGHTS RESERVED 2 3 package ingestion 4 5 import ( 6 "context" 7 "errors" 8 "fmt" 9 "time" 10 11 "github.com/rs/zerolog" 12 13 "github.com/koko1123/flow-go-1/consensus/hotstuff/model" 14 "github.com/koko1123/flow-go-1/engine" 15 "github.com/koko1123/flow-go-1/engine/access/rpc" 16 "github.com/koko1123/flow-go-1/engine/common/fifoqueue" 17 "github.com/koko1123/flow-go-1/model/flow" 18 "github.com/koko1123/flow-go-1/model/flow/filter" 19 "github.com/koko1123/flow-go-1/module" 20 "github.com/koko1123/flow-go-1/module/component" 21 "github.com/koko1123/flow-go-1/module/irrecoverable" 22 "github.com/koko1123/flow-go-1/module/mempool/stdmap" 23 "github.com/koko1123/flow-go-1/network" 24 "github.com/koko1123/flow-go-1/network/channels" 25 "github.com/koko1123/flow-go-1/state/protocol" 26 "github.com/koko1123/flow-go-1/storage" 27 "github.com/koko1123/flow-go-1/utils/logging" 28 ) 29 30 // time to wait for the all the missing collections to be received at node startup 31 const collectionCatchupTimeout = 30 * time.Second 32 33 // time to poll the storage to check if missing collections have been received 34 const collectionCatchupDBPollInterval = 10 * time.Millisecond 35 36 // time to update the FullBlockHeight index 37 const fullBlockUpdateInterval = 1 * time.Minute 38 39 // a threshold of number of blocks with missing collections beyond which collections should be re-requested 40 // this is to prevent spamming the collection nodes with request 41 const missingCollsForBlkThreshold = 100 42 43 // a threshold of block height beyond which collections should be re-requested (regardless of the number of blocks for which collection are missing) 44 // this is to ensure that if a collection is missing for a long time (in terms of block height) it is eventually re-requested 45 const missingCollsForAgeThreshold = 100 46 47 // default queue capacity 48 const defaultQueueCapacity = 10_000 49 50 var defaultCollectionCatchupTimeout = collectionCatchupTimeout 51 var defaultCollectionCatchupDBPollInterval = collectionCatchupDBPollInterval 52 var defaultFullBlockUpdateInterval = fullBlockUpdateInterval 53 var defaultMissingCollsForBlkThreshold = missingCollsForBlkThreshold 54 var defaultMissingCollsForAgeThreshold = missingCollsForAgeThreshold 55 56 // Engine represents the ingestion engine, used to funnel data from other nodes 57 // to a centralized location that can be queried by a user 58 type Engine struct { 59 *component.ComponentManager 60 messageHandler *engine.MessageHandler 61 executionReceiptsNotifier engine.Notifier 62 executionReceiptsQueue engine.MessageStore 63 finalizedBlockNotifier engine.Notifier 64 finalizedBlockQueue engine.MessageStore 65 66 log zerolog.Logger // used to log relevant actions with context 67 state protocol.State // used to access the protocol state 68 me module.Local // used to access local node information 69 request module.Requester // used to request collections 70 71 // storage 72 // FIX: remove direct DB access by substituting indexer module 73 blocks storage.Blocks 74 headers storage.Headers 75 collections storage.Collections 76 transactions storage.Transactions 77 executionReceipts storage.ExecutionReceipts 78 executionResults storage.ExecutionResults 79 80 // metrics 81 transactionMetrics module.TransactionMetrics 82 collectionsToMarkFinalized *stdmap.Times 83 collectionsToMarkExecuted *stdmap.Times 84 blocksToMarkExecuted *stdmap.Times 85 86 rpcEngine *rpc.Engine 87 } 88 89 // New creates a new access ingestion engine 90 func New( 91 log zerolog.Logger, 92 net network.Network, 93 state protocol.State, 94 me module.Local, 95 request module.Requester, 96 blocks storage.Blocks, 97 headers storage.Headers, 98 collections storage.Collections, 99 transactions storage.Transactions, 100 executionResults storage.ExecutionResults, 101 executionReceipts storage.ExecutionReceipts, 102 transactionMetrics module.TransactionMetrics, 103 collectionsToMarkFinalized *stdmap.Times, 104 collectionsToMarkExecuted *stdmap.Times, 105 blocksToMarkExecuted *stdmap.Times, 106 rpcEngine *rpc.Engine, 107 ) (*Engine, error) { 108 executionReceiptsRawQueue, err := fifoqueue.NewFifoQueue(defaultQueueCapacity) 109 if err != nil { 110 return nil, fmt.Errorf("could not create execution receipts queue: %w", err) 111 } 112 113 executionReceiptsQueue := &engine.FifoMessageStore{FifoQueue: executionReceiptsRawQueue} 114 115 finalizedBlocksRawQueue, err := fifoqueue.NewFifoQueue(defaultQueueCapacity) 116 if err != nil { 117 return nil, fmt.Errorf("could not create finalized block queue: %w", err) 118 } 119 120 finalizedBlocksQueue := &engine.FifoMessageStore{FifoQueue: finalizedBlocksRawQueue} 121 122 messageHandler := engine.NewMessageHandler( 123 log, 124 engine.NewNotifier(), 125 engine.Pattern{ 126 Match: func(msg *engine.Message) bool { 127 _, ok := msg.Payload.(*model.Block) 128 return ok 129 }, 130 Store: finalizedBlocksQueue, 131 }, 132 engine.Pattern{ 133 Match: func(msg *engine.Message) bool { 134 _, ok := msg.Payload.(*flow.ExecutionReceipt) 135 return ok 136 }, 137 Store: executionReceiptsQueue, 138 }, 139 ) 140 141 // initialize the propagation engine with its dependencies 142 e := &Engine{ 143 log: log.With().Str("engine", "ingestion").Logger(), 144 state: state, 145 me: me, 146 request: request, 147 blocks: blocks, 148 headers: headers, 149 collections: collections, 150 transactions: transactions, 151 executionResults: executionResults, 152 executionReceipts: executionReceipts, 153 transactionMetrics: transactionMetrics, 154 collectionsToMarkFinalized: collectionsToMarkFinalized, 155 collectionsToMarkExecuted: collectionsToMarkExecuted, 156 blocksToMarkExecuted: blocksToMarkExecuted, 157 rpcEngine: rpcEngine, 158 159 // queue / notifier for execution receipts 160 executionReceiptsNotifier: engine.NewNotifier(), 161 executionReceiptsQueue: executionReceiptsQueue, 162 163 // queue / notifier for finalized blocks 164 finalizedBlockNotifier: engine.NewNotifier(), 165 finalizedBlockQueue: finalizedBlocksQueue, 166 167 messageHandler: messageHandler, 168 } 169 170 // Add workers 171 e.ComponentManager = component.NewComponentManagerBuilder(). 172 AddWorker(e.processBackground). 173 AddWorker(e.processExecutionReceipts). 174 AddWorker(e.processFinalizedBlocks). 175 Build() 176 177 // register engine with the execution receipt provider 178 _, err = net.Register(channels.ReceiveReceipts, e) 179 if err != nil { 180 return nil, fmt.Errorf("could not register for results: %w", err) 181 } 182 183 return e, nil 184 } 185 186 func (e *Engine) Start(parent irrecoverable.SignalerContext) { 187 rootBlock, err := e.state.Params().Root() 188 if err != nil { 189 parent.Throw(fmt.Errorf("failed to get root block: %w", err)) 190 } 191 192 // if spork root snapshot 193 rootSnapshot := e.state.AtBlockID(rootBlock.ID()) 194 195 isSporkRootSnapshot, err := protocol.IsSporkRootSnapshot(rootSnapshot) 196 if err != nil { 197 parent.Throw(fmt.Errorf("could not check if root snapshot is a spork root snapshot: %w", err)) 198 } 199 200 // This is useful for dynamically bootstrapped access node, they will request missing collections. In order to ensure all txs 201 // from the missing collections can be verified, we must ensure they are referencing to known blocks. 202 // That's why we set the full block height to be rootHeight + TransactionExpiry, so that we only request missing collections 203 // in blocks above that height. 204 if isSporkRootSnapshot { 205 // for snapshot with a single block in the sealing segment the first full block is the root block. 206 err := e.blocks.InsertLastFullBlockHeightIfNotExists(rootBlock.Height) 207 if err != nil { 208 parent.Throw(fmt.Errorf("failed to update last full block height during ingestion engine startup: %w", err)) 209 } 210 } else { 211 // for midspork snapshots with a sealing segment that has more than 1 block add the transaction expiry to the root block height to avoid 212 // requesting resources for blocks below the expiry. 213 firstFullHeight := rootBlock.Height + flow.DefaultTransactionExpiry 214 err := e.blocks.InsertLastFullBlockHeightIfNotExists(firstFullHeight) 215 if err != nil { 216 parent.Throw(fmt.Errorf("failed to update last full block height during ingestion engine startup: %w", err)) 217 } 218 } 219 220 e.ComponentManager.Start(parent) 221 } 222 223 func (e *Engine) processBackground(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 224 // context with timeout 225 requestCtx, cancel := context.WithTimeout(ctx, defaultCollectionCatchupTimeout) 226 defer cancel() 227 228 // request missing collections 229 err := e.requestMissingCollections(requestCtx) 230 if err != nil { 231 e.log.Error().Err(err).Msg("requesting missing collections failed") 232 } 233 ready() 234 235 ticker := time.NewTicker(defaultFullBlockUpdateInterval) 236 for { 237 select { 238 case <-ctx.Done(): 239 return 240 case <-ticker.C: 241 e.updateLastFullBlockReceivedIndex() 242 } 243 } 244 } 245 246 func (e *Engine) processExecutionReceipts(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 247 ready() 248 notifier := e.executionReceiptsNotifier.Channel() 249 250 for { 251 select { 252 case <-ctx.Done(): 253 return 254 case <-notifier: 255 err := e.processAvailableExecutionReceipts(ctx) 256 if err != nil { 257 // if an error reaches this point, it is unexpected 258 ctx.Throw(err) 259 return 260 } 261 } 262 } 263 } 264 265 func (e *Engine) processAvailableExecutionReceipts(ctx context.Context) error { 266 for { 267 select { 268 case <-ctx.Done(): 269 return nil 270 default: 271 } 272 msg, ok := e.executionReceiptsQueue.Get() 273 if !ok { 274 return nil 275 } 276 277 receipt := msg.Payload.(*flow.ExecutionReceipt) 278 279 if err := e.handleExecutionReceipt(msg.OriginID, receipt); err != nil { 280 return err 281 } 282 } 283 284 } 285 286 func (e *Engine) processFinalizedBlocks(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 287 ready() 288 notifier := e.finalizedBlockNotifier.Channel() 289 290 for { 291 select { 292 case <-ctx.Done(): 293 return 294 case <-notifier: 295 _ = e.processAvailableFinalizedBlocks(ctx) 296 } 297 } 298 } 299 300 func (e *Engine) processAvailableFinalizedBlocks(ctx context.Context) error { 301 for { 302 select { 303 case <-ctx.Done(): 304 return nil 305 default: 306 } 307 308 msg, ok := e.finalizedBlockQueue.Get() 309 if !ok { 310 return nil 311 } 312 313 hb := msg.Payload.(*model.Block) 314 blockID := hb.BlockID 315 316 if err := e.processFinalizedBlock(blockID); err != nil { 317 e.log.Error().Err(err).Hex("block_id", blockID[:]).Msg("failed to process block") 318 continue 319 } 320 321 e.trackFinalizedMetricForBlock(hb) 322 } 323 } 324 325 // process processes the given ingestion engine event. Events that are given 326 // to this function originate within the expulsion engine on the node with the 327 // given origin ID. 328 func (e *Engine) process(originID flow.Identifier, event interface{}) error { 329 select { 330 case <-e.ComponentManager.ShutdownSignal(): 331 return component.ErrComponentShutdown 332 default: 333 } 334 335 switch event.(type) { 336 case *flow.ExecutionReceipt: 337 err := e.messageHandler.Process(originID, event) 338 e.executionReceiptsNotifier.Notify() 339 return err 340 case *model.Block: 341 err := e.messageHandler.Process(originID, event) 342 e.finalizedBlockNotifier.Notify() 343 return err 344 default: 345 return fmt.Errorf("invalid event type (%T)", event) 346 } 347 } 348 349 // SubmitLocal submits an event originating on the local node. 350 func (e *Engine) SubmitLocal(event interface{}) { 351 err := e.process(e.me.NodeID(), event) 352 if err != nil { 353 engine.LogError(e.log, err) 354 } 355 } 356 357 // Submit submits the given event from the node with the given origin ID 358 // for processing in a non-blocking manner. It returns instantly and logs 359 // a potential processing error internally when done. 360 func (e *Engine) Submit(channel channels.Channel, originID flow.Identifier, event interface{}) { 361 err := e.process(originID, event) 362 if err != nil { 363 engine.LogError(e.log, err) 364 } 365 } 366 367 // ProcessLocal processes an event originating on the local node. 368 func (e *Engine) ProcessLocal(event interface{}) error { 369 return e.process(e.me.NodeID(), event) 370 } 371 372 // Process processes the given event from the node with the given origin ID in 373 // a blocking manner. It returns the potential processing error when done. 374 func (e *Engine) Process(channel channels.Channel, originID flow.Identifier, event interface{}) error { 375 return e.process(originID, event) 376 } 377 378 // OnFinalizedBlock is called by the follower engine after a block has been finalized and the state has been updated 379 func (e *Engine) OnFinalizedBlock(hb *model.Block) { 380 _ = e.ProcessLocal(hb) 381 } 382 383 // processBlock handles an incoming finalized block. 384 func (e *Engine) processFinalizedBlock(blockID flow.Identifier) error { 385 386 // TODO: consider using storage.Index.ByBlockID, the index contains collection id and seals ID 387 block, err := e.blocks.ByID(blockID) 388 if err != nil { 389 return fmt.Errorf("failed to lookup block: %w", err) 390 } 391 392 // Notify rpc handler of new finalized block height 393 e.rpcEngine.SubmitLocal(block) 394 395 // FIX: we can't index guarantees here, as we might have more than one block 396 // with the same collection as long as it is not finalized 397 398 // TODO: substitute an indexer module as layer between engine and storage 399 400 // index the block storage with each of the collection guarantee 401 err = e.blocks.IndexBlockForCollections(block.Header.ID(), flow.GetIDs(block.Payload.Guarantees)) 402 if err != nil { 403 return fmt.Errorf("could not index block for collections: %w", err) 404 } 405 406 // loop through seals and index ID -> result ID 407 for _, seal := range block.Payload.Seals { 408 err := e.executionResults.Index(seal.BlockID, seal.ResultID) 409 if err != nil { 410 return fmt.Errorf("could not index block for execution result: %w", err) 411 } 412 } 413 414 // skip requesting collections, if this block is below the last full block height 415 // this means that either we have already received these collections, or the block 416 // may contain unverifiable guarantees (in case this node has just joined the network) 417 lastFullBlockHeight, err := e.blocks.GetLastFullBlockHeight() 418 if err != nil { 419 return fmt.Errorf("could not get last full block height: %w", err) 420 } 421 422 if block.Header.Height <= lastFullBlockHeight { 423 e.log.Info().Msgf("skipping requesting collections for finalized block below last full block height (%d<=%d)", block.Header.Height, lastFullBlockHeight) 424 return nil 425 } 426 427 // queue requesting each of the collections from the collection node 428 e.requestCollectionsInFinalizedBlock(block.Payload.Guarantees) 429 430 return nil 431 } 432 433 func (e *Engine) trackFinalizedMetricForBlock(hb *model.Block) { 434 // TODO: consider using storage.Index.ByBlockID, the index contains collection id and seals ID 435 // retrieve the block 436 block, err := e.blocks.ByID(hb.BlockID) 437 if err != nil { 438 e.log.Warn().Err(err).Msg("could not track tx finalized metric: finalized block not found locally") 439 return 440 } 441 442 // TODO lookup actual finalization time by looking at the block finalizing `b` 443 now := time.Now().UTC() 444 445 // mark all transactions as finalized 446 // TODO: sample to reduce performance overhead 447 for _, g := range block.Payload.Guarantees { 448 l, err := e.collections.LightByID(g.CollectionID) 449 if errors.Is(err, storage.ErrNotFound) { 450 e.collectionsToMarkFinalized.Add(g.CollectionID, now) 451 continue 452 } else if err != nil { 453 e.log.Warn().Err(err).Str("collection_id", g.CollectionID.String()). 454 Msg("could not track tx finalized metric: finalized collection not found locally") 455 continue 456 } 457 458 for _, t := range l.Transactions { 459 e.transactionMetrics.TransactionFinalized(t, now) 460 } 461 } 462 463 if ti, found := e.blocksToMarkExecuted.ByID(hb.BlockID); found { 464 e.trackExecutedMetricForBlock(block, ti) 465 e.transactionMetrics.UpdateExecutionReceiptMaxHeight(block.Header.Height) 466 e.blocksToMarkExecuted.Remove(hb.BlockID) 467 } 468 } 469 470 func (e *Engine) handleExecutionReceipt(originID flow.Identifier, r *flow.ExecutionReceipt) error { 471 // persist the execution receipt locally, storing will also index the receipt 472 err := e.executionReceipts.Store(r) 473 if err != nil { 474 return fmt.Errorf("failed to store execution receipt: %w", err) 475 } 476 477 e.trackExecutionReceiptMetrics(r) 478 return nil 479 } 480 481 func (e *Engine) trackExecutionReceiptMetrics(r *flow.ExecutionReceipt) { 482 // TODO add actual execution time to execution receipt? 483 now := time.Now().UTC() 484 485 // retrieve the block 486 // TODO: consider using storage.Index.ByBlockID, the index contains collection id and seals ID 487 b, err := e.blocks.ByID(r.ExecutionResult.BlockID) 488 489 if errors.Is(err, storage.ErrNotFound) { 490 e.blocksToMarkExecuted.Add(r.ExecutionResult.BlockID, now) 491 return 492 } 493 494 if err != nil { 495 e.log.Warn().Err(err).Msg("could not track tx executed metric: executed block not found locally") 496 return 497 } 498 499 e.transactionMetrics.UpdateExecutionReceiptMaxHeight(b.Header.Height) 500 501 e.trackExecutedMetricForBlock(b, now) 502 } 503 504 func (e *Engine) trackExecutedMetricForBlock(block *flow.Block, ti time.Time) { 505 // mark all transactions as executed 506 // TODO: sample to reduce performance overhead 507 for _, g := range block.Payload.Guarantees { 508 l, err := e.collections.LightByID(g.CollectionID) 509 if errors.Is(err, storage.ErrNotFound) { 510 e.collectionsToMarkExecuted.Add(g.CollectionID, ti) 511 continue 512 } else if err != nil { 513 e.log.Warn().Err(err).Str("collection_id", g.CollectionID.String()). 514 Msg("could not track tx executed metric: executed collection not found locally") 515 continue 516 } 517 518 for _, t := range l.Transactions { 519 e.transactionMetrics.TransactionExecuted(t, ti) 520 } 521 } 522 } 523 524 // handleCollection handles the response of the a collection request made earlier when a block was received 525 func (e *Engine) handleCollection(originID flow.Identifier, entity flow.Entity) error { 526 527 // convert the entity to a strictly typed collection 528 collection, ok := entity.(*flow.Collection) 529 if !ok { 530 return fmt.Errorf("invalid entity type (%T)", entity) 531 } 532 533 light := collection.Light() 534 535 if ti, found := e.collectionsToMarkFinalized.ByID(light.ID()); found { 536 for _, t := range light.Transactions { 537 e.transactionMetrics.TransactionFinalized(t, ti) 538 } 539 e.collectionsToMarkFinalized.Remove(light.ID()) 540 } 541 542 if ti, found := e.collectionsToMarkExecuted.ByID(light.ID()); found { 543 for _, t := range light.Transactions { 544 e.transactionMetrics.TransactionExecuted(t, ti) 545 } 546 e.collectionsToMarkExecuted.Remove(light.ID()) 547 } 548 549 // FIX: we can't index guarantees here, as we might have more than one block 550 // with the same collection as long as it is not finalized 551 552 // store the light collection (collection minus the transaction body - those are stored separately) 553 // and add transaction ids as index 554 err := e.collections.StoreLightAndIndexByTransaction(&light) 555 if err != nil { 556 // ignore collection if already seen 557 if errors.Is(err, storage.ErrAlreadyExists) { 558 e.log.Debug(). 559 Hex("collection_id", logging.Entity(light)). 560 Msg("collection is already seen") 561 return nil 562 } 563 return err 564 } 565 566 // now store each of the transaction body 567 for _, tx := range collection.Transactions { 568 err := e.transactions.Store(tx) 569 if err != nil { 570 return fmt.Errorf("could not store transaction (%x): %w", tx.ID(), err) 571 } 572 } 573 574 return nil 575 } 576 577 func (e *Engine) OnCollection(originID flow.Identifier, entity flow.Entity) { 578 err := e.handleCollection(originID, entity) 579 if err != nil { 580 e.log.Error().Err(err).Msg("could not handle collection") 581 return 582 } 583 } 584 585 // OnBlockIncorporated is a noop for this engine since access node is only dealing with finalized blocks 586 func (e *Engine) OnBlockIncorporated(*model.Block) { 587 } 588 589 // OnDoubleProposeDetected is a noop for this engine since access node is only dealing with finalized blocks 590 func (e *Engine) OnDoubleProposeDetected(*model.Block, *model.Block) { 591 } 592 593 // requestMissingCollections requests missing collections for all blocks in the local db storage once at startup 594 func (e *Engine) requestMissingCollections(ctx context.Context) error { 595 596 var startHeight, endHeight uint64 597 598 // get the height of the last block for which all collections were received 599 lastFullHeight, err := e.blocks.GetLastFullBlockHeight() 600 if err != nil { 601 return fmt.Errorf("failed to complete requests for missing collections: %w", err) 602 } 603 604 // start from the next block 605 startHeight = lastFullHeight + 1 606 607 // end at the finalized block 608 finalBlk, err := e.state.Final().Head() 609 if err != nil { 610 return err 611 } 612 endHeight = finalBlk.Height 613 614 e.log.Info(). 615 Uint64("start_height", startHeight). 616 Uint64("end_height", endHeight). 617 Msg("starting collection catchup") 618 619 // collect all missing collection ids in a map 620 var missingCollMap = make(map[flow.Identifier]struct{}) 621 622 // iterate through the complete chain and request the missing collections 623 for i := startHeight; i <= endHeight; i++ { 624 625 // if deadline exceeded or someone cancelled the context 626 if ctx.Err() != nil { 627 return fmt.Errorf("failed to complete requests for missing collections: %w", ctx.Err()) 628 } 629 630 missingColls, err := e.missingCollectionsAtHeight(i) 631 if err != nil { 632 return fmt.Errorf("failed to retreive missing collections by height %d during collection catchup: %w", i, err) 633 } 634 635 // request the missing collections 636 e.requestCollectionsInFinalizedBlock(missingColls) 637 638 // add them to the missing collection id map to track later 639 for _, cg := range missingColls { 640 missingCollMap[cg.CollectionID] = struct{}{} 641 } 642 } 643 644 // if no collections were found to be missing we are done. 645 if len(missingCollMap) == 0 { 646 // nothing more to do 647 e.log.Info().Msg("no missing collections found") 648 return nil 649 } 650 651 // the collection catchup needs to happen ASAP when the node starts up. Hence, force the requester to dispatch all request 652 e.request.Force() 653 654 // track progress of retrieving all the missing collections by polling the db periodically 655 ticker := time.NewTicker(defaultCollectionCatchupDBPollInterval) 656 defer ticker.Stop() 657 658 // while there are still missing collections, keep polling 659 for len(missingCollMap) > 0 { 660 select { 661 case <-ctx.Done(): 662 // context may have expired 663 return fmt.Errorf("failed to complete collection retreival: %w", ctx.Err()) 664 case <-ticker.C: 665 666 // log progress 667 e.log.Info(). 668 Int("total_missing_collections", len(missingCollMap)). 669 Msg("retrieving missing collections...") 670 671 var foundColls []flow.Identifier 672 // query db to find if collections are still missing 673 for collId := range missingCollMap { 674 found, err := e.lookupCollection(collId) 675 if err != nil { 676 return err 677 } 678 // if collection found in local db, remove it from missingColls later 679 if found { 680 foundColls = append(foundColls, collId) 681 } 682 } 683 684 // update the missingColls list by removing collections that have now been received 685 for _, c := range foundColls { 686 delete(missingCollMap, c) 687 } 688 } 689 } 690 691 e.log.Info().Msg("collection catchup done") 692 return nil 693 } 694 695 // updateLastFullBlockReceivedIndex keeps the FullBlockHeight index upto date and requests missing collections if 696 // the number of blocks missing collection have reached the defaultMissingCollsForBlkThreshold value. 697 // (The FullBlockHeight index indicates that block for which all collections have been received) 698 func (e *Engine) updateLastFullBlockReceivedIndex() { 699 700 logError := func(err error) { 701 e.log.Error().Err(err).Msg("failed to update the last full block height") 702 } 703 704 lastFullHeight, err := e.blocks.GetLastFullBlockHeight() 705 if err != nil { 706 if !errors.Is(err, storage.ErrNotFound) { 707 logError(err) 708 return 709 } 710 // use the root height as the last full height 711 header, err := e.state.Params().Root() 712 if err != nil { 713 logError(err) 714 return 715 } 716 lastFullHeight = header.Height 717 } 718 719 e.log.Debug().Uint64("last_full_block_height", lastFullHeight).Msg("updating LastFullBlockReceived index...") 720 721 finalBlk, err := e.state.Final().Head() 722 if err != nil { 723 logError(err) 724 return 725 } 726 finalizedHeight := finalBlk.Height 727 728 // track number of incomplete blocks 729 incompleteBlksCnt := 0 730 731 // track the latest contiguous full height 732 latestFullHeight := lastFullHeight 733 734 // collect all missing collections 735 var allMissingColls []*flow.CollectionGuarantee 736 737 // start from the next block till we either hit the finalized block or cross the max collection missing threshold 738 for i := lastFullHeight + 1; i <= finalizedHeight && incompleteBlksCnt < defaultMissingCollsForBlkThreshold; i++ { 739 740 // find missing collections for block at height i 741 missingColls, err := e.missingCollectionsAtHeight(i) 742 if err != nil { 743 logError(err) 744 return 745 } 746 747 // if there are missing collections 748 if len(missingColls) > 0 { 749 750 // increment number of incomplete blocks 751 incompleteBlksCnt++ 752 753 // collect the missing collections for requesting later 754 allMissingColls = append(allMissingColls, missingColls...) 755 756 continue 757 } 758 759 // if there are no missing collections so far, advance the latestFullHeight pointer 760 if incompleteBlksCnt == 0 { 761 latestFullHeight = i 762 } 763 } 764 765 // if more contiguous blocks are now complete, update db 766 if latestFullHeight > lastFullHeight { 767 err = e.blocks.UpdateLastFullBlockHeight(latestFullHeight) 768 if err != nil { 769 logError(err) 770 return 771 } 772 } 773 774 // additionally, if more than threshold blocks have missing collection OR collections are missing since defaultMissingCollsForAgeThreshold, re-request those collections 775 if incompleteBlksCnt >= defaultMissingCollsForBlkThreshold || (finalizedHeight-lastFullHeight) > uint64(defaultMissingCollsForAgeThreshold) { 776 // warn log since this should generally not happen 777 e.log.Warn(). 778 Int("missing_collection_blk_count", incompleteBlksCnt). 779 Int("threshold", defaultMissingCollsForBlkThreshold). 780 Uint64("last_full_blk_height", latestFullHeight). 781 Msg("re-requesting missing collections") 782 e.requestCollectionsInFinalizedBlock(allMissingColls) 783 } 784 785 e.log.Debug().Uint64("last_full_blk_height", latestFullHeight).Msg("updated LastFullBlockReceived index") 786 } 787 788 // missingCollectionsAtHeight returns all missing collection guarantees at a given height 789 func (e *Engine) missingCollectionsAtHeight(h uint64) ([]*flow.CollectionGuarantee, error) { 790 blk, err := e.blocks.ByHeight(h) 791 if err != nil { 792 return nil, fmt.Errorf("failed to retreive block by height %d: %w", h, err) 793 } 794 795 var missingColls []*flow.CollectionGuarantee 796 for _, guarantee := range blk.Payload.Guarantees { 797 798 collID := guarantee.CollectionID 799 found, err := e.lookupCollection(collID) 800 if err != nil { 801 return nil, err 802 } 803 if !found { 804 missingColls = append(missingColls, guarantee) 805 } 806 } 807 return missingColls, nil 808 } 809 810 // lookupCollection looks up the collection from the collection db with collID 811 func (e *Engine) lookupCollection(collId flow.Identifier) (bool, error) { 812 _, err := e.collections.LightByID(collId) 813 if err == nil { 814 return true, nil 815 } 816 if errors.Is(err, storage.ErrNotFound) { 817 return false, nil 818 } 819 return false, fmt.Errorf("failed to retreive collection %s: %w", collId.String(), err) 820 } 821 822 // requestCollectionsInFinalizedBlock registers collection requests with the requester engine 823 func (e *Engine) requestCollectionsInFinalizedBlock(missingColls []*flow.CollectionGuarantee) { 824 for _, cg := range missingColls { 825 // TODO: move this query out of for loop? 826 guarantors, err := protocol.FindGuarantors(e.state, cg) 827 if err != nil { 828 // failed to find guarantors for guarantees contained in a finalized block is fatal error 829 e.log.Fatal().Err(err).Msgf("could not find guarantors for guarantee %v", cg.ID()) 830 } 831 e.request.EntityByID(cg.ID(), filter.HasNodeID(guarantors...)) 832 } 833 }