github.com/onflow/flow-go@v0.33.17/cmd/execution_builder.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "path" 9 "path/filepath" 10 goruntime "runtime" 11 "strings" 12 "time" 13 14 awsconfig "github.com/aws/aws-sdk-go-v2/config" 15 "github.com/aws/aws-sdk-go-v2/service/s3" 16 badgerDB "github.com/dgraph-io/badger/v2" 17 "github.com/ipfs/boxo/bitswap" 18 "github.com/ipfs/go-cid" 19 badger "github.com/ipfs/go-ds-badger2" 20 "github.com/onflow/flow-core-contracts/lib/go/templates" 21 "github.com/rs/zerolog" 22 "github.com/rs/zerolog/log" 23 "github.com/shirou/gopsutil/v3/cpu" 24 "github.com/shirou/gopsutil/v3/host" 25 "github.com/shirou/gopsutil/v3/mem" 26 "github.com/vmihailenco/msgpack" 27 "go.uber.org/atomic" 28 29 "github.com/onflow/flow-go/admin/commands" 30 executionCommands "github.com/onflow/flow-go/admin/commands/execution" 31 stateSyncCommands "github.com/onflow/flow-go/admin/commands/state_synchronization" 32 storageCommands "github.com/onflow/flow-go/admin/commands/storage" 33 uploaderCommands "github.com/onflow/flow-go/admin/commands/uploader" 34 "github.com/onflow/flow-go/cmd/build" 35 "github.com/onflow/flow-go/consensus" 36 "github.com/onflow/flow-go/consensus/hotstuff" 37 "github.com/onflow/flow-go/consensus/hotstuff/committees" 38 "github.com/onflow/flow-go/consensus/hotstuff/notifications" 39 "github.com/onflow/flow-go/consensus/hotstuff/notifications/pubsub" 40 "github.com/onflow/flow-go/consensus/hotstuff/signature" 41 "github.com/onflow/flow-go/consensus/hotstuff/validator" 42 "github.com/onflow/flow-go/consensus/hotstuff/verification" 43 recovery "github.com/onflow/flow-go/consensus/recovery/protocol" 44 "github.com/onflow/flow-go/engine" 45 followereng "github.com/onflow/flow-go/engine/common/follower" 46 "github.com/onflow/flow-go/engine/common/provider" 47 "github.com/onflow/flow-go/engine/common/requester" 48 "github.com/onflow/flow-go/engine/common/synchronization" 49 "github.com/onflow/flow-go/engine/execution/checker" 50 "github.com/onflow/flow-go/engine/execution/computation" 51 "github.com/onflow/flow-go/engine/execution/computation/committer" 52 "github.com/onflow/flow-go/engine/execution/ingestion" 53 "github.com/onflow/flow-go/engine/execution/ingestion/fetcher" 54 "github.com/onflow/flow-go/engine/execution/ingestion/loader" 55 "github.com/onflow/flow-go/engine/execution/ingestion/stop" 56 "github.com/onflow/flow-go/engine/execution/ingestion/uploader" 57 exeprovider "github.com/onflow/flow-go/engine/execution/provider" 58 "github.com/onflow/flow-go/engine/execution/rpc" 59 "github.com/onflow/flow-go/engine/execution/scripts" 60 "github.com/onflow/flow-go/engine/execution/state" 61 "github.com/onflow/flow-go/engine/execution/state/bootstrap" 62 "github.com/onflow/flow-go/engine/execution/storehouse" 63 "github.com/onflow/flow-go/fvm" 64 "github.com/onflow/flow-go/fvm/storage/snapshot" 65 "github.com/onflow/flow-go/fvm/systemcontracts" 66 ledgerpkg "github.com/onflow/flow-go/ledger" 67 "github.com/onflow/flow-go/ledger/common/pathfinder" 68 ledger "github.com/onflow/flow-go/ledger/complete" 69 "github.com/onflow/flow-go/ledger/complete/wal" 70 bootstrapFilenames "github.com/onflow/flow-go/model/bootstrap" 71 modelbootstrap "github.com/onflow/flow-go/model/bootstrap" 72 "github.com/onflow/flow-go/model/flow" 73 "github.com/onflow/flow-go/model/flow/filter" 74 "github.com/onflow/flow-go/model/messages" 75 "github.com/onflow/flow-go/module" 76 "github.com/onflow/flow-go/module/blobs" 77 "github.com/onflow/flow-go/module/chainsync" 78 "github.com/onflow/flow-go/module/executiondatasync/execution_data" 79 execdatacache "github.com/onflow/flow-go/module/executiondatasync/execution_data/cache" 80 exedataprovider "github.com/onflow/flow-go/module/executiondatasync/provider" 81 "github.com/onflow/flow-go/module/executiondatasync/pruner" 82 "github.com/onflow/flow-go/module/executiondatasync/tracker" 83 "github.com/onflow/flow-go/module/finalizedreader" 84 finalizer "github.com/onflow/flow-go/module/finalizer/consensus" 85 "github.com/onflow/flow-go/module/mempool/herocache" 86 "github.com/onflow/flow-go/module/mempool/queue" 87 "github.com/onflow/flow-go/module/metrics" 88 edrequester "github.com/onflow/flow-go/module/state_synchronization/requester" 89 "github.com/onflow/flow-go/network" 90 "github.com/onflow/flow-go/network/channels" 91 "github.com/onflow/flow-go/network/p2p/blob" 92 "github.com/onflow/flow-go/network/underlay" 93 "github.com/onflow/flow-go/state/protocol" 94 badgerState "github.com/onflow/flow-go/state/protocol/badger" 95 "github.com/onflow/flow-go/state/protocol/blocktimer" 96 storageerr "github.com/onflow/flow-go/storage" 97 bstorage "github.com/onflow/flow-go/storage/badger" 98 storage "github.com/onflow/flow-go/storage/badger" 99 "github.com/onflow/flow-go/storage/badger/procedure" 100 storagepebble "github.com/onflow/flow-go/storage/pebble" 101 sutil "github.com/onflow/flow-go/storage/util" 102 ) 103 104 const ( 105 blockDataUploaderMaxRetry uint64 = 5 106 blockdataUploaderRetryTimeout = 1 * time.Second 107 ) 108 109 type ExecutionNodeBuilder struct { 110 *FlowNodeBuilder // the common configs as a node 111 exeConf *ExecutionConfig // the configs and flags specific for execution node 112 } 113 114 func NewExecutionNodeBuilder(nodeBuilder *FlowNodeBuilder) *ExecutionNodeBuilder { 115 return &ExecutionNodeBuilder{ 116 FlowNodeBuilder: nodeBuilder, 117 exeConf: &ExecutionConfig{}, 118 } 119 } 120 121 func (builder *ExecutionNodeBuilder) LoadFlags() { 122 builder.FlowNodeBuilder. 123 ExtraFlags(builder.exeConf.SetupFlags). 124 ValidateFlags(builder.exeConf.ValidateFlags) 125 } 126 127 // ExecutionNode contains the running modules and their loading code. 128 type ExecutionNode struct { 129 builder *FlowNodeBuilder // This is needed for accessing the ShutdownFunc 130 exeConf *ExecutionConfig 131 132 ingestionUnit *engine.Unit 133 134 collector module.ExecutionMetrics 135 executionState state.ExecutionState 136 followerState protocol.FollowerState 137 committee hotstuff.DynamicCommittee 138 ledgerStorage *ledger.Ledger 139 registerStore *storehouse.RegisterStore 140 events *storage.Events 141 serviceEvents *storage.ServiceEvents 142 txResults *storage.TransactionResults 143 results *storage.ExecutionResults 144 myReceipts *storage.MyExecutionReceipts 145 providerEngine exeprovider.ProviderEngine 146 checkerEng *checker.Engine 147 syncCore *chainsync.Core 148 syncEngine *synchronization.Engine 149 followerCore *hotstuff.FollowerLoop // follower hotstuff logic 150 followerEng *followereng.ComplianceEngine // to sync blocks from consensus nodes 151 computationManager *computation.Manager 152 collectionRequester *requester.Engine 153 ingestionEng *ingestion.Engine 154 scriptsEng *scripts.Engine 155 followerDistributor *pubsub.FollowerDistributor 156 checkAuthorizedAtBlock func(blockID flow.Identifier) (bool, error) 157 diskWAL *wal.DiskWAL 158 blockDataUploader *uploader.Manager 159 executionDataStore execution_data.ExecutionDataStore 160 toTriggerCheckpoint *atomic.Bool // create the checkpoint trigger to be controlled by admin tool, and listened by the compactor 161 stopControl *stop.StopControl // stop the node at given block height 162 executionDataDatastore *badger.Datastore 163 executionDataPruner *pruner.Pruner 164 executionDataBlobstore blobs.Blobstore 165 executionDataTracker tracker.Storage 166 blobService network.BlobService 167 blobserviceDependable *module.ProxiedReadyDoneAware 168 } 169 170 func (builder *ExecutionNodeBuilder) LoadComponentsAndModules() { 171 172 exeNode := &ExecutionNode{ 173 builder: builder.FlowNodeBuilder, 174 exeConf: builder.exeConf, 175 toTriggerCheckpoint: atomic.NewBool(false), 176 ingestionUnit: engine.NewUnit(), 177 } 178 179 builder.FlowNodeBuilder. 180 AdminCommand("read-execution-data", func(config *NodeConfig) commands.AdminCommand { 181 return stateSyncCommands.NewReadExecutionDataCommand(exeNode.executionDataStore) 182 }). 183 AdminCommand("trigger-checkpoint", func(config *NodeConfig) commands.AdminCommand { 184 return executionCommands.NewTriggerCheckpointCommand(exeNode.toTriggerCheckpoint) 185 }). 186 AdminCommand("stop-at-height", func(config *NodeConfig) commands.AdminCommand { 187 return executionCommands.NewStopAtHeightCommand(exeNode.stopControl) 188 }). 189 AdminCommand("set-uploader-enabled", func(config *NodeConfig) commands.AdminCommand { 190 return uploaderCommands.NewToggleUploaderCommand(exeNode.blockDataUploader) 191 }). 192 AdminCommand("get-transactions", func(conf *NodeConfig) commands.AdminCommand { 193 return storageCommands.NewGetTransactionsCommand(conf.State, conf.Storage.Payloads, conf.Storage.Collections) 194 }). 195 AdminCommand("protocol-snapshot", func(conf *NodeConfig) commands.AdminCommand { 196 return storageCommands.NewProtocolSnapshotCommand( 197 conf.Logger, 198 conf.State, 199 conf.Storage.Headers, 200 conf.Storage.Seals, 201 exeNode.exeConf.triedir, 202 ) 203 }). 204 Module("mutable follower state", exeNode.LoadMutableFollowerState). 205 Module("system specs", exeNode.LoadSystemSpecs). 206 Module("execution metrics", exeNode.LoadExecutionMetrics). 207 Module("sync core", exeNode.LoadSyncCore). 208 Module("execution receipts storage", exeNode.LoadExecutionReceiptsStorage). 209 Module("follower distributor", exeNode.LoadFollowerDistributor). 210 Module("authorization checking function", exeNode.LoadAuthorizationCheckingFunction). 211 Module("execution data datastore", exeNode.LoadExecutionDataDatastore). 212 Module("execution data getter", exeNode.LoadExecutionDataGetter). 213 Module("blobservice peer manager dependencies", exeNode.LoadBlobservicePeerManagerDependencies). 214 Module("bootstrap", exeNode.LoadBootstrapper). 215 Module("register store", exeNode.LoadRegisterStore). 216 Component("execution state ledger", exeNode.LoadExecutionStateLedger). 217 218 // TODO: Modules should be able to depends on components 219 // Because all modules are always bootstrapped first, before components, 220 // its not possible to have a module depending on a Component. 221 // This is the case for a StopControl which needs to query ExecutionState which needs execution state ledger. 222 // I prefer to use dummy component now and keep the bootstrapping steps properly separated, 223 // so it will be easier to follow and refactor later 224 Component("execution state", exeNode.LoadExecutionState). 225 Component("stop control", exeNode.LoadStopControl). 226 Component("execution state ledger WAL compactor", exeNode.LoadExecutionStateLedgerWALCompactor). 227 // disable execution data pruner for now, since storehouse is going to need the execution data 228 // for recovery, 229 // TODO: will re-visit this once storehouse has implemented new WAL for checkpoint file of 230 // payloadless trie. 231 // Component("execution data pruner", exeNode.LoadExecutionDataPruner). 232 Component("blob service", exeNode.LoadBlobService). 233 Component("block data upload manager", exeNode.LoadBlockUploaderManager). 234 Component("GCP block data uploader", exeNode.LoadGCPBlockDataUploader). 235 Component("S3 block data uploader", exeNode.LoadS3BlockDataUploader). 236 Component("provider engine", exeNode.LoadProviderEngine). 237 Component("checker engine", exeNode.LoadCheckerEngine). 238 Component("ingestion engine", exeNode.LoadIngestionEngine). 239 Component("scripts engine", exeNode.LoadScriptsEngine). 240 Component("consensus committee", exeNode.LoadConsensusCommittee). 241 Component("follower core", exeNode.LoadFollowerCore). 242 Component("follower engine", exeNode.LoadFollowerEngine). 243 Component("collection requester engine", exeNode.LoadCollectionRequesterEngine). 244 Component("receipt provider engine", exeNode.LoadReceiptProviderEngine). 245 Component("synchronization engine", exeNode.LoadSynchronizationEngine). 246 Component("grpc server", exeNode.LoadGrpcServer). 247 Component("observer collection indexer", exeNode.LoadObserverCollectionIndexer) 248 } 249 250 func (exeNode *ExecutionNode) LoadMutableFollowerState(node *NodeConfig) error { 251 // For now, we only support state implementations from package badger. 252 // If we ever support different implementations, the following can be replaced by a type-aware factory 253 bState, ok := node.State.(*badgerState.State) 254 if !ok { 255 return fmt.Errorf("only implementations of type badger.State are currently supported but read-only state has type %T", node.State) 256 } 257 var err error 258 exeNode.followerState, err = badgerState.NewFollowerState( 259 node.Logger, 260 node.Tracer, 261 node.ProtocolEvents, 262 bState, 263 node.Storage.Index, 264 node.Storage.Payloads, 265 blocktimer.DefaultBlockTimer, 266 ) 267 return err 268 } 269 270 func (exeNode *ExecutionNode) LoadSystemSpecs(node *NodeConfig) error { 271 sysInfoLogger := node.Logger.With().Str("system", "specs").Logger() 272 err := logSysInfo(sysInfoLogger) 273 if err != nil { 274 sysInfoLogger.Error().Err(err) 275 } 276 return nil 277 } 278 279 func (exeNode *ExecutionNode) LoadExecutionMetrics(node *NodeConfig) error { 280 exeNode.collector = metrics.NewExecutionCollector(node.Tracer) 281 282 // report the highest executed block height as soon as possible 283 // this is guaranteed to exist because LoadBootstrapper has inserted 284 // the root block as executed block 285 var height uint64 286 var blockID flow.Identifier 287 err := node.DB.View(procedure.GetHighestExecutedBlock(&height, &blockID)) 288 if err != nil { 289 // database has not been bootstrapped yet 290 if errors.Is(err, storageerr.ErrNotFound) { 291 return nil 292 } 293 return fmt.Errorf("could not get highest executed block: %w", err) 294 } 295 296 exeNode.collector.ExecutionLastExecutedBlockHeight(height) 297 return nil 298 } 299 300 func (exeNode *ExecutionNode) LoadSyncCore(node *NodeConfig) error { 301 var err error 302 exeNode.syncCore, err = chainsync.New(node.Logger, node.SyncCoreConfig, metrics.NewChainSyncCollector(node.RootChainID), node.RootChainID) 303 return err 304 } 305 306 func (exeNode *ExecutionNode) LoadExecutionReceiptsStorage( 307 node *NodeConfig, 308 ) error { 309 exeNode.results = storage.NewExecutionResults(node.Metrics.Cache, node.DB) 310 exeNode.myReceipts = storage.NewMyExecutionReceipts(node.Metrics.Cache, node.DB, node.Storage.Receipts.(*storage.ExecutionReceipts)) 311 return nil 312 } 313 314 func (exeNode *ExecutionNode) LoadFollowerDistributor(node *NodeConfig) error { 315 exeNode.followerDistributor = pubsub.NewFollowerDistributor() 316 exeNode.followerDistributor.AddProposalViolationConsumer(notifications.NewSlashingViolationsConsumer(node.Logger)) 317 return nil 318 } 319 320 func (exeNode *ExecutionNode) LoadBlobService( 321 node *NodeConfig, 322 ) ( 323 module.ReadyDoneAware, 324 error, 325 ) { 326 // build list of Access nodes that are allowed to request execution data from this node 327 var allowedANs map[flow.Identifier]bool 328 if exeNode.exeConf.executionDataAllowedPeers != "" { 329 ids := strings.Split(exeNode.exeConf.executionDataAllowedPeers, ",") 330 allowedANs = make(map[flow.Identifier]bool, len(ids)) 331 for _, idHex := range ids { 332 anID, err := flow.HexStringToIdentifier(idHex) 333 if err != nil { 334 return nil, fmt.Errorf("invalid node ID %s: %w", idHex, err) 335 } 336 337 id, ok := exeNode.builder.IdentityProvider.ByNodeID(anID) 338 if !ok { 339 return nil, fmt.Errorf("allowed node ID %s is not in identity list", idHex) 340 } 341 342 if id.Role != flow.RoleAccess { 343 return nil, fmt.Errorf("allowed node ID %s is not an access node", id.NodeID.String()) 344 } 345 346 if id.Ejected { 347 return nil, fmt.Errorf("allowed node ID %s is ejected", id.NodeID.String()) 348 } 349 350 allowedANs[anID] = true 351 } 352 } 353 354 opts := []network.BlobServiceOption{ 355 blob.WithBitswapOptions( 356 // Only allow block requests from staked ENs and ANs on the allowedANs list (if set) 357 bitswap.WithPeerBlockRequestFilter( 358 blob.AuthorizedRequester(allowedANs, exeNode.builder.IdentityProvider, exeNode.builder.Logger), 359 ), 360 bitswap.WithTracer( 361 blob.NewTracer(node.Logger.With().Str("blob_service", channels.ExecutionDataService.String()).Logger()), 362 ), 363 ), 364 } 365 366 if exeNode.exeConf.blobstoreRateLimit > 0 && exeNode.exeConf.blobstoreBurstLimit > 0 { 367 opts = append(opts, blob.WithRateLimit(float64(exeNode.exeConf.blobstoreRateLimit), exeNode.exeConf.blobstoreBurstLimit)) 368 } 369 370 edsChannel := channels.ExecutionDataService 371 if node.ObserverMode { 372 edsChannel = channels.PublicExecutionDataService 373 } 374 bs, err := node.EngineRegistry.RegisterBlobService(edsChannel, exeNode.executionDataDatastore, opts...) 375 if err != nil { 376 return nil, fmt.Errorf("failed to register blob service: %w", err) 377 } 378 exeNode.blobService = bs 379 380 // add blobservice into ReadyDoneAware dependency passed to peer manager 381 // this configures peer manager to wait for the blobservice to be ready before starting 382 exeNode.blobserviceDependable.Init(bs) 383 384 // blob service's lifecycle is managed by the network layer 385 return &module.NoopReadyDoneAware{}, nil 386 } 387 388 func (exeNode *ExecutionNode) LoadBlockUploaderManager( 389 node *NodeConfig, 390 ) ( 391 module.ReadyDoneAware, 392 error, 393 ) { 394 // blockDataUploader isn't a component, but needs to be initialized after the tracer, which is 395 // a component. 396 exeNode.blockDataUploader = uploader.NewManager(exeNode.builder.Tracer) 397 return &module.NoopReadyDoneAware{}, nil 398 } 399 400 func (exeNode *ExecutionNode) LoadGCPBlockDataUploader( 401 node *NodeConfig, 402 ) ( 403 module.ReadyDoneAware, 404 error, 405 ) { 406 // Since RetryableAsyncUploaderWrapper relies on executionDataService so we should create 407 // it after execution data service is fully setup. 408 if !exeNode.exeConf.enableBlockDataUpload || exeNode.exeConf.gcpBucketName == "" { 409 // Since we don't have conditional component creation, we just use Noop one. 410 // It's functions will be once per startup/shutdown - non-measurable performance penalty 411 // blockDataUploader will stay nil and disable calling uploader at all 412 return &module.NoopReadyDoneAware{}, nil 413 } 414 415 logger := node.Logger.With().Str("component_name", "gcp_block_data_uploader").Logger() 416 gcpBucketUploader, err := uploader.NewGCPBucketUploader( 417 context.Background(), 418 exeNode.exeConf.gcpBucketName, 419 logger, 420 ) 421 if err != nil { 422 return nil, fmt.Errorf("cannot create GCP Bucket uploader: %w", err) 423 } 424 425 asyncUploader := uploader.NewAsyncUploader( 426 gcpBucketUploader, 427 blockdataUploaderRetryTimeout, 428 blockDataUploaderMaxRetry, 429 logger, 430 exeNode.collector, 431 ) 432 433 // Setting up RetryableUploader for GCP uploader 434 retryableUploader := uploader.NewBadgerRetryableUploaderWrapper( 435 asyncUploader, 436 node.Storage.Blocks, 437 node.Storage.Commits, 438 node.Storage.Collections, 439 exeNode.events, 440 exeNode.results, 441 exeNode.txResults, 442 storage.NewComputationResultUploadStatus(node.DB), 443 execution_data.NewDownloader(exeNode.blobService), 444 exeNode.collector) 445 if retryableUploader == nil { 446 return nil, errors.New("failed to create ComputationResult upload status store") 447 } 448 449 exeNode.blockDataUploader.AddUploader(retryableUploader) 450 451 return retryableUploader, nil 452 } 453 454 func (exeNode *ExecutionNode) LoadS3BlockDataUploader( 455 node *NodeConfig, 456 ) ( 457 module.ReadyDoneAware, 458 error, 459 ) { 460 if !exeNode.exeConf.enableBlockDataUpload || exeNode.exeConf.s3BucketName == "" { 461 // Since we don't have conditional component creation, we just use Noop one. 462 // It's functions will be once per startup/shutdown - non-measurable performance penalty 463 // blockDataUploader will stay nil and disable calling uploader at all 464 return &module.NoopReadyDoneAware{}, nil 465 } 466 logger := node.Logger.With().Str("component_name", "s3_block_data_uploader").Logger() 467 468 ctx := context.Background() 469 config, err := awsconfig.LoadDefaultConfig(ctx) 470 if err != nil { 471 return nil, fmt.Errorf("failed to load AWS configuration: %w", err) 472 } 473 474 client := s3.NewFromConfig(config) 475 s3Uploader := uploader.NewS3Uploader( 476 ctx, 477 client, 478 exeNode.exeConf.s3BucketName, 479 logger, 480 ) 481 asyncUploader := uploader.NewAsyncUploader( 482 s3Uploader, 483 blockdataUploaderRetryTimeout, 484 blockDataUploaderMaxRetry, 485 logger, 486 exeNode.collector, 487 ) 488 489 // We are not enabling RetryableUploader for S3 uploader for now. When we need upload 490 // retry for multiple uploaders, we will need to use different BadgerDB key prefix. 491 exeNode.blockDataUploader.AddUploader(asyncUploader) 492 493 return asyncUploader, nil 494 } 495 496 func (exeNode *ExecutionNode) LoadProviderEngine( 497 node *NodeConfig, 498 ) ( 499 module.ReadyDoneAware, 500 error, 501 ) { 502 if exeNode.blobService == nil { 503 return nil, errors.New("blob service is not initialized") 504 } 505 506 var providerMetrics module.ExecutionDataProviderMetrics = metrics.NewNoopCollector() 507 if node.MetricsEnabled { 508 providerMetrics = metrics.NewExecutionDataProviderCollector() 509 } 510 511 executionDataProvider := exedataprovider.NewProvider( 512 node.Logger, 513 providerMetrics, 514 execution_data.DefaultSerializer, 515 exeNode.blobService, 516 exeNode.executionDataTracker, 517 ) 518 519 // in case node.FvmOptions already set a logger, we don't want to override it 520 opts := append([]fvm.Option{ 521 fvm.WithLogger( 522 node.Logger.With().Str("module", "FVM").Logger(), 523 )}, 524 node.FvmOptions..., 525 ) 526 vmCtx := fvm.NewContext(opts...) 527 528 ledgerViewCommitter := committer.NewLedgerViewCommitter(exeNode.ledgerStorage, node.Tracer) 529 manager, err := computation.New( 530 node.Logger, 531 exeNode.collector, 532 node.Tracer, 533 node.Me, 534 node.State, 535 vmCtx, 536 ledgerViewCommitter, 537 executionDataProvider, 538 exeNode.exeConf.computationConfig, 539 ) 540 if err != nil { 541 return nil, err 542 } 543 exeNode.computationManager = manager 544 545 if node.ObserverMode { 546 exeNode.providerEngine = &exeprovider.NoopEngine{} 547 } else { 548 var chunkDataPackRequestQueueMetrics module.HeroCacheMetrics = metrics.NewNoopCollector() 549 if node.HeroCacheMetricsEnable { 550 chunkDataPackRequestQueueMetrics = metrics.ChunkDataPackRequestQueueMetricsFactory(node.MetricsRegisterer) 551 } 552 chdpReqQueue := queue.NewHeroStore(exeNode.exeConf.chunkDataPackRequestsCacheSize, node.Logger, chunkDataPackRequestQueueMetrics) 553 exeNode.providerEngine, err = exeprovider.New( 554 node.Logger, 555 node.Tracer, 556 node.EngineRegistry, 557 node.State, 558 exeNode.executionState, 559 exeNode.collector, 560 exeNode.checkAuthorizedAtBlock, 561 chdpReqQueue, 562 exeNode.exeConf.chunkDataPackRequestWorkers, 563 exeNode.exeConf.chunkDataPackQueryTimeout, 564 exeNode.exeConf.chunkDataPackDeliveryTimeout, 565 ) 566 if err != nil { 567 return nil, err 568 } 569 } 570 571 // Get latest executed block and a view at that block 572 ctx := context.Background() 573 height, blockID, err := exeNode.executionState.GetHighestExecutedBlockID(ctx) 574 if err != nil { 575 return nil, fmt.Errorf( 576 "cannot get the latest executed block id at height %v: %w", 577 height, err) 578 } 579 580 blockSnapshot, _, err := exeNode.executionState.CreateStorageSnapshot(blockID) 581 if err != nil { 582 tries, _ := exeNode.ledgerStorage.Tries() 583 trieInfo := "empty" 584 if len(tries) > 0 { 585 trieInfo = fmt.Sprintf("length: %v, 1st: %v, last: %v", len(tries), tries[0].RootHash(), tries[len(tries)-1].RootHash()) 586 } 587 588 return nil, fmt.Errorf("cannot create a storage snapshot at block %v at height %v, trie: %s: %w", blockID, 589 height, trieInfo, err) 590 } 591 592 // Get the epoch counter from the smart contract at the last executed block. 593 contractEpochCounter, err := getContractEpochCounter( 594 exeNode.computationManager.VM(), 595 vmCtx, 596 blockSnapshot) 597 // Failing to fetch the epoch counter from the smart contract is a fatal error. 598 if err != nil { 599 return nil, fmt.Errorf("cannot get epoch counter from the smart contract at block %s at height %v: %w", 600 blockID.String(), height, err) 601 } 602 603 // Get the epoch counter form the protocol state, at the same block. 604 protocolStateEpochCounter, err := node.State. 605 AtBlockID(blockID). 606 Epochs(). 607 Current(). 608 Counter() 609 // Failing to fetch the epoch counter from the protocol state is a fatal error. 610 if err != nil { 611 return nil, fmt.Errorf("cannot get epoch counter from the protocol state at block %s: %w", blockID.String(), err) 612 } 613 614 l := node.Logger.With(). 615 Str("component", "provider engine"). 616 Uint64("contractEpochCounter", contractEpochCounter). 617 Uint64("protocolStateEpochCounter", protocolStateEpochCounter). 618 Str("blockID", blockID.String()). 619 Uint64("height", height). 620 Logger() 621 622 if contractEpochCounter != protocolStateEpochCounter { 623 // Do not error, because immediately following a spork they will be mismatching, 624 // until the resetEpoch transaction is submitted. 625 l.Warn(). 626 Msg("Epoch counter from the FlowEpoch smart contract and from the protocol state mismatch!") 627 } else { 628 l.Info(). 629 Msg("Epoch counter from the FlowEpoch smart contract and from the protocol state match.") 630 } 631 632 return exeNode.providerEngine, nil 633 } 634 635 func (exeNode *ExecutionNode) LoadAuthorizationCheckingFunction( 636 node *NodeConfig, 637 ) error { 638 639 exeNode.checkAuthorizedAtBlock = func(blockID flow.Identifier) (bool, error) { 640 return protocol.IsNodeAuthorizedAt(node.State.AtBlockID(blockID), node.Me.NodeID()) 641 } 642 return nil 643 } 644 645 func (exeNode *ExecutionNode) LoadExecutionDataDatastore( 646 node *NodeConfig, 647 ) error { 648 datastoreDir := filepath.Join(exeNode.exeConf.executionDataDir, "blobstore") 649 err := os.MkdirAll(datastoreDir, 0700) 650 if err != nil { 651 return err 652 } 653 dsOpts := &badger.DefaultOptions 654 ds, err := badger.NewDatastore(datastoreDir, dsOpts) 655 if err != nil { 656 return err 657 } 658 exeNode.executionDataDatastore = ds 659 exeNode.builder.ShutdownFunc(ds.Close) 660 return nil 661 } 662 663 func (exeNode *ExecutionNode) LoadBlobservicePeerManagerDependencies(node *NodeConfig) error { 664 exeNode.blobserviceDependable = module.NewProxiedReadyDoneAware() 665 exeNode.builder.PeerManagerDependencies.Add(exeNode.blobserviceDependable) 666 return nil 667 } 668 669 func (exeNode *ExecutionNode) LoadExecutionDataGetter(node *NodeConfig) error { 670 exeNode.executionDataBlobstore = blobs.NewBlobstore(exeNode.executionDataDatastore) 671 exeNode.executionDataStore = execution_data.NewExecutionDataStore(exeNode.executionDataBlobstore, execution_data.DefaultSerializer) 672 return nil 673 } 674 675 func openChunkDataPackDB(dbPath string, logger zerolog.Logger) (*badgerDB.DB, error) { 676 log := sutil.NewLogger(logger) 677 678 opts := badgerDB. 679 DefaultOptions(dbPath). 680 WithKeepL0InMemory(true). 681 WithLogger(log). 682 683 // the ValueLogFileSize option specifies how big the value of a 684 // key-value pair is allowed to be saved into badger. 685 // exceeding this limit, will fail with an error like this: 686 // could not store data: Value with size <xxxx> exceeded 1073741824 limit 687 // Maximum value size is 10G, needed by execution node 688 // TODO: finding a better max value for each node type 689 WithValueLogFileSize(256 << 23). 690 WithValueLogMaxEntries(100000) // Default is 1000000 691 692 db, err := badgerDB.Open(opts) 693 if err != nil { 694 return nil, fmt.Errorf("could not open chunk data pack badger db at path %v: %w", dbPath, err) 695 } 696 return db, nil 697 } 698 699 func (exeNode *ExecutionNode) LoadExecutionState( 700 node *NodeConfig, 701 ) ( 702 module.ReadyDoneAware, 703 error, 704 ) { 705 706 chunkDataPackDB, err := openChunkDataPackDB(exeNode.exeConf.chunkDataPackDir, node.Logger) 707 if err != nil { 708 return nil, err 709 } 710 exeNode.builder.ShutdownFunc(func() error { 711 if err := chunkDataPackDB.Close(); err != nil { 712 return fmt.Errorf("error closing chunk data pack database: %w", err) 713 } 714 return nil 715 }) 716 chunkDataPacks := storage.NewChunkDataPacks(node.Metrics.Cache, chunkDataPackDB, node.Storage.Collections, exeNode.exeConf.chunkDataPackCacheSize) 717 718 // Needed for gRPC server, make sure to assign to main scoped vars 719 exeNode.events = storage.NewEvents(node.Metrics.Cache, node.DB) 720 exeNode.serviceEvents = storage.NewServiceEvents(node.Metrics.Cache, node.DB) 721 exeNode.txResults = storage.NewTransactionResults(node.Metrics.Cache, node.DB, exeNode.exeConf.transactionResultsCacheSize) 722 723 exeNode.executionState = state.NewExecutionState( 724 exeNode.ledgerStorage, 725 node.Storage.Commits, 726 node.Storage.Blocks, 727 node.Storage.Headers, 728 node.Storage.Collections, 729 chunkDataPacks, 730 exeNode.results, 731 exeNode.myReceipts, 732 exeNode.events, 733 exeNode.serviceEvents, 734 exeNode.txResults, 735 node.DB, 736 node.Tracer, 737 exeNode.registerStore, 738 exeNode.exeConf.enableStorehouse, 739 ) 740 741 height, _, err := exeNode.executionState.GetHighestExecutedBlockID(context.Background()) 742 if err != nil { 743 return nil, fmt.Errorf("could not get highest executed block: %w", err) 744 } 745 746 log.Info().Msgf("execution state highest executed block height: %v", height) 747 exeNode.collector.ExecutionLastExecutedBlockHeight(height) 748 749 return &module.NoopReadyDoneAware{}, nil 750 } 751 752 func (exeNode *ExecutionNode) LoadStopControl( 753 node *NodeConfig, 754 ) ( 755 module.ReadyDoneAware, 756 error, 757 ) { 758 ver, err := build.Semver() 759 if err != nil { 760 err = fmt.Errorf("could not set semver version for stop control. "+ 761 "version %s is not semver compliant: %w", build.Version(), err) 762 763 // The node would not know its own version. Without this the node would not know 764 // how to reach to version boundaries. 765 exeNode.builder.Logger. 766 Err(err). 767 Msg("error starting stop control") 768 769 return nil, err 770 } 771 772 latestFinalizedBlock, err := node.State.Final().Head() 773 if err != nil { 774 return nil, fmt.Errorf("could not get latest finalized block: %w", err) 775 } 776 777 stopControl := stop.NewStopControl( 778 exeNode.ingestionUnit, 779 exeNode.exeConf.maxGracefulStopDuration, 780 exeNode.builder.Logger, 781 exeNode.executionState, 782 node.Storage.Headers, 783 node.Storage.VersionBeacons, 784 ver, 785 latestFinalizedBlock, 786 // TODO: rename to exeNode.exeConf.executionStopped to make it more consistent 787 exeNode.exeConf.pauseExecution, 788 true, 789 ) 790 // stopControl needs to consume BlockFinalized events. 791 node.ProtocolEvents.AddConsumer(stopControl) 792 793 exeNode.stopControl = stopControl 794 795 return stopControl, nil 796 } 797 798 func (exeNode *ExecutionNode) LoadRegisterStore( 799 node *NodeConfig, 800 ) error { 801 if !exeNode.exeConf.enableStorehouse { 802 node.Logger.Info().Msg("register store disabled") 803 return nil 804 } 805 806 node.Logger.Info(). 807 Str("pebble_db_path", exeNode.exeConf.registerDir). 808 Msg("register store enabled") 809 pebbledb, err := storagepebble.OpenRegisterPebbleDB(exeNode.exeConf.registerDir) 810 811 if err != nil { 812 return fmt.Errorf("could not create disk register store: %w", err) 813 } 814 815 // close pebble db on shut down 816 exeNode.builder.ShutdownFunc(func() error { 817 err := pebbledb.Close() 818 if err != nil { 819 return fmt.Errorf("could not close register store: %w", err) 820 } 821 return nil 822 }) 823 824 bootstrapped, err := storagepebble.IsBootstrapped(pebbledb) 825 if err != nil { 826 return fmt.Errorf("could not check if registers db is bootstrapped: %w", err) 827 } 828 829 node.Logger.Info().Msgf("register store bootstrapped: %v", bootstrapped) 830 831 if !bootstrapped { 832 checkpointFile := path.Join(exeNode.exeConf.triedir, modelbootstrap.FilenameWALRootCheckpoint) 833 sealedRoot, err := node.State.Params().SealedRoot() 834 if err != nil { 835 return fmt.Errorf("could not get sealed root: %w", err) 836 } 837 838 rootSeal, err := node.State.Params().Seal() 839 if err != nil { 840 return fmt.Errorf("could not get root seal: %w", err) 841 } 842 843 if sealedRoot.ID() != rootSeal.BlockID { 844 return fmt.Errorf("mismatching root seal and sealed root: %v != %v", sealedRoot.ID(), rootSeal.BlockID) 845 } 846 847 checkpointHeight := sealedRoot.Height 848 rootHash := ledgerpkg.RootHash(rootSeal.FinalState) 849 850 err = bootstrap.ImportRegistersFromCheckpoint(node.Logger, checkpointFile, checkpointHeight, rootHash, pebbledb, exeNode.exeConf.importCheckpointWorkerCount) 851 if err != nil { 852 return fmt.Errorf("could not import registers from checkpoint: %w", err) 853 } 854 } 855 diskStore, err := storagepebble.NewRegisters(pebbledb) 856 if err != nil { 857 return fmt.Errorf("could not create registers storage: %w", err) 858 } 859 860 reader := finalizedreader.NewFinalizedReader(node.Storage.Headers, node.LastFinalizedHeader.Height) 861 node.ProtocolEvents.AddConsumer(reader) 862 notifier := storehouse.NewRegisterStoreMetrics(exeNode.collector) 863 864 // report latest finalized and executed height as metrics 865 notifier.OnFinalizedAndExecutedHeightUpdated(diskStore.LatestHeight()) 866 867 registerStore, err := storehouse.NewRegisterStore( 868 diskStore, 869 nil, // TODO: replace with real WAL 870 reader, 871 node.Logger, 872 notifier, 873 ) 874 if err != nil { 875 return err 876 } 877 878 exeNode.registerStore = registerStore 879 return nil 880 } 881 882 func (exeNode *ExecutionNode) LoadExecutionStateLedger( 883 node *NodeConfig, 884 ) ( 885 module.ReadyDoneAware, 886 error, 887 ) { 888 // DiskWal is a dependent component because we need to ensure 889 // that all WAL updates are completed before closing opened WAL segment. 890 var err error 891 exeNode.diskWAL, err = wal.NewDiskWAL(node.Logger.With().Str("subcomponent", "wal").Logger(), 892 node.MetricsRegisterer, exeNode.collector, exeNode.exeConf.triedir, int(exeNode.exeConf.mTrieCacheSize), pathfinder.PathByteSize, wal.SegmentSize) 893 if err != nil { 894 return nil, fmt.Errorf("failed to initialize wal: %w", err) 895 } 896 897 exeNode.ledgerStorage, err = ledger.NewLedger(exeNode.diskWAL, int(exeNode.exeConf.mTrieCacheSize), exeNode.collector, node.Logger.With().Str("subcomponent", 898 "ledger").Logger(), ledger.DefaultPathFinderVersion) 899 return exeNode.ledgerStorage, err 900 } 901 902 func (exeNode *ExecutionNode) LoadExecutionStateLedgerWALCompactor( 903 node *NodeConfig, 904 ) ( 905 module.ReadyDoneAware, 906 error, 907 ) { 908 return ledger.NewCompactor( 909 exeNode.ledgerStorage, 910 exeNode.diskWAL, 911 node.Logger.With().Str("subcomponent", "checkpointer").Logger(), 912 uint(exeNode.exeConf.mTrieCacheSize), 913 exeNode.exeConf.checkpointDistance, 914 exeNode.exeConf.checkpointsToKeep, 915 exeNode.toTriggerCheckpoint, // compactor will listen to the signal from admin tool for force triggering checkpointing 916 exeNode.collector, 917 ) 918 } 919 920 func (exeNode *ExecutionNode) LoadExecutionDataPruner( 921 node *NodeConfig, 922 ) ( 923 module.ReadyDoneAware, 924 error, 925 ) { 926 sealed, err := node.State.Sealed().Head() 927 if err != nil { 928 return nil, fmt.Errorf("cannot get the sealed block: %w", err) 929 } 930 931 trackerDir := filepath.Join(exeNode.exeConf.executionDataDir, "tracker") 932 exeNode.executionDataTracker, err = tracker.OpenStorage( 933 trackerDir, 934 sealed.Height, 935 node.Logger, 936 tracker.WithPruneCallback(func(c cid.Cid) error { 937 // TODO: use a proper context here 938 return exeNode.executionDataBlobstore.DeleteBlob(context.TODO(), c) 939 }), 940 ) 941 if err != nil { 942 return nil, err 943 } 944 945 // by default, pruning is disabled 946 if exeNode.exeConf.executionDataPrunerHeightRangeTarget == 0 { 947 return &module.NoopReadyDoneAware{}, nil 948 } 949 950 var prunerMetrics module.ExecutionDataPrunerMetrics = metrics.NewNoopCollector() 951 if node.MetricsEnabled { 952 prunerMetrics = metrics.NewExecutionDataPrunerCollector() 953 } 954 955 exeNode.executionDataPruner, err = pruner.NewPruner( 956 node.Logger, 957 prunerMetrics, 958 exeNode.executionDataTracker, 959 pruner.WithPruneCallback(func(ctx context.Context) error { 960 return exeNode.executionDataDatastore.CollectGarbage(ctx) 961 }), 962 pruner.WithHeightRangeTarget(exeNode.exeConf.executionDataPrunerHeightRangeTarget), 963 pruner.WithThreshold(exeNode.exeConf.executionDataPrunerThreshold), 964 ) 965 return exeNode.executionDataPruner, err 966 } 967 968 func (exeNode *ExecutionNode) LoadObserverCollectionIndexer( 969 node *NodeConfig, 970 ) ( 971 module.ReadyDoneAware, 972 error, 973 ) { 974 if !node.ObserverMode { 975 node.Logger.Info().Msg("execution data downloader is disabled") 976 return &module.NoopReadyDoneAware{}, nil 977 } 978 979 node.Logger.Info().Msg("observer-mode is enabled, creating execution data downloader") 980 981 execDataDistributor := edrequester.NewExecutionDataDistributor() 982 983 executionDataDownloader := execution_data.NewDownloader(exeNode.blobService) 984 985 var heroCacheCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 986 execDataCacheBackend := herocache.NewBlockExecutionData(10, node.Logger, heroCacheCollector) 987 988 // Execution Data cache that a downloader as the backend 989 // If the execution data doesn't exist, it uses the downloader to fetch it 990 executionDataCache := execdatacache.NewExecutionDataCache( 991 executionDataDownloader, 992 node.Storage.Headers, 993 node.Storage.Seals, 994 node.Storage.Results, 995 execDataCacheBackend, 996 ) 997 998 processedBlockHeight := bstorage.NewConsumerProgress(node.DB, module.ConsumeProgressExecutionDataRequesterBlockHeight) 999 processedNotifications := bstorage.NewConsumerProgress(node.DB, module.ConsumeProgressExecutionDataRequesterNotification) 1000 1001 executionDataConfig := edrequester.ExecutionDataConfig{ 1002 InitialBlockHeight: node.SealedRootBlock.Header.Height, 1003 MaxSearchAhead: edrequester.DefaultMaxSearchAhead, 1004 FetchTimeout: edrequester.DefaultFetchTimeout, 1005 MaxFetchTimeout: edrequester.DefaultMaxFetchTimeout, 1006 RetryDelay: edrequester.DefaultRetryDelay, 1007 MaxRetryDelay: edrequester.DefaultMaxRetryDelay, 1008 } 1009 1010 r, err := edrequester.New( 1011 node.Logger, 1012 metrics.NewExecutionDataRequesterCollector(), 1013 executionDataDownloader, 1014 executionDataCache, 1015 processedBlockHeight, 1016 processedNotifications, 1017 node.State, 1018 node.Storage.Headers, 1019 executionDataConfig, 1020 execDataDistributor, 1021 ) 1022 1023 if err != nil { 1024 return &module.NoopReadyDoneAware{}, err 1025 } 1026 1027 // subscribe the block finalization event, and trigger workers to fetch execution data 1028 exeNode.followerDistributor.AddOnBlockFinalizedConsumer(r.OnBlockFinalized) 1029 1030 execDataDistributor.AddOnExecutionDataReceivedConsumer(func(data *execution_data.BlockExecutionDataEntity) { 1031 res := &messages.EntityResponse{} 1032 for _, chunk := range data.BlockExecutionData.ChunkExecutionDatas { 1033 col := chunk.Collection 1034 blob, _ := msgpack.Marshal(col) 1035 res.EntityIDs = append(res.EntityIDs, col.ID()) 1036 res.Blobs = append(res.Blobs, blob) 1037 } 1038 1039 // notify the collection requester that collections have been received 1040 err := exeNode.collectionRequester.ProcessLocal(res) 1041 if err != nil { 1042 node.Logger.Fatal().Err(err).Msgf("failed to process collection from local execution data for block %v", data.BlockExecutionData.BlockID) 1043 } 1044 }) 1045 1046 return r, nil 1047 } 1048 1049 func (exeNode *ExecutionNode) LoadCheckerEngine( 1050 node *NodeConfig, 1051 ) ( 1052 module.ReadyDoneAware, 1053 error, 1054 ) { 1055 if !exeNode.exeConf.enableChecker { 1056 node.Logger.Warn().Msgf("checker engine is disabled") 1057 return &module.NoopReadyDoneAware{}, nil 1058 } 1059 1060 node.Logger.Info().Msgf("checker engine is enabled") 1061 1062 core := checker.NewCore( 1063 node.Logger, 1064 node.State, 1065 exeNode.executionState, 1066 ) 1067 exeNode.checkerEng = checker.NewEngine(core) 1068 return exeNode.checkerEng, nil 1069 } 1070 1071 func (exeNode *ExecutionNode) LoadIngestionEngine( 1072 node *NodeConfig, 1073 ) ( 1074 module.ReadyDoneAware, 1075 error, 1076 ) { 1077 engineRegister := node.EngineRegistry 1078 if node.ObserverMode { 1079 engineRegister = &underlay.NoopEngineRegister{} 1080 } 1081 1082 var err error 1083 exeNode.collectionRequester, err = requester.New(node.Logger, node.Metrics.Engine, engineRegister, node.Me, node.State, 1084 channels.RequestCollections, 1085 filter.Any, 1086 func() flow.Entity { return &flow.Collection{} }, 1087 // we are manually triggering batches in execution, but lets still send off a batch once a minute, as a safety net for the sake of retries 1088 requester.WithBatchInterval(exeNode.exeConf.requestInterval), 1089 // consistency of collection can be checked by checking hash, and hash comes from trusted source (blocks from consensus follower) 1090 // hence we not need to check origin 1091 requester.WithValidateStaking(false), 1092 ) 1093 1094 if err != nil { 1095 return nil, fmt.Errorf("could not create requester engine: %w", err) 1096 } 1097 1098 fetcher := fetcher.NewCollectionFetcher(node.Logger, exeNode.collectionRequester, node.State, exeNode.exeConf.onflowOnlyLNs) 1099 var blockLoader ingestion.BlockLoader 1100 if exeNode.exeConf.enableStorehouse { 1101 blockLoader = loader.NewUnfinalizedLoader(node.Logger, node.State, node.Storage.Headers, exeNode.executionState) 1102 } else { 1103 blockLoader = loader.NewUnexecutedLoader(node.Logger, node.State, node.Storage.Headers, exeNode.executionState) 1104 } 1105 1106 exeNode.ingestionEng, err = ingestion.New( 1107 exeNode.ingestionUnit, 1108 node.Logger, 1109 node.EngineRegistry, 1110 node.Me, 1111 fetcher, 1112 node.Storage.Headers, 1113 node.Storage.Blocks, 1114 node.Storage.Collections, 1115 exeNode.computationManager, 1116 exeNode.providerEngine, 1117 exeNode.executionState, 1118 exeNode.collector, 1119 node.Tracer, 1120 exeNode.exeConf.extensiveLog, 1121 exeNode.executionDataPruner, 1122 exeNode.blockDataUploader, 1123 exeNode.stopControl, 1124 blockLoader, 1125 ) 1126 1127 // TODO: we should solve these mutual dependencies better 1128 // => https://github.com/dapperlabs/flow-go/issues/4360 1129 exeNode.collectionRequester = exeNode.collectionRequester.WithHandle(exeNode.ingestionEng.OnCollection) 1130 1131 node.ProtocolEvents.AddConsumer(exeNode.ingestionEng) 1132 1133 return exeNode.ingestionEng, err 1134 } 1135 1136 // create scripts engine for handling script execution 1137 func (exeNode *ExecutionNode) LoadScriptsEngine(node *NodeConfig) (module.ReadyDoneAware, error) { 1138 1139 exeNode.scriptsEng = scripts.New( 1140 node.Logger, 1141 exeNode.computationManager.QueryExecutor(), 1142 exeNode.executionState, 1143 ) 1144 1145 return exeNode.scriptsEng, nil 1146 } 1147 1148 func (exeNode *ExecutionNode) LoadConsensusCommittee( 1149 node *NodeConfig, 1150 ) ( 1151 module.ReadyDoneAware, 1152 error, 1153 ) { 1154 // initialize consensus committee's membership state 1155 // This committee state is for the HotStuff follower, which follows the MAIN CONSENSUS Committee 1156 // Note: node.Me.NodeID() is not part of the consensus exeNode.committee 1157 committee, err := committees.NewConsensusCommittee(node.State, node.Me.NodeID()) 1158 if err != nil { 1159 return nil, fmt.Errorf("could not create Committee state for main consensus: %w", err) 1160 } 1161 node.ProtocolEvents.AddConsumer(committee) 1162 exeNode.committee = committee 1163 1164 return committee, nil 1165 } 1166 1167 func (exeNode *ExecutionNode) LoadFollowerCore( 1168 node *NodeConfig, 1169 ) ( 1170 module.ReadyDoneAware, 1171 error, 1172 ) { 1173 // create a finalizer that handles updating the protocol 1174 // state when the follower detects newly finalized blocks 1175 final := finalizer.NewFinalizer(node.DB, node.Storage.Headers, exeNode.followerState, node.Tracer) 1176 1177 finalized, pending, err := recovery.FindLatest(node.State, node.Storage.Headers) 1178 if err != nil { 1179 return nil, fmt.Errorf("could not find latest finalized block and pending blocks to recover consensus follower: %w", err) 1180 } 1181 1182 // creates a consensus follower with ingestEngine as the notifier 1183 // so that it gets notified upon each new finalized block 1184 exeNode.followerCore, err = consensus.NewFollower( 1185 node.Logger, 1186 node.Metrics.Mempool, 1187 node.Storage.Headers, 1188 final, 1189 exeNode.followerDistributor, 1190 node.FinalizedRootBlock.Header, 1191 node.RootQC, 1192 finalized, 1193 pending, 1194 ) 1195 if err != nil { 1196 return nil, fmt.Errorf("could not create follower core logic: %w", err) 1197 } 1198 1199 return exeNode.followerCore, nil 1200 } 1201 1202 func (exeNode *ExecutionNode) LoadFollowerEngine( 1203 node *NodeConfig, 1204 ) ( 1205 module.ReadyDoneAware, 1206 error, 1207 ) { 1208 packer := signature.NewConsensusSigDataPacker(exeNode.committee) 1209 // initialize the verifier for the protocol consensus 1210 verifier := verification.NewCombinedVerifier(exeNode.committee, packer) 1211 validator := validator.New(exeNode.committee, verifier) 1212 1213 var heroCacheCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 1214 if node.HeroCacheMetricsEnable { 1215 heroCacheCollector = metrics.FollowerCacheMetrics(node.MetricsRegisterer) 1216 } 1217 1218 core, err := followereng.NewComplianceCore( 1219 node.Logger, 1220 node.Metrics.Mempool, 1221 heroCacheCollector, 1222 exeNode.followerDistributor, 1223 exeNode.followerState, 1224 exeNode.followerCore, 1225 validator, 1226 exeNode.syncCore, 1227 node.Tracer, 1228 ) 1229 if err != nil { 1230 return nil, fmt.Errorf("could not create follower core: %w", err) 1231 } 1232 1233 exeNode.followerEng, err = followereng.NewComplianceLayer( 1234 node.Logger, 1235 node.EngineRegistry, 1236 node.Me, 1237 node.Metrics.Engine, 1238 node.Storage.Headers, 1239 node.LastFinalizedHeader, 1240 core, 1241 node.ComplianceConfig, 1242 ) 1243 if err != nil { 1244 return nil, fmt.Errorf("could not create follower engine: %w", err) 1245 } 1246 exeNode.followerDistributor.AddOnBlockFinalizedConsumer(exeNode.followerEng.OnFinalizedBlock) 1247 1248 return exeNode.followerEng, nil 1249 } 1250 1251 func (exeNode *ExecutionNode) LoadCollectionRequesterEngine( 1252 node *NodeConfig, 1253 ) ( 1254 module.ReadyDoneAware, 1255 error, 1256 ) { 1257 // We initialize the requester engine inside the ingestion engine due to the mutual dependency. However, in 1258 // order for it to properly start and shut down, we should still return it as its own engine here, so it can 1259 // be handled by the scaffold. 1260 return exeNode.collectionRequester, nil 1261 } 1262 1263 func (exeNode *ExecutionNode) LoadReceiptProviderEngine( 1264 node *NodeConfig, 1265 ) ( 1266 module.ReadyDoneAware, 1267 error, 1268 ) { 1269 retrieve := func(blockID flow.Identifier) (flow.Entity, error) { 1270 return exeNode.myReceipts.MyReceipt(blockID) 1271 } 1272 1273 var receiptRequestQueueMetric module.HeroCacheMetrics = metrics.NewNoopCollector() 1274 if node.HeroCacheMetricsEnable { 1275 receiptRequestQueueMetric = metrics.ReceiptRequestsQueueMetricFactory(node.MetricsRegisterer) 1276 } 1277 receiptRequestQueue := queue.NewHeroStore(exeNode.exeConf.receiptRequestsCacheSize, node.Logger, receiptRequestQueueMetric) 1278 1279 engineRegister := node.EngineRegistry 1280 if node.ObserverMode { 1281 engineRegister = &underlay.NoopEngineRegister{} 1282 } 1283 eng, err := provider.New( 1284 node.Logger.With().Str("engine", "receipt_provider").Logger(), 1285 node.Metrics.Engine, 1286 engineRegister, 1287 node.Me, 1288 node.State, 1289 receiptRequestQueue, 1290 exeNode.exeConf.receiptRequestWorkers, 1291 channels.ProvideReceiptsByBlockID, 1292 filter.And( 1293 filter.HasWeight(true), 1294 filter.HasRole(flow.RoleConsensus), 1295 ), 1296 retrieve, 1297 ) 1298 return eng, err 1299 } 1300 1301 func (exeNode *ExecutionNode) LoadSynchronizationEngine( 1302 node *NodeConfig, 1303 ) ( 1304 module.ReadyDoneAware, 1305 error, 1306 ) { 1307 // initialize the synchronization engine 1308 //var err error 1309 spamConfig, err := synchronization.NewSpamDetectionConfig() 1310 if err != nil { 1311 return nil, fmt.Errorf("could not initialize spam detection config: %w", err) 1312 } 1313 1314 exeNode.syncEngine, err = synchronization.New( 1315 node.Logger, 1316 node.Metrics.Engine, 1317 node.EngineRegistry, 1318 node.Me, 1319 node.State, 1320 node.Storage.Blocks, 1321 exeNode.followerEng, 1322 exeNode.syncCore, 1323 node.SyncEngineIdentifierProvider, 1324 spamConfig, 1325 ) 1326 if err != nil { 1327 return nil, fmt.Errorf("could not initialize synchronization engine: %w", err) 1328 } 1329 exeNode.followerDistributor.AddFinalizationConsumer(exeNode.syncEngine) 1330 1331 return exeNode.syncEngine, nil 1332 } 1333 1334 func (exeNode *ExecutionNode) LoadGrpcServer( 1335 node *NodeConfig, 1336 ) ( 1337 module.ReadyDoneAware, 1338 error, 1339 ) { 1340 return rpc.New( 1341 node.Logger, 1342 exeNode.exeConf.rpcConf, 1343 exeNode.scriptsEng, 1344 node.Storage.Headers, 1345 node.State, 1346 exeNode.events, 1347 exeNode.results, 1348 exeNode.txResults, 1349 node.Storage.Commits, 1350 node.RootChainID, 1351 signature.NewBlockSignerDecoder(exeNode.committee), 1352 exeNode.exeConf.apiRatelimits, 1353 exeNode.exeConf.apiBurstlimits, 1354 ), nil 1355 } 1356 1357 func (exeNode *ExecutionNode) LoadBootstrapper(node *NodeConfig) error { 1358 1359 // check if the execution database already exists 1360 bootstrapper := bootstrap.NewBootstrapper(node.Logger) 1361 1362 commit, bootstrapped, err := bootstrapper.IsBootstrapped(node.DB) 1363 if err != nil { 1364 return fmt.Errorf("could not query database to know whether database has been bootstrapped: %w", err) 1365 } 1366 1367 // if the execution database does not exist, then we need to bootstrap the execution database. 1368 if !bootstrapped { 1369 err := wal.CheckpointHasRootHash( 1370 node.Logger, 1371 path.Join(node.BootstrapDir, bootstrapFilenames.DirnameExecutionState), 1372 bootstrapFilenames.FilenameWALRootCheckpoint, 1373 ledgerpkg.RootHash(node.RootSeal.FinalState), 1374 ) 1375 if err != nil { 1376 return err 1377 } 1378 1379 // when bootstrapping, the bootstrap folder must have a checkpoint file 1380 // we need to cover this file to the trie folder to restore the trie to restore the execution state. 1381 err = copyBootstrapState(node.BootstrapDir, exeNode.exeConf.triedir) 1382 if err != nil { 1383 return fmt.Errorf("could not load bootstrap state from checkpoint file: %w", err) 1384 } 1385 1386 err = bootstrapper.BootstrapExecutionDatabase(node.DB, node.RootSeal) 1387 if err != nil { 1388 return fmt.Errorf("could not bootstrap execution database: %w", err) 1389 } 1390 } else { 1391 // if execution database has been bootstrapped, then the root statecommit must equal to the one 1392 // in the bootstrap folder 1393 if commit != node.RootSeal.FinalState { 1394 return fmt.Errorf("mismatching root statecommitment. database has state commitment: %x, "+ 1395 "bootstap has statecommitment: %x", 1396 commit, node.RootSeal.FinalState) 1397 } 1398 } 1399 1400 return nil 1401 } 1402 1403 // getContractEpochCounter Gets the epoch counters from the FlowEpoch smart 1404 // contract from the snapshot provided. 1405 func getContractEpochCounter( 1406 vm fvm.VM, 1407 vmCtx fvm.Context, 1408 snapshot snapshot.StorageSnapshot, 1409 ) ( 1410 uint64, 1411 error, 1412 ) { 1413 sc := systemcontracts.SystemContractsForChain(vmCtx.Chain.ChainID()) 1414 1415 // Generate the script to get the epoch counter from the FlowEpoch smart contract 1416 scriptCode := templates.GenerateGetCurrentEpochCounterScript(sc.AsTemplateEnv()) 1417 script := fvm.Script(scriptCode) 1418 1419 // execute the script 1420 _, output, err := vm.Run(vmCtx, script, snapshot) 1421 if err != nil { 1422 return 0, fmt.Errorf("could not read epoch counter, internal error while executing script: %w", err) 1423 } 1424 if output.Err != nil { 1425 return 0, fmt.Errorf("could not read epoch counter, script error: %w", output.Err) 1426 } 1427 if output.Value == nil { 1428 return 0, fmt.Errorf("could not read epoch counter, script returned no value") 1429 } 1430 1431 epochCounter := output.Value.ToGoValue().(uint64) 1432 return epochCounter, nil 1433 } 1434 1435 // copy the checkpoint files from the bootstrap folder to the execution state folder 1436 // Checkpoint file is required to restore the trie, and has to be placed in the execution 1437 // state folder. 1438 // There are two ways to generate a checkpoint file: 1439 // 1. From a clean state. 1440 // Refer to the code in the testcase: TestGenerateExecutionState 1441 // 2. From a previous execution state 1442 // This is often used when sporking the network. 1443 // Use the execution-state-extract util commandline to generate a checkpoint file from 1444 // a previous checkpoint file 1445 func copyBootstrapState(dir, trie string) error { 1446 filename := "" 1447 firstCheckpointFilename := "00000000" 1448 1449 fileExists := func(fileName string) bool { 1450 _, err := os.Stat(filepath.Join(dir, bootstrapFilenames.DirnameExecutionState, fileName)) 1451 return err == nil 1452 } 1453 1454 // if there is a root checkpoint file, then copy that file over 1455 if fileExists(bootstrapFilenames.FilenameWALRootCheckpoint) { 1456 filename = bootstrapFilenames.FilenameWALRootCheckpoint 1457 } else if fileExists(firstCheckpointFilename) { 1458 // else if there is a checkpoint file, then copy that file over 1459 filename = firstCheckpointFilename 1460 } else { 1461 filePath := filepath.Join(dir, bootstrapFilenames.DirnameExecutionState, firstCheckpointFilename) 1462 1463 // include absolute path of the missing file in the error message 1464 absPath, err := filepath.Abs(filePath) 1465 if err != nil { 1466 absPath = filePath 1467 } 1468 1469 return fmt.Errorf("execution state file not found: %v", absPath) 1470 } 1471 1472 // copy from the bootstrap folder to the execution state folder 1473 from, to := path.Join(dir, bootstrapFilenames.DirnameExecutionState), trie 1474 1475 log.Info().Str("dir", dir).Str("trie", trie). 1476 Msgf("copying checkpoint file %v from directory: %v, to: %v", filename, from, to) 1477 1478 copiedFiles, err := wal.CopyCheckpointFile(filename, from, to) 1479 if err != nil { 1480 return fmt.Errorf("can not copy checkpoint file %s, from %s to %s", 1481 filename, from, to) 1482 } 1483 1484 for _, newPath := range copiedFiles { 1485 fmt.Printf("copied root checkpoint file from directory: %v, to: %v\n", from, newPath) 1486 } 1487 1488 return nil 1489 } 1490 1491 func logSysInfo(logger zerolog.Logger) error { 1492 1493 vmem, err := mem.VirtualMemory() 1494 if err != nil { 1495 return fmt.Errorf("failed to get virtual memory: %w", err) 1496 } 1497 1498 info, err := cpu.Info() 1499 if err != nil { 1500 return fmt.Errorf("failed to get cpu info: %w", err) 1501 } 1502 1503 logicalCores, err := cpu.Counts(true) 1504 if err != nil { 1505 return fmt.Errorf("failed to get logical cores: %w", err) 1506 } 1507 1508 physicalCores, err := cpu.Counts(false) 1509 if err != nil { 1510 return fmt.Errorf("failed to get physical cores: %w", err) 1511 } 1512 1513 if len(info) == 0 { 1514 return fmt.Errorf("cpu info length is 0") 1515 } 1516 1517 logger.Info().Msgf("CPU: ModelName=%s, MHz=%.0f, Family=%s, Model=%s, Stepping=%d, Microcode=%s, PhysicalCores=%d, LogicalCores=%d", 1518 info[0].ModelName, info[0].Mhz, info[0].Family, info[0].Model, info[0].Stepping, info[0].Microcode, physicalCores, logicalCores) 1519 1520 logger.Info().Msgf("RAM: Total=%d, Free=%d", vmem.Total, vmem.Free) 1521 1522 hostInfo, err := host.Info() 1523 if err != nil { 1524 return fmt.Errorf("failed to get platform info: %w", err) 1525 } 1526 logger.Info().Msgf("OS: OS=%s, Platform=%s, PlatformVersion=%s, KernelVersion=%s, Uptime: %d", 1527 hostInfo.OS, hostInfo.Platform, hostInfo.PlatformVersion, hostInfo.KernelVersion, hostInfo.Uptime) 1528 1529 // goruntime.GOMAXPROCS(0) doesn't modify any settings. 1530 logger.Info().Msgf("GO: GoVersion=%s, GOMAXPROCS=%d, NumCPU=%d", 1531 goruntime.Version(), goruntime.GOMAXPROCS(0), goruntime.NumCPU()) 1532 1533 return nil 1534 }