github.com/onflow/flow-go@v0.33.17/cmd/scaffold.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "crypto/tls" 6 "crypto/x509" 7 "errors" 8 "fmt" 9 "os" 10 "runtime" 11 "strings" 12 "time" 13 14 gcemd "cloud.google.com/go/compute/metadata" 15 "github.com/dgraph-io/badger/v2" 16 "github.com/hashicorp/go-multierror" 17 dht "github.com/libp2p/go-libp2p-kad-dht" 18 "github.com/libp2p/go-libp2p/core/host" 19 "github.com/libp2p/go-libp2p/core/peer" 20 "github.com/libp2p/go-libp2p/core/routing" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/rs/zerolog" 23 "github.com/spf13/pflag" 24 "golang.org/x/time/rate" 25 "google.golang.org/api/option" 26 27 "github.com/onflow/flow-go/admin" 28 "github.com/onflow/flow-go/admin/commands" 29 "github.com/onflow/flow-go/admin/commands/common" 30 storageCommands "github.com/onflow/flow-go/admin/commands/storage" 31 "github.com/onflow/flow-go/cmd/build" 32 "github.com/onflow/flow-go/config" 33 "github.com/onflow/flow-go/consensus/hotstuff/persister" 34 "github.com/onflow/flow-go/crypto" 35 "github.com/onflow/flow-go/fvm" 36 "github.com/onflow/flow-go/fvm/environment" 37 "github.com/onflow/flow-go/model/flow" 38 "github.com/onflow/flow-go/model/flow/filter" 39 "github.com/onflow/flow-go/module" 40 "github.com/onflow/flow-go/module/chainsync" 41 "github.com/onflow/flow-go/module/compliance" 42 "github.com/onflow/flow-go/module/component" 43 "github.com/onflow/flow-go/module/id" 44 "github.com/onflow/flow-go/module/irrecoverable" 45 "github.com/onflow/flow-go/module/local" 46 "github.com/onflow/flow-go/module/mempool/herocache" 47 "github.com/onflow/flow-go/module/metrics" 48 "github.com/onflow/flow-go/module/profiler" 49 "github.com/onflow/flow-go/module/trace" 50 "github.com/onflow/flow-go/module/updatable_configs" 51 "github.com/onflow/flow-go/module/util" 52 "github.com/onflow/flow-go/network" 53 alspmgr "github.com/onflow/flow-go/network/alsp/manager" 54 netcache "github.com/onflow/flow-go/network/cache" 55 "github.com/onflow/flow-go/network/channels" 56 "github.com/onflow/flow-go/network/converter" 57 "github.com/onflow/flow-go/network/p2p" 58 p2pbuilder "github.com/onflow/flow-go/network/p2p/builder" 59 p2pbuilderconfig "github.com/onflow/flow-go/network/p2p/builder/config" 60 "github.com/onflow/flow-go/network/p2p/cache" 61 "github.com/onflow/flow-go/network/p2p/conduit" 62 "github.com/onflow/flow-go/network/p2p/connection" 63 p2pdht "github.com/onflow/flow-go/network/p2p/dht" 64 "github.com/onflow/flow-go/network/p2p/dns" 65 "github.com/onflow/flow-go/network/p2p/keyutils" 66 "github.com/onflow/flow-go/network/p2p/ping" 67 "github.com/onflow/flow-go/network/p2p/subscription" 68 "github.com/onflow/flow-go/network/p2p/translator" 69 "github.com/onflow/flow-go/network/p2p/unicast/protocols" 70 "github.com/onflow/flow-go/network/p2p/unicast/ratelimit" 71 "github.com/onflow/flow-go/network/p2p/utils" 72 "github.com/onflow/flow-go/network/p2p/utils/ratelimiter" 73 "github.com/onflow/flow-go/network/slashing" 74 "github.com/onflow/flow-go/network/topology" 75 "github.com/onflow/flow-go/network/underlay" 76 "github.com/onflow/flow-go/state/protocol" 77 badgerState "github.com/onflow/flow-go/state/protocol/badger" 78 "github.com/onflow/flow-go/state/protocol/events" 79 "github.com/onflow/flow-go/state/protocol/events/gadgets" 80 "github.com/onflow/flow-go/storage" 81 bstorage "github.com/onflow/flow-go/storage/badger" 82 "github.com/onflow/flow-go/storage/badger/operation" 83 sutil "github.com/onflow/flow-go/storage/util" 84 "github.com/onflow/flow-go/utils/logging" 85 ) 86 87 const ( 88 NetworkComponent = "network" 89 ConduitFactoryComponent = "conduit-factory" 90 LibP2PNodeComponent = "libp2p-node" 91 ) 92 93 type Metrics struct { 94 Network module.NetworkMetrics 95 Engine module.EngineMetrics 96 Compliance module.ComplianceMetrics 97 Cache module.CacheMetrics 98 Mempool module.MempoolMetrics 99 CleanCollector module.CleanerMetrics 100 Bitswap module.BitswapMetrics 101 } 102 103 type Storage = storage.All 104 105 type namedModuleFunc struct { 106 fn BuilderFunc 107 name string 108 } 109 110 type namedComponentFunc struct { 111 fn ReadyDoneFactory 112 name string 113 114 errorHandler component.OnError 115 dependencies *DependencyList 116 } 117 118 // FlowNodeBuilder is the default builder struct used for all flow nodes 119 // It runs a node process with following structure, in sequential order 120 // Base inits (network, storage, state, logger) 121 // PostInit handlers, if any 122 // Components handlers, if any, wait sequentially 123 // Run() <- main loop 124 // Components destructors, if any 125 // The initialization can be proceeded and succeeded with PreInit and PostInit functions that allow customization 126 // of the process in case of nodes such as the unstaked access node where the NodeInfo is not part of the genesis data 127 type FlowNodeBuilder struct { 128 *NodeConfig 129 flags *pflag.FlagSet 130 modules []namedModuleFunc 131 components []namedComponentFunc 132 postShutdownFns []func() error 133 preInitFns []BuilderFunc 134 postInitFns []BuilderFunc 135 extraRootSnapshotCheck func(protocol.Snapshot) error 136 extraFlagCheck func() error 137 adminCommandBootstrapper *admin.CommandRunnerBootstrapper 138 adminCommands map[string]func(config *NodeConfig) commands.AdminCommand 139 componentBuilder component.ComponentManagerBuilder 140 bootstrapNodeAddresses []string 141 bootstrapNodePublicKeys []string 142 } 143 144 var _ NodeBuilder = (*FlowNodeBuilder)(nil) 145 146 func (fnb *FlowNodeBuilder) BaseFlags() { 147 defaultFlowConfig, err := config.DefaultConfig() 148 if err != nil { 149 fnb.Logger.Fatal().Err(err).Msg("failed to initialize flow config") 150 } 151 152 // initialize pflag set for Flow node 153 config.InitializePFlagSet(fnb.flags, defaultFlowConfig) 154 155 defaultConfig := DefaultBaseConfig() 156 157 // bind configuration parameters 158 fnb.flags.StringVar(&fnb.BaseConfig.nodeIDHex, "nodeid", defaultConfig.nodeIDHex, "identity of our node") 159 fnb.flags.StringVar(&fnb.BaseConfig.BindAddr, "bind", defaultConfig.BindAddr, "address to bind on") 160 fnb.flags.StringVarP(&fnb.BaseConfig.BootstrapDir, "bootstrapdir", "b", defaultConfig.BootstrapDir, "path to the bootstrap directory") 161 fnb.flags.StringVarP(&fnb.BaseConfig.datadir, "datadir", "d", defaultConfig.datadir, "directory to store the public database (protocol state)") 162 fnb.flags.StringVar(&fnb.BaseConfig.secretsdir, "secretsdir", defaultConfig.secretsdir, "directory to store private database (secrets)") 163 fnb.flags.StringVarP(&fnb.BaseConfig.level, "loglevel", "l", defaultConfig.level, "level for logging output") 164 fnb.flags.Uint32Var(&fnb.BaseConfig.debugLogLimit, "debug-log-limit", defaultConfig.debugLogLimit, "max number of debug/trace log events per second") 165 fnb.flags.UintVarP(&fnb.BaseConfig.metricsPort, "metricport", "m", defaultConfig.metricsPort, "port for /metrics endpoint") 166 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.Enabled, "profiler-enabled", defaultConfig.profilerConfig.Enabled, "whether to enable the auto-profiler") 167 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.UploaderEnabled, "profile-uploader-enabled", defaultConfig.profilerConfig.UploaderEnabled, 168 "whether to enable automatic profile upload to Google Cloud Profiler. "+ 169 "For autoupload to work forllowing should be true: "+ 170 "1) both -profiler-enabled=true and -profile-uploader-enabled=true need to be set. "+ 171 "2) node is running in GCE. "+ 172 "3) server or user has https://www.googleapis.com/auth/monitoring.write scope. ") 173 fnb.flags.StringVar(&fnb.BaseConfig.profilerConfig.Dir, "profiler-dir", defaultConfig.profilerConfig.Dir, "directory to create auto-profiler profiles") 174 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Interval, "profiler-interval", defaultConfig.profilerConfig.Interval, 175 "the interval between auto-profiler runs") 176 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Duration, "profiler-duration", defaultConfig.profilerConfig.Duration, 177 "the duration to run the auto-profile for") 178 179 fnb.flags.BoolVar(&fnb.BaseConfig.tracerEnabled, "tracer-enabled", defaultConfig.tracerEnabled, 180 "whether to enable tracer") 181 fnb.flags.UintVar(&fnb.BaseConfig.tracerSensitivity, "tracer-sensitivity", defaultConfig.tracerSensitivity, 182 "adjusts the level of sampling when tracing is enabled. 0 means capture everything, higher value results in less samples") 183 184 fnb.flags.StringVar(&fnb.BaseConfig.AdminAddr, "admin-addr", defaultConfig.AdminAddr, "address to bind on for admin HTTP server") 185 fnb.flags.StringVar(&fnb.BaseConfig.AdminCert, "admin-cert", defaultConfig.AdminCert, "admin cert file (for TLS)") 186 fnb.flags.StringVar(&fnb.BaseConfig.AdminKey, "admin-key", defaultConfig.AdminKey, "admin key file (for TLS)") 187 fnb.flags.StringVar(&fnb.BaseConfig.AdminClientCAs, "admin-client-certs", defaultConfig.AdminClientCAs, "admin client certs (for mutual TLS)") 188 fnb.flags.UintVar(&fnb.BaseConfig.AdminMaxMsgSize, "admin-max-response-size", defaultConfig.AdminMaxMsgSize, "admin server max response size in bytes") 189 190 fnb.flags.UintVar(&fnb.BaseConfig.guaranteesCacheSize, "guarantees-cache-size", bstorage.DefaultCacheSize, "collection guarantees cache size") 191 fnb.flags.UintVar(&fnb.BaseConfig.receiptsCacheSize, "receipts-cache-size", bstorage.DefaultCacheSize, "receipts cache size") 192 193 // dynamic node startup flags 194 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANPubkey, 195 "dynamic-startup-access-publickey", 196 "", 197 "the public key of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 198 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANAddress, 199 "dynamic-startup-access-address", 200 "", 201 "the access address of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 202 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpochPhase, 203 "dynamic-startup-epoch-phase", 204 "EpochPhaseSetup", 205 "the target epoch phase for dynamic startup <EpochPhaseStaking|EpochPhaseSetup|EpochPhaseCommitted") 206 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpoch, 207 "dynamic-startup-epoch", 208 "current", 209 "the target epoch for dynamic-startup, use \"current\" to start node in the current epoch") 210 fnb.flags.DurationVar(&fnb.BaseConfig.DynamicStartupSleepInterval, 211 "dynamic-startup-sleep-interval", 212 time.Minute, 213 "the interval in which the node will check if it can start") 214 215 fnb.flags.BoolVar(&fnb.BaseConfig.InsecureSecretsDB, "insecure-secrets-db", false, "allow the node to start up without an secrets DB encryption key") 216 fnb.flags.BoolVar(&fnb.BaseConfig.HeroCacheMetricsEnable, "herocache-metrics-collector", false, "enables herocache metrics collection") 217 218 // sync core flags 219 fnb.flags.DurationVar(&fnb.BaseConfig.SyncCoreConfig.RetryInterval, 220 "sync-retry-interval", 221 defaultConfig.SyncCoreConfig.RetryInterval, 222 "the initial interval before we retry a sync request, uses exponential backoff") 223 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.Tolerance, 224 "sync-tolerance", 225 defaultConfig.SyncCoreConfig.Tolerance, 226 "determines how big of a difference in block heights we tolerate before actively syncing with range requests") 227 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxAttempts, 228 "sync-max-attempts", 229 defaultConfig.SyncCoreConfig.MaxAttempts, 230 "the maximum number of attempts we make for each requested block/height before discarding") 231 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxSize, 232 "sync-max-size", 233 defaultConfig.SyncCoreConfig.MaxSize, 234 "the maximum number of blocks we request in the same block request message") 235 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxRequests, 236 "sync-max-requests", 237 defaultConfig.SyncCoreConfig.MaxRequests, 238 "the maximum number of requests we send during each scanning period") 239 240 fnb.flags.Uint64Var(&fnb.BaseConfig.ComplianceConfig.SkipNewProposalsThreshold, 241 "compliance-skip-proposals-threshold", 242 defaultConfig.ComplianceConfig.SkipNewProposalsThreshold, 243 "threshold at which new proposals are discarded rather than cached, if their height is this much above local finalized height") 244 245 // observer mode allows a unstaked execution node to fetch blocks from a public staked access node, and being able to execute blocks 246 fnb.flags.BoolVar(&fnb.BaseConfig.ObserverMode, "observer-mode", defaultConfig.ObserverMode, "whether the node is running in observer mode") 247 fnb.flags.StringSliceVar(&fnb.bootstrapNodePublicKeys, 248 "observer-mode-bootstrap-node-public-keys", 249 nil, 250 "the networking public key of the bootstrap access node if this is an observer (in the same order as the bootstrap node addresses) e.g. \"d57a5e9c5.....\",\"44ded42d....\"") 251 fnb.flags.StringSliceVar(&fnb.bootstrapNodeAddresses, 252 "observer-mode-bootstrap-node-addresses", 253 nil, 254 "the network addresses of the bootstrap access node if this is an observer e.g. access-001.mainnet.flow.org:9653,access-002.mainnet.flow.org:9653") 255 } 256 257 func (fnb *FlowNodeBuilder) EnqueuePingService() { 258 fnb.Component("ping service", func(node *NodeConfig) (module.ReadyDoneAware, error) { 259 pingLibP2PProtocolID := protocols.PingProtocolId(node.SporkID) 260 261 // setup the Ping provider to return the software version and the sealed block height 262 pingInfoProvider := &ping.InfoProvider{ 263 SoftwareVersionFun: func() string { 264 return build.Version() 265 }, 266 SealedBlockHeightFun: func() (uint64, error) { 267 head, err := node.State.Sealed().Head() 268 if err != nil { 269 return 0, err 270 } 271 return head.Height, nil 272 }, 273 HotstuffViewFun: func() (uint64, error) { 274 return 0, fmt.Errorf("hotstuff view reporting disabled") 275 }, 276 } 277 278 // only consensus roles will need to report hotstuff view 279 if fnb.BaseConfig.NodeRole == flow.RoleConsensus.String() { 280 // initialize the persister 281 persist := persister.New(node.DB, node.RootChainID) 282 283 pingInfoProvider.HotstuffViewFun = func() (uint64, error) { 284 livenessData, err := persist.GetLivenessData() 285 if err != nil { 286 return 0, err 287 } 288 289 return livenessData.CurrentView, nil 290 } 291 } 292 293 pingService, err := node.EngineRegistry.RegisterPingService(pingLibP2PProtocolID, pingInfoProvider) 294 295 node.PingService = pingService 296 297 return &module.NoopReadyDoneAware{}, err 298 }) 299 } 300 301 func (fnb *FlowNodeBuilder) EnqueueResolver() { 302 fnb.Component("resolver", func(node *NodeConfig) (module.ReadyDoneAware, error) { 303 var dnsIpCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 304 var dnsTxtCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 305 if fnb.HeroCacheMetricsEnable { 306 dnsIpCacheMetricsCollector = metrics.NetworkDnsIpCacheMetricsFactory(fnb.MetricsRegisterer) 307 dnsTxtCacheMetricsCollector = metrics.NetworkDnsTxtCacheMetricsFactory(fnb.MetricsRegisterer) 308 } 309 310 cache := herocache.NewDNSCache( 311 dns.DefaultCacheSize, 312 node.Logger, 313 dnsIpCacheMetricsCollector, 314 dnsTxtCacheMetricsCollector, 315 ) 316 317 resolver := dns.NewResolver( 318 node.Logger, 319 fnb.Metrics.Network, 320 cache, 321 dns.WithTTL(fnb.BaseConfig.FlowConfig.NetworkConfig.DNSCacheTTL)) 322 323 fnb.Resolver = resolver 324 return resolver, nil 325 }) 326 } 327 328 func (fnb *FlowNodeBuilder) EnqueueNetworkInit() { 329 connGaterPeerDialFilters := make([]p2p.PeerFilter, 0) 330 connGaterInterceptSecureFilters := make([]p2p.PeerFilter, 0) 331 peerManagerFilters := make([]p2p.PeerFilter, 0) 332 333 fnb.UnicastRateLimiterDistributor = ratelimit.NewUnicastRateLimiterDistributor() 334 fnb.UnicastRateLimiterDistributor.AddConsumer(fnb.Metrics.Network) 335 336 // setup default rate limiter options 337 unicastRateLimiterOpts := []ratelimit.RateLimitersOption{ 338 ratelimit.WithDisabledRateLimiting(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun), 339 ratelimit.WithNotifier(fnb.UnicastRateLimiterDistributor), 340 } 341 342 // override noop unicast message rate limiter 343 if fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit > 0 { 344 unicastMessageRateLimiter := ratelimiter.NewRateLimiter( 345 rate.Limit(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit), 346 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit, 347 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.LockoutDuration, 348 ) 349 unicastRateLimiterOpts = append(unicastRateLimiterOpts, ratelimit.WithMessageRateLimiter(unicastMessageRateLimiter)) 350 351 // avoid connection gating and pruning during dry run 352 if !fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun { 353 f := rateLimiterPeerFilter(unicastMessageRateLimiter) 354 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 355 // don't allow rate limited peers to establishing incoming connections 356 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 357 // don't create outbound connections to rate limited peers 358 peerManagerFilters = append(peerManagerFilters, f) 359 } 360 } 361 362 // override noop unicast bandwidth rate limiter 363 if fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthRateLimit > 0 && fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthBurstLimit > 0 { 364 unicastBandwidthRateLimiter := ratelimit.NewBandWidthRateLimiter( 365 rate.Limit(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthRateLimit), 366 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthBurstLimit, 367 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.LockoutDuration, 368 ) 369 unicastRateLimiterOpts = append(unicastRateLimiterOpts, ratelimit.WithBandwidthRateLimiter(unicastBandwidthRateLimiter)) 370 371 // avoid connection gating and pruning during dry run 372 if !fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun { 373 f := rateLimiterPeerFilter(unicastBandwidthRateLimiter) 374 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 375 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 376 peerManagerFilters = append(peerManagerFilters, f) 377 } 378 } 379 380 // setup unicast rate limiters 381 unicastRateLimiters := ratelimit.NewRateLimiters(unicastRateLimiterOpts...) 382 383 uniCfg := &p2pbuilderconfig.UnicastConfig{ 384 Unicast: fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast, 385 RateLimiterDistributor: fnb.UnicastRateLimiterDistributor, 386 } 387 388 connGaterCfg := &p2pbuilderconfig.ConnectionGaterConfig{ 389 InterceptPeerDialFilters: connGaterPeerDialFilters, 390 InterceptSecuredFilters: connGaterInterceptSecureFilters, 391 } 392 393 peerManagerCfg := &p2pbuilderconfig.PeerManagerConfig{ 394 ConnectionPruning: fnb.FlowConfig.NetworkConfig.NetworkConnectionPruning, 395 UpdateInterval: fnb.FlowConfig.NetworkConfig.PeerUpdateInterval, 396 ConnectorFactory: connection.DefaultLibp2pBackoffConnectorFactory(), 397 } 398 399 fnb.Component(LibP2PNodeComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 400 myAddr := fnb.NodeConfig.Me.Address() 401 if fnb.BaseConfig.BindAddr != NotSet { 402 myAddr = fnb.BaseConfig.BindAddr 403 } 404 405 if fnb.ObserverMode { 406 // observer mode only init pulbic libp2p node 407 publicLibp2pNode, err := fnb.BuildPublicLibp2pNode(myAddr) 408 if err != nil { 409 return nil, fmt.Errorf("could not build public libp2p node: %w", err) 410 } 411 fnb.LibP2PNode = publicLibp2pNode 412 413 return publicLibp2pNode, nil 414 } 415 416 dhtActivationStatus, err := DhtSystemActivationStatus(fnb.NodeRole) 417 if err != nil { 418 return nil, fmt.Errorf("could not determine dht activation status: %w", err) 419 } 420 builder, err := p2pbuilder.DefaultNodeBuilder(fnb.Logger, 421 myAddr, 422 network.PrivateNetwork, 423 fnb.NetworkKey, 424 fnb.SporkID, 425 fnb.IdentityProvider, 426 &p2pbuilderconfig.MetricsConfig{ 427 Metrics: fnb.Metrics.Network, 428 HeroCacheFactory: fnb.HeroCacheMetricsFactory(), 429 }, 430 fnb.Resolver, 431 fnb.BaseConfig.NodeRole, 432 connGaterCfg, 433 peerManagerCfg, 434 &fnb.FlowConfig.NetworkConfig.GossipSub, 435 &fnb.FlowConfig.NetworkConfig.ResourceManager, 436 uniCfg, 437 &fnb.FlowConfig.NetworkConfig.ConnectionManager, 438 &p2p.DisallowListCacheConfig{ 439 MaxSize: fnb.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, 440 Metrics: metrics.DisallowListCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PrivateNetwork), 441 }, 442 dhtActivationStatus) 443 if err != nil { 444 return nil, fmt.Errorf("could not create libp2p node builder: %w", err) 445 } 446 447 libp2pNode, err := builder.Build() 448 if err != nil { 449 return nil, fmt.Errorf("could not build libp2p node: %w", err) 450 } 451 452 fnb.LibP2PNode = libp2pNode 453 return libp2pNode, nil 454 }) 455 fnb.Component(NetworkComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 456 fnb.Logger.Info().Hex("node_id", logging.ID(fnb.NodeID)).Msg("default conduit factory initiated") 457 return fnb.InitFlowNetworkWithConduitFactory( 458 node, 459 conduit.NewDefaultConduitFactory(), 460 unicastRateLimiters, 461 peerManagerFilters) 462 }) 463 464 fnb.Module("network underlay dependency", func(node *NodeConfig) error { 465 fnb.networkUnderlayDependable = module.NewProxiedReadyDoneAware() 466 fnb.PeerManagerDependencies.Add(fnb.networkUnderlayDependable) 467 return nil 468 }) 469 470 // peer manager won't be created until all PeerManagerDependencies are ready. 471 if !fnb.ObserverMode { 472 fnb.DependableComponent("peer manager", func(node *NodeConfig) (module.ReadyDoneAware, error) { 473 return fnb.LibP2PNode.PeerManagerComponent(), nil 474 }, fnb.PeerManagerDependencies) 475 } 476 } 477 478 // HeroCacheMetricsFactory returns a HeroCacheMetricsFactory based on the MetricsEnabled flag. 479 // If MetricsEnabled is true, it returns a HeroCacheMetricsFactory that will register metrics with the provided MetricsRegisterer. 480 // If MetricsEnabled is false, it returns a no-op HeroCacheMetricsFactory that will not register any metrics. 481 func (fnb *FlowNodeBuilder) HeroCacheMetricsFactory() metrics.HeroCacheMetricsFactory { 482 if fnb.MetricsEnabled { 483 return metrics.NewHeroCacheMetricsFactory(fnb.MetricsRegisterer) 484 } 485 return metrics.NewNoopHeroCacheMetricsFactory() 486 } 487 488 // initPublicLibp2pNode creates a libp2p node for the observer service in the public (unstaked) network. 489 // The factory function is later passed into the initMiddleware function to eventually instantiate the p2p.LibP2PNode instance 490 // The LibP2P host is created with the following options: 491 // * DHT as client and seeded with the given bootstrap peers 492 // * The specified bind address as the listen address 493 // * The passed in private key as the libp2p key 494 // * No connection gater 495 // * No connection manager 496 // * No peer manager 497 // * Default libp2p pubsub options. 498 // Args: 499 // - networkKey: the private key to use for the libp2p node 500 // Returns: 501 // - p2p.LibP2PNode: the libp2p node 502 // - error: if any error occurs. Any error returned is considered irrecoverable. 503 func (fnb *FlowNodeBuilder) BuildPublicLibp2pNode(address string) (p2p.LibP2PNode, error) { 504 var pis []peer.AddrInfo 505 506 ids, err := BootstrapIdentities(fnb.bootstrapNodeAddresses, fnb.bootstrapNodePublicKeys) 507 if err != nil { 508 return nil, fmt.Errorf("could not create bootstrap identities: %w", err) 509 } 510 511 for _, b := range ids { 512 pi, err := utils.PeerAddressInfo(*b) 513 if err != nil { 514 return nil, fmt.Errorf("could not extract peer address info from bootstrap identity %v: %w", b, err) 515 } 516 517 pis = append(pis, pi) 518 519 fnb.Logger.Info().Msgf("bootstrap peer address info %v: %v", b, pi) 520 } 521 522 for _, b := range ids { 523 pi, err := utils.PeerAddressInfo(*b) 524 if err != nil { 525 return nil, fmt.Errorf("could not extract peer address info from bootstrap identity %v: %w", b, err) 526 } 527 528 pis = append(pis, pi) 529 } 530 531 if len(pis) == 0 { 532 return nil, errors.New("no bootstrap peers provided") 533 } 534 535 node, err := p2pbuilder.NewNodeBuilder( 536 fnb.Logger, 537 &fnb.FlowConfig.NetworkConfig.GossipSub, 538 &p2pbuilderconfig.MetricsConfig{ 539 HeroCacheFactory: fnb.HeroCacheMetricsFactory(), 540 Metrics: fnb.Metrics.Network, 541 }, 542 network.PublicNetwork, 543 address, 544 fnb.NetworkKey, 545 fnb.SporkID, 546 fnb.IdentityProvider, 547 &fnb.FlowConfig.NetworkConfig.ResourceManager, 548 p2pbuilderconfig.PeerManagerDisableConfig(), // disable peer manager for observer node. 549 &p2p.DisallowListCacheConfig{ 550 MaxSize: fnb.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, 551 Metrics: metrics.DisallowListCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PublicNetwork), 552 }, 553 &p2pbuilderconfig.UnicastConfig{ 554 Unicast: fnb.FlowConfig.NetworkConfig.Unicast, 555 }). 556 SetSubscriptionFilter( 557 subscription.NewRoleBasedFilter( 558 subscription.UnstakedRole, fnb.IdentityProvider, 559 ), 560 ). 561 SetRoutingSystem(func(ctx context.Context, h host.Host) (routing.Routing, error) { 562 return p2pdht.NewDHT(ctx, h, protocols.FlowPublicDHTProtocolID(fnb.SporkID), 563 fnb.Logger, 564 fnb.Metrics.Network, 565 p2pdht.AsClient(), 566 dht.BootstrapPeers(pis...), 567 ) 568 }). 569 Build() 570 571 if err != nil { 572 return nil, fmt.Errorf("could not initialize libp2p node for observer: %w", err) 573 } 574 return node, nil 575 } 576 577 func (fnb *FlowNodeBuilder) InitFlowNetworkWithConduitFactory( 578 node *NodeConfig, 579 cf network.ConduitFactory, 580 unicastRateLimiters *ratelimit.RateLimiters, 581 peerManagerFilters []p2p.PeerFilter) (network.EngineRegistry, error) { 582 583 var networkOptions []underlay.NetworkOption 584 if len(fnb.MsgValidators) > 0 { 585 networkOptions = append(networkOptions, underlay.WithMessageValidators(fnb.MsgValidators...)) 586 } 587 588 // by default if no rate limiter configuration was provided in the CLI args the default 589 // noop rate limiter will be used. 590 networkOptions = append(networkOptions, underlay.WithUnicastRateLimiters(unicastRateLimiters)) 591 592 networkOptions = append(networkOptions, 593 underlay.WithPreferredUnicastProtocols(protocols.ToProtocolNames(fnb.FlowConfig.NetworkConfig.PreferredUnicastProtocols)...), 594 ) 595 596 // peerManagerFilters are used by the peerManager via the network to filter peers from the topology. 597 if len(peerManagerFilters) > 0 { 598 networkOptions = append(networkOptions, underlay.WithPeerManagerFilters(peerManagerFilters...)) 599 } 600 601 receiveCache := netcache.NewHeroReceiveCache(fnb.FlowConfig.NetworkConfig.NetworkReceivedMessageCacheSize, 602 fnb.Logger, 603 metrics.NetworkReceiveCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PrivateNetwork)) 604 605 err := node.Metrics.Mempool.Register(metrics.ResourceNetworkingReceiveCache, receiveCache.Size) 606 if err != nil { 607 return nil, fmt.Errorf("could not register networking receive cache metric: %w", err) 608 } 609 610 networkType := network.PrivateNetwork 611 if fnb.ObserverMode { 612 // observer mode uses public network 613 networkType = network.PublicNetwork 614 } 615 616 // creates network instance 617 net, err := underlay.NewNetwork(&underlay.NetworkConfig{ 618 Logger: fnb.Logger, 619 Libp2pNode: fnb.LibP2PNode, 620 Codec: fnb.CodecFactory(), 621 Me: fnb.Me, 622 SporkId: fnb.SporkID, 623 Topology: topology.NewFullyConnectedTopology(), 624 Metrics: fnb.Metrics.Network, 625 BitSwapMetrics: fnb.Metrics.Bitswap, 626 IdentityProvider: fnb.IdentityProvider, 627 ReceiveCache: receiveCache, 628 ConduitFactory: cf, 629 UnicastMessageTimeout: fnb.FlowConfig.NetworkConfig.Unicast.MessageTimeout, 630 IdentityTranslator: fnb.IDTranslator, 631 AlspCfg: &alspmgr.MisbehaviorReportManagerConfig{ 632 Logger: fnb.Logger, 633 SpamRecordCacheSize: fnb.FlowConfig.NetworkConfig.AlspConfig.SpamRecordCacheSize, 634 SpamReportQueueSize: fnb.FlowConfig.NetworkConfig.AlspConfig.SpamReportQueueSize, 635 DisablePenalty: fnb.FlowConfig.NetworkConfig.AlspConfig.DisablePenalty, 636 HeartBeatInterval: fnb.FlowConfig.NetworkConfig.AlspConfig.HearBeatInterval, 637 AlspMetrics: fnb.Metrics.Network, 638 HeroCacheMetricsFactory: fnb.HeroCacheMetricsFactory(), 639 NetworkType: networkType, 640 }, 641 SlashingViolationConsumerFactory: func(adapter network.ConduitAdapter) network.ViolationsConsumer { 642 return slashing.NewSlashingViolationsConsumer(fnb.Logger, fnb.Metrics.Network, adapter) 643 }, 644 }, networkOptions...) 645 if err != nil { 646 return nil, fmt.Errorf("could not initialize network: %w", err) 647 } 648 649 if node.ObserverMode { 650 fnb.EngineRegistry = converter.NewNetwork(net, channels.SyncCommittee, channels.PublicSyncCommittee) 651 } else { 652 fnb.EngineRegistry = net // setting network as the fnb.Network for the engine-level components 653 } 654 fnb.NetworkUnderlay = net // setting network as the fnb.Underlay for the lower-level components 655 656 // register network ReadyDoneAware interface so other components can depend on it for startup 657 if fnb.networkUnderlayDependable != nil { 658 fnb.networkUnderlayDependable.Init(fnb.NetworkUnderlay) 659 } 660 661 idEvents := gadgets.NewIdentityDeltas(net.UpdateNodeAddresses) 662 fnb.ProtocolEvents.AddConsumer(idEvents) 663 664 return net, nil 665 } 666 667 func (fnb *FlowNodeBuilder) EnqueueMetricsServerInit() { 668 fnb.Component("metrics server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 669 server := metrics.NewServer(fnb.Logger, fnb.BaseConfig.metricsPort) 670 return server, nil 671 }) 672 } 673 674 func (fnb *FlowNodeBuilder) EnqueueAdminServerInit() error { 675 if fnb.AdminAddr == NotSet { 676 return nil 677 } 678 679 if (fnb.AdminCert != NotSet || fnb.AdminKey != NotSet || fnb.AdminClientCAs != NotSet) && 680 !(fnb.AdminCert != NotSet && fnb.AdminKey != NotSet && fnb.AdminClientCAs != NotSet) { 681 return fmt.Errorf("admin cert / key and client certs must all be provided to enable mutual TLS") 682 } 683 684 // create the updatable config manager 685 fnb.RegisterDefaultAdminCommands() 686 fnb.Component("admin server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 687 // set up all admin commands 688 for commandName, commandFunc := range fnb.adminCommands { 689 command := commandFunc(fnb.NodeConfig) 690 fnb.adminCommandBootstrapper.RegisterHandler(commandName, command.Handler) 691 fnb.adminCommandBootstrapper.RegisterValidator(commandName, command.Validator) 692 } 693 694 opts := []admin.CommandRunnerOption{ 695 admin.WithMaxMsgSize(int(fnb.AdminMaxMsgSize)), 696 } 697 698 if node.AdminCert != NotSet { 699 serverCert, err := tls.LoadX509KeyPair(node.AdminCert, node.AdminKey) 700 if err != nil { 701 return nil, err 702 } 703 clientCAs, err := os.ReadFile(node.AdminClientCAs) 704 if err != nil { 705 return nil, err 706 } 707 certPool := x509.NewCertPool() 708 certPool.AppendCertsFromPEM(clientCAs) 709 config := &tls.Config{ 710 MinVersion: tls.VersionTLS13, 711 Certificates: []tls.Certificate{serverCert}, 712 ClientAuth: tls.RequireAndVerifyClientCert, 713 ClientCAs: certPool, 714 } 715 716 opts = append(opts, admin.WithTLS(config)) 717 } 718 719 runner := fnb.adminCommandBootstrapper.Bootstrap(fnb.Logger, fnb.AdminAddr, opts...) 720 721 return runner, nil 722 }) 723 724 return nil 725 } 726 727 func (fnb *FlowNodeBuilder) RegisterBadgerMetrics() error { 728 return metrics.RegisterBadgerMetrics() 729 } 730 731 func (fnb *FlowNodeBuilder) EnqueueTracer() { 732 fnb.Component("tracer", func(node *NodeConfig) (module.ReadyDoneAware, error) { 733 return fnb.Tracer, nil 734 }) 735 } 736 737 func (fnb *FlowNodeBuilder) ParseAndPrintFlags() error { 738 // parse configuration parameters 739 pflag.Parse() 740 741 configOverride, err := config.BindPFlags(&fnb.BaseConfig.FlowConfig, fnb.flags) 742 if err != nil { 743 return err 744 } 745 746 if configOverride { 747 fnb.Logger.Info().Str("config-file", fnb.FlowConfig.ConfigFile).Msg("configuration file updated") 748 } 749 750 if err = fnb.BaseConfig.FlowConfig.Validate(); err != nil { 751 fnb.Logger.Fatal().Err(err).Msg("flow configuration validation failed") 752 } 753 754 info := fnb.Logger.Info() 755 756 noPrint := config.LogConfig(info, fnb.flags) 757 fnb.flags.VisitAll(func(flag *pflag.Flag) { 758 if _, ok := noPrint[flag.Name]; !ok { 759 info.Str(flag.Name, fmt.Sprintf("%v", flag.Value)) 760 } 761 }) 762 info.Msg("configuration loaded") 763 return fnb.extraFlagsValidation() 764 } 765 766 func (fnb *FlowNodeBuilder) ValidateRootSnapshot(f func(protocol.Snapshot) error) NodeBuilder { 767 fnb.extraRootSnapshotCheck = f 768 return fnb 769 } 770 771 func (fnb *FlowNodeBuilder) ValidateFlags(f func() error) NodeBuilder { 772 fnb.extraFlagCheck = f 773 return fnb 774 } 775 776 func (fnb *FlowNodeBuilder) PrintBuildVersionDetails() { 777 fnb.Logger.Info().Str("version", build.Version()).Str("commit", build.Commit()).Msg("build details") 778 } 779 780 func (fnb *FlowNodeBuilder) initNodeInfo() error { 781 if fnb.BaseConfig.nodeIDHex == NotSet { 782 return fmt.Errorf("cannot start without node ID") 783 } 784 785 nodeID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 786 if err != nil { 787 return fmt.Errorf("could not parse node ID from string (id: %v): %w", fnb.BaseConfig.nodeIDHex, err) 788 } 789 790 info, err := LoadPrivateNodeInfo(fnb.BaseConfig.BootstrapDir, nodeID) 791 if err != nil { 792 return fmt.Errorf("failed to load private node info: %w", err) 793 } 794 795 fnb.StakingKey = info.StakingPrivKey.PrivateKey 796 797 if fnb.ObserverMode { 798 // observer mode uses a network private key with different format than the staked node, 799 // so it has to load the network private key from a separate file 800 networkingPrivateKey, err := LoadNetworkPrivateKey(fnb.BaseConfig.BootstrapDir, nodeID) 801 if err != nil { 802 return fmt.Errorf("failed to load networking private key: %w", err) 803 } 804 805 peerID, err := peerIDFromNetworkKey(networkingPrivateKey) 806 if err != nil { 807 return fmt.Errorf("could not get peer ID from network key: %w", err) 808 } 809 810 // public node ID for observer is derived from peer ID which is derived from network key 811 pubNodeID, err := translator.NewPublicNetworkIDTranslator().GetFlowID(peerID) 812 if err != nil { 813 return fmt.Errorf("could not get flow node ID: %w", err) 814 } 815 816 fnb.NodeID = pubNodeID 817 fnb.NetworkKey = networkingPrivateKey 818 819 return nil 820 } 821 822 fnb.NodeID = nodeID 823 fnb.NetworkKey = info.NetworkPrivKey.PrivateKey 824 825 return nil 826 } 827 828 func peerIDFromNetworkKey(privateKey crypto.PrivateKey) (peer.ID, error) { 829 pubKey, err := keyutils.LibP2PPublicKeyFromFlow(privateKey.PublicKey()) 830 if err != nil { 831 return "", fmt.Errorf("could not load libp2p public key: %w", err) 832 } 833 834 return peer.IDFromPublicKey(pubKey) 835 } 836 837 func (fnb *FlowNodeBuilder) initLogger() error { 838 // configure logger with standard level, node ID and UTC timestamp 839 zerolog.TimeFieldFormat = time.RFC3339Nano 840 zerolog.TimestampFunc = func() time.Time { return time.Now().UTC() } 841 842 // Drop all log events that exceed this rate limit 843 throttledSampler := logging.BurstSampler(fnb.BaseConfig.debugLogLimit, time.Second) 844 845 log := fnb.Logger.With(). 846 Timestamp(). 847 Str("node_role", fnb.BaseConfig.NodeRole). 848 Str("node_id", fnb.NodeID.String()). 849 Logger(). 850 Sample(zerolog.LevelSampler{ 851 TraceSampler: throttledSampler, 852 DebugSampler: throttledSampler, 853 }) 854 855 log.Info().Msgf("flow %s node starting up", fnb.BaseConfig.NodeRole) 856 857 // parse config log level and apply to logger 858 lvl, err := zerolog.ParseLevel(strings.ToLower(fnb.BaseConfig.level)) 859 if err != nil { 860 return fmt.Errorf("invalid log level: %w", err) 861 } 862 863 // Minimum log level is set to trace, then overridden by SetGlobalLevel. 864 // this allows admin commands to modify the level to any value during runtime 865 log = log.Level(zerolog.TraceLevel) 866 zerolog.SetGlobalLevel(lvl) 867 868 fnb.Logger = log 869 870 return nil 871 } 872 873 func (fnb *FlowNodeBuilder) initMetrics() error { 874 875 fnb.Tracer = trace.NewNoopTracer() 876 if fnb.BaseConfig.tracerEnabled { 877 nodeIdHex := fnb.NodeID.String() 878 if len(nodeIdHex) > 8 { 879 nodeIdHex = nodeIdHex[:8] 880 } 881 882 serviceName := fnb.BaseConfig.NodeRole + "-" + nodeIdHex 883 tracer, err := trace.NewTracer( 884 fnb.Logger, 885 serviceName, 886 fnb.RootChainID.String(), 887 fnb.tracerSensitivity, 888 ) 889 if err != nil { 890 return fmt.Errorf("could not initialize tracer: %w", err) 891 } 892 893 fnb.Logger.Info().Msg("Tracer Started") 894 fnb.Tracer = tracer 895 } 896 897 fnb.Metrics = Metrics{ 898 Network: metrics.NewNoopCollector(), 899 Engine: metrics.NewNoopCollector(), 900 Compliance: metrics.NewNoopCollector(), 901 Cache: metrics.NewNoopCollector(), 902 Mempool: metrics.NewNoopCollector(), 903 CleanCollector: metrics.NewNoopCollector(), 904 Bitswap: metrics.NewNoopCollector(), 905 } 906 if fnb.BaseConfig.MetricsEnabled { 907 fnb.MetricsRegisterer = prometheus.DefaultRegisterer 908 909 mempools := metrics.NewMempoolCollector(5 * time.Second) 910 911 fnb.Metrics = Metrics{ 912 Network: metrics.NewNetworkCollector(fnb.Logger), 913 Engine: metrics.NewEngineCollector(), 914 Compliance: metrics.NewComplianceCollector(), 915 // CacheControl metrics has been causing memory abuse, disable for now 916 // Cache: metrics.NewCacheCollector(fnb.RootChainID), 917 Cache: metrics.NewNoopCollector(), 918 CleanCollector: metrics.NewCleanerCollector(), 919 Mempool: mempools, 920 Bitswap: metrics.NewBitswapCollector(), 921 } 922 923 // registers mempools as a Component so that its Ready method is invoked upon startup 924 fnb.Component("mempools metrics", func(node *NodeConfig) (module.ReadyDoneAware, error) { 925 return mempools, nil 926 }) 927 928 // metrics enabled, report node info metrics as post init event 929 fnb.PostInit(func(nodeConfig *NodeConfig) error { 930 nodeInfoMetrics := metrics.NewNodeInfoCollector() 931 protocolVersion, err := fnb.RootSnapshot.Params().ProtocolVersion() 932 if err != nil { 933 return fmt.Errorf("could not query root snapshoot protocol version: %w", err) 934 } 935 nodeInfoMetrics.NodeInfo(build.Version(), build.Commit(), nodeConfig.SporkID.String(), protocolVersion) 936 return nil 937 }) 938 } 939 return nil 940 } 941 942 func (fnb *FlowNodeBuilder) createGCEProfileUploader(client *gcemd.Client, opts ...option.ClientOption) (profiler.Uploader, error) { 943 projectID, err := client.ProjectID() 944 if err != nil { 945 return &profiler.NoopUploader{}, fmt.Errorf("failed to get project ID: %w", err) 946 } 947 948 instance, err := client.InstanceID() 949 if err != nil { 950 return &profiler.NoopUploader{}, fmt.Errorf("failed to get instance ID: %w", err) 951 } 952 953 chainID := fnb.RootChainID.String() 954 if chainID == "" { 955 fnb.Logger.Warn().Msg("RootChainID is not set, using default value") 956 chainID = "unknown" 957 } 958 959 params := profiler.Params{ 960 ProjectID: projectID, 961 ChainID: chainID, 962 Role: fnb.NodeConfig.NodeRole, 963 Version: build.Version(), 964 Commit: build.Commit(), 965 Instance: instance, 966 } 967 fnb.Logger.Info().Msgf("creating pprof profile uploader with params: %+v", params) 968 969 return profiler.NewUploader(fnb.Logger, params, opts...) 970 } 971 972 func (fnb *FlowNodeBuilder) createProfileUploader() (profiler.Uploader, error) { 973 switch { 974 case fnb.BaseConfig.profilerConfig.UploaderEnabled && gcemd.OnGCE(): 975 return fnb.createGCEProfileUploader(gcemd.NewClient(nil)) 976 default: 977 fnb.Logger.Info().Msg("not running on GCE, setting pprof uploader to noop") 978 return &profiler.NoopUploader{}, nil 979 } 980 } 981 982 func (fnb *FlowNodeBuilder) initProfiler() error { 983 uploader, err := fnb.createProfileUploader() 984 if err != nil { 985 fnb.Logger.Warn().Err(err).Msg("failed to create pprof uploader, falling back to noop") 986 uploader = &profiler.NoopUploader{} 987 } 988 989 profiler, err := profiler.New(fnb.Logger, uploader, fnb.BaseConfig.profilerConfig) 990 if err != nil { 991 return fmt.Errorf("could not initialize profiler: %w", err) 992 } 993 994 // register the enabled state of the profiler for dynamic configuring 995 err = fnb.ConfigManager.RegisterBoolConfig("profiler-enabled", profiler.Enabled, profiler.SetEnabled) 996 if err != nil { 997 return fmt.Errorf("could not register profiler-enabled config: %w", err) 998 } 999 1000 err = fnb.ConfigManager.RegisterDurationConfig( 1001 "profiler-trigger", 1002 func() time.Duration { return fnb.BaseConfig.profilerConfig.Duration }, 1003 func(d time.Duration) error { return profiler.TriggerRun(d) }, 1004 ) 1005 if err != nil { 1006 return fmt.Errorf("could not register profiler-trigger config: %w", err) 1007 } 1008 1009 err = fnb.ConfigManager.RegisterUintConfig( 1010 "profiler-set-mem-profile-rate", 1011 func() uint { return uint(runtime.MemProfileRate) }, 1012 func(r uint) error { runtime.MemProfileRate = int(r); return nil }, 1013 ) 1014 if err != nil { 1015 return fmt.Errorf("could not register profiler-set-mem-profile-rate setting: %w", err) 1016 } 1017 1018 // There is no way to get the current block profile rate so we keep track of it ourselves. 1019 currentRate := new(uint) 1020 err = fnb.ConfigManager.RegisterUintConfig( 1021 "profiler-set-block-profile-rate", 1022 func() uint { return *currentRate }, 1023 func(r uint) error { currentRate = &r; runtime.SetBlockProfileRate(int(r)); return nil }, 1024 ) 1025 if err != nil { 1026 return fmt.Errorf("could not register profiler-set-block-profile-rate setting: %w", err) 1027 } 1028 1029 err = fnb.ConfigManager.RegisterUintConfig( 1030 "profiler-set-mutex-profile-fraction", 1031 func() uint { return uint(runtime.SetMutexProfileFraction(-1)) }, 1032 func(r uint) error { _ = runtime.SetMutexProfileFraction(int(r)); return nil }, 1033 ) 1034 if err != nil { 1035 return fmt.Errorf("could not register profiler-set-mutex-profile-fraction setting: %w", err) 1036 } 1037 1038 // registering as a DependableComponent with no dependencies so that it's started immediately on startup 1039 // without being blocked by other component's Ready() 1040 fnb.DependableComponent("profiler", func(node *NodeConfig) (module.ReadyDoneAware, error) { 1041 return profiler, nil 1042 }, NewDependencyList()) 1043 1044 return nil 1045 } 1046 1047 func (fnb *FlowNodeBuilder) initDB() error { 1048 1049 // if a db has been passed in, use that instead of creating one 1050 if fnb.BaseConfig.db != nil { 1051 fnb.DB = fnb.BaseConfig.db 1052 return nil 1053 } 1054 1055 // Pre-create DB path (Badger creates only one-level dirs) 1056 err := os.MkdirAll(fnb.BaseConfig.datadir, 0700) 1057 if err != nil { 1058 return fmt.Errorf("could not create datadir (path: %s): %w", fnb.BaseConfig.datadir, err) 1059 } 1060 1061 log := sutil.NewLogger(fnb.Logger) 1062 1063 // we initialize the database with options that allow us to keep the maximum 1064 // item size in the trie itself (up to 1MB) and where we keep all level zero 1065 // tables in-memory as well; this slows down compaction and increases memory 1066 // usage, but it improves overall performance and disk i/o 1067 opts := badger. 1068 DefaultOptions(fnb.BaseConfig.datadir). 1069 WithKeepL0InMemory(true). 1070 WithLogger(log). 1071 1072 // the ValueLogFileSize option specifies how big the value of a 1073 // key-value pair is allowed to be saved into badger. 1074 // exceeding this limit, will fail with an error like this: 1075 // could not store data: Value with size <xxxx> exceeded 1073741824 limit 1076 // Maximum value size is 10G, needed by execution node 1077 // TODO: finding a better max value for each node type 1078 WithValueLogFileSize(128 << 23). 1079 WithValueLogMaxEntries(100000) // Default is 1000000 1080 1081 publicDB, err := bstorage.InitPublic(opts) 1082 if err != nil { 1083 return fmt.Errorf("could not open public db: %w", err) 1084 } 1085 fnb.DB = publicDB 1086 1087 fnb.ShutdownFunc(func() error { 1088 if err := fnb.DB.Close(); err != nil { 1089 return fmt.Errorf("error closing protocol database: %w", err) 1090 } 1091 return nil 1092 }) 1093 1094 fnb.Component("badger log cleaner", func(node *NodeConfig) (module.ReadyDoneAware, error) { 1095 return bstorage.NewCleaner(node.Logger, node.DB, node.Metrics.CleanCollector, flow.DefaultValueLogGCWaitDuration), nil 1096 }) 1097 1098 return nil 1099 } 1100 1101 func (fnb *FlowNodeBuilder) initSecretsDB() error { 1102 1103 // if the secrets DB is disabled (only applicable for Consensus Follower, 1104 // which makes use of this same logic), skip this initialization 1105 if !fnb.BaseConfig.secretsDBEnabled { 1106 return nil 1107 } 1108 1109 if fnb.BaseConfig.secretsdir == NotSet { 1110 return fmt.Errorf("missing required flag '--secretsdir'") 1111 } 1112 1113 err := os.MkdirAll(fnb.BaseConfig.secretsdir, 0700) 1114 if err != nil { 1115 return fmt.Errorf("could not create secrets db dir (path: %s): %w", fnb.BaseConfig.secretsdir, err) 1116 } 1117 1118 log := sutil.NewLogger(fnb.Logger) 1119 1120 opts := badger.DefaultOptions(fnb.BaseConfig.secretsdir).WithLogger(log) 1121 1122 // NOTE: SN nodes need to explicitly set --insecure-secrets-db to true in order to 1123 // disable secrets database encryption 1124 if fnb.NodeRole == flow.RoleConsensus.String() && fnb.InsecureSecretsDB { 1125 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 1126 } else { 1127 encryptionKey, err := loadSecretsEncryptionKey(fnb.BootstrapDir, fnb.NodeID) 1128 if errors.Is(err, os.ErrNotExist) { 1129 if fnb.NodeRole == flow.RoleConsensus.String() { 1130 // missing key is a fatal error for SN nodes 1131 return fmt.Errorf("secrets db encryption key not found: %w", err) 1132 } 1133 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 1134 } else if err != nil { 1135 return fmt.Errorf("failed to read secrets db encryption key: %w", err) 1136 } else { 1137 opts = opts.WithEncryptionKey(encryptionKey) 1138 } 1139 } 1140 1141 secretsDB, err := bstorage.InitSecret(opts) 1142 if err != nil { 1143 return fmt.Errorf("could not open secrets db: %w", err) 1144 } 1145 fnb.SecretsDB = secretsDB 1146 1147 fnb.ShutdownFunc(func() error { 1148 if err := fnb.SecretsDB.Close(); err != nil { 1149 return fmt.Errorf("error closing secrets database: %w", err) 1150 } 1151 return nil 1152 }) 1153 1154 return nil 1155 } 1156 1157 func (fnb *FlowNodeBuilder) initStorage() error { 1158 1159 // in order to void long iterations with big keys when initializing with an 1160 // already populated database, we bootstrap the initial maximum key size 1161 // upon starting 1162 err := operation.RetryOnConflict(fnb.DB.Update, func(tx *badger.Txn) error { 1163 return operation.InitMax(tx) 1164 }) 1165 if err != nil { 1166 return fmt.Errorf("could not initialize max tracker: %w", err) 1167 } 1168 1169 headers := bstorage.NewHeaders(fnb.Metrics.Cache, fnb.DB) 1170 guarantees := bstorage.NewGuarantees(fnb.Metrics.Cache, fnb.DB, fnb.BaseConfig.guaranteesCacheSize) 1171 seals := bstorage.NewSeals(fnb.Metrics.Cache, fnb.DB) 1172 results := bstorage.NewExecutionResults(fnb.Metrics.Cache, fnb.DB) 1173 receipts := bstorage.NewExecutionReceipts(fnb.Metrics.Cache, fnb.DB, results, fnb.BaseConfig.receiptsCacheSize) 1174 index := bstorage.NewIndex(fnb.Metrics.Cache, fnb.DB) 1175 payloads := bstorage.NewPayloads(fnb.DB, index, guarantees, seals, receipts, results) 1176 blocks := bstorage.NewBlocks(fnb.DB, headers, payloads) 1177 qcs := bstorage.NewQuorumCertificates(fnb.Metrics.Cache, fnb.DB, bstorage.DefaultCacheSize) 1178 transactions := bstorage.NewTransactions(fnb.Metrics.Cache, fnb.DB) 1179 collections := bstorage.NewCollections(fnb.DB, transactions) 1180 setups := bstorage.NewEpochSetups(fnb.Metrics.Cache, fnb.DB) 1181 epochCommits := bstorage.NewEpochCommits(fnb.Metrics.Cache, fnb.DB) 1182 statuses := bstorage.NewEpochStatuses(fnb.Metrics.Cache, fnb.DB) 1183 commits := bstorage.NewCommits(fnb.Metrics.Cache, fnb.DB) 1184 versionBeacons := bstorage.NewVersionBeacons(fnb.DB) 1185 1186 fnb.Storage = Storage{ 1187 Headers: headers, 1188 Guarantees: guarantees, 1189 Receipts: receipts, 1190 Results: results, 1191 Seals: seals, 1192 Index: index, 1193 Payloads: payloads, 1194 Blocks: blocks, 1195 QuorumCertificates: qcs, 1196 Transactions: transactions, 1197 Collections: collections, 1198 Setups: setups, 1199 EpochCommits: epochCommits, 1200 VersionBeacons: versionBeacons, 1201 Statuses: statuses, 1202 Commits: commits, 1203 } 1204 1205 return nil 1206 } 1207 1208 func (fnb *FlowNodeBuilder) InitIDProviders() { 1209 fnb.Module("id providers", func(node *NodeConfig) error { 1210 idCache, err := cache.NewProtocolStateIDCache(node.Logger, node.State, node.ProtocolEvents) 1211 if err != nil { 1212 return fmt.Errorf("could not initialize ProtocolStateIDCache: %w", err) 1213 } 1214 1215 // The following wrapper allows to disallow-list byzantine nodes via an admin command: 1216 // the wrapper overrides the 'Ejected' flag of disallow-listed nodes to true 1217 disallowListWrapper, err := cache.NewNodeDisallowListWrapper(idCache, node.DB, func() network.DisallowListNotificationConsumer { 1218 return fnb.NetworkUnderlay 1219 }) 1220 if err != nil { 1221 return fmt.Errorf("could not initialize NodeBlockListWrapper: %w", err) 1222 } 1223 node.IdentityProvider = disallowListWrapper 1224 1225 if node.ObserverMode { 1226 // identifier providers decides which node to connect to when syncing blocks, 1227 // in observer mode, the peer nodes have to be specific public access node, 1228 // rather than the staked consensus nodes. 1229 idTranslator, factory, err := CreatePublicIDTranslatorAndIdentifierProvider( 1230 fnb.Logger, 1231 fnb.NetworkKey, 1232 fnb.SporkID, 1233 // fnb.LibP2PNode is not created yet, until EnqueueNetworkInit is called. 1234 // so we pass a function that will return the LibP2PNode when called. 1235 func() p2p.LibP2PNode { 1236 return fnb.LibP2PNode 1237 }, 1238 idCache, 1239 ) 1240 if err != nil { 1241 return fmt.Errorf("could not initialize public ID translator and identifier provider: %w", err) 1242 } 1243 1244 fnb.IDTranslator = idTranslator 1245 fnb.SyncEngineIdentifierProvider = factory() 1246 1247 return nil 1248 } 1249 1250 node.IDTranslator = idCache 1251 1252 // register the disallow list wrapper for dynamic configuration via admin command 1253 err = node.ConfigManager.RegisterIdentifierListConfig("network-id-provider-blocklist", 1254 disallowListWrapper.GetDisallowList, disallowListWrapper.Update) 1255 if err != nil { 1256 return fmt.Errorf("failed to register disallow-list wrapper with config manager: %w", err) 1257 } 1258 1259 node.SyncEngineIdentifierProvider = id.NewIdentityFilterIdentifierProvider( 1260 filter.And( 1261 filter.HasRole(flow.RoleConsensus), 1262 filter.Not(filter.HasNodeID(node.Me.NodeID())), 1263 underlay.NotEjectedFilter, 1264 ), 1265 node.IdentityProvider, 1266 ) 1267 return nil 1268 }) 1269 } 1270 1271 func (fnb *FlowNodeBuilder) initState() error { 1272 fnb.ProtocolEvents = events.NewDistributor() 1273 1274 isBootStrapped, err := badgerState.IsBootstrapped(fnb.DB) 1275 if err != nil { 1276 return fmt.Errorf("failed to determine whether database contains bootstrapped state: %w", err) 1277 } 1278 1279 if isBootStrapped { 1280 fnb.Logger.Info().Msg("opening already bootstrapped protocol state") 1281 state, err := badgerState.OpenState( 1282 fnb.Metrics.Compliance, 1283 fnb.DB, 1284 fnb.Storage.Headers, 1285 fnb.Storage.Seals, 1286 fnb.Storage.Results, 1287 fnb.Storage.Blocks, 1288 fnb.Storage.QuorumCertificates, 1289 fnb.Storage.Setups, 1290 fnb.Storage.EpochCommits, 1291 fnb.Storage.Statuses, 1292 fnb.Storage.VersionBeacons, 1293 ) 1294 if err != nil { 1295 return fmt.Errorf("could not open protocol state: %w", err) 1296 } 1297 fnb.State = state 1298 1299 // set root snapshot field 1300 rootBlock, err := state.Params().FinalizedRoot() 1301 if err != nil { 1302 return fmt.Errorf("could not get root block from protocol state: %w", err) 1303 } 1304 1305 rootSnapshot := state.AtBlockID(rootBlock.ID()) 1306 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1307 return err 1308 } 1309 } else { 1310 // Bootstrap! 1311 fnb.Logger.Info().Msg("bootstrapping empty protocol state") 1312 1313 // if no root snapshot is configured, attempt to load the file from disk 1314 var rootSnapshot = fnb.RootSnapshot 1315 if rootSnapshot == nil { 1316 fnb.Logger.Info().Msgf("loading root protocol state snapshot from disk") 1317 rootSnapshot, err = loadRootProtocolSnapshot(fnb.BaseConfig.BootstrapDir) 1318 if err != nil { 1319 return fmt.Errorf("failed to read protocol snapshot from disk: %w", err) 1320 } 1321 } 1322 // set root snapshot fields 1323 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1324 return err 1325 } 1326 1327 // generate bootstrap config options as per NodeConfig 1328 var options []badgerState.BootstrapConfigOptions 1329 if fnb.SkipNwAddressBasedValidations { 1330 options = append(options, badgerState.SkipNetworkAddressValidation) 1331 } 1332 1333 fnb.State, err = badgerState.Bootstrap( 1334 fnb.Metrics.Compliance, 1335 fnb.DB, 1336 fnb.Storage.Headers, 1337 fnb.Storage.Seals, 1338 fnb.Storage.Results, 1339 fnb.Storage.Blocks, 1340 fnb.Storage.QuorumCertificates, 1341 fnb.Storage.Setups, 1342 fnb.Storage.EpochCommits, 1343 fnb.Storage.Statuses, 1344 fnb.Storage.VersionBeacons, 1345 fnb.RootSnapshot, 1346 options..., 1347 ) 1348 if err != nil { 1349 return fmt.Errorf("could not bootstrap protocol state: %w", err) 1350 } 1351 1352 fnb.Logger.Info(). 1353 Hex("root_result_id", logging.Entity(fnb.RootResult)). 1354 Hex("root_state_commitment", fnb.RootSeal.FinalState[:]). 1355 Hex("finalized_root_block_id", logging.Entity(fnb.FinalizedRootBlock)). 1356 Uint64("finalized_root_block_height", fnb.FinalizedRootBlock.Header.Height). 1357 Hex("sealed_root_block_id", logging.Entity(fnb.SealedRootBlock)). 1358 Uint64("sealed_root_block_height", fnb.SealedRootBlock.Header.Height). 1359 Msg("protocol state bootstrapped") 1360 } 1361 1362 // initialize local if it hasn't been initialized yet 1363 if fnb.Me == nil { 1364 if err := fnb.initLocal(); err != nil { 1365 return err 1366 } 1367 } 1368 1369 lastFinalized, err := fnb.State.Final().Head() 1370 if err != nil { 1371 return fmt.Errorf("could not get last finalized block header: %w", err) 1372 } 1373 fnb.NodeConfig.LastFinalizedHeader = lastFinalized 1374 1375 lastSealed, err := fnb.State.Sealed().Head() 1376 if err != nil { 1377 return fmt.Errorf("could not get last sealed block header: %w", err) 1378 } 1379 1380 fnb.Logger.Info(). 1381 Hex("last_finalized_block_id", logging.Entity(lastFinalized)). 1382 Uint64("last_finalized_block_height", lastFinalized.Height). 1383 Hex("last_sealed_block_id", logging.Entity(lastSealed)). 1384 Uint64("last_sealed_block_height", lastSealed.Height). 1385 Hex("finalized_root_block_id", logging.Entity(fnb.FinalizedRootBlock)). 1386 Uint64("finalized_root_block_height", fnb.FinalizedRootBlock.Header.Height). 1387 Hex("sealed_root_block_id", logging.Entity(fnb.SealedRootBlock)). 1388 Uint64("sealed_root_block_height", fnb.SealedRootBlock.Header.Height). 1389 Msg("successfully opened protocol state") 1390 1391 return nil 1392 } 1393 1394 // setRootSnapshot sets the root snapshot field and all related fields in the NodeConfig. 1395 func (fnb *FlowNodeBuilder) setRootSnapshot(rootSnapshot protocol.Snapshot) error { 1396 var err error 1397 1398 // validate the root snapshot QCs 1399 err = badgerState.IsValidRootSnapshotQCs(rootSnapshot) 1400 if err != nil { 1401 return fmt.Errorf("failed to validate root snapshot QCs: %w", err) 1402 } 1403 1404 // perform extra checks requested by specific node types 1405 if fnb.extraRootSnapshotCheck != nil { 1406 err = fnb.extraRootSnapshotCheck(rootSnapshot) 1407 if err != nil { 1408 return fmt.Errorf("failed to perform extra checks on root snapshot: %w", err) 1409 } 1410 } 1411 1412 fnb.RootSnapshot = rootSnapshot 1413 // cache properties of the root snapshot, for convenience 1414 fnb.RootResult, fnb.RootSeal, err = fnb.RootSnapshot.SealedResult() 1415 if err != nil { 1416 return fmt.Errorf("failed to read root sealed result: %w", err) 1417 } 1418 1419 sealingSegment, err := fnb.RootSnapshot.SealingSegment() 1420 if err != nil { 1421 return fmt.Errorf("failed to read root sealing segment: %w", err) 1422 } 1423 1424 fnb.FinalizedRootBlock = sealingSegment.Highest() 1425 fnb.SealedRootBlock = sealingSegment.Sealed() 1426 fnb.RootQC, err = fnb.RootSnapshot.QuorumCertificate() 1427 if err != nil { 1428 return fmt.Errorf("failed to read root QC: %w", err) 1429 } 1430 1431 fnb.RootChainID = fnb.FinalizedRootBlock.Header.ChainID 1432 fnb.SporkID, err = fnb.RootSnapshot.Params().SporkID() 1433 if err != nil { 1434 return fmt.Errorf("failed to read spork ID: %w", err) 1435 } 1436 1437 return nil 1438 } 1439 1440 func (fnb *FlowNodeBuilder) initLocal() error { 1441 // NodeID has been set in initNodeInfo 1442 myID := fnb.NodeID 1443 if fnb.ObserverMode { 1444 nodeID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 1445 if err != nil { 1446 return fmt.Errorf("could not parse node ID from string (id: %v): %w", fnb.BaseConfig.nodeIDHex, err) 1447 } 1448 info, err := LoadPrivateNodeInfo(fnb.BaseConfig.BootstrapDir, nodeID) 1449 if err != nil { 1450 return fmt.Errorf("could not load private node info: %w", err) 1451 } 1452 1453 if info.Role != flow.RoleExecution { 1454 return fmt.Errorf("observer mode is only available for execution nodes") 1455 } 1456 1457 id := &flow.Identity{ 1458 // observer mode uses the node id derived from the network key, 1459 // rather than the node id from the node info file 1460 NodeID: myID, 1461 Address: info.Address, 1462 Role: info.Role, 1463 Weight: 0, 1464 Ejected: false, 1465 StakingPubKey: fnb.StakingKey.PublicKey(), 1466 NetworkPubKey: fnb.NetworkKey.PublicKey(), 1467 } 1468 fnb.Me, err = local.New(id, fnb.StakingKey) 1469 if err != nil { 1470 return fmt.Errorf("could not initialize local: %w", err) 1471 } 1472 1473 return nil 1474 } 1475 1476 // Verify that my ID (as given in the configuration) is known to the network 1477 // (i.e. protocol state). There are two cases that will cause the following error: 1478 // 1) used the wrong node id, which is not part of the identity list of the finalized state 1479 // 2) the node id is a new one for a new spork, but the bootstrap data has not been updated. 1480 self, err := fnb.State.Final().Identity(myID) 1481 if err != nil { 1482 return fmt.Errorf("node identity not found in the identity list of the finalized state (id: %v): %w", myID, err) 1483 } 1484 1485 // Verify that my role (as given in the configuration) is consistent with the protocol state. 1486 // We enforce this strictly for MainNet. For other networks (e.g. TestNet or BenchNet), we 1487 // are lenient, to allow ghost node to run as any role. 1488 if self.Role.String() != fnb.BaseConfig.NodeRole { 1489 rootBlockHeader, err := fnb.State.Params().FinalizedRoot() 1490 if err != nil { 1491 return fmt.Errorf("could not get root block from protocol state: %w", err) 1492 } 1493 1494 if rootBlockHeader.ChainID == flow.Mainnet { 1495 return fmt.Errorf("running as incorrect role, expected: %v, actual: %v, exiting", 1496 self.Role.String(), 1497 fnb.BaseConfig.NodeRole, 1498 ) 1499 } 1500 1501 fnb.Logger.Warn().Msgf("running as incorrect role, expected: %v, actual: %v, continuing", 1502 self.Role.String(), 1503 fnb.BaseConfig.NodeRole) 1504 } 1505 1506 // ensure that the configured staking/network keys are consistent with the protocol state 1507 if !self.NetworkPubKey.Equals(fnb.NetworkKey.PublicKey()) { 1508 return fmt.Errorf("configured networking key does not match protocol state") 1509 } 1510 if !self.StakingPubKey.Equals(fnb.StakingKey.PublicKey()) { 1511 return fmt.Errorf("configured staking key does not match protocol state") 1512 } 1513 1514 fnb.Me, err = local.New(self, fnb.StakingKey) 1515 if err != nil { 1516 return fmt.Errorf("could not initialize local: %w", err) 1517 } 1518 1519 return nil 1520 } 1521 1522 func (fnb *FlowNodeBuilder) initFvmOptions() { 1523 blockFinder := environment.NewBlockFinder(fnb.Storage.Headers) 1524 vmOpts := []fvm.Option{ 1525 fvm.WithChain(fnb.RootChainID.Chain()), 1526 fvm.WithBlocks(blockFinder), 1527 fvm.WithAccountStorageLimit(true), 1528 } 1529 if fnb.RootChainID == flow.Testnet || fnb.RootChainID == flow.Sandboxnet || fnb.RootChainID == flow.Mainnet { 1530 vmOpts = append(vmOpts, 1531 fvm.WithTransactionFeesEnabled(true), 1532 ) 1533 } 1534 if fnb.RootChainID == flow.Testnet || fnb.RootChainID == flow.Sandboxnet || fnb.RootChainID == flow.Localnet || fnb.RootChainID == flow.Benchnet { 1535 vmOpts = append(vmOpts, 1536 fvm.WithContractDeploymentRestricted(false), 1537 ) 1538 } 1539 fnb.FvmOptions = vmOpts 1540 } 1541 1542 // handleModules initializes the given module. 1543 func (fnb *FlowNodeBuilder) handleModule(v namedModuleFunc) error { 1544 fnb.Logger.Info().Str("module", v.name).Msg("module initialization started") 1545 err := v.fn(fnb.NodeConfig) 1546 if err != nil { 1547 return fmt.Errorf("module %s initialization failed: %w", v.name, err) 1548 } 1549 1550 fnb.Logger.Info().Str("module", v.name).Msg("module initialization complete") 1551 return nil 1552 } 1553 1554 // handleModules initializes all modules that have been enqueued on this node builder. 1555 func (fnb *FlowNodeBuilder) handleModules() error { 1556 for _, f := range fnb.modules { 1557 if err := fnb.handleModule(f); err != nil { 1558 return err 1559 } 1560 } 1561 1562 return nil 1563 } 1564 1565 // handleComponents registers the component's factory method with the ComponentManager to be run 1566 // when the node starts. 1567 // It uses signal channels to ensure that components are started serially. 1568 func (fnb *FlowNodeBuilder) handleComponents() error { 1569 // The parent/started channels are used to enforce serial startup. 1570 // - parent is the started channel of the previous component. 1571 // - when a component is ready, it closes its started channel by calling the provided callback. 1572 // Components wait for their parent channel to close before starting, this ensures they start 1573 // up serially, even though the ComponentManager will launch the goroutines in parallel. 1574 1575 // The first component is always started immediately 1576 parent := make(chan struct{}) 1577 close(parent) 1578 1579 var err error 1580 asyncComponents := []namedComponentFunc{} 1581 1582 // Run all components 1583 for _, f := range fnb.components { 1584 // Components with explicit dependencies are not started serially 1585 if f.dependencies != nil { 1586 asyncComponents = append(asyncComponents, f) 1587 continue 1588 } 1589 1590 started := make(chan struct{}) 1591 1592 if f.errorHandler != nil { 1593 err = fnb.handleRestartableComponent(f, parent, func() { close(started) }) 1594 } else { 1595 err = fnb.handleComponent(f, parent, func() { close(started) }) 1596 } 1597 1598 if err != nil { 1599 return fmt.Errorf("could not handle component %s: %w", f.name, err) 1600 } 1601 1602 parent = started 1603 } 1604 1605 // Components with explicit dependencies are run asynchronously, which means dependencies in 1606 // the dependency list must be initialized outside of the component factory. 1607 for _, f := range asyncComponents { 1608 fnb.Logger.Debug().Str("component", f.name).Int("dependencies", len(f.dependencies.components)).Msg("handling component asynchronously") 1609 err = fnb.handleComponent(f, util.AllReady(f.dependencies.components...), func() {}) 1610 if err != nil { 1611 return fmt.Errorf("could not handle dependable component %s: %w", f.name, err) 1612 } 1613 } 1614 1615 return nil 1616 } 1617 1618 // handleComponent constructs a component using the provided ReadyDoneFactory, and registers a 1619 // worker with the ComponentManager to be run when the node is started. 1620 // 1621 // The ComponentManager starts all workers in parallel. Since some components have non-idempotent 1622 // ReadyDoneAware interfaces, we need to ensure that they are started serially. This is accomplished 1623 // using the parentReady channel and the started closure. Components wait for the parentReady channel 1624 // to close before starting, and then call the started callback after they are ready(). The started 1625 // callback closes the parentReady channel of the next component, and so on. 1626 // 1627 // TODO: Instead of this serial startup, components should wait for their dependencies to be ready 1628 // using their ReadyDoneAware interface. After components are updated to use the idempotent 1629 // ReadyDoneAware interface and explicitly wait for their dependencies to be ready, we can remove 1630 // this channel chaining. 1631 func (fnb *FlowNodeBuilder) handleComponent(v namedComponentFunc, dependencies <-chan struct{}, started func()) error { 1632 // Add a closure that starts the component when the node is started, and then waits for it to exit 1633 // gracefully. 1634 // Startup for all components will happen in parallel, and components can use their dependencies' 1635 // ReadyDoneAware interface to wait until they are ready. 1636 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1637 // wait for the dependencies to be ready before starting 1638 if err := util.WaitClosed(ctx, dependencies); err != nil { 1639 return 1640 } 1641 1642 logger := fnb.Logger.With().Str("component", v.name).Logger() 1643 1644 logger.Info().Msg("component initialization started") 1645 // First, build the component using the factory method. 1646 readyAware, err := v.fn(fnb.NodeConfig) 1647 if err != nil { 1648 ctx.Throw(fmt.Errorf("component %s initialization failed: %w", v.name, err)) 1649 } 1650 if readyAware == nil { 1651 ctx.Throw(fmt.Errorf("component %s initialization failed: nil component", v.name)) 1652 } 1653 logger.Info().Msg("component initialization complete") 1654 1655 // if this is a Component, use the Startable interface to start the component, otherwise 1656 // Ready() will launch it. 1657 cmp, isComponent := readyAware.(component.Component) 1658 if isComponent { 1659 cmp.Start(ctx) 1660 } 1661 1662 // Wait until the component is ready 1663 if err := util.WaitClosed(ctx, readyAware.Ready()); err != nil { 1664 // The context was cancelled. Continue to shutdown logic. 1665 logger.Warn().Msg("component startup aborted") 1666 1667 // Non-idempotent ReadyDoneAware components trigger shutdown by calling Done(). Don't 1668 // do that here since it may not be safe if the component is not Ready(). 1669 if !isComponent { 1670 return 1671 } 1672 } else { 1673 logger.Info().Msg("component startup complete") 1674 ready() 1675 1676 // Signal to the next component that we're ready. 1677 started() 1678 } 1679 1680 // Component shutdown is signaled by cancelling its context. 1681 <-ctx.Done() 1682 logger.Info().Msg("component shutdown started") 1683 1684 // Finally, wait until component has finished shutting down. 1685 <-readyAware.Done() 1686 logger.Info().Msg("component shutdown complete") 1687 }) 1688 1689 return nil 1690 } 1691 1692 // handleRestartableComponent constructs a component using the provided ReadyDoneFactory, and 1693 // registers a worker with the ComponentManager to be run when the node is started. 1694 // 1695 // Restartable Components are components that can be restarted after successfully handling 1696 // an irrecoverable error. 1697 // 1698 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1699 func (fnb *FlowNodeBuilder) handleRestartableComponent(v namedComponentFunc, parentReady <-chan struct{}, started func()) error { 1700 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1701 // wait for the previous component to be ready before starting 1702 if err := util.WaitClosed(ctx, parentReady); err != nil { 1703 return 1704 } 1705 1706 // Note: we're marking the worker routine ready before we even attempt to start the 1707 // component. the idea behind a restartable component is that the node should not depend 1708 // on it for safe operation, so the node does not need to wait for it to be ready. 1709 ready() 1710 1711 // do not block serial startup. started can only be called once, so it cannot be called 1712 // from within the componentFactory 1713 started() 1714 1715 log := fnb.Logger.With().Str("component", v.name).Logger() 1716 1717 // This may be called multiple times if the component is restarted 1718 componentFactory := func() (component.Component, error) { 1719 log.Info().Msg("component initialization started") 1720 c, err := v.fn(fnb.NodeConfig) 1721 if err != nil { 1722 return nil, err 1723 } 1724 log.Info().Msg("component initialization complete") 1725 1726 go func() { 1727 if err := util.WaitClosed(ctx, c.Ready()); err != nil { 1728 log.Info().Msg("component startup aborted") 1729 } else { 1730 log.Info().Msg("component startup complete") 1731 } 1732 1733 <-ctx.Done() 1734 log.Info().Msg("component shutdown started") 1735 }() 1736 return c.(component.Component), nil 1737 } 1738 1739 err := component.RunComponent(ctx, componentFactory, v.errorHandler) 1740 if err != nil && !errors.Is(err, ctx.Err()) { 1741 ctx.Throw(fmt.Errorf("component %s encountered an unhandled irrecoverable error: %w", v.name, err)) 1742 } 1743 1744 log.Info().Msg("component shutdown complete") 1745 }) 1746 1747 return nil 1748 } 1749 1750 // ExtraFlags enables binding additional flags beyond those defined in BaseConfig. 1751 func (fnb *FlowNodeBuilder) ExtraFlags(f func(*pflag.FlagSet)) NodeBuilder { 1752 f(fnb.flags) 1753 return fnb 1754 } 1755 1756 // Module enables setting up dependencies of the engine with the builder context. 1757 func (fnb *FlowNodeBuilder) Module(name string, f BuilderFunc) NodeBuilder { 1758 fnb.modules = append(fnb.modules, namedModuleFunc{ 1759 fn: f, 1760 name: name, 1761 }) 1762 return fnb 1763 } 1764 1765 // ShutdownFunc adds a callback function that is called after all components have exited. 1766 func (fnb *FlowNodeBuilder) ShutdownFunc(fn func() error) NodeBuilder { 1767 fnb.postShutdownFns = append(fnb.postShutdownFns, fn) 1768 return fnb 1769 } 1770 1771 func (fnb *FlowNodeBuilder) AdminCommand(command string, f func(config *NodeConfig) commands.AdminCommand) NodeBuilder { 1772 fnb.adminCommands[command] = f 1773 return fnb 1774 } 1775 1776 // Component adds a new component to the node that conforms to the ReadyDoneAware 1777 // interface. 1778 // 1779 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1780 // In both cases, the object is started when the node is run, and the node will wait for the 1781 // component to exit gracefully. 1782 func (fnb *FlowNodeBuilder) Component(name string, f ReadyDoneFactory) NodeBuilder { 1783 fnb.components = append(fnb.components, namedComponentFunc{ 1784 fn: f, 1785 name: name, 1786 }) 1787 return fnb 1788 } 1789 1790 // DependableComponent adds a new component to the node that conforms to the ReadyDoneAware 1791 // interface. The builder will wait until all of the components in the dependencies list are ready 1792 // before constructing the component. 1793 // 1794 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1795 // In both cases, the object is started when the node is run, and the node will wait for the 1796 // component to exit gracefully. 1797 // 1798 // IMPORTANT: Dependable components are started in parallel with no guaranteed run order, so all 1799 // dependencies must be initialized outside of the ReadyDoneFactory, and their `Ready()` method 1800 // MUST be idempotent. 1801 func (fnb *FlowNodeBuilder) DependableComponent(name string, f ReadyDoneFactory, dependencies *DependencyList) NodeBuilder { 1802 // Note: dependencies are passed as a struct to allow updating the list after calling this method. 1803 // Passing a slice instead would result in out of sync metadata since slices are passed by reference 1804 fnb.components = append(fnb.components, namedComponentFunc{ 1805 fn: f, 1806 name: name, 1807 dependencies: dependencies, 1808 }) 1809 return fnb 1810 } 1811 1812 // OverrideComponent adds given builder function to the components set of the node builder. If a builder function with that name 1813 // already exists, it will be overridden. 1814 func (fnb *FlowNodeBuilder) OverrideComponent(name string, f ReadyDoneFactory) NodeBuilder { 1815 for i := 0; i < len(fnb.components); i++ { 1816 if fnb.components[i].name == name { 1817 // found component with the name, override it. 1818 fnb.components[i] = namedComponentFunc{ 1819 fn: f, 1820 name: name, 1821 } 1822 1823 return fnb 1824 } 1825 } 1826 1827 // no component found with the same name, hence just adding it. 1828 return fnb.Component(name, f) 1829 } 1830 1831 // RestartableComponent adds a new component to the node that conforms to the ReadyDoneAware 1832 // interface, and calls the provided error handler when an irrecoverable error is encountered. 1833 // Use RestartableComponent if the component is not critical to the node's safe operation and 1834 // can/should be independently restarted when an irrecoverable error is encountered. 1835 // 1836 // IMPORTANT: Since a RestartableComponent can be restarted independently of the node, the node and 1837 // other components must not rely on it for safe operation, and failures must be handled gracefully. 1838 // As such, RestartableComponents do not block the node from becoming ready, and do not block 1839 // subsequent components from starting serially. They do start in serial order. 1840 // 1841 // Note: The ReadyDoneFactory method may be called multiple times if the component is restarted. 1842 // 1843 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1844 func (fnb *FlowNodeBuilder) RestartableComponent(name string, f ReadyDoneFactory, errorHandler component.OnError) NodeBuilder { 1845 fnb.components = append(fnb.components, namedComponentFunc{ 1846 fn: f, 1847 name: name, 1848 errorHandler: errorHandler, 1849 }) 1850 return fnb 1851 } 1852 1853 // OverrideModule adds given builder function to the modules set of the node builder. If a builder function with that name 1854 // already exists, it will be overridden. 1855 func (fnb *FlowNodeBuilder) OverrideModule(name string, f BuilderFunc) NodeBuilder { 1856 for i := 0; i < len(fnb.modules); i++ { 1857 if fnb.modules[i].name == name { 1858 // found module with the name, override it. 1859 fnb.modules[i] = namedModuleFunc{ 1860 fn: f, 1861 name: name, 1862 } 1863 1864 return fnb 1865 } 1866 } 1867 1868 // no module found with the same name, hence just adding it. 1869 return fnb.Module(name, f) 1870 } 1871 1872 func (fnb *FlowNodeBuilder) PreInit(f BuilderFunc) NodeBuilder { 1873 fnb.preInitFns = append(fnb.preInitFns, f) 1874 return fnb 1875 } 1876 1877 func (fnb *FlowNodeBuilder) PostInit(f BuilderFunc) NodeBuilder { 1878 fnb.postInitFns = append(fnb.postInitFns, f) 1879 return fnb 1880 } 1881 1882 type Option func(*BaseConfig) 1883 1884 func WithBootstrapDir(bootstrapDir string) Option { 1885 return func(config *BaseConfig) { 1886 config.BootstrapDir = bootstrapDir 1887 } 1888 } 1889 1890 func WithBindAddress(bindAddress string) Option { 1891 return func(config *BaseConfig) { 1892 config.BindAddr = bindAddress 1893 } 1894 } 1895 1896 func WithDataDir(dataDir string) Option { 1897 return func(config *BaseConfig) { 1898 if config.db == nil { 1899 config.datadir = dataDir 1900 } 1901 } 1902 } 1903 1904 func WithSecretsDBEnabled(enabled bool) Option { 1905 return func(config *BaseConfig) { 1906 config.secretsDBEnabled = enabled 1907 } 1908 } 1909 1910 func WithMetricsEnabled(enabled bool) Option { 1911 return func(config *BaseConfig) { 1912 config.MetricsEnabled = enabled 1913 } 1914 } 1915 1916 func WithSyncCoreConfig(syncConfig chainsync.Config) Option { 1917 return func(config *BaseConfig) { 1918 config.SyncCoreConfig = syncConfig 1919 } 1920 } 1921 1922 func WithComplianceConfig(complianceConfig compliance.Config) Option { 1923 return func(config *BaseConfig) { 1924 config.ComplianceConfig = complianceConfig 1925 } 1926 } 1927 1928 func WithLogLevel(level string) Option { 1929 return func(config *BaseConfig) { 1930 config.level = level 1931 } 1932 } 1933 1934 // WithDB takes precedence over WithDataDir and datadir will be set to empty if DB is set using this option 1935 func WithDB(db *badger.DB) Option { 1936 return func(config *BaseConfig) { 1937 config.db = db 1938 config.datadir = "" 1939 } 1940 } 1941 1942 // FlowNode creates a new Flow node builder with the given name. 1943 func FlowNode(role string, opts ...Option) *FlowNodeBuilder { 1944 config := DefaultBaseConfig() 1945 config.NodeRole = role 1946 for _, opt := range opts { 1947 opt(config) 1948 } 1949 1950 builder := &FlowNodeBuilder{ 1951 NodeConfig: &NodeConfig{ 1952 BaseConfig: *config, 1953 Logger: zerolog.New(os.Stderr), 1954 PeerManagerDependencies: NewDependencyList(), 1955 ConfigManager: updatable_configs.NewManager(), 1956 }, 1957 flags: pflag.CommandLine, 1958 adminCommandBootstrapper: admin.NewCommandRunnerBootstrapper(), 1959 adminCommands: make(map[string]func(*NodeConfig) commands.AdminCommand), 1960 componentBuilder: component.NewComponentManagerBuilder(), 1961 } 1962 return builder 1963 } 1964 1965 func (fnb *FlowNodeBuilder) Initialize() error { 1966 fnb.PrintBuildVersionDetails() 1967 1968 fnb.BaseFlags() 1969 1970 if err := fnb.ParseAndPrintFlags(); err != nil { 1971 return err 1972 } 1973 1974 // ID providers must be initialized before the network 1975 fnb.InitIDProviders() 1976 1977 fnb.EnqueueResolver() 1978 1979 fnb.EnqueueNetworkInit() 1980 1981 fnb.EnqueuePingService() 1982 1983 if fnb.MetricsEnabled { 1984 fnb.EnqueueMetricsServerInit() 1985 if err := fnb.RegisterBadgerMetrics(); err != nil { 1986 return err 1987 } 1988 } 1989 1990 fnb.EnqueueTracer() 1991 1992 return nil 1993 } 1994 1995 func (fnb *FlowNodeBuilder) RegisterDefaultAdminCommands() { 1996 fnb.AdminCommand("set-log-level", func(config *NodeConfig) commands.AdminCommand { 1997 return &common.SetLogLevelCommand{} 1998 }).AdminCommand("set-golog-level", func(config *NodeConfig) commands.AdminCommand { 1999 return &common.SetGologLevelCommand{} 2000 }).AdminCommand("get-config", func(config *NodeConfig) commands.AdminCommand { 2001 return common.NewGetConfigCommand(config.ConfigManager) 2002 }).AdminCommand("set-config", func(config *NodeConfig) commands.AdminCommand { 2003 return common.NewSetConfigCommand(config.ConfigManager) 2004 }).AdminCommand("list-configs", func(config *NodeConfig) commands.AdminCommand { 2005 return common.NewListConfigCommand(config.ConfigManager) 2006 }).AdminCommand("read-blocks", func(config *NodeConfig) commands.AdminCommand { 2007 return storageCommands.NewReadBlocksCommand(config.State, config.Storage.Blocks) 2008 }).AdminCommand("read-range-blocks", func(conf *NodeConfig) commands.AdminCommand { 2009 return storageCommands.NewReadRangeBlocksCommand(conf.Storage.Blocks) 2010 }).AdminCommand("read-results", func(config *NodeConfig) commands.AdminCommand { 2011 return storageCommands.NewReadResultsCommand(config.State, config.Storage.Results) 2012 }).AdminCommand("read-seals", func(config *NodeConfig) commands.AdminCommand { 2013 return storageCommands.NewReadSealsCommand(config.State, config.Storage.Seals, config.Storage.Index) 2014 }).AdminCommand("get-latest-identity", func(config *NodeConfig) commands.AdminCommand { 2015 return common.NewGetIdentityCommand(config.IdentityProvider) 2016 }) 2017 } 2018 2019 func (fnb *FlowNodeBuilder) Build() (Node, error) { 2020 // Run the prestart initialization. This includes anything that should be done before 2021 // starting the components. 2022 if err := fnb.onStart(); err != nil { 2023 return nil, err 2024 } 2025 2026 return NewNode( 2027 fnb.componentBuilder.Build(), 2028 fnb.NodeConfig, 2029 fnb.Logger, 2030 fnb.postShutdown, 2031 fnb.handleFatal, 2032 ), nil 2033 } 2034 2035 func (fnb *FlowNodeBuilder) onStart() error { 2036 // init nodeinfo by reading the private bootstrap file if not already set 2037 if fnb.NodeID == flow.ZeroID { 2038 if err := fnb.initNodeInfo(); err != nil { 2039 return err 2040 } 2041 } 2042 2043 if err := fnb.initLogger(); err != nil { 2044 return err 2045 } 2046 2047 if err := fnb.initDB(); err != nil { 2048 return err 2049 } 2050 2051 if err := fnb.initSecretsDB(); err != nil { 2052 return err 2053 } 2054 2055 if err := fnb.initMetrics(); err != nil { 2056 return err 2057 } 2058 2059 if err := fnb.initStorage(); err != nil { 2060 return err 2061 } 2062 2063 for _, f := range fnb.preInitFns { 2064 if err := fnb.handlePreInit(f); err != nil { 2065 return err 2066 } 2067 } 2068 2069 if err := fnb.initState(); err != nil { 2070 return err 2071 } 2072 2073 if err := fnb.initProfiler(); err != nil { 2074 return err 2075 } 2076 2077 fnb.initFvmOptions() 2078 2079 for _, f := range fnb.postInitFns { 2080 if err := fnb.handlePostInit(f); err != nil { 2081 return err 2082 } 2083 } 2084 2085 if err := fnb.EnqueueAdminServerInit(); err != nil { 2086 return err 2087 } 2088 2089 // run all modules 2090 if err := fnb.handleModules(); err != nil { 2091 return fmt.Errorf("could not handle modules: %w", err) 2092 } 2093 2094 // run all components 2095 return fnb.handleComponents() 2096 } 2097 2098 // postShutdown is called by the node before exiting 2099 // put any cleanup code here that should be run after all components have stopped 2100 func (fnb *FlowNodeBuilder) postShutdown() error { 2101 var errs *multierror.Error 2102 2103 for _, fn := range fnb.postShutdownFns { 2104 err := fn() 2105 if err != nil { 2106 errs = multierror.Append(errs, err) 2107 } 2108 } 2109 fnb.Logger.Info().Msg("database has been closed") 2110 return errs.ErrorOrNil() 2111 } 2112 2113 // handleFatal handles irrecoverable errors by logging them and exiting the process. 2114 func (fnb *FlowNodeBuilder) handleFatal(err error) { 2115 fnb.Logger.Fatal().Err(err).Msg("unhandled irrecoverable error") 2116 } 2117 2118 func (fnb *FlowNodeBuilder) handlePreInit(f BuilderFunc) error { 2119 return f(fnb.NodeConfig) 2120 } 2121 2122 func (fnb *FlowNodeBuilder) handlePostInit(f BuilderFunc) error { 2123 return f(fnb.NodeConfig) 2124 } 2125 2126 func (fnb *FlowNodeBuilder) extraFlagsValidation() error { 2127 if fnb.extraFlagCheck != nil { 2128 err := fnb.extraFlagCheck() 2129 if err != nil { 2130 return fmt.Errorf("invalid flags: %w", err) 2131 } 2132 } 2133 return nil 2134 } 2135 2136 // DhtSystemActivationStatus parses the given role string and returns the corresponding DHT system activation status. 2137 // Args: 2138 // - roleStr: the role string to parse. 2139 // Returns: 2140 // - DhtSystemActivation: the corresponding DHT system activation status. 2141 // - error: if the role string is invalid, returns an error. 2142 func DhtSystemActivationStatus(roleStr string) (p2pbuilder.DhtSystemActivation, error) { 2143 if roleStr == "ghost" { 2144 // ghost node is not a valid role, so we don't need to parse it 2145 return p2pbuilder.DhtSystemDisabled, nil 2146 } 2147 2148 role, err := flow.ParseRole(roleStr) 2149 if err != nil && roleStr != "ghost" { 2150 // ghost role is not a valid role, so we don't need to parse it 2151 return p2pbuilder.DhtSystemDisabled, fmt.Errorf("could not parse node role: %w", err) 2152 } 2153 if role == flow.RoleAccess || role == flow.RoleExecution { 2154 // Only access and execution nodes need to run DHT; 2155 // Access nodes and execution nodes need DHT to run a blob service. 2156 // Moreover, access nodes run a DHT to let un-staked (public) access nodes find each other on the public network. 2157 return p2pbuilder.DhtSystemEnabled, nil 2158 } 2159 2160 return p2pbuilder.DhtSystemDisabled, nil 2161 }