github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/cmd/scaffold.go (about) 1 package cmd 2 3 import ( 4 "context" 5 "crypto/tls" 6 "crypto/x509" 7 "errors" 8 "fmt" 9 "os" 10 "runtime" 11 "strings" 12 "time" 13 14 gcemd "cloud.google.com/go/compute/metadata" 15 "github.com/dgraph-io/badger/v2" 16 "github.com/hashicorp/go-multierror" 17 dht "github.com/libp2p/go-libp2p-kad-dht" 18 "github.com/libp2p/go-libp2p/core/host" 19 "github.com/libp2p/go-libp2p/core/peer" 20 "github.com/libp2p/go-libp2p/core/routing" 21 "github.com/prometheus/client_golang/prometheus" 22 "github.com/rs/zerolog" 23 "github.com/spf13/pflag" 24 "golang.org/x/time/rate" 25 "google.golang.org/api/option" 26 27 "github.com/onflow/crypto" 28 29 "github.com/onflow/flow-go/admin" 30 "github.com/onflow/flow-go/admin/commands" 31 "github.com/onflow/flow-go/admin/commands/common" 32 storageCommands "github.com/onflow/flow-go/admin/commands/storage" 33 "github.com/onflow/flow-go/cmd/build" 34 "github.com/onflow/flow-go/config" 35 "github.com/onflow/flow-go/consensus/hotstuff/persister" 36 "github.com/onflow/flow-go/fvm" 37 "github.com/onflow/flow-go/fvm/environment" 38 "github.com/onflow/flow-go/model/flow" 39 "github.com/onflow/flow-go/model/flow/filter" 40 "github.com/onflow/flow-go/module" 41 "github.com/onflow/flow-go/module/chainsync" 42 "github.com/onflow/flow-go/module/compliance" 43 "github.com/onflow/flow-go/module/component" 44 "github.com/onflow/flow-go/module/id" 45 "github.com/onflow/flow-go/module/irrecoverable" 46 "github.com/onflow/flow-go/module/local" 47 "github.com/onflow/flow-go/module/mempool/herocache" 48 "github.com/onflow/flow-go/module/metrics" 49 "github.com/onflow/flow-go/module/profiler" 50 "github.com/onflow/flow-go/module/trace" 51 "github.com/onflow/flow-go/module/updatable_configs" 52 "github.com/onflow/flow-go/module/util" 53 "github.com/onflow/flow-go/network" 54 alspmgr "github.com/onflow/flow-go/network/alsp/manager" 55 netcache "github.com/onflow/flow-go/network/cache" 56 "github.com/onflow/flow-go/network/channels" 57 "github.com/onflow/flow-go/network/converter" 58 "github.com/onflow/flow-go/network/p2p" 59 p2pbuilder "github.com/onflow/flow-go/network/p2p/builder" 60 p2pbuilderconfig "github.com/onflow/flow-go/network/p2p/builder/config" 61 "github.com/onflow/flow-go/network/p2p/cache" 62 "github.com/onflow/flow-go/network/p2p/conduit" 63 "github.com/onflow/flow-go/network/p2p/connection" 64 p2pdht "github.com/onflow/flow-go/network/p2p/dht" 65 "github.com/onflow/flow-go/network/p2p/dns" 66 "github.com/onflow/flow-go/network/p2p/keyutils" 67 "github.com/onflow/flow-go/network/p2p/ping" 68 "github.com/onflow/flow-go/network/p2p/subscription" 69 "github.com/onflow/flow-go/network/p2p/translator" 70 "github.com/onflow/flow-go/network/p2p/unicast/protocols" 71 "github.com/onflow/flow-go/network/p2p/unicast/ratelimit" 72 "github.com/onflow/flow-go/network/p2p/utils" 73 "github.com/onflow/flow-go/network/p2p/utils/ratelimiter" 74 "github.com/onflow/flow-go/network/slashing" 75 "github.com/onflow/flow-go/network/topology" 76 "github.com/onflow/flow-go/network/underlay" 77 "github.com/onflow/flow-go/state/protocol" 78 badgerState "github.com/onflow/flow-go/state/protocol/badger" 79 "github.com/onflow/flow-go/state/protocol/events" 80 "github.com/onflow/flow-go/state/protocol/events/gadgets" 81 "github.com/onflow/flow-go/storage" 82 bstorage "github.com/onflow/flow-go/storage/badger" 83 "github.com/onflow/flow-go/storage/badger/operation" 84 sutil "github.com/onflow/flow-go/storage/util" 85 "github.com/onflow/flow-go/utils/logging" 86 ) 87 88 const ( 89 NetworkComponent = "network" 90 ConduitFactoryComponent = "conduit-factory" 91 LibP2PNodeComponent = "libp2p-node" 92 ) 93 94 type Metrics struct { 95 Network module.NetworkMetrics 96 Engine module.EngineMetrics 97 Compliance module.ComplianceMetrics 98 Cache module.CacheMetrics 99 Mempool module.MempoolMetrics 100 CleanCollector module.CleanerMetrics 101 Bitswap module.BitswapMetrics 102 } 103 104 type Storage = storage.All 105 106 type namedModuleFunc struct { 107 fn BuilderFunc 108 name string 109 } 110 111 type namedComponentFunc struct { 112 fn ReadyDoneFactory 113 name string 114 115 errorHandler component.OnError 116 dependencies *DependencyList 117 } 118 119 // FlowNodeBuilder is the default builder struct used for all flow nodes 120 // It runs a node process with following structure, in sequential order 121 // Base inits (network, storage, state, logger) 122 // PostInit handlers, if any 123 // Components handlers, if any, wait sequentially 124 // Run() <- main loop 125 // Components destructors, if any 126 // The initialization can be proceeded and succeeded with PreInit and PostInit functions that allow customization 127 // of the process in case of nodes such as the unstaked access node where the NodeInfo is not part of the genesis data 128 type FlowNodeBuilder struct { 129 *NodeConfig 130 flags *pflag.FlagSet 131 modules []namedModuleFunc 132 components []namedComponentFunc 133 postShutdownFns []func() error 134 preInitFns []BuilderFunc 135 postInitFns []BuilderFunc 136 extraRootSnapshotCheck func(protocol.Snapshot) error 137 extraFlagCheck func() error 138 adminCommandBootstrapper *admin.CommandRunnerBootstrapper 139 adminCommands map[string]func(config *NodeConfig) commands.AdminCommand 140 componentBuilder component.ComponentManagerBuilder 141 bootstrapNodeAddresses []string 142 bootstrapNodePublicKeys []string 143 } 144 145 var _ NodeBuilder = (*FlowNodeBuilder)(nil) 146 147 func (fnb *FlowNodeBuilder) BaseFlags() { 148 defaultFlowConfig, err := config.DefaultConfig() 149 if err != nil { 150 fnb.Logger.Fatal().Err(err).Msg("failed to initialize flow config") 151 } 152 153 // initialize pflag set for Flow node 154 config.InitializePFlagSet(fnb.flags, defaultFlowConfig) 155 156 defaultConfig := DefaultBaseConfig() 157 158 // bind configuration parameters 159 fnb.flags.StringVar(&fnb.BaseConfig.nodeIDHex, "nodeid", defaultConfig.nodeIDHex, "identity of our node") 160 fnb.flags.StringVar(&fnb.BaseConfig.BindAddr, "bind", defaultConfig.BindAddr, "address to bind on") 161 fnb.flags.StringVarP(&fnb.BaseConfig.BootstrapDir, "bootstrapdir", "b", defaultConfig.BootstrapDir, "path to the bootstrap directory") 162 fnb.flags.StringVarP(&fnb.BaseConfig.datadir, "datadir", "d", defaultConfig.datadir, "directory to store the public database (protocol state)") 163 fnb.flags.StringVar(&fnb.BaseConfig.secretsdir, "secretsdir", defaultConfig.secretsdir, "directory to store private database (secrets)") 164 fnb.flags.StringVarP(&fnb.BaseConfig.level, "loglevel", "l", defaultConfig.level, "level for logging output") 165 fnb.flags.Uint32Var(&fnb.BaseConfig.debugLogLimit, "debug-log-limit", defaultConfig.debugLogLimit, "max number of debug/trace log events per second") 166 fnb.flags.UintVarP(&fnb.BaseConfig.metricsPort, "metricport", "m", defaultConfig.metricsPort, "port for /metrics endpoint") 167 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.Enabled, "profiler-enabled", defaultConfig.profilerConfig.Enabled, "whether to enable the auto-profiler") 168 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.UploaderEnabled, "profile-uploader-enabled", defaultConfig.profilerConfig.UploaderEnabled, 169 "whether to enable automatic profile upload to Google Cloud Profiler. "+ 170 "For autoupload to work forllowing should be true: "+ 171 "1) both -profiler-enabled=true and -profile-uploader-enabled=true need to be set. "+ 172 "2) node is running in GCE. "+ 173 "3) server or user has https://www.googleapis.com/auth/monitoring.write scope. ") 174 fnb.flags.StringVar(&fnb.BaseConfig.profilerConfig.Dir, "profiler-dir", defaultConfig.profilerConfig.Dir, "directory to create auto-profiler profiles") 175 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Interval, "profiler-interval", defaultConfig.profilerConfig.Interval, 176 "the interval between auto-profiler runs") 177 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Duration, "profiler-duration", defaultConfig.profilerConfig.Duration, 178 "the duration to run the auto-profile for") 179 180 fnb.flags.BoolVar(&fnb.BaseConfig.tracerEnabled, "tracer-enabled", defaultConfig.tracerEnabled, 181 "whether to enable tracer") 182 fnb.flags.UintVar(&fnb.BaseConfig.tracerSensitivity, "tracer-sensitivity", defaultConfig.tracerSensitivity, 183 "adjusts the level of sampling when tracing is enabled. 0 means capture everything, higher value results in less samples") 184 185 fnb.flags.StringVar(&fnb.BaseConfig.AdminAddr, "admin-addr", defaultConfig.AdminAddr, "address to bind on for admin HTTP server") 186 fnb.flags.StringVar(&fnb.BaseConfig.AdminCert, "admin-cert", defaultConfig.AdminCert, "admin cert file (for TLS)") 187 fnb.flags.StringVar(&fnb.BaseConfig.AdminKey, "admin-key", defaultConfig.AdminKey, "admin key file (for TLS)") 188 fnb.flags.StringVar(&fnb.BaseConfig.AdminClientCAs, "admin-client-certs", defaultConfig.AdminClientCAs, "admin client certs (for mutual TLS)") 189 fnb.flags.UintVar(&fnb.BaseConfig.AdminMaxMsgSize, "admin-max-response-size", defaultConfig.AdminMaxMsgSize, "admin server max response size in bytes") 190 191 fnb.flags.UintVar(&fnb.BaseConfig.guaranteesCacheSize, "guarantees-cache-size", bstorage.DefaultCacheSize, "collection guarantees cache size") 192 fnb.flags.UintVar(&fnb.BaseConfig.receiptsCacheSize, "receipts-cache-size", bstorage.DefaultCacheSize, "receipts cache size") 193 194 fnb.flags.BoolVar(&fnb.BaseConfig.DhtSystemEnabled, 195 "dht-enabled", 196 defaultConfig.DhtSystemEnabled, 197 "[experimental] whether to enable dht system. This is an experimental feature. Use with caution.") 198 fnb.flags.BoolVar(&fnb.BaseConfig.BitswapReprovideEnabled, 199 "bitswap-reprovide-enabled", 200 defaultConfig.BitswapReprovideEnabled, 201 "[experimental] whether to enable bitswap reproviding. This is an experimental feature. Use with caution.") 202 203 // dynamic node startup flags 204 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANPubkey, 205 "dynamic-startup-access-publickey", 206 "", 207 "the public key of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 208 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANAddress, 209 "dynamic-startup-access-address", 210 "", 211 "the access address of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 212 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpochPhase, 213 "dynamic-startup-epoch-phase", 214 "EpochPhaseSetup", 215 "the target epoch phase for dynamic startup <EpochPhaseStaking|EpochPhaseSetup|EpochPhaseCommitted") 216 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpoch, 217 "dynamic-startup-epoch", 218 "current", 219 "the target epoch for dynamic-startup, use \"current\" to start node in the current epoch") 220 fnb.flags.DurationVar(&fnb.BaseConfig.DynamicStartupSleepInterval, 221 "dynamic-startup-sleep-interval", 222 time.Minute, 223 "the interval in which the node will check if it can start") 224 225 fnb.flags.BoolVar(&fnb.BaseConfig.InsecureSecretsDB, "insecure-secrets-db", false, "allow the node to start up without an secrets DB encryption key") 226 fnb.flags.BoolVar(&fnb.BaseConfig.HeroCacheMetricsEnable, "herocache-metrics-collector", false, "enables herocache metrics collection") 227 228 // sync core flags 229 fnb.flags.DurationVar(&fnb.BaseConfig.SyncCoreConfig.RetryInterval, 230 "sync-retry-interval", 231 defaultConfig.SyncCoreConfig.RetryInterval, 232 "the initial interval before we retry a sync request, uses exponential backoff") 233 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.Tolerance, 234 "sync-tolerance", 235 defaultConfig.SyncCoreConfig.Tolerance, 236 "determines how big of a difference in block heights we tolerate before actively syncing with range requests") 237 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxAttempts, 238 "sync-max-attempts", 239 defaultConfig.SyncCoreConfig.MaxAttempts, 240 "the maximum number of attempts we make for each requested block/height before discarding") 241 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxSize, 242 "sync-max-size", 243 defaultConfig.SyncCoreConfig.MaxSize, 244 "the maximum number of blocks we request in the same block request message") 245 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxRequests, 246 "sync-max-requests", 247 defaultConfig.SyncCoreConfig.MaxRequests, 248 "the maximum number of requests we send during each scanning period") 249 250 fnb.flags.Uint64Var(&fnb.BaseConfig.ComplianceConfig.SkipNewProposalsThreshold, 251 "compliance-skip-proposals-threshold", 252 defaultConfig.ComplianceConfig.SkipNewProposalsThreshold, 253 "threshold at which new proposals are discarded rather than cached, if their height is this much above local finalized height") 254 255 // observer mode allows a unstaked execution node to fetch blocks from a public staked access node, and being able to execute blocks 256 fnb.flags.BoolVar(&fnb.BaseConfig.ObserverMode, "observer-mode", defaultConfig.ObserverMode, "whether the node is running in observer mode") 257 fnb.flags.StringSliceVar(&fnb.bootstrapNodePublicKeys, 258 "observer-mode-bootstrap-node-public-keys", 259 nil, 260 "the networking public key of the bootstrap access node if this is an observer (in the same order as the bootstrap node addresses) e.g. \"d57a5e9c5.....\",\"44ded42d....\"") 261 fnb.flags.StringSliceVar(&fnb.bootstrapNodeAddresses, 262 "observer-mode-bootstrap-node-addresses", 263 nil, 264 "the network addresses of the bootstrap access node if this is an observer e.g. access-001.mainnet.flow.org:9653,access-002.mainnet.flow.org:9653") 265 } 266 267 func (fnb *FlowNodeBuilder) EnqueuePingService() { 268 fnb.Component("ping service", func(node *NodeConfig) (module.ReadyDoneAware, error) { 269 pingLibP2PProtocolID := protocols.PingProtocolId(node.SporkID) 270 271 // setup the Ping provider to return the software version and the sealed block height 272 pingInfoProvider := &ping.InfoProvider{ 273 SoftwareVersionFun: func() string { 274 return build.Version() 275 }, 276 SealedBlockHeightFun: func() (uint64, error) { 277 head, err := node.State.Sealed().Head() 278 if err != nil { 279 return 0, err 280 } 281 return head.Height, nil 282 }, 283 HotstuffViewFun: func() (uint64, error) { 284 return 0, fmt.Errorf("hotstuff view reporting disabled") 285 }, 286 } 287 288 // only consensus roles will need to report hotstuff view 289 if fnb.BaseConfig.NodeRole == flow.RoleConsensus.String() { 290 // initialize the persister 291 persist := persister.New(node.DB, node.RootChainID) 292 293 pingInfoProvider.HotstuffViewFun = func() (uint64, error) { 294 livenessData, err := persist.GetLivenessData() 295 if err != nil { 296 return 0, err 297 } 298 299 return livenessData.CurrentView, nil 300 } 301 } 302 303 pingService, err := node.EngineRegistry.RegisterPingService(pingLibP2PProtocolID, pingInfoProvider) 304 305 node.PingService = pingService 306 307 return &module.NoopReadyDoneAware{}, err 308 }) 309 } 310 311 func (fnb *FlowNodeBuilder) EnqueueResolver() { 312 fnb.Component("resolver", func(node *NodeConfig) (module.ReadyDoneAware, error) { 313 var dnsIpCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 314 var dnsTxtCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 315 if fnb.HeroCacheMetricsEnable { 316 dnsIpCacheMetricsCollector = metrics.NetworkDnsIpCacheMetricsFactory(fnb.MetricsRegisterer) 317 dnsTxtCacheMetricsCollector = metrics.NetworkDnsTxtCacheMetricsFactory(fnb.MetricsRegisterer) 318 } 319 320 cache := herocache.NewDNSCache( 321 dns.DefaultCacheSize, 322 node.Logger, 323 dnsIpCacheMetricsCollector, 324 dnsTxtCacheMetricsCollector, 325 ) 326 327 resolver := dns.NewResolver( 328 node.Logger, 329 fnb.Metrics.Network, 330 cache, 331 dns.WithTTL(fnb.BaseConfig.FlowConfig.NetworkConfig.DNSCacheTTL)) 332 333 fnb.Resolver = resolver 334 return resolver, nil 335 }) 336 } 337 338 func (fnb *FlowNodeBuilder) EnqueueNetworkInit() { 339 connGaterPeerDialFilters := make([]p2p.PeerFilter, 0) 340 connGaterInterceptSecureFilters := make([]p2p.PeerFilter, 0) 341 peerManagerFilters := make([]p2p.PeerFilter, 0) 342 343 fnb.UnicastRateLimiterDistributor = ratelimit.NewUnicastRateLimiterDistributor() 344 fnb.UnicastRateLimiterDistributor.AddConsumer(fnb.Metrics.Network) 345 346 // setup default rate limiter options 347 unicastRateLimiterOpts := []ratelimit.RateLimitersOption{ 348 ratelimit.WithDisabledRateLimiting(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun), 349 ratelimit.WithNotifier(fnb.UnicastRateLimiterDistributor), 350 } 351 352 // override noop unicast message rate limiter 353 if fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit > 0 { 354 unicastMessageRateLimiter := ratelimiter.NewRateLimiter( 355 rate.Limit(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit), 356 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.MessageRateLimit, 357 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.LockoutDuration, 358 ) 359 unicastRateLimiterOpts = append(unicastRateLimiterOpts, ratelimit.WithMessageRateLimiter(unicastMessageRateLimiter)) 360 361 // avoid connection gating and pruning during dry run 362 if !fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun { 363 f := rateLimiterPeerFilter(unicastMessageRateLimiter) 364 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 365 // don't allow rate limited peers to establishing incoming connections 366 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 367 // don't create outbound connections to rate limited peers 368 peerManagerFilters = append(peerManagerFilters, f) 369 } 370 } 371 372 // override noop unicast bandwidth rate limiter 373 if fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthRateLimit > 0 && fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthBurstLimit > 0 { 374 unicastBandwidthRateLimiter := ratelimit.NewBandWidthRateLimiter( 375 rate.Limit(fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthRateLimit), 376 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.BandwidthBurstLimit, 377 fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.LockoutDuration, 378 ) 379 unicastRateLimiterOpts = append(unicastRateLimiterOpts, ratelimit.WithBandwidthRateLimiter(unicastBandwidthRateLimiter)) 380 381 // avoid connection gating and pruning during dry run 382 if !fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast.RateLimiter.DryRun { 383 f := rateLimiterPeerFilter(unicastBandwidthRateLimiter) 384 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 385 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 386 peerManagerFilters = append(peerManagerFilters, f) 387 } 388 } 389 390 // setup unicast rate limiters 391 unicastRateLimiters := ratelimit.NewRateLimiters(unicastRateLimiterOpts...) 392 393 uniCfg := &p2pbuilderconfig.UnicastConfig{ 394 Unicast: fnb.BaseConfig.FlowConfig.NetworkConfig.Unicast, 395 RateLimiterDistributor: fnb.UnicastRateLimiterDistributor, 396 } 397 398 connGaterCfg := &p2pbuilderconfig.ConnectionGaterConfig{ 399 InterceptPeerDialFilters: connGaterPeerDialFilters, 400 InterceptSecuredFilters: connGaterInterceptSecureFilters, 401 } 402 403 peerManagerCfg := &p2pbuilderconfig.PeerManagerConfig{ 404 ConnectionPruning: fnb.FlowConfig.NetworkConfig.NetworkConnectionPruning, 405 UpdateInterval: fnb.FlowConfig.NetworkConfig.PeerUpdateInterval, 406 ConnectorFactory: connection.DefaultLibp2pBackoffConnectorFactory(), 407 } 408 409 fnb.Component(LibP2PNodeComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 410 myAddr := fnb.NodeConfig.Me.Address() 411 if fnb.BaseConfig.BindAddr != NotSet { 412 myAddr = fnb.BaseConfig.BindAddr 413 } 414 415 if fnb.ObserverMode { 416 // observer mode only init pulbic libp2p node 417 publicLibp2pNode, err := fnb.BuildPublicLibp2pNode(myAddr) 418 if err != nil { 419 return nil, fmt.Errorf("could not build public libp2p node: %w", err) 420 } 421 fnb.LibP2PNode = publicLibp2pNode 422 423 return publicLibp2pNode, nil 424 } 425 426 dhtActivationStatus, err := DhtSystemActivationStatus(fnb.NodeRole, fnb.DhtSystemEnabled) 427 if err != nil { 428 return nil, fmt.Errorf("could not determine dht activation status: %w", err) 429 } 430 builder, err := p2pbuilder.DefaultNodeBuilder(fnb.Logger, 431 myAddr, 432 network.PrivateNetwork, 433 fnb.NetworkKey, 434 fnb.SporkID, 435 fnb.IdentityProvider, 436 &p2pbuilderconfig.MetricsConfig{ 437 Metrics: fnb.Metrics.Network, 438 HeroCacheFactory: fnb.HeroCacheMetricsFactory(), 439 }, 440 fnb.Resolver, 441 fnb.BaseConfig.NodeRole, 442 connGaterCfg, 443 peerManagerCfg, 444 &fnb.FlowConfig.NetworkConfig.GossipSub, 445 &fnb.FlowConfig.NetworkConfig.ResourceManager, 446 uniCfg, 447 &fnb.FlowConfig.NetworkConfig.ConnectionManager, 448 &p2p.DisallowListCacheConfig{ 449 MaxSize: fnb.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, 450 Metrics: metrics.DisallowListCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PrivateNetwork), 451 }, 452 dhtActivationStatus) 453 if err != nil { 454 return nil, fmt.Errorf("could not create libp2p node builder: %w", err) 455 } 456 457 libp2pNode, err := builder.Build() 458 if err != nil { 459 return nil, fmt.Errorf("could not build libp2p node: %w", err) 460 } 461 462 fnb.LibP2PNode = libp2pNode 463 return libp2pNode, nil 464 }) 465 fnb.Component(NetworkComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 466 fnb.Logger.Info().Hex("node_id", logging.ID(fnb.NodeID)).Msg("default conduit factory initiated") 467 return fnb.InitFlowNetworkWithConduitFactory( 468 node, 469 conduit.NewDefaultConduitFactory(), 470 unicastRateLimiters, 471 peerManagerFilters) 472 }) 473 474 fnb.Module("network underlay dependency", func(node *NodeConfig) error { 475 fnb.networkUnderlayDependable = module.NewProxiedReadyDoneAware() 476 fnb.PeerManagerDependencies.Add(fnb.networkUnderlayDependable) 477 return nil 478 }) 479 480 // peer manager won't be created until all PeerManagerDependencies are ready. 481 if !fnb.ObserverMode { 482 fnb.DependableComponent("peer manager", func(node *NodeConfig) (module.ReadyDoneAware, error) { 483 return fnb.LibP2PNode.PeerManagerComponent(), nil 484 }, fnb.PeerManagerDependencies) 485 } 486 } 487 488 // HeroCacheMetricsFactory returns a HeroCacheMetricsFactory based on the MetricsEnabled flag. 489 // If MetricsEnabled is true, it returns a HeroCacheMetricsFactory that will register metrics with the provided MetricsRegisterer. 490 // If MetricsEnabled is false, it returns a no-op HeroCacheMetricsFactory that will not register any metrics. 491 func (fnb *FlowNodeBuilder) HeroCacheMetricsFactory() metrics.HeroCacheMetricsFactory { 492 if fnb.MetricsEnabled { 493 return metrics.NewHeroCacheMetricsFactory(fnb.MetricsRegisterer) 494 } 495 return metrics.NewNoopHeroCacheMetricsFactory() 496 } 497 498 // initPublicLibp2pNode creates a libp2p node for the observer service in the public (unstaked) network. 499 // The factory function is later passed into the initMiddleware function to eventually instantiate the p2p.LibP2PNode instance 500 // The LibP2P host is created with the following options: 501 // * DHT as client and seeded with the given bootstrap peers 502 // * The specified bind address as the listen address 503 // * The passed in private key as the libp2p key 504 // * No connection gater 505 // * No connection manager 506 // * No peer manager 507 // * Default libp2p pubsub options. 508 // Args: 509 // - networkKey: the private key to use for the libp2p node 510 // Returns: 511 // - p2p.LibP2PNode: the libp2p node 512 // - error: if any error occurs. Any error returned is considered irrecoverable. 513 func (fnb *FlowNodeBuilder) BuildPublicLibp2pNode(address string) (p2p.LibP2PNode, error) { 514 var pis []peer.AddrInfo 515 516 ids, err := BootstrapIdentities(fnb.bootstrapNodeAddresses, fnb.bootstrapNodePublicKeys) 517 if err != nil { 518 return nil, fmt.Errorf("could not create bootstrap identities: %w", err) 519 } 520 521 for _, b := range ids { 522 pi, err := utils.PeerAddressInfo(*b) 523 if err != nil { 524 return nil, fmt.Errorf("could not extract peer address info from bootstrap identity %v: %w", b, err) 525 } 526 527 pis = append(pis, pi) 528 } 529 530 for _, b := range ids { 531 pi, err := utils.PeerAddressInfo(*b) 532 if err != nil { 533 return nil, fmt.Errorf("could not extract peer address info from bootstrap identity %v: %w", b, err) 534 } 535 536 pis = append(pis, pi) 537 } 538 539 node, err := p2pbuilder.NewNodeBuilder( 540 fnb.Logger, 541 &fnb.FlowConfig.NetworkConfig.GossipSub, 542 &p2pbuilderconfig.MetricsConfig{ 543 HeroCacheFactory: fnb.HeroCacheMetricsFactory(), 544 Metrics: fnb.Metrics.Network, 545 }, 546 network.PublicNetwork, 547 address, 548 fnb.NetworkKey, 549 fnb.SporkID, 550 fnb.IdentityProvider, 551 &fnb.FlowConfig.NetworkConfig.ResourceManager, 552 p2pbuilderconfig.PeerManagerDisableConfig(), // disable peer manager for observer node. 553 &p2p.DisallowListCacheConfig{ 554 MaxSize: fnb.FlowConfig.NetworkConfig.DisallowListNotificationCacheSize, 555 Metrics: metrics.DisallowListCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PublicNetwork), 556 }, 557 &p2pbuilderconfig.UnicastConfig{ 558 Unicast: fnb.FlowConfig.NetworkConfig.Unicast, 559 }). 560 SetSubscriptionFilter( 561 subscription.NewRoleBasedFilter( 562 subscription.UnstakedRole, fnb.IdentityProvider, 563 ), 564 ). 565 SetRoutingSystem(func(ctx context.Context, h host.Host) (routing.Routing, error) { 566 return p2pdht.NewDHT(ctx, h, protocols.FlowPublicDHTProtocolID(fnb.SporkID), 567 fnb.Logger, 568 fnb.Metrics.Network, 569 p2pdht.AsClient(), 570 dht.BootstrapPeers(pis...), 571 ) 572 }). 573 Build() 574 575 if err != nil { 576 return nil, fmt.Errorf("could not initialize libp2p node for observer: %w", err) 577 } 578 return node, nil 579 } 580 581 func (fnb *FlowNodeBuilder) InitFlowNetworkWithConduitFactory( 582 node *NodeConfig, 583 cf network.ConduitFactory, 584 unicastRateLimiters *ratelimit.RateLimiters, 585 peerManagerFilters []p2p.PeerFilter) (network.EngineRegistry, error) { 586 587 var networkOptions []underlay.NetworkOption 588 if len(fnb.MsgValidators) > 0 { 589 networkOptions = append(networkOptions, underlay.WithMessageValidators(fnb.MsgValidators...)) 590 } 591 592 // by default if no rate limiter configuration was provided in the CLI args the default 593 // noop rate limiter will be used. 594 networkOptions = append(networkOptions, underlay.WithUnicastRateLimiters(unicastRateLimiters)) 595 596 networkOptions = append(networkOptions, 597 underlay.WithPreferredUnicastProtocols(protocols.ToProtocolNames(fnb.FlowConfig.NetworkConfig.PreferredUnicastProtocols)...), 598 ) 599 600 // peerManagerFilters are used by the peerManager via the network to filter peers from the topology. 601 if len(peerManagerFilters) > 0 { 602 networkOptions = append(networkOptions, underlay.WithPeerManagerFilters(peerManagerFilters...)) 603 } 604 605 receiveCache := netcache.NewHeroReceiveCache(fnb.FlowConfig.NetworkConfig.NetworkReceivedMessageCacheSize, 606 fnb.Logger, 607 metrics.NetworkReceiveCacheMetricsFactory(fnb.HeroCacheMetricsFactory(), network.PrivateNetwork)) 608 609 err := node.Metrics.Mempool.Register(metrics.ResourceNetworkingReceiveCache, receiveCache.Size) 610 if err != nil { 611 return nil, fmt.Errorf("could not register networking receive cache metric: %w", err) 612 } 613 614 networkType := network.PrivateNetwork 615 if fnb.ObserverMode { 616 // observer mode uses public network 617 networkType = network.PublicNetwork 618 } 619 620 // creates network instance 621 net, err := underlay.NewNetwork(&underlay.NetworkConfig{ 622 Logger: fnb.Logger, 623 Libp2pNode: fnb.LibP2PNode, 624 Codec: fnb.CodecFactory(), 625 Me: fnb.Me, 626 SporkId: fnb.SporkID, 627 Topology: topology.NewFullyConnectedTopology(), 628 Metrics: fnb.Metrics.Network, 629 BitSwapMetrics: fnb.Metrics.Bitswap, 630 IdentityProvider: fnb.IdentityProvider, 631 ReceiveCache: receiveCache, 632 ConduitFactory: cf, 633 UnicastMessageTimeout: fnb.FlowConfig.NetworkConfig.Unicast.MessageTimeout, 634 IdentityTranslator: fnb.IDTranslator, 635 AlspCfg: &alspmgr.MisbehaviorReportManagerConfig{ 636 Logger: fnb.Logger, 637 SpamRecordCacheSize: fnb.FlowConfig.NetworkConfig.AlspConfig.SpamRecordCacheSize, 638 SpamReportQueueSize: fnb.FlowConfig.NetworkConfig.AlspConfig.SpamReportQueueSize, 639 DisablePenalty: fnb.FlowConfig.NetworkConfig.AlspConfig.DisablePenalty, 640 HeartBeatInterval: fnb.FlowConfig.NetworkConfig.AlspConfig.HearBeatInterval, 641 AlspMetrics: fnb.Metrics.Network, 642 HeroCacheMetricsFactory: fnb.HeroCacheMetricsFactory(), 643 NetworkType: networkType, 644 }, 645 SlashingViolationConsumerFactory: func(adapter network.ConduitAdapter) network.ViolationsConsumer { 646 return slashing.NewSlashingViolationsConsumer(fnb.Logger, fnb.Metrics.Network, adapter) 647 }, 648 }, networkOptions...) 649 if err != nil { 650 return nil, fmt.Errorf("could not initialize network: %w", err) 651 } 652 653 if node.ObserverMode { 654 fnb.EngineRegistry = converter.NewNetwork(net, channels.SyncCommittee, channels.PublicSyncCommittee) 655 } else { 656 fnb.EngineRegistry = net // setting network as the fnb.Network for the engine-level components 657 } 658 fnb.NetworkUnderlay = net // setting network as the fnb.Underlay for the lower-level components 659 660 // register network ReadyDoneAware interface so other components can depend on it for startup 661 if fnb.networkUnderlayDependable != nil { 662 fnb.networkUnderlayDependable.Init(fnb.NetworkUnderlay) 663 } 664 665 idEvents := gadgets.NewIdentityDeltas(net.UpdateNodeAddresses) 666 fnb.ProtocolEvents.AddConsumer(idEvents) 667 668 return net, nil 669 } 670 671 func (fnb *FlowNodeBuilder) EnqueueMetricsServerInit() { 672 fnb.Component("metrics server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 673 server := metrics.NewServer(fnb.Logger, fnb.BaseConfig.metricsPort) 674 return server, nil 675 }) 676 } 677 678 func (fnb *FlowNodeBuilder) EnqueueAdminServerInit() error { 679 if fnb.AdminAddr == NotSet { 680 return nil 681 } 682 683 if (fnb.AdminCert != NotSet || fnb.AdminKey != NotSet || fnb.AdminClientCAs != NotSet) && 684 !(fnb.AdminCert != NotSet && fnb.AdminKey != NotSet && fnb.AdminClientCAs != NotSet) { 685 return fmt.Errorf("admin cert / key and client certs must all be provided to enable mutual TLS") 686 } 687 688 // create the updatable config manager 689 fnb.RegisterDefaultAdminCommands() 690 fnb.Component("admin server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 691 // set up all admin commands 692 for commandName, commandFunc := range fnb.adminCommands { 693 command := commandFunc(fnb.NodeConfig) 694 fnb.adminCommandBootstrapper.RegisterHandler(commandName, command.Handler) 695 fnb.adminCommandBootstrapper.RegisterValidator(commandName, command.Validator) 696 } 697 698 opts := []admin.CommandRunnerOption{ 699 admin.WithMaxMsgSize(int(fnb.AdminMaxMsgSize)), 700 } 701 702 if node.AdminCert != NotSet { 703 serverCert, err := tls.LoadX509KeyPair(node.AdminCert, node.AdminKey) 704 if err != nil { 705 return nil, err 706 } 707 clientCAs, err := os.ReadFile(node.AdminClientCAs) 708 if err != nil { 709 return nil, err 710 } 711 certPool := x509.NewCertPool() 712 certPool.AppendCertsFromPEM(clientCAs) 713 config := &tls.Config{ 714 MinVersion: tls.VersionTLS13, 715 Certificates: []tls.Certificate{serverCert}, 716 ClientAuth: tls.RequireAndVerifyClientCert, 717 ClientCAs: certPool, 718 } 719 720 opts = append(opts, admin.WithTLS(config)) 721 } 722 723 runner := fnb.adminCommandBootstrapper.Bootstrap(fnb.Logger, fnb.AdminAddr, opts...) 724 725 return runner, nil 726 }) 727 728 return nil 729 } 730 731 func (fnb *FlowNodeBuilder) RegisterBadgerMetrics() error { 732 return metrics.RegisterBadgerMetrics() 733 } 734 735 func (fnb *FlowNodeBuilder) EnqueueTracer() { 736 fnb.Component("tracer", func(node *NodeConfig) (module.ReadyDoneAware, error) { 737 return fnb.Tracer, nil 738 }) 739 } 740 741 func (fnb *FlowNodeBuilder) ParseAndPrintFlags() error { 742 // parse configuration parameters 743 pflag.Parse() 744 745 configOverride, err := config.BindPFlags(&fnb.BaseConfig.FlowConfig, fnb.flags) 746 if err != nil { 747 return err 748 } 749 750 if configOverride { 751 fnb.Logger.Info().Str("config-file", fnb.FlowConfig.ConfigFile).Msg("configuration file updated") 752 } 753 754 if err = fnb.BaseConfig.FlowConfig.Validate(); err != nil { 755 fnb.Logger.Fatal().Err(err).Msg("flow configuration validation failed") 756 } 757 758 info := fnb.Logger.Error() 759 760 noPrint := config.LogConfig(info, fnb.flags) 761 fnb.flags.VisitAll(func(flag *pflag.Flag) { 762 if _, ok := noPrint[flag.Name]; !ok { 763 info.Str(flag.Name, fmt.Sprintf("%v", flag.Value)) 764 } 765 }) 766 info.Msg("configuration loaded (logged as error for visibility)") 767 return fnb.extraFlagsValidation() 768 } 769 770 func (fnb *FlowNodeBuilder) ValidateRootSnapshot(f func(protocol.Snapshot) error) NodeBuilder { 771 fnb.extraRootSnapshotCheck = f 772 return fnb 773 } 774 775 func (fnb *FlowNodeBuilder) ValidateFlags(f func() error) NodeBuilder { 776 fnb.extraFlagCheck = f 777 return fnb 778 } 779 780 func (fnb *FlowNodeBuilder) PrintBuildVersionDetails() { 781 fnb.Logger.Info().Str("version", build.Version()).Str("commit", build.Commit()).Msg("build details") 782 } 783 784 func (fnb *FlowNodeBuilder) initNodeInfo() error { 785 if fnb.BaseConfig.nodeIDHex == NotSet { 786 return fmt.Errorf("cannot start without node ID") 787 } 788 789 nodeID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 790 if err != nil { 791 return fmt.Errorf("could not parse node ID from string (id: %v): %w", fnb.BaseConfig.nodeIDHex, err) 792 } 793 794 info, err := LoadPrivateNodeInfo(fnb.BaseConfig.BootstrapDir, nodeID) 795 if err != nil { 796 return fmt.Errorf("failed to load private node info: %w", err) 797 } 798 799 fnb.StakingKey = info.StakingPrivKey.PrivateKey 800 801 if fnb.ObserverMode { 802 // observer mode uses a network private key with different format than the staked node, 803 // so it has to load the network private key from a separate file 804 networkingPrivateKey, err := LoadNetworkPrivateKey(fnb.BaseConfig.BootstrapDir, nodeID) 805 if err != nil { 806 return fmt.Errorf("failed to load networking private key: %w", err) 807 } 808 809 peerID, err := peerIDFromNetworkKey(networkingPrivateKey) 810 if err != nil { 811 return fmt.Errorf("could not get peer ID from network key: %w", err) 812 } 813 814 // public node ID for observer is derived from peer ID which is derived from network key 815 pubNodeID, err := translator.NewPublicNetworkIDTranslator().GetFlowID(peerID) 816 if err != nil { 817 return fmt.Errorf("could not get flow node ID: %w", err) 818 } 819 820 fnb.NodeID = pubNodeID 821 fnb.NetworkKey = networkingPrivateKey 822 823 return nil 824 } 825 826 fnb.NodeID = nodeID 827 fnb.NetworkKey = info.NetworkPrivKey.PrivateKey 828 829 return nil 830 } 831 832 func peerIDFromNetworkKey(privateKey crypto.PrivateKey) (peer.ID, error) { 833 pubKey, err := keyutils.LibP2PPublicKeyFromFlow(privateKey.PublicKey()) 834 if err != nil { 835 return "", fmt.Errorf("could not load libp2p public key: %w", err) 836 } 837 838 return peer.IDFromPublicKey(pubKey) 839 } 840 841 func (fnb *FlowNodeBuilder) initLogger() error { 842 // configure logger with standard level, node ID and UTC timestamp 843 zerolog.TimeFieldFormat = time.RFC3339Nano 844 zerolog.TimestampFunc = func() time.Time { return time.Now().UTC() } 845 846 // Drop all log events that exceed this rate limit 847 throttledSampler := logging.BurstSampler(fnb.BaseConfig.debugLogLimit, time.Second) 848 849 log := fnb.Logger.With(). 850 Timestamp(). 851 Str("node_role", fnb.BaseConfig.NodeRole). 852 Str("node_id", fnb.NodeID.String()). 853 Logger(). 854 Sample(zerolog.LevelSampler{ 855 TraceSampler: throttledSampler, 856 DebugSampler: throttledSampler, 857 }) 858 859 log.Info().Msgf("flow %s node starting up", fnb.BaseConfig.NodeRole) 860 861 // parse config log level and apply to logger 862 lvl, err := zerolog.ParseLevel(strings.ToLower(fnb.BaseConfig.level)) 863 if err != nil { 864 return fmt.Errorf("invalid log level: %w", err) 865 } 866 867 // Minimum log level is set to trace, then overridden by SetGlobalLevel. 868 // this allows admin commands to modify the level to any value during runtime 869 log = log.Level(zerolog.TraceLevel) 870 zerolog.SetGlobalLevel(lvl) 871 872 fnb.Logger = log 873 874 return nil 875 } 876 877 func (fnb *FlowNodeBuilder) initMetrics() error { 878 879 fnb.Tracer = trace.NewNoopTracer() 880 if fnb.BaseConfig.tracerEnabled { 881 nodeIdHex := fnb.NodeID.String() 882 if len(nodeIdHex) > 8 { 883 nodeIdHex = nodeIdHex[:8] 884 } 885 886 serviceName := fnb.BaseConfig.NodeRole + "-" + nodeIdHex 887 tracer, err := trace.NewTracer( 888 fnb.Logger, 889 serviceName, 890 fnb.RootChainID.String(), 891 fnb.tracerSensitivity, 892 ) 893 if err != nil { 894 return fmt.Errorf("could not initialize tracer: %w", err) 895 } 896 897 fnb.Logger.Info().Msg("Tracer Started") 898 fnb.Tracer = tracer 899 } 900 901 fnb.Metrics = Metrics{ 902 Network: metrics.NewNoopCollector(), 903 Engine: metrics.NewNoopCollector(), 904 Compliance: metrics.NewNoopCollector(), 905 Cache: metrics.NewNoopCollector(), 906 Mempool: metrics.NewNoopCollector(), 907 CleanCollector: metrics.NewNoopCollector(), 908 Bitswap: metrics.NewNoopCollector(), 909 } 910 if fnb.BaseConfig.MetricsEnabled { 911 fnb.MetricsRegisterer = prometheus.DefaultRegisterer 912 913 mempools := metrics.NewMempoolCollector(5 * time.Second) 914 915 fnb.Metrics = Metrics{ 916 Network: metrics.NewNetworkCollector(fnb.Logger), 917 Engine: metrics.NewEngineCollector(), 918 Compliance: metrics.NewComplianceCollector(), 919 // CacheControl metrics has been causing memory abuse, disable for now 920 // Cache: metrics.NewCacheCollector(fnb.RootChainID), 921 Cache: metrics.NewNoopCollector(), 922 CleanCollector: metrics.NewCleanerCollector(), 923 Mempool: mempools, 924 Bitswap: metrics.NewBitswapCollector(), 925 } 926 927 // registers mempools as a Component so that its Ready method is invoked upon startup 928 fnb.Component("mempools metrics", func(node *NodeConfig) (module.ReadyDoneAware, error) { 929 return mempools, nil 930 }) 931 932 // metrics enabled, report node info metrics as post init event 933 fnb.PostInit(func(nodeConfig *NodeConfig) error { 934 nodeInfoMetrics := metrics.NewNodeInfoCollector() 935 protocolVersion := fnb.RootSnapshot.Params().ProtocolVersion() 936 nodeInfoMetrics.NodeInfo(build.Version(), build.Commit(), nodeConfig.SporkID.String(), protocolVersion) 937 return nil 938 }) 939 } 940 return nil 941 } 942 943 func (fnb *FlowNodeBuilder) createGCEProfileUploader(client *gcemd.Client, opts ...option.ClientOption) (profiler.Uploader, error) { 944 projectID, err := client.ProjectID() 945 if err != nil { 946 return &profiler.NoopUploader{}, fmt.Errorf("failed to get project ID: %w", err) 947 } 948 949 instance, err := client.InstanceID() 950 if err != nil { 951 return &profiler.NoopUploader{}, fmt.Errorf("failed to get instance ID: %w", err) 952 } 953 954 chainID := fnb.RootChainID.String() 955 if chainID == "" { 956 fnb.Logger.Warn().Msg("RootChainID is not set, using default value") 957 chainID = "unknown" 958 } 959 960 params := profiler.Params{ 961 ProjectID: projectID, 962 ChainID: chainID, 963 Role: fnb.NodeConfig.NodeRole, 964 Version: build.Version(), 965 Commit: build.Commit(), 966 Instance: instance, 967 } 968 fnb.Logger.Info().Msgf("creating pprof profile uploader with params: %+v", params) 969 970 return profiler.NewUploader(fnb.Logger, params, opts...) 971 } 972 973 func (fnb *FlowNodeBuilder) createProfileUploader() (profiler.Uploader, error) { 974 switch { 975 case fnb.BaseConfig.profilerConfig.UploaderEnabled && gcemd.OnGCE(): 976 return fnb.createGCEProfileUploader(gcemd.NewClient(nil)) 977 default: 978 fnb.Logger.Info().Msg("not running on GCE, setting pprof uploader to noop") 979 return &profiler.NoopUploader{}, nil 980 } 981 } 982 983 func (fnb *FlowNodeBuilder) initProfiler() error { 984 uploader, err := fnb.createProfileUploader() 985 if err != nil { 986 fnb.Logger.Warn().Err(err).Msg("failed to create pprof uploader, falling back to noop") 987 uploader = &profiler.NoopUploader{} 988 } 989 990 profiler, err := profiler.New(fnb.Logger, uploader, fnb.BaseConfig.profilerConfig) 991 if err != nil { 992 return fmt.Errorf("could not initialize profiler: %w", err) 993 } 994 995 // register the enabled state of the profiler for dynamic configuring 996 err = fnb.ConfigManager.RegisterBoolConfig("profiler-enabled", profiler.Enabled, profiler.SetEnabled) 997 if err != nil { 998 return fmt.Errorf("could not register profiler-enabled config: %w", err) 999 } 1000 1001 err = fnb.ConfigManager.RegisterDurationConfig( 1002 "profiler-trigger", 1003 func() time.Duration { return fnb.BaseConfig.profilerConfig.Duration }, 1004 func(d time.Duration) error { return profiler.TriggerRun(d) }, 1005 ) 1006 if err != nil { 1007 return fmt.Errorf("could not register profiler-trigger config: %w", err) 1008 } 1009 1010 err = fnb.ConfigManager.RegisterUintConfig( 1011 "profiler-set-mem-profile-rate", 1012 func() uint { return uint(runtime.MemProfileRate) }, 1013 func(r uint) error { runtime.MemProfileRate = int(r); return nil }, 1014 ) 1015 if err != nil { 1016 return fmt.Errorf("could not register profiler-set-mem-profile-rate setting: %w", err) 1017 } 1018 1019 // There is no way to get the current block profile rate so we keep track of it ourselves. 1020 currentRate := new(uint) 1021 err = fnb.ConfigManager.RegisterUintConfig( 1022 "profiler-set-block-profile-rate", 1023 func() uint { return *currentRate }, 1024 func(r uint) error { currentRate = &r; runtime.SetBlockProfileRate(int(r)); return nil }, 1025 ) 1026 if err != nil { 1027 return fmt.Errorf("could not register profiler-set-block-profile-rate setting: %w", err) 1028 } 1029 1030 err = fnb.ConfigManager.RegisterUintConfig( 1031 "profiler-set-mutex-profile-fraction", 1032 func() uint { return uint(runtime.SetMutexProfileFraction(-1)) }, 1033 func(r uint) error { _ = runtime.SetMutexProfileFraction(int(r)); return nil }, 1034 ) 1035 if err != nil { 1036 return fmt.Errorf("could not register profiler-set-mutex-profile-fraction setting: %w", err) 1037 } 1038 1039 // registering as a DependableComponent with no dependencies so that it's started immediately on startup 1040 // without being blocked by other component's Ready() 1041 fnb.DependableComponent("profiler", func(node *NodeConfig) (module.ReadyDoneAware, error) { 1042 return profiler, nil 1043 }, NewDependencyList()) 1044 1045 return nil 1046 } 1047 1048 func (fnb *FlowNodeBuilder) initDB() error { 1049 1050 // if a db has been passed in, use that instead of creating one 1051 if fnb.BaseConfig.db != nil { 1052 fnb.DB = fnb.BaseConfig.db 1053 return nil 1054 } 1055 1056 // Pre-create DB path (Badger creates only one-level dirs) 1057 err := os.MkdirAll(fnb.BaseConfig.datadir, 0700) 1058 if err != nil { 1059 return fmt.Errorf("could not create datadir (path: %s): %w", fnb.BaseConfig.datadir, err) 1060 } 1061 1062 log := sutil.NewLogger(fnb.Logger) 1063 1064 // we initialize the database with options that allow us to keep the maximum 1065 // item size in the trie itself (up to 1MB) and where we keep all level zero 1066 // tables in-memory as well; this slows down compaction and increases memory 1067 // usage, but it improves overall performance and disk i/o 1068 opts := badger. 1069 DefaultOptions(fnb.BaseConfig.datadir). 1070 WithKeepL0InMemory(true). 1071 WithLogger(log). 1072 1073 // the ValueLogFileSize option specifies how big the value of a 1074 // key-value pair is allowed to be saved into badger. 1075 // exceeding this limit, will fail with an error like this: 1076 // could not store data: Value with size <xxxx> exceeded 1073741824 limit 1077 // Maximum value size is 10G, needed by execution node 1078 // TODO: finding a better max value for each node type 1079 WithValueLogFileSize(128 << 23). 1080 WithValueLogMaxEntries(100000) // Default is 1000000 1081 1082 publicDB, err := bstorage.InitPublic(opts) 1083 if err != nil { 1084 return fmt.Errorf("could not open public db: %w", err) 1085 } 1086 fnb.DB = publicDB 1087 1088 fnb.ShutdownFunc(func() error { 1089 if err := fnb.DB.Close(); err != nil { 1090 return fmt.Errorf("error closing protocol database: %w", err) 1091 } 1092 return nil 1093 }) 1094 1095 fnb.Component("badger log cleaner", func(node *NodeConfig) (module.ReadyDoneAware, error) { 1096 return bstorage.NewCleaner(node.Logger, node.DB, node.Metrics.CleanCollector, flow.DefaultValueLogGCWaitDuration), nil 1097 }) 1098 1099 return nil 1100 } 1101 1102 func (fnb *FlowNodeBuilder) initSecretsDB() error { 1103 1104 // if the secrets DB is disabled (only applicable for Consensus Follower, 1105 // which makes use of this same logic), skip this initialization 1106 if !fnb.BaseConfig.secretsDBEnabled { 1107 return nil 1108 } 1109 1110 if fnb.BaseConfig.secretsdir == NotSet { 1111 return fmt.Errorf("missing required flag '--secretsdir'") 1112 } 1113 1114 err := os.MkdirAll(fnb.BaseConfig.secretsdir, 0700) 1115 if err != nil { 1116 return fmt.Errorf("could not create secrets db dir (path: %s): %w", fnb.BaseConfig.secretsdir, err) 1117 } 1118 1119 log := sutil.NewLogger(fnb.Logger) 1120 1121 opts := badger.DefaultOptions(fnb.BaseConfig.secretsdir).WithLogger(log) 1122 1123 // NOTE: SN nodes need to explicitly set --insecure-secrets-db to true in order to 1124 // disable secrets database encryption 1125 if fnb.NodeRole == flow.RoleConsensus.String() && fnb.InsecureSecretsDB { 1126 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 1127 } else { 1128 encryptionKey, err := loadSecretsEncryptionKey(fnb.BootstrapDir, fnb.NodeID) 1129 if errors.Is(err, os.ErrNotExist) { 1130 if fnb.NodeRole == flow.RoleConsensus.String() { 1131 // missing key is a fatal error for SN nodes 1132 return fmt.Errorf("secrets db encryption key not found: %w", err) 1133 } 1134 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 1135 } else if err != nil { 1136 return fmt.Errorf("failed to read secrets db encryption key: %w", err) 1137 } else { 1138 opts = opts.WithEncryptionKey(encryptionKey) 1139 } 1140 } 1141 1142 secretsDB, err := bstorage.InitSecret(opts) 1143 if err != nil { 1144 return fmt.Errorf("could not open secrets db: %w", err) 1145 } 1146 fnb.SecretsDB = secretsDB 1147 1148 fnb.ShutdownFunc(func() error { 1149 if err := fnb.SecretsDB.Close(); err != nil { 1150 return fmt.Errorf("error closing secrets database: %w", err) 1151 } 1152 return nil 1153 }) 1154 1155 return nil 1156 } 1157 1158 func (fnb *FlowNodeBuilder) initStorage() error { 1159 1160 // in order to void long iterations with big keys when initializing with an 1161 // already populated database, we bootstrap the initial maximum key size 1162 // upon starting 1163 err := operation.RetryOnConflict(fnb.DB.Update, func(tx *badger.Txn) error { 1164 return operation.InitMax(tx) 1165 }) 1166 if err != nil { 1167 return fmt.Errorf("could not initialize max tracker: %w", err) 1168 } 1169 1170 headers := bstorage.NewHeaders(fnb.Metrics.Cache, fnb.DB) 1171 guarantees := bstorage.NewGuarantees(fnb.Metrics.Cache, fnb.DB, fnb.BaseConfig.guaranteesCacheSize) 1172 seals := bstorage.NewSeals(fnb.Metrics.Cache, fnb.DB) 1173 results := bstorage.NewExecutionResults(fnb.Metrics.Cache, fnb.DB) 1174 receipts := bstorage.NewExecutionReceipts(fnb.Metrics.Cache, fnb.DB, results, fnb.BaseConfig.receiptsCacheSize) 1175 index := bstorage.NewIndex(fnb.Metrics.Cache, fnb.DB) 1176 payloads := bstorage.NewPayloads(fnb.DB, index, guarantees, seals, receipts, results) 1177 blocks := bstorage.NewBlocks(fnb.DB, headers, payloads) 1178 qcs := bstorage.NewQuorumCertificates(fnb.Metrics.Cache, fnb.DB, bstorage.DefaultCacheSize) 1179 transactions := bstorage.NewTransactions(fnb.Metrics.Cache, fnb.DB) 1180 collections := bstorage.NewCollections(fnb.DB, transactions) 1181 setups := bstorage.NewEpochSetups(fnb.Metrics.Cache, fnb.DB) 1182 epochCommits := bstorage.NewEpochCommits(fnb.Metrics.Cache, fnb.DB) 1183 commits := bstorage.NewCommits(fnb.Metrics.Cache, fnb.DB) 1184 protocolState := bstorage.NewProtocolState(fnb.Metrics.Cache, setups, epochCommits, fnb.DB, 1185 bstorage.DefaultProtocolStateCacheSize, bstorage.DefaultProtocolStateByBlockIDCacheSize) 1186 protocolKVStores := bstorage.NewProtocolKVStore(fnb.Metrics.Cache, fnb.DB, 1187 bstorage.DefaultProtocolKVStoreCacheSize, bstorage.DefaultProtocolKVStoreByBlockIDCacheSize) 1188 versionBeacons := bstorage.NewVersionBeacons(fnb.DB) 1189 1190 fnb.Storage = Storage{ 1191 Headers: headers, 1192 Guarantees: guarantees, 1193 Receipts: receipts, 1194 Results: results, 1195 Seals: seals, 1196 Index: index, 1197 Payloads: payloads, 1198 Blocks: blocks, 1199 QuorumCertificates: qcs, 1200 Transactions: transactions, 1201 Collections: collections, 1202 Setups: setups, 1203 EpochCommits: epochCommits, 1204 VersionBeacons: versionBeacons, 1205 EpochProtocolState: protocolState, 1206 ProtocolKVStore: protocolKVStores, 1207 Commits: commits, 1208 } 1209 1210 return nil 1211 } 1212 1213 func (fnb *FlowNodeBuilder) InitIDProviders() { 1214 fnb.Module("id providers", func(node *NodeConfig) error { 1215 idCache, err := cache.NewProtocolStateIDCache(node.Logger, node.State, node.ProtocolEvents) 1216 if err != nil { 1217 return fmt.Errorf("could not initialize ProtocolStateIDCache: %w", err) 1218 } 1219 1220 // The following wrapper allows to disallow-list byzantine nodes via an admin command: 1221 // the wrapper overrides the 'Ejected' flag of disallow-listed nodes to true 1222 disallowListWrapper, err := cache.NewNodeDisallowListWrapper(idCache, node.DB, func() network.DisallowListNotificationConsumer { 1223 return fnb.NetworkUnderlay 1224 }) 1225 if err != nil { 1226 return fmt.Errorf("could not initialize NodeBlockListWrapper: %w", err) 1227 } 1228 node.IdentityProvider = disallowListWrapper 1229 1230 if node.ObserverMode { 1231 // identifier providers decides which node to connect to when syncing blocks, 1232 // in observer mode, the peer nodes have to be specific public access node, 1233 // rather than the staked consensus nodes. 1234 idTranslator, factory, err := CreatePublicIDTranslatorAndIdentifierProvider( 1235 fnb.Logger, 1236 fnb.NetworkKey, 1237 fnb.SporkID, 1238 // fnb.LibP2PNode is not created yet, until EnqueueNetworkInit is called. 1239 // so we pass a function that will return the LibP2PNode when called. 1240 func() p2p.LibP2PNode { 1241 return fnb.LibP2PNode 1242 }, 1243 idCache, 1244 ) 1245 if err != nil { 1246 return fmt.Errorf("could not initialize public ID translator and identifier provider: %w", err) 1247 } 1248 1249 fnb.IDTranslator = idTranslator 1250 fnb.SyncEngineIdentifierProvider = factory() 1251 1252 return nil 1253 } 1254 1255 node.IDTranslator = idCache 1256 1257 // register the disallow list wrapper for dynamic configuration via admin command 1258 err = node.ConfigManager.RegisterIdentifierListConfig("network-id-provider-blocklist", 1259 disallowListWrapper.GetDisallowList, disallowListWrapper.Update) 1260 if err != nil { 1261 return fmt.Errorf("failed to register disallow-list wrapper with config manager: %w", err) 1262 } 1263 1264 node.SyncEngineIdentifierProvider = id.NewIdentityFilterIdentifierProvider( 1265 filter.And( 1266 filter.HasRole[flow.Identity](flow.RoleConsensus), 1267 filter.Not(filter.HasNodeID[flow.Identity](node.Me.NodeID())), 1268 filter.NotEjectedFilter, 1269 ), 1270 node.IdentityProvider, 1271 ) 1272 return nil 1273 }) 1274 } 1275 1276 func (fnb *FlowNodeBuilder) initState() error { 1277 fnb.ProtocolEvents = events.NewDistributor() 1278 1279 isBootStrapped, err := badgerState.IsBootstrapped(fnb.DB) 1280 if err != nil { 1281 return fmt.Errorf("failed to determine whether database contains bootstrapped state: %w", err) 1282 } 1283 1284 if isBootStrapped { 1285 fnb.Logger.Info().Msg("opening already bootstrapped protocol state") 1286 state, err := badgerState.OpenState( 1287 fnb.Metrics.Compliance, 1288 fnb.DB, 1289 fnb.Storage.Headers, 1290 fnb.Storage.Seals, 1291 fnb.Storage.Results, 1292 fnb.Storage.Blocks, 1293 fnb.Storage.QuorumCertificates, 1294 fnb.Storage.Setups, 1295 fnb.Storage.EpochCommits, 1296 fnb.Storage.EpochProtocolState, 1297 fnb.Storage.ProtocolKVStore, 1298 fnb.Storage.VersionBeacons, 1299 ) 1300 if err != nil { 1301 return fmt.Errorf("could not open protocol state: %w", err) 1302 } 1303 fnb.State = state 1304 1305 // set root snapshot field 1306 rootBlock := state.Params().FinalizedRoot() 1307 rootSnapshot := state.AtBlockID(rootBlock.ID()) 1308 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1309 return err 1310 } 1311 } else { 1312 // Bootstrap! 1313 fnb.Logger.Info().Msg("bootstrapping empty protocol state") 1314 1315 // if no root snapshot is configured, attempt to load the file from disk 1316 var rootSnapshot = fnb.RootSnapshot 1317 if rootSnapshot == nil { 1318 fnb.Logger.Info().Msgf("loading root protocol state snapshot from disk") 1319 rootSnapshot, err = loadRootProtocolSnapshot(fnb.BaseConfig.BootstrapDir) 1320 if err != nil { 1321 return fmt.Errorf("failed to read protocol snapshot from disk: %w", err) 1322 } 1323 } 1324 // set root snapshot fields 1325 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1326 return err 1327 } 1328 1329 // generate bootstrap config options as per NodeConfig 1330 var options []badgerState.BootstrapConfigOptions 1331 if fnb.SkipNwAddressBasedValidations { 1332 options = append(options, badgerState.SkipNetworkAddressValidation) 1333 } 1334 1335 fnb.State, err = badgerState.Bootstrap( 1336 fnb.Metrics.Compliance, 1337 fnb.DB, 1338 fnb.Storage.Headers, 1339 fnb.Storage.Seals, 1340 fnb.Storage.Results, 1341 fnb.Storage.Blocks, 1342 fnb.Storage.QuorumCertificates, 1343 fnb.Storage.Setups, 1344 fnb.Storage.EpochCommits, 1345 fnb.Storage.EpochProtocolState, 1346 fnb.Storage.ProtocolKVStore, 1347 fnb.Storage.VersionBeacons, 1348 fnb.RootSnapshot, 1349 options..., 1350 ) 1351 if err != nil { 1352 return fmt.Errorf("could not bootstrap protocol state: %w", err) 1353 } 1354 1355 fnb.Logger.Info(). 1356 Hex("root_result_id", logging.Entity(fnb.RootResult)). 1357 Hex("root_state_commitment", fnb.RootSeal.FinalState[:]). 1358 Hex("finalized_root_block_id", logging.Entity(fnb.FinalizedRootBlock)). 1359 Uint64("finalized_root_block_height", fnb.FinalizedRootBlock.Header.Height). 1360 Hex("sealed_root_block_id", logging.Entity(fnb.SealedRootBlock)). 1361 Uint64("sealed_root_block_height", fnb.SealedRootBlock.Header.Height). 1362 Msg("protocol state bootstrapped") 1363 } 1364 1365 // initialize local if it hasn't been initialized yet 1366 if fnb.Me == nil { 1367 if err := fnb.initLocal(); err != nil { 1368 return err 1369 } 1370 } 1371 1372 lastFinalized, err := fnb.State.Final().Head() 1373 if err != nil { 1374 return fmt.Errorf("could not get last finalized block header: %w", err) 1375 } 1376 fnb.NodeConfig.LastFinalizedHeader = lastFinalized 1377 1378 lastSealed, err := fnb.State.Sealed().Head() 1379 if err != nil { 1380 return fmt.Errorf("could not get last sealed block header: %w", err) 1381 } 1382 1383 fnb.Logger.Info(). 1384 Hex("last_finalized_block_id", logging.Entity(lastFinalized)). 1385 Uint64("last_finalized_block_height", lastFinalized.Height). 1386 Hex("last_sealed_block_id", logging.Entity(lastSealed)). 1387 Uint64("last_sealed_block_height", lastSealed.Height). 1388 Hex("finalized_root_block_id", logging.Entity(fnb.FinalizedRootBlock)). 1389 Uint64("finalized_root_block_height", fnb.FinalizedRootBlock.Header.Height). 1390 Hex("sealed_root_block_id", logging.Entity(fnb.SealedRootBlock)). 1391 Uint64("sealed_root_block_height", fnb.SealedRootBlock.Header.Height). 1392 Msg("successfully opened protocol state") 1393 1394 return nil 1395 } 1396 1397 // setRootSnapshot sets the root snapshot field and all related fields in the NodeConfig. 1398 func (fnb *FlowNodeBuilder) setRootSnapshot(rootSnapshot protocol.Snapshot) error { 1399 var err error 1400 1401 // validate the root snapshot QCs 1402 err = badgerState.IsValidRootSnapshotQCs(rootSnapshot) 1403 if err != nil { 1404 return fmt.Errorf("failed to validate root snapshot QCs: %w", err) 1405 } 1406 1407 // perform extra checks requested by specific node types 1408 if fnb.extraRootSnapshotCheck != nil { 1409 err = fnb.extraRootSnapshotCheck(rootSnapshot) 1410 if err != nil { 1411 return fmt.Errorf("failed to perform extra checks on root snapshot: %w", err) 1412 } 1413 } 1414 1415 fnb.RootSnapshot = rootSnapshot 1416 // cache properties of the root snapshot, for convenience 1417 fnb.RootResult, fnb.RootSeal, err = fnb.RootSnapshot.SealedResult() 1418 if err != nil { 1419 return fmt.Errorf("failed to read root sealed result: %w", err) 1420 } 1421 1422 sealingSegment, err := fnb.RootSnapshot.SealingSegment() 1423 if err != nil { 1424 return fmt.Errorf("failed to read root sealing segment: %w", err) 1425 } 1426 1427 fnb.FinalizedRootBlock = sealingSegment.Highest() 1428 fnb.SealedRootBlock = sealingSegment.Sealed() 1429 fnb.RootQC, err = fnb.RootSnapshot.QuorumCertificate() 1430 if err != nil { 1431 return fmt.Errorf("failed to read root QC: %w", err) 1432 } 1433 1434 fnb.RootChainID = fnb.FinalizedRootBlock.Header.ChainID 1435 fnb.SporkID = fnb.RootSnapshot.Params().SporkID() 1436 1437 return nil 1438 } 1439 1440 func (fnb *FlowNodeBuilder) initLocal() error { 1441 // NodeID has been set in initNodeInfo 1442 myID := fnb.NodeID 1443 if fnb.ObserverMode { 1444 nodeID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 1445 if err != nil { 1446 return fmt.Errorf("could not parse node ID from string (id: %v): %w", fnb.BaseConfig.nodeIDHex, err) 1447 } 1448 info, err := LoadPrivateNodeInfo(fnb.BaseConfig.BootstrapDir, nodeID) 1449 if err != nil { 1450 return fmt.Errorf("could not load private node info: %w", err) 1451 } 1452 1453 if info.Role != flow.RoleExecution { 1454 return fmt.Errorf("observer mode is only available for execution nodes") 1455 } 1456 1457 id := flow.IdentitySkeleton{ 1458 // observer mode uses the node id derived from the network key, 1459 // rather than the node id from the node info file 1460 NodeID: myID, 1461 Address: info.Address, 1462 Role: info.Role, 1463 InitialWeight: 0, 1464 NetworkPubKey: fnb.NetworkKey.PublicKey(), 1465 StakingPubKey: fnb.StakingKey.PublicKey(), 1466 } 1467 fnb.Me, err = local.New(id, fnb.StakingKey) 1468 if err != nil { 1469 return fmt.Errorf("could not initialize local: %w", err) 1470 } 1471 1472 return nil 1473 } 1474 1475 // Verify that my ID (as given in the configuration) is known to the network 1476 // (i.e. protocol state). There are two cases that will cause the following error: 1477 // 1) used the wrong node id, which is not part of the identity list of the finalized state 1478 // 2) the node id is a new one for a new spork, but the bootstrap data has not been updated. 1479 self, err := fnb.State.Final().Identity(myID) 1480 if err != nil { 1481 return fmt.Errorf("node identity not found in the identity list of the finalized state (id: %v): %w", myID, err) 1482 } 1483 1484 // Verify that my role (as given in the configuration) is consistent with the protocol state. 1485 // We enforce this strictly for MainNet. For other networks (e.g. TestNet or BenchNet), we 1486 // are lenient, to allow ghost node to run as any role. 1487 if self.Role.String() != fnb.BaseConfig.NodeRole { 1488 rootBlockHeader := fnb.State.Params().FinalizedRoot() 1489 if rootBlockHeader.ChainID == flow.Mainnet { 1490 return fmt.Errorf("running as incorrect role, expected: %v, actual: %v, exiting", 1491 self.Role.String(), 1492 fnb.BaseConfig.NodeRole, 1493 ) 1494 } 1495 1496 fnb.Logger.Warn().Msgf("running as incorrect role, expected: %v, actual: %v, continuing", 1497 self.Role.String(), 1498 fnb.BaseConfig.NodeRole) 1499 } 1500 1501 // ensure that the configured staking/network keys are consistent with the protocol state 1502 if !self.NetworkPubKey.Equals(fnb.NetworkKey.PublicKey()) { 1503 return fmt.Errorf("configured networking key does not match protocol state") 1504 } 1505 if !self.StakingPubKey.Equals(fnb.StakingKey.PublicKey()) { 1506 return fmt.Errorf("configured staking key does not match protocol state") 1507 } 1508 1509 fnb.Me, err = local.New(self.IdentitySkeleton, fnb.StakingKey) 1510 if err != nil { 1511 return fmt.Errorf("could not initialize local: %w", err) 1512 } 1513 1514 return nil 1515 } 1516 1517 func (fnb *FlowNodeBuilder) initFvmOptions() { 1518 blockFinder := environment.NewBlockFinder(fnb.Storage.Headers) 1519 vmOpts := []fvm.Option{ 1520 fvm.WithChain(fnb.RootChainID.Chain()), 1521 fvm.WithBlocks(blockFinder), 1522 fvm.WithAccountStorageLimit(true), 1523 } 1524 switch fnb.RootChainID { 1525 case flow.Testnet, 1526 flow.Sandboxnet, 1527 flow.Previewnet, 1528 flow.Mainnet: 1529 vmOpts = append(vmOpts, 1530 fvm.WithTransactionFeesEnabled(true), 1531 ) 1532 } 1533 switch fnb.RootChainID { 1534 case flow.Testnet, 1535 flow.Sandboxnet, 1536 flow.Previewnet, 1537 flow.Localnet, 1538 flow.Benchnet: 1539 vmOpts = append(vmOpts, 1540 fvm.WithContractDeploymentRestricted(false), 1541 ) 1542 } 1543 fnb.FvmOptions = vmOpts 1544 } 1545 1546 // handleModules initializes the given module. 1547 func (fnb *FlowNodeBuilder) handleModule(v namedModuleFunc) error { 1548 fnb.Logger.Info().Str("module", v.name).Msg("module initialization started") 1549 err := v.fn(fnb.NodeConfig) 1550 if err != nil { 1551 return fmt.Errorf("module %s initialization failed: %w", v.name, err) 1552 } 1553 1554 fnb.Logger.Info().Str("module", v.name).Msg("module initialization complete") 1555 return nil 1556 } 1557 1558 // handleModules initializes all modules that have been enqueued on this node builder. 1559 func (fnb *FlowNodeBuilder) handleModules() error { 1560 for _, f := range fnb.modules { 1561 if err := fnb.handleModule(f); err != nil { 1562 return err 1563 } 1564 } 1565 1566 return nil 1567 } 1568 1569 // handleComponents registers the component's factory method with the ComponentManager to be run 1570 // when the node starts. 1571 // It uses signal channels to ensure that components are started serially. 1572 func (fnb *FlowNodeBuilder) handleComponents() error { 1573 // The parent/started channels are used to enforce serial startup. 1574 // - parent is the started channel of the previous component. 1575 // - when a component is ready, it closes its started channel by calling the provided callback. 1576 // Components wait for their parent channel to close before starting, this ensures they start 1577 // up serially, even though the ComponentManager will launch the goroutines in parallel. 1578 1579 // The first component is always started immediately 1580 parent := make(chan struct{}) 1581 close(parent) 1582 1583 var err error 1584 asyncComponents := []namedComponentFunc{} 1585 1586 // Run all components 1587 for _, f := range fnb.components { 1588 // Components with explicit dependencies are not started serially 1589 if f.dependencies != nil { 1590 asyncComponents = append(asyncComponents, f) 1591 continue 1592 } 1593 1594 started := make(chan struct{}) 1595 1596 if f.errorHandler != nil { 1597 err = fnb.handleRestartableComponent(f, parent, func() { close(started) }) 1598 } else { 1599 err = fnb.handleComponent(f, parent, func() { close(started) }) 1600 } 1601 1602 if err != nil { 1603 return fmt.Errorf("could not handle component %s: %w", f.name, err) 1604 } 1605 1606 parent = started 1607 } 1608 1609 // Components with explicit dependencies are run asynchronously, which means dependencies in 1610 // the dependency list must be initialized outside of the component factory. 1611 for _, f := range asyncComponents { 1612 fnb.Logger.Debug().Str("component", f.name).Int("dependencies", len(f.dependencies.components)).Msg("handling component asynchronously") 1613 err = fnb.handleComponent(f, util.AllReady(f.dependencies.components...), func() {}) 1614 if err != nil { 1615 return fmt.Errorf("could not handle dependable component %s: %w", f.name, err) 1616 } 1617 } 1618 1619 return nil 1620 } 1621 1622 // handleComponent constructs a component using the provided ReadyDoneFactory, and registers a 1623 // worker with the ComponentManager to be run when the node is started. 1624 // 1625 // The ComponentManager starts all workers in parallel. Since some components have non-idempotent 1626 // ReadyDoneAware interfaces, we need to ensure that they are started serially. This is accomplished 1627 // using the parentReady channel and the started closure. Components wait for the parentReady channel 1628 // to close before starting, and then call the started callback after they are ready(). The started 1629 // callback closes the parentReady channel of the next component, and so on. 1630 // 1631 // TODO: Instead of this serial startup, components should wait for their dependencies to be ready 1632 // using their ReadyDoneAware interface. After components are updated to use the idempotent 1633 // ReadyDoneAware interface and explicitly wait for their dependencies to be ready, we can remove 1634 // this channel chaining. 1635 func (fnb *FlowNodeBuilder) handleComponent(v namedComponentFunc, dependencies <-chan struct{}, started func()) error { 1636 // Add a closure that starts the component when the node is started, and then waits for it to exit 1637 // gracefully. 1638 // Startup for all components will happen in parallel, and components can use their dependencies' 1639 // ReadyDoneAware interface to wait until they are ready. 1640 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1641 // wait for the dependencies to be ready before starting 1642 if err := util.WaitClosed(ctx, dependencies); err != nil { 1643 return 1644 } 1645 1646 logger := fnb.Logger.With().Str("component", v.name).Logger() 1647 1648 logger.Info().Msg("component initialization started") 1649 // First, build the component using the factory method. 1650 readyAware, err := v.fn(fnb.NodeConfig) 1651 if err != nil { 1652 ctx.Throw(fmt.Errorf("component %s initialization failed: %w", v.name, err)) 1653 } 1654 if readyAware == nil { 1655 ctx.Throw(fmt.Errorf("component %s initialization failed: nil component", v.name)) 1656 } 1657 logger.Info().Msg("component initialization complete") 1658 1659 // if this is a Component, use the Startable interface to start the component, otherwise 1660 // Ready() will launch it. 1661 cmp, isComponent := readyAware.(component.Component) 1662 if isComponent { 1663 cmp.Start(ctx) 1664 } 1665 1666 // Wait until the component is ready 1667 if err := util.WaitClosed(ctx, readyAware.Ready()); err != nil { 1668 // The context was cancelled. Continue to shutdown logic. 1669 logger.Warn().Msg("component startup aborted") 1670 1671 // Non-idempotent ReadyDoneAware components trigger shutdown by calling Done(). Don't 1672 // do that here since it may not be safe if the component is not Ready(). 1673 if !isComponent { 1674 return 1675 } 1676 } else { 1677 logger.Info().Msg("component startup complete") 1678 ready() 1679 1680 // Signal to the next component that we're ready. 1681 started() 1682 } 1683 1684 // Component shutdown is signaled by cancelling its context. 1685 <-ctx.Done() 1686 logger.Info().Msg("component shutdown started") 1687 1688 // Finally, wait until component has finished shutting down. 1689 <-readyAware.Done() 1690 logger.Info().Msg("component shutdown complete") 1691 }) 1692 1693 return nil 1694 } 1695 1696 // handleRestartableComponent constructs a component using the provided ReadyDoneFactory, and 1697 // registers a worker with the ComponentManager to be run when the node is started. 1698 // 1699 // Restartable Components are components that can be restarted after successfully handling 1700 // an irrecoverable error. 1701 // 1702 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1703 func (fnb *FlowNodeBuilder) handleRestartableComponent(v namedComponentFunc, parentReady <-chan struct{}, started func()) error { 1704 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1705 // wait for the previous component to be ready before starting 1706 if err := util.WaitClosed(ctx, parentReady); err != nil { 1707 return 1708 } 1709 1710 // Note: we're marking the worker routine ready before we even attempt to start the 1711 // component. the idea behind a restartable component is that the node should not depend 1712 // on it for safe operation, so the node does not need to wait for it to be ready. 1713 ready() 1714 1715 // do not block serial startup. started can only be called once, so it cannot be called 1716 // from within the componentFactory 1717 started() 1718 1719 log := fnb.Logger.With().Str("component", v.name).Logger() 1720 1721 // This may be called multiple times if the component is restarted 1722 componentFactory := func() (component.Component, error) { 1723 log.Info().Msg("component initialization started") 1724 c, err := v.fn(fnb.NodeConfig) 1725 if err != nil { 1726 return nil, err 1727 } 1728 log.Info().Msg("component initialization complete") 1729 1730 go func() { 1731 if err := util.WaitClosed(ctx, c.Ready()); err != nil { 1732 log.Info().Msg("component startup aborted") 1733 } else { 1734 log.Info().Msg("component startup complete") 1735 } 1736 1737 <-ctx.Done() 1738 log.Info().Msg("component shutdown started") 1739 }() 1740 return c.(component.Component), nil 1741 } 1742 1743 err := component.RunComponent(ctx, componentFactory, v.errorHandler) 1744 if err != nil && !errors.Is(err, ctx.Err()) { 1745 ctx.Throw(fmt.Errorf("component %s encountered an unhandled irrecoverable error: %w", v.name, err)) 1746 } 1747 1748 log.Info().Msg("component shutdown complete") 1749 }) 1750 1751 return nil 1752 } 1753 1754 // ExtraFlags enables binding additional flags beyond those defined in BaseConfig. 1755 func (fnb *FlowNodeBuilder) ExtraFlags(f func(*pflag.FlagSet)) NodeBuilder { 1756 f(fnb.flags) 1757 return fnb 1758 } 1759 1760 // Module enables setting up dependencies of the engine with the builder context. 1761 func (fnb *FlowNodeBuilder) Module(name string, f BuilderFunc) NodeBuilder { 1762 fnb.modules = append(fnb.modules, namedModuleFunc{ 1763 fn: f, 1764 name: name, 1765 }) 1766 return fnb 1767 } 1768 1769 // ShutdownFunc adds a callback function that is called after all components have exited. 1770 func (fnb *FlowNodeBuilder) ShutdownFunc(fn func() error) NodeBuilder { 1771 fnb.postShutdownFns = append(fnb.postShutdownFns, fn) 1772 return fnb 1773 } 1774 1775 func (fnb *FlowNodeBuilder) AdminCommand(command string, f func(config *NodeConfig) commands.AdminCommand) NodeBuilder { 1776 fnb.adminCommands[command] = f 1777 return fnb 1778 } 1779 1780 // Component adds a new component to the node that conforms to the ReadyDoneAware 1781 // interface. 1782 // 1783 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1784 // In both cases, the object is started when the node is run, and the node will wait for the 1785 // component to exit gracefully. 1786 func (fnb *FlowNodeBuilder) Component(name string, f ReadyDoneFactory) NodeBuilder { 1787 fnb.components = append(fnb.components, namedComponentFunc{ 1788 fn: f, 1789 name: name, 1790 }) 1791 return fnb 1792 } 1793 1794 // DependableComponent adds a new component to the node that conforms to the ReadyDoneAware 1795 // interface. The builder will wait until all of the components in the dependencies list are ready 1796 // before constructing the component. 1797 // 1798 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1799 // In both cases, the object is started when the node is run, and the node will wait for the 1800 // component to exit gracefully. 1801 // 1802 // IMPORTANT: Dependable components are started in parallel with no guaranteed run order, so all 1803 // dependencies must be initialized outside of the ReadyDoneFactory, and their `Ready()` method 1804 // MUST be idempotent. 1805 func (fnb *FlowNodeBuilder) DependableComponent(name string, f ReadyDoneFactory, dependencies *DependencyList) NodeBuilder { 1806 // Note: dependencies are passed as a struct to allow updating the list after calling this method. 1807 // Passing a slice instead would result in out of sync metadata since slices are passed by reference 1808 fnb.components = append(fnb.components, namedComponentFunc{ 1809 fn: f, 1810 name: name, 1811 dependencies: dependencies, 1812 }) 1813 return fnb 1814 } 1815 1816 // OverrideComponent adds given builder function to the components set of the node builder. If a builder function with that name 1817 // already exists, it will be overridden. 1818 func (fnb *FlowNodeBuilder) OverrideComponent(name string, f ReadyDoneFactory) NodeBuilder { 1819 for i := 0; i < len(fnb.components); i++ { 1820 if fnb.components[i].name == name { 1821 // found component with the name, override it. 1822 fnb.components[i] = namedComponentFunc{ 1823 fn: f, 1824 name: name, 1825 } 1826 1827 return fnb 1828 } 1829 } 1830 1831 // no component found with the same name, hence just adding it. 1832 return fnb.Component(name, f) 1833 } 1834 1835 // RestartableComponent adds a new component to the node that conforms to the ReadyDoneAware 1836 // interface, and calls the provided error handler when an irrecoverable error is encountered. 1837 // Use RestartableComponent if the component is not critical to the node's safe operation and 1838 // can/should be independently restarted when an irrecoverable error is encountered. 1839 // 1840 // IMPORTANT: Since a RestartableComponent can be restarted independently of the node, the node and 1841 // other components must not rely on it for safe operation, and failures must be handled gracefully. 1842 // As such, RestartableComponents do not block the node from becoming ready, and do not block 1843 // subsequent components from starting serially. They do start in serial order. 1844 // 1845 // Note: The ReadyDoneFactory method may be called multiple times if the component is restarted. 1846 // 1847 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1848 func (fnb *FlowNodeBuilder) RestartableComponent(name string, f ReadyDoneFactory, errorHandler component.OnError) NodeBuilder { 1849 fnb.components = append(fnb.components, namedComponentFunc{ 1850 fn: f, 1851 name: name, 1852 errorHandler: errorHandler, 1853 }) 1854 return fnb 1855 } 1856 1857 // OverrideModule adds given builder function to the modules set of the node builder. If a builder function with that name 1858 // already exists, it will be overridden. 1859 func (fnb *FlowNodeBuilder) OverrideModule(name string, f BuilderFunc) NodeBuilder { 1860 for i := 0; i < len(fnb.modules); i++ { 1861 if fnb.modules[i].name == name { 1862 // found module with the name, override it. 1863 fnb.modules[i] = namedModuleFunc{ 1864 fn: f, 1865 name: name, 1866 } 1867 1868 return fnb 1869 } 1870 } 1871 1872 // no module found with the same name, hence just adding it. 1873 return fnb.Module(name, f) 1874 } 1875 1876 func (fnb *FlowNodeBuilder) PreInit(f BuilderFunc) NodeBuilder { 1877 fnb.preInitFns = append(fnb.preInitFns, f) 1878 return fnb 1879 } 1880 1881 func (fnb *FlowNodeBuilder) PostInit(f BuilderFunc) NodeBuilder { 1882 fnb.postInitFns = append(fnb.postInitFns, f) 1883 return fnb 1884 } 1885 1886 type Option func(*BaseConfig) 1887 1888 func WithBootstrapDir(bootstrapDir string) Option { 1889 return func(config *BaseConfig) { 1890 config.BootstrapDir = bootstrapDir 1891 } 1892 } 1893 1894 func WithBindAddress(bindAddress string) Option { 1895 return func(config *BaseConfig) { 1896 config.BindAddr = bindAddress 1897 } 1898 } 1899 1900 func WithDataDir(dataDir string) Option { 1901 return func(config *BaseConfig) { 1902 if config.db == nil { 1903 config.datadir = dataDir 1904 } 1905 } 1906 } 1907 1908 func WithSecretsDBEnabled(enabled bool) Option { 1909 return func(config *BaseConfig) { 1910 config.secretsDBEnabled = enabled 1911 } 1912 } 1913 1914 func WithMetricsEnabled(enabled bool) Option { 1915 return func(config *BaseConfig) { 1916 config.MetricsEnabled = enabled 1917 } 1918 } 1919 1920 func WithSyncCoreConfig(syncConfig chainsync.Config) Option { 1921 return func(config *BaseConfig) { 1922 config.SyncCoreConfig = syncConfig 1923 } 1924 } 1925 1926 func WithComplianceConfig(complianceConfig compliance.Config) Option { 1927 return func(config *BaseConfig) { 1928 config.ComplianceConfig = complianceConfig 1929 } 1930 } 1931 1932 func WithLogLevel(level string) Option { 1933 return func(config *BaseConfig) { 1934 config.level = level 1935 } 1936 } 1937 1938 // WithDB takes precedence over WithDataDir and datadir will be set to empty if DB is set using this option 1939 func WithDB(db *badger.DB) Option { 1940 return func(config *BaseConfig) { 1941 config.db = db 1942 config.datadir = "" 1943 } 1944 } 1945 1946 // FlowNode creates a new Flow node builder with the given name. 1947 func FlowNode(role string, opts ...Option) *FlowNodeBuilder { 1948 config := DefaultBaseConfig() 1949 config.NodeRole = role 1950 for _, opt := range opts { 1951 opt(config) 1952 } 1953 1954 builder := &FlowNodeBuilder{ 1955 NodeConfig: &NodeConfig{ 1956 BaseConfig: *config, 1957 Logger: zerolog.New(os.Stderr), 1958 PeerManagerDependencies: NewDependencyList(), 1959 ConfigManager: updatable_configs.NewManager(), 1960 }, 1961 flags: pflag.CommandLine, 1962 adminCommandBootstrapper: admin.NewCommandRunnerBootstrapper(), 1963 adminCommands: make(map[string]func(*NodeConfig) commands.AdminCommand), 1964 componentBuilder: component.NewComponentManagerBuilder(), 1965 } 1966 return builder 1967 } 1968 1969 func (fnb *FlowNodeBuilder) Initialize() error { 1970 fnb.PrintBuildVersionDetails() 1971 1972 fnb.BaseFlags() 1973 1974 if err := fnb.ParseAndPrintFlags(); err != nil { 1975 return err 1976 } 1977 1978 // ID providers must be initialized before the network 1979 fnb.InitIDProviders() 1980 1981 fnb.EnqueueResolver() 1982 1983 fnb.EnqueueNetworkInit() 1984 1985 fnb.EnqueuePingService() 1986 1987 if fnb.MetricsEnabled { 1988 fnb.EnqueueMetricsServerInit() 1989 if err := fnb.RegisterBadgerMetrics(); err != nil { 1990 return err 1991 } 1992 } 1993 1994 fnb.EnqueueTracer() 1995 1996 return nil 1997 } 1998 1999 func (fnb *FlowNodeBuilder) RegisterDefaultAdminCommands() { 2000 fnb.AdminCommand("set-log-level", func(config *NodeConfig) commands.AdminCommand { 2001 return &common.SetLogLevelCommand{} 2002 }).AdminCommand("set-golog-level", func(config *NodeConfig) commands.AdminCommand { 2003 return &common.SetGologLevelCommand{} 2004 }).AdminCommand("get-config", func(config *NodeConfig) commands.AdminCommand { 2005 return common.NewGetConfigCommand(config.ConfigManager) 2006 }).AdminCommand("set-config", func(config *NodeConfig) commands.AdminCommand { 2007 return common.NewSetConfigCommand(config.ConfigManager) 2008 }).AdminCommand("list-configs", func(config *NodeConfig) commands.AdminCommand { 2009 return common.NewListConfigCommand(config.ConfigManager) 2010 }).AdminCommand("read-blocks", func(config *NodeConfig) commands.AdminCommand { 2011 return storageCommands.NewReadBlocksCommand(config.State, config.Storage.Blocks) 2012 }).AdminCommand("read-range-blocks", func(conf *NodeConfig) commands.AdminCommand { 2013 return storageCommands.NewReadRangeBlocksCommand(conf.Storage.Blocks) 2014 }).AdminCommand("read-results", func(config *NodeConfig) commands.AdminCommand { 2015 return storageCommands.NewReadResultsCommand(config.State, config.Storage.Results) 2016 }).AdminCommand("read-seals", func(config *NodeConfig) commands.AdminCommand { 2017 return storageCommands.NewReadSealsCommand(config.State, config.Storage.Seals, config.Storage.Index) 2018 }).AdminCommand("get-latest-identity", func(config *NodeConfig) commands.AdminCommand { 2019 return common.NewGetIdentityCommand(config.IdentityProvider) 2020 }) 2021 } 2022 2023 func (fnb *FlowNodeBuilder) Build() (Node, error) { 2024 // Run the prestart initialization. This includes anything that should be done before 2025 // starting the components. 2026 if err := fnb.onStart(); err != nil { 2027 return nil, err 2028 } 2029 2030 return NewNode( 2031 fnb.componentBuilder.Build(), 2032 fnb.NodeConfig, 2033 fnb.Logger, 2034 fnb.postShutdown, 2035 fnb.handleFatal, 2036 ), nil 2037 } 2038 2039 func (fnb *FlowNodeBuilder) onStart() error { 2040 // init nodeinfo by reading the private bootstrap file if not already set 2041 if fnb.NodeID == flow.ZeroID { 2042 if err := fnb.initNodeInfo(); err != nil { 2043 return err 2044 } 2045 } 2046 2047 if err := fnb.initLogger(); err != nil { 2048 return err 2049 } 2050 2051 if err := fnb.initDB(); err != nil { 2052 return err 2053 } 2054 2055 if err := fnb.initSecretsDB(); err != nil { 2056 return err 2057 } 2058 2059 if err := fnb.initMetrics(); err != nil { 2060 return err 2061 } 2062 2063 if err := fnb.initStorage(); err != nil { 2064 return err 2065 } 2066 2067 for _, f := range fnb.preInitFns { 2068 if err := fnb.handlePreInit(f); err != nil { 2069 return err 2070 } 2071 } 2072 2073 if err := fnb.initState(); err != nil { 2074 return err 2075 } 2076 2077 if err := fnb.initProfiler(); err != nil { 2078 return err 2079 } 2080 2081 fnb.initFvmOptions() 2082 2083 for _, f := range fnb.postInitFns { 2084 if err := fnb.handlePostInit(f); err != nil { 2085 return err 2086 } 2087 } 2088 2089 if err := fnb.EnqueueAdminServerInit(); err != nil { 2090 return err 2091 } 2092 2093 // run all modules 2094 if err := fnb.handleModules(); err != nil { 2095 return fmt.Errorf("could not handle modules: %w", err) 2096 } 2097 2098 // run all components 2099 return fnb.handleComponents() 2100 } 2101 2102 // postShutdown is called by the node before exiting 2103 // put any cleanup code here that should be run after all components have stopped 2104 func (fnb *FlowNodeBuilder) postShutdown() error { 2105 var errs *multierror.Error 2106 2107 for _, fn := range fnb.postShutdownFns { 2108 err := fn() 2109 if err != nil { 2110 errs = multierror.Append(errs, err) 2111 } 2112 } 2113 fnb.Logger.Info().Msg("database has been closed") 2114 return errs.ErrorOrNil() 2115 } 2116 2117 // handleFatal handles irrecoverable errors by logging them and exiting the process. 2118 func (fnb *FlowNodeBuilder) handleFatal(err error) { 2119 fnb.Logger.Fatal().Err(err).Msg("unhandled irrecoverable error") 2120 } 2121 2122 func (fnb *FlowNodeBuilder) handlePreInit(f BuilderFunc) error { 2123 return f(fnb.NodeConfig) 2124 } 2125 2126 func (fnb *FlowNodeBuilder) handlePostInit(f BuilderFunc) error { 2127 return f(fnb.NodeConfig) 2128 } 2129 2130 func (fnb *FlowNodeBuilder) extraFlagsValidation() error { 2131 if fnb.extraFlagCheck != nil { 2132 err := fnb.extraFlagCheck() 2133 if err != nil { 2134 return fmt.Errorf("invalid flags: %w", err) 2135 } 2136 } 2137 return nil 2138 } 2139 2140 // DhtSystemActivationStatus parses the given role string and returns the corresponding DHT system activation status. 2141 // Args: 2142 // - roleStr: the role string to parse. 2143 // - enabled: whether the DHT system is configured to be enabled. Only meaningful for access and execution nodes. 2144 // Returns: 2145 // - DhtSystemActivation: the corresponding DHT system activation status. 2146 // - error: if the role string is invalid, returns an error. 2147 func DhtSystemActivationStatus(roleStr string, enabled bool) (p2pbuilder.DhtSystemActivation, error) { 2148 if roleStr == "ghost" { 2149 // ghost node is not a valid role, so we don't need to parse it 2150 return p2pbuilder.DhtSystemDisabled, nil 2151 } 2152 2153 role, err := flow.ParseRole(roleStr) 2154 if err != nil && roleStr != "ghost" { 2155 // ghost role is not a valid role, so we don't need to parse it 2156 return p2pbuilder.DhtSystemDisabled, fmt.Errorf("could not parse node role: %w", err) 2157 } 2158 2159 // Only access and execution nodes need to run DHT; which is used by bitswap. 2160 // Access nodes also run a DHT on the public network for peer discovery of un-staked nodes. 2161 if role != flow.RoleAccess && role != flow.RoleExecution { 2162 return p2pbuilder.DhtSystemDisabled, nil 2163 } 2164 2165 return p2pbuilder.DhtSystemActivation(enabled), nil 2166 }