github.com/koko1123/flow-go-1@v0.29.6/cmd/scaffold.go (about) 1 package cmd 2 3 import ( 4 "crypto/tls" 5 "crypto/x509" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "math/rand" 10 "os" 11 "path/filepath" 12 "runtime" 13 "strings" 14 "time" 15 16 gcemd "cloud.google.com/go/compute/metadata" 17 "github.com/dgraph-io/badger/v3" 18 "github.com/hashicorp/go-multierror" 19 "github.com/libp2p/go-libp2p/core/peer" 20 "github.com/prometheus/client_golang/prometheus" 21 "github.com/rs/zerolog" 22 "github.com/spf13/pflag" 23 "golang.org/x/time/rate" 24 "google.golang.org/api/option" 25 26 "github.com/koko1123/flow-go-1/admin" 27 "github.com/koko1123/flow-go-1/admin/commands" 28 "github.com/koko1123/flow-go-1/admin/commands/common" 29 storageCommands "github.com/koko1123/flow-go-1/admin/commands/storage" 30 "github.com/koko1123/flow-go-1/cmd/build" 31 "github.com/koko1123/flow-go-1/consensus/hotstuff/persister" 32 "github.com/koko1123/flow-go-1/fvm" 33 "github.com/koko1123/flow-go-1/fvm/environment" 34 "github.com/koko1123/flow-go-1/model/bootstrap" 35 "github.com/koko1123/flow-go-1/model/flow" 36 "github.com/koko1123/flow-go-1/model/flow/filter" 37 "github.com/koko1123/flow-go-1/module" 38 "github.com/koko1123/flow-go-1/module/chainsync" 39 "github.com/koko1123/flow-go-1/module/compliance" 40 "github.com/koko1123/flow-go-1/module/component" 41 "github.com/koko1123/flow-go-1/module/id" 42 "github.com/koko1123/flow-go-1/module/irrecoverable" 43 "github.com/koko1123/flow-go-1/module/local" 44 "github.com/koko1123/flow-go-1/module/mempool/herocache" 45 "github.com/koko1123/flow-go-1/module/metrics" 46 "github.com/koko1123/flow-go-1/module/profiler" 47 "github.com/koko1123/flow-go-1/module/trace" 48 "github.com/koko1123/flow-go-1/module/updatable_configs" 49 "github.com/koko1123/flow-go-1/module/util" 50 "github.com/koko1123/flow-go-1/network" 51 netcache "github.com/koko1123/flow-go-1/network/cache" 52 "github.com/koko1123/flow-go-1/network/channels" 53 "github.com/koko1123/flow-go-1/network/p2p" 54 "github.com/koko1123/flow-go-1/network/p2p/cache" 55 "github.com/koko1123/flow-go-1/network/p2p/conduit" 56 "github.com/koko1123/flow-go-1/network/p2p/dns" 57 "github.com/koko1123/flow-go-1/network/p2p/middleware" 58 "github.com/koko1123/flow-go-1/network/p2p/p2pbuilder" 59 "github.com/koko1123/flow-go-1/network/p2p/ping" 60 "github.com/koko1123/flow-go-1/network/p2p/subscription" 61 "github.com/koko1123/flow-go-1/network/p2p/unicast" 62 "github.com/koko1123/flow-go-1/network/p2p/unicast/ratelimit" 63 "github.com/koko1123/flow-go-1/network/slashing" 64 "github.com/koko1123/flow-go-1/network/topology" 65 "github.com/koko1123/flow-go-1/state/protocol" 66 badgerState "github.com/koko1123/flow-go-1/state/protocol/badger" 67 "github.com/koko1123/flow-go-1/state/protocol/events" 68 "github.com/koko1123/flow-go-1/state/protocol/events/gadgets" 69 "github.com/koko1123/flow-go-1/state/protocol/inmem" 70 "github.com/koko1123/flow-go-1/storage" 71 bstorage "github.com/koko1123/flow-go-1/storage/badger" 72 "github.com/koko1123/flow-go-1/storage/badger/operation" 73 sutil "github.com/koko1123/flow-go-1/storage/util" 74 "github.com/koko1123/flow-go-1/utils/io" 75 "github.com/koko1123/flow-go-1/utils/logging" 76 ) 77 78 const ( 79 NetworkComponent = "network" 80 ConduitFactoryComponent = "conduit-factory" 81 LibP2PNodeComponent = "libp2p-node" 82 ) 83 84 type Metrics struct { 85 Network module.NetworkMetrics 86 Engine module.EngineMetrics 87 Compliance module.ComplianceMetrics 88 Cache module.CacheMetrics 89 Mempool module.MempoolMetrics 90 CleanCollector module.CleanerMetrics 91 Bitswap module.BitswapMetrics 92 } 93 94 type Storage = storage.All 95 96 type namedModuleFunc struct { 97 fn BuilderFunc 98 name string 99 } 100 101 type namedComponentFunc struct { 102 fn ReadyDoneFactory 103 name string 104 105 errorHandler component.OnError 106 dependencies *DependencyList 107 } 108 109 // FlowNodeBuilder is the default builder struct used for all flow nodes 110 // It runs a node process with following structure, in sequential order 111 // Base inits (network, storage, state, logger) 112 // PostInit handlers, if any 113 // Components handlers, if any, wait sequentially 114 // Run() <- main loop 115 // Components destructors, if any 116 // The initialization can be proceeded and succeeded with PreInit and PostInit functions that allow customization 117 // of the process in case of nodes such as the unstaked access node where the NodeInfo is not part of the genesis data 118 type FlowNodeBuilder struct { 119 *NodeConfig 120 flags *pflag.FlagSet 121 modules []namedModuleFunc 122 components []namedComponentFunc 123 postShutdownFns []func() error 124 preInitFns []BuilderFunc 125 postInitFns []BuilderFunc 126 extraFlagCheck func() error 127 adminCommandBootstrapper *admin.CommandRunnerBootstrapper 128 adminCommands map[string]func(config *NodeConfig) commands.AdminCommand 129 componentBuilder component.ComponentManagerBuilder 130 } 131 132 func (fnb *FlowNodeBuilder) BaseFlags() { 133 defaultConfig := DefaultBaseConfig() 134 135 // bind configuration parameters 136 fnb.flags.StringVar(&fnb.BaseConfig.nodeIDHex, "nodeid", defaultConfig.nodeIDHex, "identity of our node") 137 fnb.flags.StringVar(&fnb.BaseConfig.BindAddr, "bind", defaultConfig.BindAddr, "address to bind on") 138 fnb.flags.StringVarP(&fnb.BaseConfig.BootstrapDir, "bootstrapdir", "b", defaultConfig.BootstrapDir, "path to the bootstrap directory") 139 fnb.flags.StringVarP(&fnb.BaseConfig.datadir, "datadir", "d", defaultConfig.datadir, "directory to store the public database (protocol state)") 140 fnb.flags.StringVar(&fnb.BaseConfig.secretsdir, "secretsdir", defaultConfig.secretsdir, "directory to store private database (secrets)") 141 fnb.flags.StringVarP(&fnb.BaseConfig.level, "loglevel", "l", defaultConfig.level, "level for logging output") 142 fnb.flags.Uint32Var(&fnb.BaseConfig.debugLogLimit, "debug-log-limit", defaultConfig.debugLogLimit, "max number of debug/trace log events per second") 143 fnb.flags.DurationVar(&fnb.BaseConfig.PeerUpdateInterval, "peerupdate-interval", defaultConfig.PeerUpdateInterval, "how often to refresh the peer connections for the node") 144 fnb.flags.DurationVar(&fnb.BaseConfig.UnicastMessageTimeout, "unicast-timeout", defaultConfig.UnicastMessageTimeout, "how long a unicast transmission can take to complete") 145 fnb.flags.UintVarP(&fnb.BaseConfig.metricsPort, "metricport", "m", defaultConfig.metricsPort, "port for /metrics endpoint") 146 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.Enabled, "profiler-enabled", defaultConfig.profilerConfig.Enabled, "whether to enable the auto-profiler") 147 fnb.flags.BoolVar(&fnb.BaseConfig.profilerConfig.UploaderEnabled, "profile-uploader-enabled", defaultConfig.profilerConfig.UploaderEnabled, 148 "whether to enable automatic profile upload to Google Cloud Profiler. "+ 149 "For autoupload to work forllowing should be true: "+ 150 "1) both -profiler-enabled=true and -profile-uploader-enabled=true need to be set. "+ 151 "2) node is running in GCE. "+ 152 "3) server or user has https://www.googleapis.com/auth/monitoring.write scope. ") 153 fnb.flags.StringVar(&fnb.BaseConfig.profilerConfig.Dir, "profiler-dir", defaultConfig.profilerConfig.Dir, "directory to create auto-profiler profiles") 154 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Interval, "profiler-interval", defaultConfig.profilerConfig.Interval, 155 "the interval between auto-profiler runs") 156 fnb.flags.DurationVar(&fnb.BaseConfig.profilerConfig.Duration, "profiler-duration", defaultConfig.profilerConfig.Duration, 157 "the duration to run the auto-profile for") 158 159 fnb.flags.BoolVar(&fnb.BaseConfig.tracerEnabled, "tracer-enabled", defaultConfig.tracerEnabled, 160 "whether to enable tracer") 161 fnb.flags.UintVar(&fnb.BaseConfig.tracerSensitivity, "tracer-sensitivity", defaultConfig.tracerSensitivity, 162 "adjusts the level of sampling when tracing is enabled. 0 means capture everything, higher value results in less samples") 163 164 fnb.flags.StringVar(&fnb.BaseConfig.AdminAddr, "admin-addr", defaultConfig.AdminAddr, "address to bind on for admin HTTP server") 165 fnb.flags.StringVar(&fnb.BaseConfig.AdminCert, "admin-cert", defaultConfig.AdminCert, "admin cert file (for TLS)") 166 fnb.flags.StringVar(&fnb.BaseConfig.AdminKey, "admin-key", defaultConfig.AdminKey, "admin key file (for TLS)") 167 fnb.flags.StringVar(&fnb.BaseConfig.AdminClientCAs, "admin-client-certs", defaultConfig.AdminClientCAs, "admin client certs (for mutual TLS)") 168 fnb.flags.UintVar(&fnb.BaseConfig.AdminMaxMsgSize, "admin-max-response-size", defaultConfig.AdminMaxMsgSize, "admin server max response size in bytes") 169 170 fnb.flags.Float64Var(&fnb.BaseConfig.LibP2PResourceManagerConfig.FileDescriptorsRatio, "libp2p-fd-ratio", defaultConfig.LibP2PResourceManagerConfig.FileDescriptorsRatio, "ratio of available file descriptors to be used by libp2p (in (0,1])") 171 fnb.flags.Float64Var(&fnb.BaseConfig.LibP2PResourceManagerConfig.MemoryLimitRatio, "libp2p-memory-limit", defaultConfig.LibP2PResourceManagerConfig.MemoryLimitRatio, "ratio of available memory to be used by libp2p (in (0,1])") 172 fnb.flags.DurationVar(&fnb.BaseConfig.DNSCacheTTL, "dns-cache-ttl", defaultConfig.DNSCacheTTL, "time-to-live for dns cache") 173 fnb.flags.StringSliceVar(&fnb.BaseConfig.PreferredUnicastProtocols, "preferred-unicast-protocols", nil, "preferred unicast protocols in ascending order of preference") 174 fnb.flags.Uint32Var(&fnb.BaseConfig.NetworkReceivedMessageCacheSize, "networking-receive-cache-size", p2p.DefaultReceiveCacheSize, 175 "incoming message cache size at networking layer") 176 fnb.flags.BoolVar(&fnb.BaseConfig.NetworkConnectionPruning, "networking-connection-pruning", defaultConfig.NetworkConnectionPruning, "enabling connection trimming") 177 fnb.flags.BoolVar(&fnb.BaseConfig.PeerScoringEnabled, "peer-scoring-enabled", defaultConfig.PeerScoringEnabled, "enabling peer scoring on pubsub network") 178 fnb.flags.UintVar(&fnb.BaseConfig.guaranteesCacheSize, "guarantees-cache-size", bstorage.DefaultCacheSize, "collection guarantees cache size") 179 fnb.flags.UintVar(&fnb.BaseConfig.receiptsCacheSize, "receipts-cache-size", bstorage.DefaultCacheSize, "receipts cache size") 180 181 // dynamic node startup flags 182 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANPubkey, "dynamic-startup-access-publickey", "", "the public key of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 183 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupANAddress, "dynamic-startup-access-address", "", "the access address of the trusted secure access node to connect to when using dynamic-startup, this access node must be staked") 184 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpochPhase, "dynamic-startup-epoch-phase", "EpochPhaseSetup", "the target epoch phase for dynamic startup <EpochPhaseStaking|EpochPhaseSetup|EpochPhaseCommitted") 185 fnb.flags.StringVar(&fnb.BaseConfig.DynamicStartupEpoch, "dynamic-startup-epoch", "current", "the target epoch for dynamic-startup, use \"current\" to start node in the current epoch") 186 fnb.flags.DurationVar(&fnb.BaseConfig.DynamicStartupSleepInterval, "dynamic-startup-sleep-interval", time.Minute, "the interval in which the node will check if it can start") 187 188 fnb.flags.BoolVar(&fnb.BaseConfig.InsecureSecretsDB, "insecure-secrets-db", false, "allow the node to start up without an secrets DB encryption key") 189 fnb.flags.BoolVar(&fnb.BaseConfig.HeroCacheMetricsEnable, "herocache-metrics-collector", false, "enables herocache metrics collection") 190 191 // sync core flags 192 fnb.flags.DurationVar(&fnb.BaseConfig.SyncCoreConfig.RetryInterval, "sync-retry-interval", defaultConfig.SyncCoreConfig.RetryInterval, "the initial interval before we retry a sync request, uses exponential backoff") 193 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.Tolerance, "sync-tolerance", defaultConfig.SyncCoreConfig.Tolerance, "determines how big of a difference in block heights we tolerate before actively syncing with range requests") 194 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxAttempts, "sync-max-attempts", defaultConfig.SyncCoreConfig.MaxAttempts, "the maximum number of attempts we make for each requested block/height before discarding") 195 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxSize, "sync-max-size", defaultConfig.SyncCoreConfig.MaxSize, "the maximum number of blocks we request in the same block request message") 196 fnb.flags.UintVar(&fnb.BaseConfig.SyncCoreConfig.MaxRequests, "sync-max-requests", defaultConfig.SyncCoreConfig.MaxRequests, "the maximum number of requests we send during each scanning period") 197 198 fnb.flags.Uint64Var(&fnb.BaseConfig.ComplianceConfig.SkipNewProposalsThreshold, "compliance-skip-proposals-threshold", defaultConfig.ComplianceConfig.SkipNewProposalsThreshold, "threshold at which new proposals are discarded rather than cached, if their height is this much above local finalized height") 199 200 // unicast stream handler rate limits 201 fnb.flags.IntVar(&fnb.BaseConfig.UnicastMessageRateLimit, "unicast-message-rate-limit", defaultConfig.NetworkConfig.UnicastMessageRateLimit, "maximum number of unicast messages that a peer can send per second") 202 fnb.flags.IntVar(&fnb.BaseConfig.UnicastBandwidthRateLimit, "unicast-bandwidth-rate-limit", defaultConfig.NetworkConfig.UnicastBandwidthRateLimit, "bandwidth size in bytes a peer is allowed to send via unicast streams per second") 203 fnb.flags.IntVar(&fnb.BaseConfig.UnicastBandwidthBurstLimit, "unicast-bandwidth-burst-limit", defaultConfig.NetworkConfig.UnicastBandwidthBurstLimit, "bandwidth size in bytes a peer is allowed to send at one time") 204 fnb.flags.DurationVar(&fnb.BaseConfig.UnicastRateLimitLockoutDuration, "unicast-rate-limit-lockout-duration", defaultConfig.NetworkConfig.UnicastRateLimitLockoutDuration, "the number of seconds a peer will be forced to wait before being allowed to successful reconnect to the node after being rate limited") 205 fnb.flags.BoolVar(&fnb.BaseConfig.UnicastRateLimitDryRun, "unicast-rate-limit-dry-run", defaultConfig.NetworkConfig.UnicastRateLimitDryRun, "disable peer disconnects and connections gating when rate limiting peers") 206 } 207 208 func (fnb *FlowNodeBuilder) EnqueuePingService() { 209 fnb.Component("ping service", func(node *NodeConfig) (module.ReadyDoneAware, error) { 210 pingLibP2PProtocolID := unicast.PingProtocolId(node.SporkID) 211 212 // setup the Ping provider to return the software version and the sealed block height 213 pingInfoProvider := &ping.InfoProvider{ 214 SoftwareVersionFun: func() string { 215 return build.Semver() 216 }, 217 SealedBlockHeightFun: func() (uint64, error) { 218 head, err := node.State.Sealed().Head() 219 if err != nil { 220 return 0, err 221 } 222 return head.Height, nil 223 }, 224 HotstuffViewFun: func() (uint64, error) { 225 return 0, fmt.Errorf("hotstuff view reporting disabled") 226 }, 227 } 228 229 // only consensus roles will need to report hotstuff view 230 if fnb.BaseConfig.NodeRole == flow.RoleConsensus.String() { 231 // initialize the persister 232 persist := persister.New(node.DB, node.RootChainID) 233 234 pingInfoProvider.HotstuffViewFun = func() (uint64, error) { 235 curView, err := persist.GetStarted() 236 if err != nil { 237 return 0, err 238 } 239 240 return curView, nil 241 } 242 } 243 244 pingService, err := node.Network.RegisterPingService(pingLibP2PProtocolID, pingInfoProvider) 245 246 node.PingService = pingService 247 248 return &module.NoopReadyDoneAware{}, err 249 }) 250 } 251 252 func (fnb *FlowNodeBuilder) EnqueueResolver() { 253 fnb.Component("resolver", func(node *NodeConfig) (module.ReadyDoneAware, error) { 254 var dnsIpCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 255 var dnsTxtCacheMetricsCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 256 if fnb.HeroCacheMetricsEnable { 257 dnsIpCacheMetricsCollector = metrics.NetworkDnsIpCacheMetricsFactory(fnb.MetricsRegisterer) 258 dnsTxtCacheMetricsCollector = metrics.NetworkDnsTxtCacheMetricsFactory(fnb.MetricsRegisterer) 259 } 260 261 cache := herocache.NewDNSCache( 262 dns.DefaultCacheSize, 263 node.Logger, 264 dnsIpCacheMetricsCollector, 265 dnsTxtCacheMetricsCollector, 266 ) 267 268 resolver := dns.NewResolver( 269 node.Logger, 270 fnb.Metrics.Network, 271 cache, 272 dns.WithTTL(fnb.BaseConfig.DNSCacheTTL)) 273 274 fnb.Resolver = resolver 275 return resolver, nil 276 }) 277 } 278 279 func (fnb *FlowNodeBuilder) EnqueueNetworkInit() { 280 connGaterPeerDialFilters := make([]p2p.PeerFilter, 0) 281 connGaterInterceptSecureFilters := make([]p2p.PeerFilter, 0) 282 peerManagerFilters := make([]p2p.PeerFilter, 0) 283 284 // log and collect metrics for unicast messages that are rate limited 285 onUnicastRateLimit := func(peerID peer.ID, role, msgType string, topic channels.Topic, reason ratelimit.RateLimitReason) { 286 fnb.Logger.Warn(). 287 Str("peer_id", peerID.String()). 288 Str("role", role). 289 Str("message_type", msgType). 290 Str("topic", topic.String()). 291 Str("reason", reason.String()). 292 Bool(logging.KeySuspicious, true). 293 Msg("unicast peer rate limited") 294 fnb.Metrics.Network.OnRateLimitedUnicastMessage(role, msgType, topic.String(), reason.String()) 295 } 296 297 // setup default noop unicast rate limiters 298 unicastRateLimiters := ratelimit.NewRateLimiters(ratelimit.NewNoopRateLimiter(), ratelimit.NewNoopRateLimiter(), onUnicastRateLimit, ratelimit.WithDisabledRateLimiting(fnb.BaseConfig.UnicastRateLimitDryRun)) 299 300 // override noop unicast message rate limiter 301 if fnb.BaseConfig.UnicastMessageRateLimit > 0 { 302 unicastMessageRateLimiter := ratelimit.NewMessageRateLimiter( 303 rate.Limit(fnb.BaseConfig.UnicastMessageRateLimit), 304 fnb.BaseConfig.UnicastMessageRateLimit, 305 fnb.BaseConfig.UnicastRateLimitLockoutDuration, 306 ) 307 unicastRateLimiters.MessageRateLimiter = unicastMessageRateLimiter 308 309 // avoid connection gating and pruning during dry run 310 if !fnb.BaseConfig.UnicastRateLimitDryRun { 311 f := rateLimiterPeerFilter(unicastMessageRateLimiter) 312 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 313 // don't allow rate limited peers to establishing incoming connections 314 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 315 // don't create outbound connections to rate limited peers 316 peerManagerFilters = append(peerManagerFilters, f) 317 } 318 } 319 320 // override noop unicast bandwidth rate limiter 321 if fnb.BaseConfig.UnicastBandwidthRateLimit > 0 && fnb.BaseConfig.UnicastBandwidthBurstLimit > 0 { 322 unicastBandwidthRateLimiter := ratelimit.NewBandWidthRateLimiter( 323 rate.Limit(fnb.BaseConfig.UnicastBandwidthRateLimit), 324 fnb.BaseConfig.UnicastBandwidthBurstLimit, 325 fnb.BaseConfig.UnicastRateLimitLockoutDuration, 326 ) 327 unicastRateLimiters.BandWidthRateLimiter = unicastBandwidthRateLimiter 328 329 // avoid connection gating and pruning during dry run 330 if !fnb.BaseConfig.UnicastRateLimitDryRun { 331 f := rateLimiterPeerFilter(unicastBandwidthRateLimiter) 332 // add IsRateLimited peerFilters to conn gater intercept secure peer and peer manager filters list 333 connGaterInterceptSecureFilters = append(connGaterInterceptSecureFilters, f) 334 peerManagerFilters = append(peerManagerFilters, f) 335 } 336 } 337 338 fnb.Component(LibP2PNodeComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 339 myAddr := fnb.NodeConfig.Me.Address() 340 if fnb.BaseConfig.BindAddr != NotSet { 341 myAddr = fnb.BaseConfig.BindAddr 342 } 343 344 libP2PNodeFactory := p2pbuilder.DefaultLibP2PNodeFactory( 345 fnb.Logger, 346 myAddr, 347 fnb.NetworkKey, 348 fnb.SporkID, 349 fnb.IdentityProvider, 350 fnb.Metrics.Network, 351 fnb.Resolver, 352 fnb.PeerScoringEnabled, 353 fnb.BaseConfig.NodeRole, 354 connGaterPeerDialFilters, 355 connGaterInterceptSecureFilters, 356 // run peer manager with the specified interval and let it also prune connections 357 fnb.NetworkConnectionPruning, 358 fnb.PeerUpdateInterval, 359 fnb.LibP2PResourceManagerConfig, 360 ) 361 362 libp2pNode, err := libP2PNodeFactory() 363 if err != nil { 364 return nil, fmt.Errorf("failed to create libp2p node: %w", err) 365 } 366 fnb.LibP2PNode = libp2pNode 367 368 return libp2pNode, nil 369 }) 370 371 fnb.Component(NetworkComponent, func(node *NodeConfig) (module.ReadyDoneAware, error) { 372 cf := conduit.NewDefaultConduitFactory() 373 fnb.Logger.Info().Hex("node_id", logging.ID(fnb.NodeID)).Msg("default conduit factory initiated") 374 return fnb.InitFlowNetworkWithConduitFactory(node, cf, unicastRateLimiters, peerManagerFilters) 375 }) 376 377 fnb.Module("middleware dependency", func(node *NodeConfig) error { 378 fnb.middlewareDependable = module.NewProxiedReadyDoneAware() 379 fnb.PeerManagerDependencies.Add(fnb.middlewareDependable) 380 return nil 381 }) 382 383 // peer manager won't be created until all PeerManagerDependencies are ready. 384 fnb.DependableComponent("peer manager", func(node *NodeConfig) (module.ReadyDoneAware, error) { 385 return fnb.LibP2PNode.PeerManagerComponent(), nil 386 }, fnb.PeerManagerDependencies) 387 } 388 389 func (fnb *FlowNodeBuilder) InitFlowNetworkWithConduitFactory(node *NodeConfig, cf network.ConduitFactory, unicastRateLimiters *ratelimit.RateLimiters, peerManagerFilters []p2p.PeerFilter) (network.Network, error) { 390 var mwOpts []middleware.MiddlewareOption 391 if len(fnb.MsgValidators) > 0 { 392 mwOpts = append(mwOpts, middleware.WithMessageValidators(fnb.MsgValidators...)) 393 } 394 395 // by default if no rate limiter configuration was provided in the CLI args the default 396 // noop rate limiter will be used. 397 mwOpts = append(mwOpts, middleware.WithUnicastRateLimiters(unicastRateLimiters)) 398 399 mwOpts = append(mwOpts, 400 middleware.WithPreferredUnicastProtocols(unicast.ToProtocolNames(fnb.PreferredUnicastProtocols)), 401 ) 402 403 // peerManagerFilters are used by the peerManager via the middleware to filter peers from the topology. 404 if len(peerManagerFilters) > 0 { 405 mwOpts = append(mwOpts, middleware.WithPeerManagerFilters(peerManagerFilters)) 406 } 407 408 slashingViolationsConsumer := slashing.NewSlashingViolationsConsumer(fnb.Logger, fnb.Metrics.Network) 409 410 fnb.Middleware = middleware.NewMiddleware( 411 fnb.Logger, 412 fnb.LibP2PNode, 413 fnb.Me.NodeID(), 414 fnb.Metrics.Bitswap, 415 fnb.SporkID, 416 fnb.BaseConfig.UnicastMessageTimeout, 417 fnb.IDTranslator, 418 fnb.CodecFactory(), 419 slashingViolationsConsumer, 420 mwOpts...) 421 422 subscriptionManager := subscription.NewChannelSubscriptionManager(fnb.Middleware) 423 var heroCacheCollector module.HeroCacheMetrics = metrics.NewNoopCollector() 424 if fnb.HeroCacheMetricsEnable { 425 heroCacheCollector = metrics.NetworkReceiveCacheMetricsFactory(fnb.MetricsRegisterer) 426 } 427 428 receiveCache := netcache.NewHeroReceiveCache(fnb.NetworkReceivedMessageCacheSize, 429 fnb.Logger, 430 heroCacheCollector) 431 432 err := node.Metrics.Mempool.Register(metrics.ResourceNetworkingReceiveCache, receiveCache.Size) 433 if err != nil { 434 return nil, fmt.Errorf("could not register networking receive cache metric: %w", err) 435 } 436 437 // creates network instance 438 net, err := p2p.NewNetwork(&p2p.NetworkParameters{ 439 Logger: fnb.Logger, 440 Codec: fnb.CodecFactory(), 441 Me: fnb.Me, 442 MiddlewareFactory: func() (network.Middleware, error) { return fnb.Middleware, nil }, 443 Topology: topology.NewFullyConnectedTopology(), 444 SubscriptionManager: subscriptionManager, 445 Metrics: fnb.Metrics.Network, 446 IdentityProvider: fnb.IdentityProvider, 447 ReceiveCache: receiveCache, 448 Options: []p2p.NetworkOptFunction{p2p.WithConduitFactory(cf)}, 449 }) 450 if err != nil { 451 return nil, fmt.Errorf("could not initialize network: %w", err) 452 } 453 454 fnb.Network = net 455 456 // register middleware's ReadyDoneAware interface so other components can depend on it for startup 457 if fnb.middlewareDependable != nil { 458 fnb.middlewareDependable.Init(fnb.Middleware) 459 } 460 461 idEvents := gadgets.NewIdentityDeltas(fnb.Middleware.UpdateNodeAddresses) 462 fnb.ProtocolEvents.AddConsumer(idEvents) 463 464 return net, nil 465 } 466 467 func (fnb *FlowNodeBuilder) EnqueueMetricsServerInit() { 468 fnb.Component("metrics server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 469 server := metrics.NewServer(fnb.Logger, fnb.BaseConfig.metricsPort) 470 return server, nil 471 }) 472 } 473 474 func (fnb *FlowNodeBuilder) EnqueueAdminServerInit() error { 475 if fnb.AdminAddr == NotSet { 476 return nil 477 } 478 479 if (fnb.AdminCert != NotSet || fnb.AdminKey != NotSet || fnb.AdminClientCAs != NotSet) && 480 !(fnb.AdminCert != NotSet && fnb.AdminKey != NotSet && fnb.AdminClientCAs != NotSet) { 481 return fmt.Errorf("admin cert / key and client certs must all be provided to enable mutual TLS") 482 } 483 484 // create the updatable config manager 485 fnb.RegisterDefaultAdminCommands() 486 fnb.Component("admin server", func(node *NodeConfig) (module.ReadyDoneAware, error) { 487 // set up all admin commands 488 for commandName, commandFunc := range fnb.adminCommands { 489 command := commandFunc(fnb.NodeConfig) 490 fnb.adminCommandBootstrapper.RegisterHandler(commandName, command.Handler) 491 fnb.adminCommandBootstrapper.RegisterValidator(commandName, command.Validator) 492 } 493 494 opts := []admin.CommandRunnerOption{ 495 admin.WithMaxMsgSize(int(fnb.AdminMaxMsgSize)), 496 } 497 498 if node.AdminCert != NotSet { 499 serverCert, err := tls.LoadX509KeyPair(node.AdminCert, node.AdminKey) 500 if err != nil { 501 return nil, err 502 } 503 clientCAs, err := os.ReadFile(node.AdminClientCAs) 504 if err != nil { 505 return nil, err 506 } 507 certPool := x509.NewCertPool() 508 certPool.AppendCertsFromPEM(clientCAs) 509 config := &tls.Config{ 510 MinVersion: tls.VersionTLS13, 511 Certificates: []tls.Certificate{serverCert}, 512 ClientAuth: tls.RequireAndVerifyClientCert, 513 ClientCAs: certPool, 514 } 515 516 opts = append(opts, admin.WithTLS(config)) 517 } 518 519 runner := fnb.adminCommandBootstrapper.Bootstrap(fnb.Logger, fnb.AdminAddr, opts...) 520 521 return runner, nil 522 }) 523 524 return nil 525 } 526 527 func (fnb *FlowNodeBuilder) RegisterBadgerMetrics() error { 528 return metrics.RegisterBadgerMetrics() 529 } 530 531 func (fnb *FlowNodeBuilder) EnqueueTracer() { 532 fnb.Component("tracer", func(node *NodeConfig) (module.ReadyDoneAware, error) { 533 return fnb.Tracer, nil 534 }) 535 } 536 537 func (fnb *FlowNodeBuilder) ParseAndPrintFlags() error { 538 // parse configuration parameters 539 pflag.Parse() 540 541 // print all flags 542 log := fnb.Logger.Info() 543 544 pflag.VisitAll(func(flag *pflag.Flag) { 545 log = log.Str(flag.Name, flag.Value.String()) 546 }) 547 548 log.Msg("flags loaded") 549 550 return fnb.extraFlagsValidation() 551 } 552 553 func (fnb *FlowNodeBuilder) ValidateFlags(f func() error) NodeBuilder { 554 fnb.extraFlagCheck = f 555 return fnb 556 } 557 558 func (fnb *FlowNodeBuilder) PrintBuildVersionDetails() { 559 fnb.Logger.Info().Str("version", build.Semver()).Str("commit", build.Commit()).Msg("build details") 560 } 561 562 func (fnb *FlowNodeBuilder) initNodeInfo() error { 563 if fnb.BaseConfig.nodeIDHex == NotSet { 564 return fmt.Errorf("cannot start without node ID") 565 } 566 567 nodeID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 568 if err != nil { 569 return fmt.Errorf("could not parse node ID from string (id: %v): %w", fnb.BaseConfig.nodeIDHex, err) 570 } 571 572 info, err := LoadPrivateNodeInfo(fnb.BaseConfig.BootstrapDir, nodeID) 573 if err != nil { 574 return fmt.Errorf("failed to load private node info: %w", err) 575 } 576 577 fnb.NodeID = nodeID 578 fnb.NetworkKey = info.NetworkPrivKey.PrivateKey 579 fnb.StakingKey = info.StakingPrivKey.PrivateKey 580 581 return nil 582 } 583 584 func (fnb *FlowNodeBuilder) initLogger() error { 585 // configure logger with standard level, node ID and UTC timestamp 586 zerolog.TimeFieldFormat = time.RFC3339Nano 587 zerolog.TimestampFunc = func() time.Time { return time.Now().UTC() } 588 589 // Drop all log events that exceed this rate limit 590 throttledSampler := logging.BurstSampler(fnb.BaseConfig.debugLogLimit, time.Second) 591 592 log := fnb.Logger.With(). 593 Timestamp(). 594 Str("node_role", fnb.BaseConfig.NodeRole). 595 Str("node_id", fnb.NodeID.String()). 596 Logger(). 597 Sample(zerolog.LevelSampler{ 598 TraceSampler: throttledSampler, 599 DebugSampler: throttledSampler, 600 }) 601 602 log.Info().Msgf("flow %s node starting up", fnb.BaseConfig.NodeRole) 603 604 // parse config log level and apply to logger 605 lvl, err := zerolog.ParseLevel(strings.ToLower(fnb.BaseConfig.level)) 606 if err != nil { 607 return fmt.Errorf("invalid log level: %w", err) 608 } 609 610 // Minimum log level is set to trace, then overridden by SetGlobalLevel. 611 // this allows admin commands to modify the level to any value during runtime 612 log = log.Level(zerolog.TraceLevel) 613 zerolog.SetGlobalLevel(lvl) 614 615 fnb.Logger = log 616 617 return nil 618 } 619 620 func (fnb *FlowNodeBuilder) initMetrics() error { 621 622 fnb.Tracer = trace.NewNoopTracer() 623 if fnb.BaseConfig.tracerEnabled { 624 nodeIdHex := fnb.NodeID.String() 625 if len(nodeIdHex) > 8 { 626 nodeIdHex = nodeIdHex[:8] 627 } 628 629 serviceName := fnb.BaseConfig.NodeRole + "-" + nodeIdHex 630 tracer, err := trace.NewTracer( 631 fnb.Logger, 632 serviceName, 633 fnb.RootChainID.String(), 634 fnb.tracerSensitivity, 635 ) 636 if err != nil { 637 return fmt.Errorf("could not initialize tracer: %w", err) 638 } 639 640 fnb.Logger.Info().Msg("Tracer Started") 641 fnb.Tracer = tracer 642 } 643 644 fnb.Metrics = Metrics{ 645 Network: metrics.NewNoopCollector(), 646 Engine: metrics.NewNoopCollector(), 647 Compliance: metrics.NewNoopCollector(), 648 Cache: metrics.NewNoopCollector(), 649 Mempool: metrics.NewNoopCollector(), 650 CleanCollector: metrics.NewNoopCollector(), 651 Bitswap: metrics.NewNoopCollector(), 652 } 653 if fnb.BaseConfig.MetricsEnabled { 654 fnb.MetricsRegisterer = prometheus.DefaultRegisterer 655 656 mempools := metrics.NewMempoolCollector(5 * time.Second) 657 658 fnb.Metrics = Metrics{ 659 Network: metrics.NewNetworkCollector(fnb.Logger), 660 Engine: metrics.NewEngineCollector(), 661 Compliance: metrics.NewComplianceCollector(), 662 // CacheControl metrics has been causing memory abuse, disable for now 663 // Cache: metrics.NewCacheCollector(fnb.RootChainID), 664 Cache: metrics.NewNoopCollector(), 665 CleanCollector: metrics.NewCleanerCollector(), 666 Mempool: mempools, 667 Bitswap: metrics.NewBitswapCollector(), 668 } 669 670 // registers mempools as a Component so that its Ready method is invoked upon startup 671 fnb.Component("mempools metrics", func(node *NodeConfig) (module.ReadyDoneAware, error) { 672 return mempools, nil 673 }) 674 675 // metrics enabled, report node info metrics as post init event 676 fnb.PostInit(func(nodeConfig *NodeConfig) error { 677 nodeInfoMetrics := metrics.NewNodeInfoCollector() 678 protocolVersion, err := fnb.RootSnapshot.Params().ProtocolVersion() 679 if err != nil { 680 return fmt.Errorf("could not query root snapshoot protocol version: %w", err) 681 } 682 nodeInfoMetrics.NodeInfo(build.Semver(), build.Commit(), nodeConfig.SporkID.String(), protocolVersion) 683 return nil 684 }) 685 } 686 return nil 687 } 688 689 func (fnb *FlowNodeBuilder) createGCEProfileUploader(client *gcemd.Client, opts ...option.ClientOption) (profiler.Uploader, error) { 690 projectID, err := client.ProjectID() 691 if err != nil { 692 return &profiler.NoopUploader{}, fmt.Errorf("failed to get project ID: %w", err) 693 } 694 695 instance, err := client.InstanceID() 696 if err != nil { 697 return &profiler.NoopUploader{}, fmt.Errorf("failed to get instance ID: %w", err) 698 } 699 700 chainID := fnb.RootChainID.String() 701 if chainID == "" { 702 fnb.Logger.Warn().Msg("RootChainID is not set, using default value") 703 chainID = "unknown" 704 } 705 706 params := profiler.Params{ 707 ProjectID: projectID, 708 ChainID: chainID, 709 Role: fnb.NodeConfig.NodeRole, 710 Version: build.Semver(), 711 Commit: build.Commit(), 712 Instance: instance, 713 } 714 fnb.Logger.Info().Msgf("creating pprof profile uploader with params: %+v", params) 715 716 return profiler.NewUploader(fnb.Logger, params, opts...) 717 } 718 719 func (fnb *FlowNodeBuilder) createProfileUploader() (profiler.Uploader, error) { 720 switch { 721 case fnb.BaseConfig.profilerConfig.UploaderEnabled && gcemd.OnGCE(): 722 return fnb.createGCEProfileUploader(gcemd.NewClient(nil)) 723 default: 724 fnb.Logger.Info().Msg("not running on GCE, setting pprof uploader to noop") 725 return &profiler.NoopUploader{}, nil 726 } 727 } 728 729 func (fnb *FlowNodeBuilder) initProfiler() error { 730 uploader, err := fnb.createProfileUploader() 731 if err != nil { 732 fnb.Logger.Warn().Err(err).Msg("failed to create pprof uploader, falling back to noop") 733 uploader = &profiler.NoopUploader{} 734 } 735 736 profiler, err := profiler.New(fnb.Logger, uploader, fnb.BaseConfig.profilerConfig) 737 if err != nil { 738 return fmt.Errorf("could not initialize profiler: %w", err) 739 } 740 741 // register the enabled state of the profiler for dynamic configuring 742 err = fnb.ConfigManager.RegisterBoolConfig("profiler-enabled", profiler.Enabled, profiler.SetEnabled) 743 if err != nil { 744 return fmt.Errorf("could not register profiler-enabled config: %w", err) 745 } 746 747 err = fnb.ConfigManager.RegisterDurationConfig( 748 "profiler-trigger", 749 func() time.Duration { return fnb.BaseConfig.profilerConfig.Duration }, 750 func(d time.Duration) error { return profiler.TriggerRun(d) }, 751 ) 752 if err != nil { 753 return fmt.Errorf("could not register profiler-trigger config: %w", err) 754 } 755 756 err = fnb.ConfigManager.RegisterUintConfig( 757 "profiler-set-mem-profile-rate", 758 func() uint { return uint(runtime.MemProfileRate) }, 759 func(r uint) error { runtime.MemProfileRate = int(r); return nil }, 760 ) 761 if err != nil { 762 return fmt.Errorf("could not register profiler-set-mem-profile-rate setting: %w", err) 763 } 764 765 // There is no way to get the current block profile rate so we keep track of it ourselves. 766 currentRate := new(uint) 767 err = fnb.ConfigManager.RegisterUintConfig( 768 "profiler-set-block-profile-rate", 769 func() uint { return *currentRate }, 770 func(r uint) error { currentRate = &r; runtime.SetBlockProfileRate(int(r)); return nil }, 771 ) 772 if err != nil { 773 return fmt.Errorf("could not register profiler-set-block-profile-rate setting: %w", err) 774 } 775 776 err = fnb.ConfigManager.RegisterUintConfig( 777 "profiler-set-mutex-profile-fraction", 778 func() uint { return uint(runtime.SetMutexProfileFraction(-1)) }, 779 func(r uint) error { _ = runtime.SetMutexProfileFraction(int(r)); return nil }, 780 ) 781 if err != nil { 782 return fmt.Errorf("could not register profiler-set-mutex-profile-fraction setting: %w", err) 783 } 784 785 // registering as a DependableComponent with no dependencies so that it's started immediately on startup 786 // without being blocked by other component's Ready() 787 fnb.DependableComponent("profiler", func(node *NodeConfig) (module.ReadyDoneAware, error) { 788 return profiler, nil 789 }, NewDependencyList()) 790 791 return nil 792 } 793 794 func (fnb *FlowNodeBuilder) initDB() error { 795 796 // if a db has been passed in, use that instead of creating one 797 if fnb.BaseConfig.db != nil { 798 fnb.DB = fnb.BaseConfig.db 799 return nil 800 } 801 802 // Pre-create DB path (Badger creates only one-level dirs) 803 err := os.MkdirAll(fnb.BaseConfig.datadir, 0700) 804 if err != nil { 805 return fmt.Errorf("could not create datadir (path: %s): %w", fnb.BaseConfig.datadir, err) 806 } 807 808 log := sutil.NewLogger(fnb.Logger) 809 810 // we initialize the database with options that allow us to keep the maximum 811 // item size in the trie itself (up to 1MB) and where we keep all level zero 812 // tables in-memory as well; this slows down compaction and increases memory 813 // usage, but it improves overall performance and disk i/o 814 opts := badger. 815 DefaultOptions(fnb.BaseConfig.datadir). 816 WithKeepL0InMemory(true). 817 WithLogger(log). 818 819 // the ValueLogFileSize option specifies how big the value of a 820 // key-value pair is allowed to be saved into badger. 821 // exceeding this limit, will fail with an error like this: 822 // could not store data: Value with size <xxxx> exceeded 1073741824 limit 823 // Maximum value size is 10G, needed by execution node 824 // TODO: finding a better max value for each node type 825 WithValueLogFileSize(128 << 23). 826 WithValueLogMaxEntries(100000) // Default is 1000000 827 828 publicDB, err := bstorage.InitPublic(opts) 829 if err != nil { 830 return fmt.Errorf("could not open public db: %w", err) 831 } 832 fnb.DB = publicDB 833 834 fnb.ShutdownFunc(func() error { 835 if err := fnb.DB.Close(); err != nil { 836 return fmt.Errorf("error closing protocol database: %w", err) 837 } 838 return nil 839 }) 840 841 return nil 842 } 843 844 func (fnb *FlowNodeBuilder) initSecretsDB() error { 845 846 // if the secrets DB is disabled (only applicable for Consensus Follower, 847 // which makes use of this same logic), skip this initialization 848 if !fnb.BaseConfig.secretsDBEnabled { 849 return nil 850 } 851 852 if fnb.BaseConfig.secretsdir == NotSet { 853 return fmt.Errorf("missing required flag '--secretsdir'") 854 } 855 856 err := os.MkdirAll(fnb.BaseConfig.secretsdir, 0700) 857 if err != nil { 858 return fmt.Errorf("could not create secrets db dir (path: %s): %w", fnb.BaseConfig.secretsdir, err) 859 } 860 861 log := sutil.NewLogger(fnb.Logger) 862 863 opts := badger.DefaultOptions(fnb.BaseConfig.secretsdir).WithLogger(log) 864 865 // NOTE: SN nodes need to explicitly set --insecure-secrets-db to true in order to 866 // disable secrets database encryption 867 if fnb.NodeRole == flow.RoleConsensus.String() && fnb.InsecureSecretsDB { 868 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 869 } else { 870 encryptionKey, err := loadSecretsEncryptionKey(fnb.BootstrapDir, fnb.NodeID) 871 if errors.Is(err, os.ErrNotExist) { 872 if fnb.NodeRole == flow.RoleConsensus.String() { 873 // missing key is a fatal error for SN nodes 874 return fmt.Errorf("secrets db encryption key not found: %w", err) 875 } 876 fnb.Logger.Warn().Msg("starting with secrets database encryption disabled") 877 } else if err != nil { 878 return fmt.Errorf("failed to read secrets db encryption key: %w", err) 879 } else { 880 opts = opts.WithEncryptionKey(encryptionKey) 881 } 882 } 883 884 secretsDB, err := bstorage.InitSecret(opts) 885 if err != nil { 886 return fmt.Errorf("could not open secrets db: %w", err) 887 } 888 fnb.SecretsDB = secretsDB 889 890 fnb.ShutdownFunc(func() error { 891 if err := fnb.SecretsDB.Close(); err != nil { 892 return fmt.Errorf("error closing secrets database: %w", err) 893 } 894 return nil 895 }) 896 897 return nil 898 } 899 900 func (fnb *FlowNodeBuilder) initStorage() error { 901 902 // in order to void long iterations with big keys when initializing with an 903 // already populated database, we bootstrap the initial maximum key size 904 // upon starting 905 err := operation.RetryOnConflict(fnb.DB.Update, func(tx *badger.Txn) error { 906 return operation.InitMax(tx) 907 }) 908 if err != nil { 909 return fmt.Errorf("could not initialize max tracker: %w", err) 910 } 911 912 headers := bstorage.NewHeaders(fnb.Metrics.Cache, fnb.DB) 913 guarantees := bstorage.NewGuarantees(fnb.Metrics.Cache, fnb.DB, fnb.BaseConfig.guaranteesCacheSize) 914 seals := bstorage.NewSeals(fnb.Metrics.Cache, fnb.DB) 915 results := bstorage.NewExecutionResults(fnb.Metrics.Cache, fnb.DB) 916 receipts := bstorage.NewExecutionReceipts(fnb.Metrics.Cache, fnb.DB, results, fnb.BaseConfig.receiptsCacheSize) 917 index := bstorage.NewIndex(fnb.Metrics.Cache, fnb.DB) 918 payloads := bstorage.NewPayloads(fnb.DB, index, guarantees, seals, receipts, results) 919 blocks := bstorage.NewBlocks(fnb.DB, headers, payloads) 920 transactions := bstorage.NewTransactions(fnb.Metrics.Cache, fnb.DB) 921 collections := bstorage.NewCollections(fnb.DB, transactions) 922 setups := bstorage.NewEpochSetups(fnb.Metrics.Cache, fnb.DB) 923 epochCommits := bstorage.NewEpochCommits(fnb.Metrics.Cache, fnb.DB) 924 statuses := bstorage.NewEpochStatuses(fnb.Metrics.Cache, fnb.DB) 925 commits := bstorage.NewCommits(fnb.Metrics.Cache, fnb.DB) 926 927 fnb.Storage = Storage{ 928 Headers: headers, 929 Guarantees: guarantees, 930 Receipts: receipts, 931 Results: results, 932 Seals: seals, 933 Index: index, 934 Payloads: payloads, 935 Blocks: blocks, 936 Transactions: transactions, 937 Collections: collections, 938 Setups: setups, 939 EpochCommits: epochCommits, 940 Statuses: statuses, 941 Commits: commits, 942 } 943 944 return nil 945 } 946 947 func (fnb *FlowNodeBuilder) InitIDProviders() { 948 fnb.Module("id providers", func(node *NodeConfig) error { 949 idCache, err := cache.NewProtocolStateIDCache(node.Logger, node.State, node.ProtocolEvents) 950 if err != nil { 951 return fmt.Errorf("could not initialize ProtocolStateIDCache: %w", err) 952 } 953 node.IDTranslator = idCache 954 955 // The following wrapper allows to black-list byzantine nodes via an admin command: 956 // the wrapper overrides the 'Ejected' flag of blocked nodes to true 957 blocklistWrapper, err := cache.NewNodeBlocklistWrapper(idCache, node.DB) 958 if err != nil { 959 return fmt.Errorf("could not initialize NodeBlocklistWrapper: %w", err) 960 } 961 node.IdentityProvider = blocklistWrapper 962 963 // register the blocklist for dynamic configuration via admin command 964 err = node.ConfigManager.RegisterIdentifierListConfig("network-id-provider-blocklist", 965 blocklistWrapper.GetBlocklist, blocklistWrapper.Update) 966 if err != nil { 967 return fmt.Errorf("failed to register blocklist with config manager: %w", err) 968 } 969 970 node.SyncEngineIdentifierProvider = id.NewIdentityFilterIdentifierProvider( 971 filter.And( 972 filter.HasRole(flow.RoleConsensus), 973 filter.Not(filter.HasNodeID(node.Me.NodeID())), 974 p2p.NotEjectedFilter, 975 ), 976 node.IdentityProvider, 977 ) 978 return nil 979 }) 980 } 981 982 func (fnb *FlowNodeBuilder) initState() error { 983 fnb.ProtocolEvents = events.NewDistributor() 984 985 isBootStrapped, err := badgerState.IsBootstrapped(fnb.DB) 986 if err != nil { 987 return fmt.Errorf("failed to determine whether database contains bootstrapped state: %w", err) 988 } 989 990 if isBootStrapped { 991 fnb.Logger.Info().Msg("opening already bootstrapped protocol state") 992 state, err := badgerState.OpenState( 993 fnb.Metrics.Compliance, 994 fnb.DB, 995 fnb.Storage.Headers, 996 fnb.Storage.Seals, 997 fnb.Storage.Results, 998 fnb.Storage.Blocks, 999 fnb.Storage.Setups, 1000 fnb.Storage.EpochCommits, 1001 fnb.Storage.Statuses, 1002 ) 1003 if err != nil { 1004 return fmt.Errorf("could not open protocol state: %w", err) 1005 } 1006 fnb.State = state 1007 1008 // set root snapshot field 1009 rootBlock, err := state.Params().Root() 1010 if err != nil { 1011 return fmt.Errorf("could not get root block from protocol state: %w", err) 1012 } 1013 1014 rootSnapshot := state.AtBlockID(rootBlock.ID()) 1015 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1016 return err 1017 } 1018 } else { 1019 // Bootstrap! 1020 fnb.Logger.Info().Msg("bootstrapping empty protocol state") 1021 1022 // if no root snapshot is configured, attempt to load the file from disk 1023 var rootSnapshot = fnb.RootSnapshot 1024 if rootSnapshot == nil { 1025 fnb.Logger.Info().Msgf("loading root protocol state snapshot from disk") 1026 rootSnapshot, err = loadRootProtocolSnapshot(fnb.BaseConfig.BootstrapDir) 1027 if err != nil { 1028 return fmt.Errorf("failed to read protocol snapshot from disk: %w", err) 1029 } 1030 } 1031 // set root snapshot fields 1032 if err := fnb.setRootSnapshot(rootSnapshot); err != nil { 1033 return err 1034 } 1035 1036 // generate bootstrap config options as per NodeConfig 1037 var options []badgerState.BootstrapConfigOptions 1038 if fnb.SkipNwAddressBasedValidations { 1039 options = append(options, badgerState.SkipNetworkAddressValidation) 1040 } 1041 1042 fnb.State, err = badgerState.Bootstrap( 1043 fnb.Metrics.Compliance, 1044 fnb.DB, 1045 fnb.Storage.Headers, 1046 fnb.Storage.Seals, 1047 fnb.Storage.Results, 1048 fnb.Storage.Blocks, 1049 fnb.Storage.Setups, 1050 fnb.Storage.EpochCommits, 1051 fnb.Storage.Statuses, 1052 fnb.RootSnapshot, 1053 options..., 1054 ) 1055 if err != nil { 1056 return fmt.Errorf("could not bootstrap protocol state: %w", err) 1057 } 1058 1059 fnb.Logger.Info(). 1060 Hex("root_result_id", logging.Entity(fnb.RootResult)). 1061 Hex("root_state_commitment", fnb.RootSeal.FinalState[:]). 1062 Hex("root_block_id", logging.Entity(fnb.RootBlock)). 1063 Uint64("root_block_height", fnb.RootBlock.Header.Height). 1064 Msg("protocol state bootstrapped") 1065 } 1066 1067 // initialize local if it hasn't been initialized yet 1068 if fnb.Me == nil { 1069 if err := fnb.initLocal(); err != nil { 1070 return err 1071 } 1072 } 1073 1074 lastFinalized, err := fnb.State.Final().Head() 1075 if err != nil { 1076 return fmt.Errorf("could not get last finalized block header: %w", err) 1077 } 1078 1079 fnb.Logger.Info(). 1080 Hex("root_block_id", logging.Entity(fnb.RootBlock)). 1081 Uint64("root_block_height", fnb.RootBlock.Header.Height). 1082 Hex("finalized_block_id", logging.Entity(lastFinalized)). 1083 Uint64("finalized_block_height", lastFinalized.Height). 1084 Msg("successfully opened protocol state") 1085 1086 return nil 1087 } 1088 1089 // setRootSnapshot sets the root snapshot field and all related fields in the NodeConfig. 1090 func (fnb *FlowNodeBuilder) setRootSnapshot(rootSnapshot protocol.Snapshot) error { 1091 var err error 1092 1093 // validate the root snapshot QCs 1094 err = badgerState.IsValidRootSnapshotQCs(rootSnapshot) 1095 if err != nil { 1096 return fmt.Errorf("failed to validate root snapshot QCs: %w", err) 1097 } 1098 1099 fnb.RootSnapshot = rootSnapshot 1100 // cache properties of the root snapshot, for convenience 1101 fnb.RootResult, fnb.RootSeal, err = fnb.RootSnapshot.SealedResult() 1102 if err != nil { 1103 return fmt.Errorf("failed to read root sealed result: %w", err) 1104 } 1105 1106 sealingSegment, err := fnb.RootSnapshot.SealingSegment() 1107 if err != nil { 1108 return fmt.Errorf("failed to read root sealing segment: %w", err) 1109 } 1110 1111 fnb.RootBlock = sealingSegment.Highest() 1112 fnb.RootQC, err = fnb.RootSnapshot.QuorumCertificate() 1113 if err != nil { 1114 return fmt.Errorf("failed to read root QC: %w", err) 1115 } 1116 1117 fnb.RootChainID = fnb.RootBlock.Header.ChainID 1118 fnb.SporkID, err = fnb.RootSnapshot.Params().SporkID() 1119 if err != nil { 1120 return fmt.Errorf("failed to read spork ID: %w", err) 1121 } 1122 1123 return nil 1124 } 1125 1126 func (fnb *FlowNodeBuilder) initLocal() error { 1127 // Verify that my ID (as given in the configuration) is known to the network 1128 // (i.e. protocol state). There are two cases that will cause the following error: 1129 // 1) used the wrong node id, which is not part of the identity list of the finalized state 1130 // 2) the node id is a new one for a new spork, but the bootstrap data has not been updated. 1131 myID, err := flow.HexStringToIdentifier(fnb.BaseConfig.nodeIDHex) 1132 if err != nil { 1133 return fmt.Errorf("could not parse node identifier: %w", err) 1134 } 1135 1136 self, err := fnb.State.Final().Identity(myID) 1137 if err != nil { 1138 return fmt.Errorf("node identity not found in the identity list of the finalized state (id: %v): %w", myID, err) 1139 } 1140 1141 // Verify that my role (as given in the configuration) is consistent with the protocol state. 1142 // We enforce this strictly for MainNet. For other networks (e.g. TestNet or BenchNet), we 1143 // are lenient, to allow ghost node to run as any role. 1144 if self.Role.String() != fnb.BaseConfig.NodeRole { 1145 rootBlockHeader, err := fnb.State.Params().Root() 1146 if err != nil { 1147 return fmt.Errorf("could not get root block from protocol state: %w", err) 1148 } 1149 1150 if rootBlockHeader.ChainID == flow.Mainnet { 1151 return fmt.Errorf("running as incorrect role, expected: %v, actual: %v, exiting", 1152 self.Role.String(), 1153 fnb.BaseConfig.NodeRole, 1154 ) 1155 } 1156 1157 fnb.Logger.Warn().Msgf("running as incorrect role, expected: %v, actual: %v, continuing", 1158 self.Role.String(), 1159 fnb.BaseConfig.NodeRole) 1160 } 1161 1162 // ensure that the configured staking/network keys are consistent with the protocol state 1163 if !self.NetworkPubKey.Equals(fnb.NetworkKey.PublicKey()) { 1164 return fmt.Errorf("configured networking key does not match protocol state") 1165 } 1166 if !self.StakingPubKey.Equals(fnb.StakingKey.PublicKey()) { 1167 return fmt.Errorf("configured staking key does not match protocol state") 1168 } 1169 1170 fnb.Me, err = local.New(self, fnb.StakingKey) 1171 if err != nil { 1172 return fmt.Errorf("could not initialize local: %w", err) 1173 } 1174 1175 return nil 1176 } 1177 1178 func (fnb *FlowNodeBuilder) initFvmOptions() { 1179 blockFinder := environment.NewBlockFinder(fnb.Storage.Headers) 1180 vmOpts := []fvm.Option{ 1181 fvm.WithChain(fnb.RootChainID.Chain()), 1182 fvm.WithBlocks(blockFinder), 1183 fvm.WithAccountStorageLimit(true), 1184 } 1185 if fnb.RootChainID == flow.Testnet || fnb.RootChainID == flow.Sandboxnet || fnb.RootChainID == flow.Mainnet { 1186 vmOpts = append(vmOpts, 1187 fvm.WithTransactionFeesEnabled(true), 1188 ) 1189 } 1190 if fnb.RootChainID == flow.Testnet || fnb.RootChainID == flow.Sandboxnet || fnb.RootChainID == flow.Localnet || fnb.RootChainID == flow.Benchnet { 1191 vmOpts = append(vmOpts, 1192 fvm.WithContractDeploymentRestricted(false), 1193 ) 1194 } 1195 fnb.FvmOptions = vmOpts 1196 } 1197 1198 // handleModules initializes the given module. 1199 func (fnb *FlowNodeBuilder) handleModule(v namedModuleFunc) error { 1200 err := v.fn(fnb.NodeConfig) 1201 if err != nil { 1202 return fmt.Errorf("module %s initialization failed: %w", v.name, err) 1203 } 1204 1205 fnb.Logger.Info().Str("module", v.name).Msg("module initialization complete") 1206 return nil 1207 } 1208 1209 // handleModules initializes all modules that have been enqueued on this node builder. 1210 func (fnb *FlowNodeBuilder) handleModules() error { 1211 for _, f := range fnb.modules { 1212 if err := fnb.handleModule(f); err != nil { 1213 return err 1214 } 1215 } 1216 1217 return nil 1218 } 1219 1220 // handleComponents registers the component's factory method with the ComponentManager to be run 1221 // when the node starts. 1222 // It uses signal channels to ensure that components are started serially. 1223 func (fnb *FlowNodeBuilder) handleComponents() error { 1224 // The parent/started channels are used to enforce serial startup. 1225 // - parent is the started channel of the previous component. 1226 // - when a component is ready, it closes its started channel by calling the provided callback. 1227 // Components wait for their parent channel to close before starting, this ensures they start 1228 // up serially, even though the ComponentManager will launch the goroutines in parallel. 1229 1230 // The first component is always started immediately 1231 parent := make(chan struct{}) 1232 close(parent) 1233 1234 var err error 1235 asyncComponents := []namedComponentFunc{} 1236 1237 // Run all components 1238 for _, f := range fnb.components { 1239 // Components with explicit dependencies are not started serially 1240 if f.dependencies != nil { 1241 asyncComponents = append(asyncComponents, f) 1242 continue 1243 } 1244 1245 started := make(chan struct{}) 1246 1247 if f.errorHandler != nil { 1248 err = fnb.handleRestartableComponent(f, parent, func() { close(started) }) 1249 } else { 1250 err = fnb.handleComponent(f, parent, func() { close(started) }) 1251 } 1252 1253 if err != nil { 1254 return fmt.Errorf("could not handle component %s: %w", f.name, err) 1255 } 1256 1257 parent = started 1258 } 1259 1260 // Components with explicit dependencies are run asynchronously, which means dependencies in 1261 // the dependency list must be initialized outside of the component factory. 1262 for _, f := range asyncComponents { 1263 fnb.Logger.Debug().Str("component", f.name).Int("dependencies", len(f.dependencies.components)).Msg("handling component asynchronously") 1264 err = fnb.handleComponent(f, util.AllReady(f.dependencies.components...), func() {}) 1265 if err != nil { 1266 return fmt.Errorf("could not handle dependable component %s: %w", f.name, err) 1267 } 1268 } 1269 1270 return nil 1271 } 1272 1273 // handleComponent constructs a component using the provided ReadyDoneFactory, and registers a 1274 // worker with the ComponentManager to be run when the node is started. 1275 // 1276 // The ComponentManager starts all workers in parallel. Since some components have non-idempotent 1277 // ReadyDoneAware interfaces, we need to ensure that they are started serially. This is accomplished 1278 // using the parentReady channel and the started closure. Components wait for the parentReady channel 1279 // to close before starting, and then call the started callback after they are ready(). The started 1280 // callback closes the parentReady channel of the next component, and so on. 1281 // 1282 // TODO: Instead of this serial startup, components should wait for their dependencies to be ready 1283 // using their ReadyDoneAware interface. After components are updated to use the idempotent 1284 // ReadyDoneAware interface and explicitly wait for their dependencies to be ready, we can remove 1285 // this channel chaining. 1286 func (fnb *FlowNodeBuilder) handleComponent(v namedComponentFunc, dependencies <-chan struct{}, started func()) error { 1287 // Add a closure that starts the component when the node is started, and then waits for it to exit 1288 // gracefully. 1289 // Startup for all components will happen in parallel, and components can use their dependencies' 1290 // ReadyDoneAware interface to wait until they are ready. 1291 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1292 // wait for the dependencies to be ready before starting 1293 if err := util.WaitClosed(ctx, dependencies); err != nil { 1294 return 1295 } 1296 1297 logger := fnb.Logger.With().Str("component", v.name).Logger() 1298 1299 // First, build the component using the factory method. 1300 readyAware, err := v.fn(fnb.NodeConfig) 1301 if err != nil { 1302 ctx.Throw(fmt.Errorf("component %s initialization failed: %w", v.name, err)) 1303 } 1304 logger.Info().Msg("component initialization complete") 1305 1306 // if this is a Component, use the Startable interface to start the component, otherwise 1307 // Ready() will launch it. 1308 cmp, isComponent := readyAware.(component.Component) 1309 if isComponent { 1310 cmp.Start(ctx) 1311 } 1312 1313 // Wait until the component is ready 1314 if err := util.WaitClosed(ctx, readyAware.Ready()); err != nil { 1315 // The context was cancelled. Continue to shutdown logic. 1316 logger.Warn().Msg("component startup aborted") 1317 1318 // Non-idempotent ReadyDoneAware components trigger shutdown by calling Done(). Don't 1319 // do that here since it may not be safe if the component is not Ready(). 1320 if !isComponent { 1321 return 1322 } 1323 } else { 1324 logger.Info().Msg("component startup complete") 1325 ready() 1326 1327 // Signal to the next component that we're ready. 1328 started() 1329 } 1330 1331 // Component shutdown is signaled by cancelling its context. 1332 <-ctx.Done() 1333 logger.Info().Msg("component shutdown started") 1334 1335 // Finally, wait until component has finished shutting down. 1336 <-readyAware.Done() 1337 logger.Info().Msg("component shutdown complete") 1338 }) 1339 1340 return nil 1341 } 1342 1343 // handleRestartableComponent constructs a component using the provided ReadyDoneFactory, and 1344 // registers a worker with the ComponentManager to be run when the node is started. 1345 // 1346 // Restartable Components are components that can be restarted after successfully handling 1347 // an irrecoverable error. 1348 // 1349 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1350 func (fnb *FlowNodeBuilder) handleRestartableComponent(v namedComponentFunc, parentReady <-chan struct{}, started func()) error { 1351 fnb.componentBuilder.AddWorker(func(ctx irrecoverable.SignalerContext, ready component.ReadyFunc) { 1352 // wait for the previous component to be ready before starting 1353 if err := util.WaitClosed(ctx, parentReady); err != nil { 1354 return 1355 } 1356 1357 // Note: we're marking the worker routine ready before we even attempt to start the 1358 // component. the idea behind a restartable component is that the node should not depend 1359 // on it for safe operation, so the node does not need to wait for it to be ready. 1360 ready() 1361 1362 // do not block serial startup. started can only be called once, so it cannot be called 1363 // from within the componentFactory 1364 started() 1365 1366 log := fnb.Logger.With().Str("component", v.name).Logger() 1367 1368 // This may be called multiple times if the component is restarted 1369 componentFactory := func() (component.Component, error) { 1370 c, err := v.fn(fnb.NodeConfig) 1371 if err != nil { 1372 return nil, err 1373 } 1374 log.Info().Msg("component initialization complete") 1375 1376 go func() { 1377 if err := util.WaitClosed(ctx, c.Ready()); err != nil { 1378 log.Info().Msg("component startup aborted") 1379 } else { 1380 log.Info().Msg("component startup complete") 1381 } 1382 1383 <-ctx.Done() 1384 log.Info().Msg("component shutdown started") 1385 }() 1386 return c.(component.Component), nil 1387 } 1388 1389 err := component.RunComponent(ctx, componentFactory, v.errorHandler) 1390 if err != nil && !errors.Is(err, ctx.Err()) { 1391 ctx.Throw(fmt.Errorf("component %s encountered an unhandled irrecoverable error: %w", v.name, err)) 1392 } 1393 1394 log.Info().Msg("component shutdown complete") 1395 }) 1396 1397 return nil 1398 } 1399 1400 // ExtraFlags enables binding additional flags beyond those defined in BaseConfig. 1401 func (fnb *FlowNodeBuilder) ExtraFlags(f func(*pflag.FlagSet)) NodeBuilder { 1402 f(fnb.flags) 1403 return fnb 1404 } 1405 1406 // Module enables setting up dependencies of the engine with the builder context. 1407 func (fnb *FlowNodeBuilder) Module(name string, f BuilderFunc) NodeBuilder { 1408 fnb.modules = append(fnb.modules, namedModuleFunc{ 1409 fn: f, 1410 name: name, 1411 }) 1412 return fnb 1413 } 1414 1415 // ShutdownFunc adds a callback function that is called after all components have exited. 1416 func (fnb *FlowNodeBuilder) ShutdownFunc(fn func() error) NodeBuilder { 1417 fnb.postShutdownFns = append(fnb.postShutdownFns, fn) 1418 return fnb 1419 } 1420 1421 func (fnb *FlowNodeBuilder) AdminCommand(command string, f func(config *NodeConfig) commands.AdminCommand) NodeBuilder { 1422 fnb.adminCommands[command] = f 1423 return fnb 1424 } 1425 1426 // Component adds a new component to the node that conforms to the ReadyDoneAware 1427 // interface. 1428 // 1429 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1430 // In both cases, the object is started when the node is run, and the node will wait for the 1431 // component to exit gracefully. 1432 func (fnb *FlowNodeBuilder) Component(name string, f ReadyDoneFactory) NodeBuilder { 1433 fnb.components = append(fnb.components, namedComponentFunc{ 1434 fn: f, 1435 name: name, 1436 }) 1437 return fnb 1438 } 1439 1440 // DependableComponent adds a new component to the node that conforms to the ReadyDoneAware 1441 // interface. The builder will wait until all of the components in the dependencies list are ready 1442 // before constructing the component. 1443 // 1444 // The ReadyDoneFactory may return either a `Component` or `ReadyDoneAware` instance. 1445 // In both cases, the object is started when the node is run, and the node will wait for the 1446 // component to exit gracefully. 1447 // 1448 // IMPORTANT: Dependable components are started in parallel with no guaranteed run order, so all 1449 // dependencies must be initialized outside of the ReadyDoneFactory, and their `Ready()` method 1450 // MUST be idempotent. 1451 func (fnb *FlowNodeBuilder) DependableComponent(name string, f ReadyDoneFactory, dependencies *DependencyList) NodeBuilder { 1452 // Note: dependencies are passed as a struct to allow updating the list after calling this method. 1453 // Passing a slice instead would result in out of sync metadata since slices are passed by reference 1454 fnb.components = append(fnb.components, namedComponentFunc{ 1455 fn: f, 1456 name: name, 1457 dependencies: dependencies, 1458 }) 1459 return fnb 1460 } 1461 1462 // OverrideComponent adds given builder function to the components set of the node builder. If a builder function with that name 1463 // already exists, it will be overridden. 1464 func (fnb *FlowNodeBuilder) OverrideComponent(name string, f ReadyDoneFactory) NodeBuilder { 1465 for i := 0; i < len(fnb.components); i++ { 1466 if fnb.components[i].name == name { 1467 // found component with the name, override it. 1468 fnb.components[i] = namedComponentFunc{ 1469 fn: f, 1470 name: name, 1471 } 1472 1473 return fnb 1474 } 1475 } 1476 1477 // no component found with the same name, hence just adding it. 1478 return fnb.Component(name, f) 1479 } 1480 1481 // OverrideModule adds given builder function to the modules set of the node builder. If a builder function with that name 1482 // already exists, it will be overridden. 1483 func (fnb *FlowNodeBuilder) OverrideModule(name string, f BuilderFunc) NodeBuilder { 1484 for i := 0; i < len(fnb.modules); i++ { 1485 if fnb.modules[i].name == name { 1486 // found module with the name, override it. 1487 fnb.modules[i] = namedModuleFunc{ 1488 fn: f, 1489 name: name, 1490 } 1491 1492 return fnb 1493 } 1494 } 1495 1496 // no module found with the same name, hence just adding it. 1497 return fnb.Module(name, f) 1498 } 1499 1500 // RestartableComponent adds a new component to the node that conforms to the ReadyDoneAware 1501 // interface, and calls the provided error handler when an irrecoverable error is encountered. 1502 // Use RestartableComponent if the component is not critical to the node's safe operation and 1503 // can/should be independently restarted when an irrecoverable error is encountered. 1504 // 1505 // IMPORTANT: Since a RestartableComponent can be restarted independently of the node, the node and 1506 // other components must not rely on it for safe operation, and failures must be handled gracefully. 1507 // As such, RestartableComponents do not block the node from becoming ready, and do not block 1508 // subsequent components from starting serially. They do start in serial order. 1509 // 1510 // Note: The ReadyDoneFactory method may be called multiple times if the component is restarted. 1511 // 1512 // Any irrecoverable errors thrown by the component will be passed to the provided error handler. 1513 func (fnb *FlowNodeBuilder) RestartableComponent(name string, f ReadyDoneFactory, errorHandler component.OnError) NodeBuilder { 1514 fnb.components = append(fnb.components, namedComponentFunc{ 1515 fn: f, 1516 name: name, 1517 errorHandler: errorHandler, 1518 }) 1519 return fnb 1520 } 1521 1522 func (fnb *FlowNodeBuilder) PreInit(f BuilderFunc) NodeBuilder { 1523 fnb.preInitFns = append(fnb.preInitFns, f) 1524 return fnb 1525 } 1526 1527 func (fnb *FlowNodeBuilder) PostInit(f BuilderFunc) NodeBuilder { 1528 fnb.postInitFns = append(fnb.postInitFns, f) 1529 return fnb 1530 } 1531 1532 type Option func(*BaseConfig) 1533 1534 func WithBootstrapDir(bootstrapDir string) Option { 1535 return func(config *BaseConfig) { 1536 config.BootstrapDir = bootstrapDir 1537 } 1538 } 1539 1540 func WithBindAddress(bindAddress string) Option { 1541 return func(config *BaseConfig) { 1542 config.BindAddr = bindAddress 1543 } 1544 } 1545 1546 func WithDataDir(dataDir string) Option { 1547 return func(config *BaseConfig) { 1548 if config.db == nil { 1549 config.datadir = dataDir 1550 } 1551 } 1552 } 1553 1554 func WithSecretsDBEnabled(enabled bool) Option { 1555 return func(config *BaseConfig) { 1556 config.secretsDBEnabled = enabled 1557 } 1558 } 1559 1560 func WithMetricsEnabled(enabled bool) Option { 1561 return func(config *BaseConfig) { 1562 config.MetricsEnabled = enabled 1563 } 1564 } 1565 1566 func WithSyncCoreConfig(syncConfig chainsync.Config) Option { 1567 return func(config *BaseConfig) { 1568 config.SyncCoreConfig = syncConfig 1569 } 1570 } 1571 1572 func WithComplianceConfig(complianceConfig compliance.Config) Option { 1573 return func(config *BaseConfig) { 1574 config.ComplianceConfig = complianceConfig 1575 } 1576 } 1577 1578 func WithLogLevel(level string) Option { 1579 return func(config *BaseConfig) { 1580 config.level = level 1581 } 1582 } 1583 1584 // WithDB takes precedence over WithDataDir and datadir will be set to empty if DB is set using this option 1585 func WithDB(db *badger.DB) Option { 1586 return func(config *BaseConfig) { 1587 config.db = db 1588 config.datadir = "" 1589 } 1590 } 1591 1592 // FlowNode creates a new Flow node builder with the given name. 1593 func FlowNode(role string, opts ...Option) *FlowNodeBuilder { 1594 config := DefaultBaseConfig() 1595 config.NodeRole = role 1596 for _, opt := range opts { 1597 opt(config) 1598 } 1599 1600 builder := &FlowNodeBuilder{ 1601 NodeConfig: &NodeConfig{ 1602 BaseConfig: *config, 1603 Logger: zerolog.New(os.Stderr), 1604 PeerManagerDependencies: NewDependencyList(), 1605 ConfigManager: updatable_configs.NewManager(), 1606 }, 1607 flags: pflag.CommandLine, 1608 adminCommandBootstrapper: admin.NewCommandRunnerBootstrapper(), 1609 adminCommands: make(map[string]func(*NodeConfig) commands.AdminCommand), 1610 componentBuilder: component.NewComponentManagerBuilder(), 1611 } 1612 return builder 1613 } 1614 1615 func (fnb *FlowNodeBuilder) Initialize() error { 1616 fnb.PrintBuildVersionDetails() 1617 1618 fnb.BaseFlags() 1619 1620 if err := fnb.ParseAndPrintFlags(); err != nil { 1621 return err 1622 } 1623 1624 // ID providers must be initialized before the network 1625 fnb.InitIDProviders() 1626 1627 fnb.EnqueueResolver() 1628 1629 fnb.EnqueueNetworkInit() 1630 1631 fnb.EnqueuePingService() 1632 1633 if fnb.MetricsEnabled { 1634 fnb.EnqueueMetricsServerInit() 1635 if err := fnb.RegisterBadgerMetrics(); err != nil { 1636 return err 1637 } 1638 } 1639 1640 fnb.EnqueueTracer() 1641 1642 return nil 1643 } 1644 1645 func (fnb *FlowNodeBuilder) RegisterDefaultAdminCommands() { 1646 fnb.AdminCommand("set-log-level", func(config *NodeConfig) commands.AdminCommand { 1647 return &common.SetLogLevelCommand{} 1648 }).AdminCommand("set-golog-level", func(config *NodeConfig) commands.AdminCommand { 1649 return &common.SetGologLevelCommand{} 1650 }).AdminCommand("get-config", func(config *NodeConfig) commands.AdminCommand { 1651 return common.NewGetConfigCommand(config.ConfigManager) 1652 }).AdminCommand("set-config", func(config *NodeConfig) commands.AdminCommand { 1653 return common.NewSetConfigCommand(config.ConfigManager) 1654 }).AdminCommand("list-configs", func(config *NodeConfig) commands.AdminCommand { 1655 return common.NewListConfigCommand(config.ConfigManager) 1656 }).AdminCommand("read-blocks", func(config *NodeConfig) commands.AdminCommand { 1657 return storageCommands.NewReadBlocksCommand(config.State, config.Storage.Blocks) 1658 }).AdminCommand("read-results", func(config *NodeConfig) commands.AdminCommand { 1659 return storageCommands.NewReadResultsCommand(config.State, config.Storage.Results) 1660 }).AdminCommand("read-seals", func(config *NodeConfig) commands.AdminCommand { 1661 return storageCommands.NewReadSealsCommand(config.State, config.Storage.Seals, config.Storage.Index) 1662 }).AdminCommand("get-latest-identity", func(config *NodeConfig) commands.AdminCommand { 1663 return common.NewGetIdentityCommand(config.IdentityProvider) 1664 }) 1665 } 1666 1667 func (fnb *FlowNodeBuilder) Build() (Node, error) { 1668 // Run the prestart initialization. This includes anything that should be done before 1669 // starting the components. 1670 if err := fnb.onStart(); err != nil { 1671 return nil, err 1672 } 1673 1674 return NewNode( 1675 fnb.componentBuilder.Build(), 1676 fnb.NodeConfig, 1677 fnb.Logger, 1678 fnb.postShutdown, 1679 fnb.handleFatal, 1680 ), nil 1681 } 1682 1683 func (fnb *FlowNodeBuilder) onStart() error { 1684 1685 // seed random generator 1686 rand.Seed(time.Now().UnixNano()) 1687 1688 // init nodeinfo by reading the private bootstrap file if not already set 1689 if fnb.NodeID == flow.ZeroID { 1690 if err := fnb.initNodeInfo(); err != nil { 1691 return err 1692 } 1693 } 1694 1695 if err := fnb.initLogger(); err != nil { 1696 return err 1697 } 1698 1699 if err := fnb.initDB(); err != nil { 1700 return err 1701 } 1702 1703 if err := fnb.initSecretsDB(); err != nil { 1704 return err 1705 } 1706 1707 if err := fnb.initMetrics(); err != nil { 1708 return err 1709 } 1710 1711 if err := fnb.initStorage(); err != nil { 1712 return err 1713 } 1714 1715 for _, f := range fnb.preInitFns { 1716 if err := fnb.handlePreInit(f); err != nil { 1717 return err 1718 } 1719 } 1720 1721 if err := fnb.initState(); err != nil { 1722 return err 1723 } 1724 1725 if err := fnb.initProfiler(); err != nil { 1726 return err 1727 } 1728 1729 fnb.initFvmOptions() 1730 1731 for _, f := range fnb.postInitFns { 1732 if err := fnb.handlePostInit(f); err != nil { 1733 return err 1734 } 1735 } 1736 1737 if err := fnb.EnqueueAdminServerInit(); err != nil { 1738 return err 1739 } 1740 1741 // run all modules 1742 if err := fnb.handleModules(); err != nil { 1743 return fmt.Errorf("could not handle modules: %w", err) 1744 } 1745 1746 // run all components 1747 return fnb.handleComponents() 1748 } 1749 1750 // postShutdown is called by the node before exiting 1751 // put any cleanup code here that should be run after all components have stopped 1752 func (fnb *FlowNodeBuilder) postShutdown() error { 1753 var errs *multierror.Error 1754 1755 for _, fn := range fnb.postShutdownFns { 1756 err := fn() 1757 if err != nil { 1758 errs = multierror.Append(errs, err) 1759 } 1760 } 1761 fnb.Logger.Info().Msg("database has been closed") 1762 return errs.ErrorOrNil() 1763 } 1764 1765 // handleFatal handles irrecoverable errors by logging them and exiting the process. 1766 func (fnb *FlowNodeBuilder) handleFatal(err error) { 1767 fnb.Logger.Fatal().Err(err).Msg("unhandled irrecoverable error") 1768 } 1769 1770 func (fnb *FlowNodeBuilder) handlePreInit(f BuilderFunc) error { 1771 return f(fnb.NodeConfig) 1772 } 1773 1774 func (fnb *FlowNodeBuilder) handlePostInit(f BuilderFunc) error { 1775 return f(fnb.NodeConfig) 1776 } 1777 1778 func (fnb *FlowNodeBuilder) extraFlagsValidation() error { 1779 if fnb.extraFlagCheck != nil { 1780 err := fnb.extraFlagCheck() 1781 if err != nil { 1782 return fmt.Errorf("invalid flags: %w", err) 1783 } 1784 } 1785 return nil 1786 } 1787 1788 // loadRootProtocolSnapshot loads the root protocol snapshot from disk 1789 func loadRootProtocolSnapshot(dir string) (*inmem.Snapshot, error) { 1790 path := filepath.Join(dir, bootstrap.PathRootProtocolStateSnapshot) 1791 data, err := io.ReadFile(path) 1792 if err != nil { 1793 return nil, fmt.Errorf("could not read root snapshot (path=%s): %w", path, err) 1794 } 1795 1796 var snapshot inmem.EncodableSnapshot 1797 err = json.Unmarshal(data, &snapshot) 1798 if err != nil { 1799 return nil, err 1800 } 1801 1802 return inmem.SnapshotFromEncodable(snapshot), nil 1803 } 1804 1805 // LoadPrivateNodeInfo the private info for this node from disk (e.g., private staking/network keys). 1806 func LoadPrivateNodeInfo(dir string, myID flow.Identifier) (*bootstrap.NodeInfoPriv, error) { 1807 path := filepath.Join(dir, fmt.Sprintf(bootstrap.PathNodeInfoPriv, myID)) 1808 data, err := io.ReadFile(path) 1809 if err != nil { 1810 return nil, fmt.Errorf("could not read private node info (path=%s): %w", path, err) 1811 } 1812 var info bootstrap.NodeInfoPriv 1813 err = json.Unmarshal(data, &info) 1814 return &info, err 1815 } 1816 1817 // loadSecretsEncryptionKey loads the encryption key for the secrets database. 1818 // If the file does not exist, returns os.ErrNotExist. 1819 func loadSecretsEncryptionKey(dir string, myID flow.Identifier) ([]byte, error) { 1820 path := filepath.Join(dir, fmt.Sprintf(bootstrap.PathSecretsEncryptionKey, myID)) 1821 data, err := io.ReadFile(path) 1822 if err != nil { 1823 return nil, fmt.Errorf("could not read secrets db encryption key (path=%s): %w", path, err) 1824 } 1825 return data, nil 1826 } 1827 1828 func rateLimiterPeerFilter(rateLimiter p2p.RateLimiter) p2p.PeerFilter { 1829 return func(p peer.ID) error { 1830 if rateLimiter.IsRateLimited(p) { 1831 return fmt.Errorf("peer is rate limited") 1832 } 1833 1834 return nil 1835 } 1836 }