github.com/MetalBlockchain/metalgo@v1.11.9/node/node.go (about) 1 // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved. 2 // See the file LICENSE for licensing terms. 3 4 package node 5 6 import ( 7 "context" 8 "crypto" 9 "crypto/tls" 10 "encoding/json" 11 "errors" 12 "fmt" 13 "io" 14 "io/fs" 15 "net" 16 "net/netip" 17 "os" 18 "path/filepath" 19 "strconv" 20 "sync" 21 "time" 22 23 "github.com/prometheus/client_golang/prometheus" 24 "github.com/prometheus/client_golang/prometheus/collectors" 25 "github.com/prometheus/client_golang/prometheus/promhttp" 26 "go.uber.org/zap" 27 28 "github.com/MetalBlockchain/metalgo/api/admin" 29 "github.com/MetalBlockchain/metalgo/api/health" 30 "github.com/MetalBlockchain/metalgo/api/info" 31 "github.com/MetalBlockchain/metalgo/api/keystore" 32 "github.com/MetalBlockchain/metalgo/api/metrics" 33 "github.com/MetalBlockchain/metalgo/api/server" 34 "github.com/MetalBlockchain/metalgo/chains" 35 "github.com/MetalBlockchain/metalgo/chains/atomic" 36 "github.com/MetalBlockchain/metalgo/database" 37 "github.com/MetalBlockchain/metalgo/database/leveldb" 38 "github.com/MetalBlockchain/metalgo/database/memdb" 39 "github.com/MetalBlockchain/metalgo/database/meterdb" 40 "github.com/MetalBlockchain/metalgo/database/pebbledb" 41 "github.com/MetalBlockchain/metalgo/database/prefixdb" 42 "github.com/MetalBlockchain/metalgo/database/versiondb" 43 "github.com/MetalBlockchain/metalgo/genesis" 44 "github.com/MetalBlockchain/metalgo/ids" 45 "github.com/MetalBlockchain/metalgo/indexer" 46 "github.com/MetalBlockchain/metalgo/message" 47 "github.com/MetalBlockchain/metalgo/nat" 48 "github.com/MetalBlockchain/metalgo/network" 49 "github.com/MetalBlockchain/metalgo/network/dialer" 50 "github.com/MetalBlockchain/metalgo/network/peer" 51 "github.com/MetalBlockchain/metalgo/network/throttling" 52 "github.com/MetalBlockchain/metalgo/snow" 53 "github.com/MetalBlockchain/metalgo/snow/networking/benchlist" 54 "github.com/MetalBlockchain/metalgo/snow/networking/router" 55 "github.com/MetalBlockchain/metalgo/snow/networking/timeout" 56 "github.com/MetalBlockchain/metalgo/snow/networking/tracker" 57 "github.com/MetalBlockchain/metalgo/snow/uptime" 58 "github.com/MetalBlockchain/metalgo/snow/validators" 59 "github.com/MetalBlockchain/metalgo/staking" 60 "github.com/MetalBlockchain/metalgo/trace" 61 "github.com/MetalBlockchain/metalgo/utils" 62 "github.com/MetalBlockchain/metalgo/utils/constants" 63 "github.com/MetalBlockchain/metalgo/utils/crypto/bls" 64 "github.com/MetalBlockchain/metalgo/utils/dynamicip" 65 "github.com/MetalBlockchain/metalgo/utils/filesystem" 66 "github.com/MetalBlockchain/metalgo/utils/hashing" 67 "github.com/MetalBlockchain/metalgo/utils/ips" 68 "github.com/MetalBlockchain/metalgo/utils/logging" 69 "github.com/MetalBlockchain/metalgo/utils/math/meter" 70 "github.com/MetalBlockchain/metalgo/utils/metric" 71 "github.com/MetalBlockchain/metalgo/utils/perms" 72 "github.com/MetalBlockchain/metalgo/utils/profiler" 73 "github.com/MetalBlockchain/metalgo/utils/resource" 74 "github.com/MetalBlockchain/metalgo/utils/set" 75 "github.com/MetalBlockchain/metalgo/version" 76 "github.com/MetalBlockchain/metalgo/vms" 77 "github.com/MetalBlockchain/metalgo/vms/avm" 78 "github.com/MetalBlockchain/metalgo/vms/platformvm" 79 "github.com/MetalBlockchain/metalgo/vms/platformvm/signer" 80 "github.com/MetalBlockchain/metalgo/vms/platformvm/upgrade" 81 "github.com/MetalBlockchain/metalgo/vms/registry" 82 "github.com/MetalBlockchain/metalgo/vms/rpcchainvm/runtime" 83 84 avmconfig "github.com/MetalBlockchain/metalgo/vms/avm/config" 85 platformconfig "github.com/MetalBlockchain/metalgo/vms/platformvm/config" 86 coreth "github.com/MetalBlockchain/coreth/plugin/evm" 87 ) 88 89 const ( 90 stakingPortName = constants.AppName + "-staking" 91 httpPortName = constants.AppName + "-http" 92 93 ipResolutionTimeout = 30 * time.Second 94 95 apiNamespace = constants.PlatformName + metric.NamespaceSeparator + "api" 96 benchlistNamespace = constants.PlatformName + metric.NamespaceSeparator + "benchlist" 97 dbNamespace = constants.PlatformName + metric.NamespaceSeparator + "db" 98 healthNamespace = constants.PlatformName + metric.NamespaceSeparator + "health" 99 meterDBNamespace = constants.PlatformName + metric.NamespaceSeparator + "meterdb" 100 networkNamespace = constants.PlatformName + metric.NamespaceSeparator + "network" 101 processNamespace = constants.PlatformName + metric.NamespaceSeparator + "process" 102 requestsNamespace = constants.PlatformName + metric.NamespaceSeparator + "requests" 103 resourceTrackerNamespace = constants.PlatformName + metric.NamespaceSeparator + "resource_tracker" 104 responsesNamespace = constants.PlatformName + metric.NamespaceSeparator + "responses" 105 systemResourcesNamespace = constants.PlatformName + metric.NamespaceSeparator + "system_resources" 106 ) 107 108 var ( 109 genesisHashKey = []byte("genesisID") 110 ungracefulShutdown = []byte("ungracefulShutdown") 111 112 indexerDBPrefix = []byte{0x00} 113 keystoreDBPrefix = []byte("keystore") 114 115 errInvalidTLSKey = errors.New("invalid TLS key") 116 errShuttingDown = errors.New("server shutting down") 117 ) 118 119 // New returns an instance of Node 120 func New( 121 config *Config, 122 logFactory logging.Factory, 123 logger logging.Logger, 124 ) (*Node, error) { 125 tlsCert := config.StakingTLSCert.Leaf 126 stakingCert, err := staking.ParseCertificate(tlsCert.Raw) 127 if err != nil { 128 return nil, fmt.Errorf("invalid staking certificate: %w", err) 129 } 130 131 n := &Node{ 132 Log: logger, 133 LogFactory: logFactory, 134 StakingTLSSigner: config.StakingTLSCert.PrivateKey.(crypto.Signer), 135 StakingTLSCert: stakingCert, 136 ID: ids.NodeIDFromCert(stakingCert), 137 Config: config, 138 } 139 140 n.DoneShuttingDown.Add(1) 141 142 pop := signer.NewProofOfPossession(n.Config.StakingSigningKey) 143 logger.Info("initializing node", 144 zap.Stringer("version", version.CurrentApp), 145 zap.Stringer("nodeID", n.ID), 146 zap.Stringer("stakingKeyType", tlsCert.PublicKeyAlgorithm), 147 zap.Reflect("nodePOP", pop), 148 zap.Reflect("providedFlags", n.Config.ProvidedFlags), 149 zap.Reflect("config", n.Config), 150 ) 151 152 n.VMFactoryLog, err = logFactory.Make("vm-factory") 153 if err != nil { 154 return nil, fmt.Errorf("problem creating vm logger: %w", err) 155 } 156 157 n.VMAliaser = ids.NewAliaser() 158 for vmID, aliases := range config.VMAliases { 159 for _, alias := range aliases { 160 if err := n.VMAliaser.Alias(vmID, alias); err != nil { 161 return nil, err 162 } 163 } 164 } 165 n.VMManager = vms.NewManager(n.VMFactoryLog, n.VMAliaser) 166 167 if err := n.initBootstrappers(); err != nil { // Configure the bootstrappers 168 return nil, fmt.Errorf("problem initializing node beacons: %w", err) 169 } 170 171 // Set up tracer 172 n.tracer, err = trace.New(n.Config.TraceConfig) 173 if err != nil { 174 return nil, fmt.Errorf("couldn't initialize tracer: %w", err) 175 } 176 177 if err := n.initMetrics(); err != nil { 178 return nil, fmt.Errorf("couldn't initialize metrics: %w", err) 179 } 180 181 n.initNAT() 182 if err := n.initAPIServer(); err != nil { // Start the API Server 183 return nil, fmt.Errorf("couldn't initialize API server: %w", err) 184 } 185 186 if err := n.initMetricsAPI(); err != nil { // Start the Metrics API 187 return nil, fmt.Errorf("couldn't initialize metrics API: %w", err) 188 } 189 190 if err := n.initDatabase(); err != nil { // Set up the node's database 191 return nil, fmt.Errorf("problem initializing database: %w", err) 192 } 193 194 if err := n.initKeystoreAPI(); err != nil { // Start the Keystore API 195 return nil, fmt.Errorf("couldn't initialize keystore API: %w", err) 196 } 197 198 n.initSharedMemory() // Initialize shared memory 199 200 // message.Creator is shared between networking, chainManager and the engine. 201 // It must be initiated before networking (initNetworking), chain manager (initChainManager) 202 // and the engine (initChains) but after the metrics (initMetricsAPI) 203 // message.Creator currently record metrics under network namespace 204 205 networkRegisterer, err := metrics.MakeAndRegister( 206 n.MetricsGatherer, 207 networkNamespace, 208 ) 209 if err != nil { 210 return nil, err 211 } 212 213 n.msgCreator, err = message.NewCreator( 214 n.Log, 215 networkRegisterer, 216 n.Config.NetworkConfig.CompressionType, 217 n.Config.NetworkConfig.MaximumInboundMessageTimeout, 218 ) 219 if err != nil { 220 return nil, fmt.Errorf("problem initializing message creator: %w", err) 221 } 222 223 n.vdrs = validators.NewManager() 224 if !n.Config.SybilProtectionEnabled { 225 logger.Warn("sybil control is not enforced") 226 n.vdrs = newOverriddenManager(constants.PrimaryNetworkID, n.vdrs) 227 } 228 if err := n.initResourceManager(); err != nil { 229 return nil, fmt.Errorf("problem initializing resource manager: %w", err) 230 } 231 n.initCPUTargeter(&config.CPUTargeterConfig) 232 n.initDiskTargeter(&config.DiskTargeterConfig) 233 if err := n.initNetworking(networkRegisterer); err != nil { // Set up networking layer. 234 return nil, fmt.Errorf("problem initializing networking: %w", err) 235 } 236 237 n.initEventDispatchers() 238 239 // Start the Health API 240 // Has to be initialized before chain manager 241 // [n.Net] must already be set 242 if err := n.initHealthAPI(); err != nil { 243 return nil, fmt.Errorf("couldn't initialize health API: %w", err) 244 } 245 if err := n.addDefaultVMAliases(); err != nil { 246 return nil, fmt.Errorf("couldn't initialize API aliases: %w", err) 247 } 248 if err := n.initChainManager(n.Config.AvaxAssetID); err != nil { // Set up the chain manager 249 return nil, fmt.Errorf("couldn't initialize chain manager: %w", err) 250 } 251 if err := n.initVMs(); err != nil { // Initialize the VM registry. 252 return nil, fmt.Errorf("couldn't initialize VM registry: %w", err) 253 } 254 if err := n.initAdminAPI(); err != nil { // Start the Admin API 255 return nil, fmt.Errorf("couldn't initialize admin API: %w", err) 256 } 257 if err := n.initInfoAPI(); err != nil { // Start the Info API 258 return nil, fmt.Errorf("couldn't initialize info API: %w", err) 259 } 260 if err := n.initChainAliases(n.Config.GenesisBytes); err != nil { 261 return nil, fmt.Errorf("couldn't initialize chain aliases: %w", err) 262 } 263 if err := n.initAPIAliases(n.Config.GenesisBytes); err != nil { 264 return nil, fmt.Errorf("couldn't initialize API aliases: %w", err) 265 } 266 if err := n.initIndexer(); err != nil { 267 return nil, fmt.Errorf("couldn't initialize indexer: %w", err) 268 } 269 270 n.health.Start(context.TODO(), n.Config.HealthCheckFreq) 271 n.initProfiler() 272 273 // Start the Platform chain 274 if err := n.initChains(n.Config.GenesisBytes); err != nil { 275 return nil, fmt.Errorf("couldn't initialize chains: %w", err) 276 } 277 return n, nil 278 } 279 280 // Node is an instance of an Avalanche node. 281 type Node struct { 282 Log logging.Logger 283 VMFactoryLog logging.Logger 284 LogFactory logging.Factory 285 286 // This node's unique ID used when communicating with other nodes 287 // (in consensus, for example) 288 ID ids.NodeID 289 290 StakingTLSSigner crypto.Signer 291 StakingTLSCert *staking.Certificate 292 293 // Storage for this node 294 DB database.Database 295 296 router nat.Router 297 portMapper *nat.Mapper 298 ipUpdater dynamicip.Updater 299 300 chainRouter router.Router 301 302 // Profiles the process. Nil if continuous profiling is disabled. 303 profiler profiler.ContinuousProfiler 304 305 // Indexes blocks, transactions and blocks 306 indexer indexer.Indexer 307 308 // Handles calls to Keystore API 309 keystore keystore.Keystore 310 311 // Manages shared memory 312 sharedMemory *atomic.Memory 313 314 // Monitors node health and runs health checks 315 health health.Health 316 317 // Build and parse messages, for both network layer and chain manager 318 msgCreator message.Creator 319 320 // Manages network timeouts 321 timeoutManager timeout.Manager 322 323 // Manages creation of blockchains and routing messages to them 324 chainManager chains.Manager 325 326 // Manages validator benching 327 benchlistManager benchlist.Manager 328 329 uptimeCalculator uptime.LockedCalculator 330 331 // dispatcher for events as they happen in consensus 332 BlockAcceptorGroup snow.AcceptorGroup 333 TxAcceptorGroup snow.AcceptorGroup 334 VertexAcceptorGroup snow.AcceptorGroup 335 336 // Net runs the networking stack 337 Net network.Network 338 339 // The staking address will optionally be written to a process context 340 // file to enable other nodes to be configured to use this node as a 341 // beacon. 342 stakingAddress string 343 344 // tlsKeyLogWriterCloser is a debug file handle that writes all the TLS 345 // session keys. This value should only be non-nil during debugging. 346 tlsKeyLogWriterCloser io.WriteCloser 347 348 // this node's initial connections to the network 349 bootstrappers validators.Manager 350 351 // current validators of the network 352 vdrs validators.Manager 353 354 apiURI string 355 356 // Handles HTTP API calls 357 APIServer server.Server 358 359 // This node's configuration 360 Config *Config 361 362 tracer trace.Tracer 363 364 // ensures that we only close the node once. 365 shutdownOnce sync.Once 366 367 // True if node is shutting down or is done shutting down 368 shuttingDown utils.Atomic[bool] 369 370 // Sets the exit code 371 shuttingDownExitCode utils.Atomic[int] 372 373 // Incremented only once on initialization. 374 // Decremented when node is done shutting down. 375 DoneShuttingDown sync.WaitGroup 376 377 // Metrics Registerer 378 MetricsGatherer metrics.MultiGatherer 379 MeterDBMetricsGatherer metrics.MultiGatherer 380 381 VMAliaser ids.Aliaser 382 VMManager vms.Manager 383 384 // VM endpoint registry 385 VMRegistry registry.VMRegistry 386 387 // Manages shutdown of a VM process 388 runtimeManager runtime.Manager 389 390 resourceManager resource.Manager 391 392 // Tracks the CPU/disk usage caused by processing 393 // messages of each peer. 394 resourceTracker tracker.ResourceTracker 395 396 // Specifies how much CPU usage each peer can cause before 397 // we rate-limit them. 398 cpuTargeter tracker.Targeter 399 400 // Specifies how much disk usage each peer can cause before 401 // we rate-limit them. 402 diskTargeter tracker.Targeter 403 404 // Closed when a sufficient amount of bootstrap nodes are connected to 405 onSufficientlyConnected chan struct{} 406 } 407 408 /* 409 ****************************************************************************** 410 *************************** P2P Networking Section *************************** 411 ****************************************************************************** 412 */ 413 414 // Initialize the networking layer. 415 // Assumes [n.vdrs], [n.CPUTracker], and [n.CPUTargeter] have been initialized. 416 func (n *Node) initNetworking(reg prometheus.Registerer) error { 417 // Providing either loopback address - `::1` for ipv6 and `127.0.0.1` for ipv4 - as the listen 418 // host will avoid the need for a firewall exception on recent MacOS: 419 // 420 // - MacOS requires a manually-approved firewall exception [1] for each version of a given 421 // binary that wants to bind to all interfaces (i.e. with an address of `:[port]`). Each 422 // compiled version of avalanchego requires a separate exception to be allowed to bind to all 423 // interfaces. 424 // 425 // - A firewall exception is not required to bind to a loopback interface, but the only way for 426 // Listen() to bind to loopback for both ipv4 and ipv6 is to bind to all interfaces [2] which 427 // requires an exception. 428 // 429 // - Thus, the only way to start a node on MacOS without approving a firewall exception for the 430 // avalanchego binary is to bind to loopback by specifying the host to be `::1` or `127.0.0.1`. 431 // 432 // 1: https://apple.stackexchange.com/questions/393715/do-you-want-the-application-main-to-accept-incoming-network-connections-pop 433 // 2: https://github.com/golang/go/issues/56998 434 listenAddress := net.JoinHostPort(n.Config.ListenHost, strconv.FormatUint(uint64(n.Config.ListenPort), 10)) 435 listener, err := net.Listen(constants.NetworkType, listenAddress) 436 if err != nil { 437 return err 438 } 439 // Wrap listener so it will only accept a certain number of incoming connections per second 440 listener = throttling.NewThrottledListener(listener, n.Config.NetworkConfig.ThrottlerConfig.MaxInboundConnsPerSec) 441 442 // Record the bound address to enable inclusion in process context file. 443 n.stakingAddress = listener.Addr().String() 444 stakingAddrPort, err := ips.ParseAddrPort(n.stakingAddress) 445 if err != nil { 446 return err 447 } 448 449 var ( 450 publicAddr netip.Addr 451 atomicIP *utils.Atomic[netip.AddrPort] 452 ) 453 switch { 454 case n.Config.PublicIP != "": 455 // Use the specified public IP. 456 publicAddr, err = ips.ParseAddr(n.Config.PublicIP) 457 if err != nil { 458 return fmt.Errorf("invalid public IP address %q: %w", n.Config.PublicIP, err) 459 } 460 atomicIP = utils.NewAtomic(netip.AddrPortFrom( 461 publicAddr, 462 stakingAddrPort.Port(), 463 )) 464 n.ipUpdater = dynamicip.NewNoUpdater() 465 case n.Config.PublicIPResolutionService != "": 466 // Use dynamic IP resolution. 467 resolver, err := dynamicip.NewResolver(n.Config.PublicIPResolutionService) 468 if err != nil { 469 return fmt.Errorf("couldn't create IP resolver: %w", err) 470 } 471 472 // Use that to resolve our public IP. 473 ctx, cancel := context.WithTimeout(context.Background(), ipResolutionTimeout) 474 publicAddr, err = resolver.Resolve(ctx) 475 cancel() 476 if err != nil { 477 return fmt.Errorf("couldn't resolve public IP: %w", err) 478 } 479 atomicIP = utils.NewAtomic(netip.AddrPortFrom( 480 publicAddr, 481 stakingAddrPort.Port(), 482 )) 483 n.ipUpdater = dynamicip.NewUpdater(atomicIP, resolver, n.Config.PublicIPResolutionFreq) 484 default: 485 publicAddr, err = n.router.ExternalIP() 486 if err != nil { 487 return fmt.Errorf("public IP / IP resolution service not given and failed to resolve IP with NAT: %w", err) 488 } 489 atomicIP = utils.NewAtomic(netip.AddrPortFrom( 490 publicAddr, 491 stakingAddrPort.Port(), 492 )) 493 n.ipUpdater = dynamicip.NewNoUpdater() 494 } 495 496 if !ips.IsPublic(publicAddr) { 497 n.Log.Warn("P2P IP is private, you will not be publicly discoverable", 498 zap.Stringer("ip", publicAddr), 499 ) 500 } 501 502 // Regularly update our public IP and port mappings. 503 n.portMapper.Map( 504 stakingAddrPort.Port(), 505 stakingAddrPort.Port(), 506 stakingPortName, 507 atomicIP, 508 n.Config.PublicIPResolutionFreq, 509 ) 510 go n.ipUpdater.Dispatch(n.Log) 511 512 n.Log.Info("initializing networking", 513 zap.Stringer("ip", atomicIP.Get()), 514 ) 515 516 tlsKey, ok := n.Config.StakingTLSCert.PrivateKey.(crypto.Signer) 517 if !ok { 518 return errInvalidTLSKey 519 } 520 521 if n.Config.NetworkConfig.TLSKeyLogFile != "" { 522 n.tlsKeyLogWriterCloser, err = perms.Create(n.Config.NetworkConfig.TLSKeyLogFile, perms.ReadWrite) 523 if err != nil { 524 return err 525 } 526 n.Log.Warn("TLS key logging is enabled", 527 zap.String("filename", n.Config.NetworkConfig.TLSKeyLogFile), 528 ) 529 } 530 531 // We allow nodes to gossip unknown ACPs in case the current ACPs constant 532 // becomes out of date. 533 var unknownACPs set.Set[uint32] 534 for acp := range n.Config.NetworkConfig.SupportedACPs { 535 if !constants.CurrentACPs.Contains(acp) { 536 unknownACPs.Add(acp) 537 } 538 } 539 for acp := range n.Config.NetworkConfig.ObjectedACPs { 540 if !constants.CurrentACPs.Contains(acp) { 541 unknownACPs.Add(acp) 542 } 543 } 544 if unknownACPs.Len() > 0 { 545 n.Log.Warn("gossiping unknown ACPs", 546 zap.Reflect("acps", unknownACPs), 547 ) 548 } 549 550 tlsConfig := peer.TLSConfig(n.Config.StakingTLSCert, n.tlsKeyLogWriterCloser) 551 552 // Create chain router 553 n.chainRouter = &router.ChainRouter{} 554 if n.Config.TraceConfig.Enabled { 555 n.chainRouter = router.Trace(n.chainRouter, n.tracer) 556 } 557 558 // Configure benchlist 559 n.Config.BenchlistConfig.Validators = n.vdrs 560 n.Config.BenchlistConfig.Benchable = n.chainRouter 561 n.Config.BenchlistConfig.BenchlistRegisterer = metrics.NewLabelGatherer(chains.ChainLabel) 562 563 err = n.MetricsGatherer.Register( 564 benchlistNamespace, 565 n.Config.BenchlistConfig.BenchlistRegisterer, 566 ) 567 if err != nil { 568 return err 569 } 570 571 n.benchlistManager = benchlist.NewManager(&n.Config.BenchlistConfig) 572 573 n.uptimeCalculator = uptime.NewLockedCalculator() 574 575 consensusRouter := n.chainRouter 576 if !n.Config.SybilProtectionEnabled { 577 // Sybil protection is disabled so we don't have a txID that added us as 578 // a validator. Because each validator needs a txID associated with it, 579 // we hack one together by just padding our nodeID with zeroes. 580 dummyTxID := ids.Empty 581 copy(dummyTxID[:], n.ID.Bytes()) 582 583 err := n.vdrs.AddStaker( 584 constants.PrimaryNetworkID, 585 n.ID, 586 bls.PublicFromSecretKey(n.Config.StakingSigningKey), 587 dummyTxID, 588 n.Config.SybilProtectionDisabledWeight, 589 ) 590 if err != nil { 591 return err 592 } 593 594 consensusRouter = &insecureValidatorManager{ 595 log: n.Log, 596 Router: consensusRouter, 597 vdrs: n.vdrs, 598 weight: n.Config.SybilProtectionDisabledWeight, 599 } 600 } 601 602 n.onSufficientlyConnected = make(chan struct{}) 603 numBootstrappers := n.bootstrappers.Count(constants.PrimaryNetworkID) 604 requiredConns := (3*numBootstrappers + 3) / 4 605 606 if requiredConns > 0 { 607 consensusRouter = &beaconManager{ 608 Router: consensusRouter, 609 beacons: n.bootstrappers, 610 requiredConns: int64(requiredConns), 611 onSufficientlyConnected: n.onSufficientlyConnected, 612 } 613 } else { 614 close(n.onSufficientlyConnected) 615 } 616 617 // add node configs to network config 618 n.Config.NetworkConfig.MyNodeID = n.ID 619 n.Config.NetworkConfig.MyIPPort = atomicIP 620 n.Config.NetworkConfig.NetworkID = n.Config.NetworkID 621 n.Config.NetworkConfig.Validators = n.vdrs 622 n.Config.NetworkConfig.Beacons = n.bootstrappers 623 n.Config.NetworkConfig.TLSConfig = tlsConfig 624 n.Config.NetworkConfig.TLSKey = tlsKey 625 n.Config.NetworkConfig.BLSKey = n.Config.StakingSigningKey 626 n.Config.NetworkConfig.TrackedSubnets = n.Config.TrackedSubnets 627 n.Config.NetworkConfig.UptimeCalculator = n.uptimeCalculator 628 n.Config.NetworkConfig.UptimeRequirement = n.Config.UptimeRequirement 629 n.Config.NetworkConfig.ResourceTracker = n.resourceTracker 630 n.Config.NetworkConfig.CPUTargeter = n.cpuTargeter 631 n.Config.NetworkConfig.DiskTargeter = n.diskTargeter 632 633 n.Net, err = network.NewNetwork( 634 &n.Config.NetworkConfig, 635 n.msgCreator, 636 reg, 637 n.Log, 638 listener, 639 dialer.NewDialer(constants.NetworkType, n.Config.NetworkConfig.DialerConfig, n.Log), 640 consensusRouter, 641 ) 642 643 return err 644 } 645 646 type NodeProcessContext struct { 647 // The process id of the node 648 PID int `json:"pid"` 649 // URI to access the node API 650 // Format: [https|http]://[host]:[port] 651 URI string `json:"uri"` 652 // Address other nodes can use to communicate with this node 653 // Format: [host]:[port] 654 StakingAddress string `json:"stakingAddress"` 655 } 656 657 // Write process context to the configured path. Supports the use of 658 // dynamically chosen network ports with local network orchestration. 659 func (n *Node) writeProcessContext() error { 660 n.Log.Info("writing process context", zap.String("path", n.Config.ProcessContextFilePath)) 661 662 // Write the process context to disk 663 processContext := &NodeProcessContext{ 664 PID: os.Getpid(), 665 URI: n.apiURI, 666 StakingAddress: n.stakingAddress, // Set by network initialization 667 } 668 bytes, err := json.MarshalIndent(processContext, "", " ") 669 if err != nil { 670 return fmt.Errorf("failed to marshal process context: %w", err) 671 } 672 if err := perms.WriteFile(n.Config.ProcessContextFilePath, bytes, perms.ReadWrite); err != nil { 673 return fmt.Errorf("failed to write process context: %w", err) 674 } 675 return nil 676 } 677 678 // Dispatch starts the node's servers. 679 // Returns when the node exits. 680 func (n *Node) Dispatch() error { 681 if err := n.writeProcessContext(); err != nil { 682 return err 683 } 684 685 // Start the HTTP API server 686 go n.Log.RecoverAndPanic(func() { 687 n.Log.Info("API server listening", 688 zap.String("uri", n.apiURI), 689 ) 690 err := n.APIServer.Dispatch() 691 // When [n].Shutdown() is called, [n.APIServer].Close() is called. 692 // This causes [n.APIServer].Dispatch() to return an error. 693 // If that happened, don't log/return an error here. 694 if !n.shuttingDown.Get() { 695 n.Log.Fatal("API server dispatch failed", 696 zap.Error(err), 697 ) 698 } 699 // If the API server isn't running, shut down the node. 700 // If node is already shutting down, this does nothing. 701 n.Shutdown(1) 702 }) 703 704 // Log a warning if we aren't able to connect to a sufficient portion of 705 // nodes. 706 go func() { 707 timer := time.NewTimer(n.Config.BootstrapBeaconConnectionTimeout) 708 defer timer.Stop() 709 710 select { 711 case <-timer.C: 712 if n.shuttingDown.Get() { 713 return 714 } 715 n.Log.Warn("failed to connect to bootstrap nodes", 716 zap.Stringer("bootstrappers", n.bootstrappers), 717 zap.Duration("duration", n.Config.BootstrapBeaconConnectionTimeout), 718 ) 719 case <-n.onSufficientlyConnected: 720 } 721 }() 722 723 // Add state sync nodes to the peer network 724 for i, peerIP := range n.Config.StateSyncIPs { 725 n.Net.ManuallyTrack(n.Config.StateSyncIDs[i], peerIP) 726 } 727 728 // Add bootstrap nodes to the peer network 729 for _, bootstrapper := range n.Config.Bootstrappers { 730 n.Net.ManuallyTrack(bootstrapper.ID, bootstrapper.IP) 731 } 732 733 // Start P2P connections 734 err := n.Net.Dispatch() 735 736 // If the P2P server isn't running, shut down the node. 737 // If node is already shutting down, this does nothing. 738 n.Shutdown(1) 739 740 if n.tlsKeyLogWriterCloser != nil { 741 err := n.tlsKeyLogWriterCloser.Close() 742 if err != nil { 743 n.Log.Error("closing TLS key log file failed", 744 zap.String("filename", n.Config.NetworkConfig.TLSKeyLogFile), 745 zap.Error(err), 746 ) 747 } 748 } 749 750 // Wait until the node is done shutting down before returning 751 n.DoneShuttingDown.Wait() 752 753 // Remove the process context file to communicate to an orchestrator 754 // that the node is no longer running. 755 if err := os.Remove(n.Config.ProcessContextFilePath); err != nil && !errors.Is(err, fs.ErrNotExist) { 756 n.Log.Error("removal of process context file failed", 757 zap.String("path", n.Config.ProcessContextFilePath), 758 zap.Error(err), 759 ) 760 } 761 762 return err 763 } 764 765 /* 766 ****************************************************************************** 767 *********************** End P2P Networking Section *************************** 768 ****************************************************************************** 769 */ 770 771 func (n *Node) initDatabase() error { 772 dbRegisterer, err := metrics.MakeAndRegister( 773 n.MetricsGatherer, 774 dbNamespace, 775 ) 776 if err != nil { 777 return err 778 } 779 780 // start the db 781 switch n.Config.DatabaseConfig.Name { 782 case leveldb.Name: 783 // Prior to v1.10.15, the only on-disk database was leveldb, and its 784 // files went to [dbPath]/[networkID]/v1.4.5. 785 dbPath := filepath.Join(n.Config.DatabaseConfig.Path, version.CurrentDatabase.String()) 786 n.DB, err = leveldb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbRegisterer) 787 if err != nil { 788 return fmt.Errorf("couldn't create %s at %s: %w", leveldb.Name, dbPath, err) 789 } 790 case memdb.Name: 791 n.DB = memdb.New() 792 case pebbledb.Name: 793 dbPath := filepath.Join(n.Config.DatabaseConfig.Path, "pebble") 794 n.DB, err = pebbledb.New(dbPath, n.Config.DatabaseConfig.Config, n.Log, dbRegisterer) 795 if err != nil { 796 return fmt.Errorf("couldn't create %s at %s: %w", pebbledb.Name, dbPath, err) 797 } 798 default: 799 return fmt.Errorf( 800 "db-type was %q but should have been one of {%s, %s, %s}", 801 n.Config.DatabaseConfig.Name, 802 leveldb.Name, 803 memdb.Name, 804 pebbledb.Name, 805 ) 806 } 807 808 if n.Config.ReadOnly && n.Config.DatabaseConfig.Name != memdb.Name { 809 n.DB = versiondb.New(n.DB) 810 } 811 812 meterDBReg, err := metrics.MakeAndRegister( 813 n.MeterDBMetricsGatherer, 814 "all", 815 ) 816 if err != nil { 817 return err 818 } 819 820 n.DB, err = meterdb.New(meterDBReg, n.DB) 821 if err != nil { 822 return err 823 } 824 825 rawExpectedGenesisHash := hashing.ComputeHash256(n.Config.GenesisBytes) 826 827 rawGenesisHash, err := n.DB.Get(genesisHashKey) 828 if err == database.ErrNotFound { 829 rawGenesisHash = rawExpectedGenesisHash 830 err = n.DB.Put(genesisHashKey, rawGenesisHash) 831 } 832 if err != nil { 833 return err 834 } 835 836 genesisHash, err := ids.ToID(rawGenesisHash) 837 if err != nil { 838 return err 839 } 840 expectedGenesisHash, err := ids.ToID(rawExpectedGenesisHash) 841 if err != nil { 842 return err 843 } 844 845 if genesisHash != expectedGenesisHash { 846 return fmt.Errorf("db contains invalid genesis hash. DB Genesis: %s Generated Genesis: %s", genesisHash, expectedGenesisHash) 847 } 848 849 n.Log.Info("initializing database", 850 zap.Stringer("genesisHash", genesisHash), 851 ) 852 853 ok, err := n.DB.Has(ungracefulShutdown) 854 if err != nil { 855 return fmt.Errorf("failed to read ungraceful shutdown key: %w", err) 856 } 857 858 if ok { 859 n.Log.Warn("detected previous ungraceful shutdown") 860 } 861 862 if err := n.DB.Put(ungracefulShutdown, nil); err != nil { 863 return fmt.Errorf( 864 "failed to write ungraceful shutdown key at: %w", 865 err, 866 ) 867 } 868 869 return nil 870 } 871 872 // Set the node IDs of the peers this node should first connect to 873 func (n *Node) initBootstrappers() error { 874 n.bootstrappers = validators.NewManager() 875 for _, bootstrapper := range n.Config.Bootstrappers { 876 // Note: The beacon connection manager will treat all beaconIDs as 877 // equal. 878 // Invariant: We never use the TxID or BLS keys populated here. 879 if err := n.bootstrappers.AddStaker(constants.PrimaryNetworkID, bootstrapper.ID, nil, ids.Empty, 1); err != nil { 880 return err 881 } 882 } 883 return nil 884 } 885 886 // Create the EventDispatcher used for hooking events 887 // into the general process flow. 888 func (n *Node) initEventDispatchers() { 889 n.BlockAcceptorGroup = snow.NewAcceptorGroup(n.Log) 890 n.TxAcceptorGroup = snow.NewAcceptorGroup(n.Log) 891 n.VertexAcceptorGroup = snow.NewAcceptorGroup(n.Log) 892 } 893 894 // Initialize [n.indexer]. 895 // Should only be called after [n.DB], [n.DecisionAcceptorGroup], 896 // [n.ConsensusAcceptorGroup], [n.Log], [n.APIServer], [n.chainManager] are 897 // initialized 898 func (n *Node) initIndexer() error { 899 txIndexerDB := prefixdb.New(indexerDBPrefix, n.DB) 900 var err error 901 n.indexer, err = indexer.NewIndexer(indexer.Config{ 902 IndexingEnabled: n.Config.IndexAPIEnabled, 903 AllowIncompleteIndex: n.Config.IndexAllowIncomplete, 904 DB: txIndexerDB, 905 Log: n.Log, 906 BlockAcceptorGroup: n.BlockAcceptorGroup, 907 TxAcceptorGroup: n.TxAcceptorGroup, 908 VertexAcceptorGroup: n.VertexAcceptorGroup, 909 APIServer: n.APIServer, 910 ShutdownF: func() { 911 n.Shutdown(0) // TODO put exit code here 912 }, 913 }) 914 if err != nil { 915 return fmt.Errorf("couldn't create index for txs: %w", err) 916 } 917 918 // Chain manager will notify indexer when a chain is created 919 n.chainManager.AddRegistrant(n.indexer) 920 921 return nil 922 } 923 924 // Initializes the Platform chain. 925 // Its genesis data specifies the other chains that should be created. 926 func (n *Node) initChains(genesisBytes []byte) error { 927 n.Log.Info("initializing chains") 928 929 platformChain := chains.ChainParameters{ 930 ID: constants.PlatformChainID, 931 SubnetID: constants.PrimaryNetworkID, 932 GenesisData: genesisBytes, // Specifies other chains to create 933 VMID: constants.PlatformVMID, 934 CustomBeacons: n.bootstrappers, 935 } 936 937 // Start the chain creator with the Platform Chain 938 return n.chainManager.StartChainCreator(platformChain) 939 } 940 941 func (n *Node) initMetrics() error { 942 n.MetricsGatherer = metrics.NewPrefixGatherer() 943 n.MeterDBMetricsGatherer = metrics.NewLabelGatherer(chains.ChainLabel) 944 return n.MetricsGatherer.Register( 945 meterDBNamespace, 946 n.MeterDBMetricsGatherer, 947 ) 948 } 949 950 func (n *Node) initNAT() { 951 n.Log.Info("initializing NAT") 952 953 if n.Config.PublicIP == "" && n.Config.PublicIPResolutionService == "" { 954 n.router = nat.GetRouter() 955 if !n.router.SupportsNAT() { 956 n.Log.Warn("UPnP and NAT-PMP router attach failed, " + 957 "you may not be listening publicly. " + 958 "Please confirm the settings in your router") 959 } 960 } else { 961 n.router = nat.NewNoRouter() 962 } 963 964 n.portMapper = nat.NewPortMapper(n.Log, n.router) 965 } 966 967 // initAPIServer initializes the server that handles HTTP calls 968 func (n *Node) initAPIServer() error { 969 n.Log.Info("initializing API server") 970 971 // An empty host is treated as a wildcard to match all addresses, so it is 972 // considered public. 973 hostIsPublic := n.Config.HTTPHost == "" 974 if !hostIsPublic { 975 ip, err := ips.Lookup(n.Config.HTTPHost) 976 if err != nil { 977 n.Log.Fatal("failed to lookup HTTP host", 978 zap.String("host", n.Config.HTTPHost), 979 zap.Error(err), 980 ) 981 return err 982 } 983 hostIsPublic = ips.IsPublic(ip) 984 985 n.Log.Debug("finished HTTP host lookup", 986 zap.String("host", n.Config.HTTPHost), 987 zap.Stringer("ip", ip), 988 zap.Bool("isPublic", hostIsPublic), 989 ) 990 } 991 992 listenAddress := net.JoinHostPort(n.Config.HTTPHost, strconv.FormatUint(uint64(n.Config.HTTPPort), 10)) 993 listener, err := net.Listen("tcp", listenAddress) 994 if err != nil { 995 return err 996 } 997 998 addrStr := listener.Addr().String() 999 addrPort, err := ips.ParseAddrPort(addrStr) 1000 if err != nil { 1001 return err 1002 } 1003 1004 // Don't open the HTTP port if the HTTP server is private 1005 if hostIsPublic { 1006 n.Log.Warn("HTTP server is binding to a potentially public host. "+ 1007 "You may be vulnerable to a DoS attack if your HTTP port is publicly accessible", 1008 zap.String("host", n.Config.HTTPHost), 1009 ) 1010 1011 n.portMapper.Map( 1012 addrPort.Port(), 1013 addrPort.Port(), 1014 httpPortName, 1015 nil, 1016 n.Config.PublicIPResolutionFreq, 1017 ) 1018 } 1019 1020 protocol := "http" 1021 if n.Config.HTTPSEnabled { 1022 cert, err := tls.X509KeyPair(n.Config.HTTPSCert, n.Config.HTTPSKey) 1023 if err != nil { 1024 return err 1025 } 1026 config := &tls.Config{ 1027 MinVersion: tls.VersionTLS12, 1028 Certificates: []tls.Certificate{cert}, 1029 } 1030 listener = tls.NewListener(listener, config) 1031 1032 protocol = "https" 1033 } 1034 n.apiURI = fmt.Sprintf("%s://%s", protocol, listener.Addr()) 1035 1036 apiRegisterer, err := metrics.MakeAndRegister( 1037 n.MetricsGatherer, 1038 apiNamespace, 1039 ) 1040 if err != nil { 1041 return err 1042 } 1043 1044 n.APIServer, err = server.New( 1045 n.Log, 1046 n.LogFactory, 1047 listener, 1048 n.Config.HTTPAllowedOrigins, 1049 n.Config.ShutdownTimeout, 1050 n.ID, 1051 n.Config.TraceConfig.Enabled, 1052 n.tracer, 1053 apiRegisterer, 1054 n.Config.HTTPConfig.HTTPConfig, 1055 n.Config.HTTPAllowedHosts, 1056 ) 1057 return err 1058 } 1059 1060 // Add the default VM aliases 1061 func (n *Node) addDefaultVMAliases() error { 1062 n.Log.Info("adding the default VM aliases") 1063 1064 for vmID, aliases := range genesis.VMAliases { 1065 for _, alias := range aliases { 1066 if err := n.VMAliaser.Alias(vmID, alias); err != nil { 1067 return err 1068 } 1069 } 1070 } 1071 return nil 1072 } 1073 1074 // Create the chainManager and register the following VMs: 1075 // AVM, Simple Payments DAG, Simple Payments Chain, and Platform VM 1076 // Assumes n.DBManager, n.vdrs all initialized (non-nil) 1077 func (n *Node) initChainManager(avaxAssetID ids.ID) error { 1078 createAVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.AVMID) 1079 if err != nil { 1080 return err 1081 } 1082 xChainID := createAVMTx.ID() 1083 1084 createEVMTx, err := genesis.VMGenesis(n.Config.GenesisBytes, constants.EVMID) 1085 if err != nil { 1086 return err 1087 } 1088 cChainID := createEVMTx.ID() 1089 1090 // If any of these chains die, the node shuts down 1091 criticalChains := set.Of( 1092 constants.PlatformChainID, 1093 xChainID, 1094 cChainID, 1095 ) 1096 1097 requestsReg, err := metrics.MakeAndRegister( 1098 n.MetricsGatherer, 1099 requestsNamespace, 1100 ) 1101 if err != nil { 1102 return err 1103 } 1104 1105 responseReg, err := metrics.MakeAndRegister( 1106 n.MetricsGatherer, 1107 responsesNamespace, 1108 ) 1109 if err != nil { 1110 return err 1111 } 1112 1113 n.timeoutManager, err = timeout.NewManager( 1114 &n.Config.AdaptiveTimeoutConfig, 1115 n.benchlistManager, 1116 requestsReg, 1117 responseReg, 1118 ) 1119 if err != nil { 1120 return err 1121 } 1122 go n.Log.RecoverAndPanic(n.timeoutManager.Dispatch) 1123 1124 // Routes incoming messages from peers to the appropriate chain 1125 err = n.chainRouter.Initialize( 1126 n.ID, 1127 n.Log, 1128 n.timeoutManager, 1129 n.Config.ConsensusShutdownTimeout, 1130 criticalChains, 1131 n.Config.SybilProtectionEnabled, 1132 n.Config.TrackedSubnets, 1133 n.Shutdown, 1134 n.Config.RouterHealthConfig, 1135 requestsReg, 1136 ) 1137 if err != nil { 1138 return fmt.Errorf("couldn't initialize chain router: %w", err) 1139 } 1140 1141 subnets, err := chains.NewSubnets(n.ID, n.Config.SubnetConfigs) 1142 if err != nil { 1143 return fmt.Errorf("failed to initialize subnets: %w", err) 1144 } 1145 1146 n.chainManager, err = chains.New( 1147 &chains.ManagerConfig{ 1148 SybilProtectionEnabled: n.Config.SybilProtectionEnabled, 1149 StakingTLSSigner: n.StakingTLSSigner, 1150 StakingTLSCert: n.StakingTLSCert, 1151 StakingBLSKey: n.Config.StakingSigningKey, 1152 Log: n.Log, 1153 LogFactory: n.LogFactory, 1154 VMManager: n.VMManager, 1155 BlockAcceptorGroup: n.BlockAcceptorGroup, 1156 TxAcceptorGroup: n.TxAcceptorGroup, 1157 VertexAcceptorGroup: n.VertexAcceptorGroup, 1158 DB: n.DB, 1159 MsgCreator: n.msgCreator, 1160 Router: n.chainRouter, 1161 Net: n.Net, 1162 Validators: n.vdrs, 1163 PartialSyncPrimaryNetwork: n.Config.PartialSyncPrimaryNetwork, 1164 NodeID: n.ID, 1165 NetworkID: n.Config.NetworkID, 1166 Server: n.APIServer, 1167 Keystore: n.keystore, 1168 AtomicMemory: n.sharedMemory, 1169 AVAXAssetID: avaxAssetID, 1170 XChainID: xChainID, 1171 CChainID: cChainID, 1172 CriticalChains: criticalChains, 1173 TimeoutManager: n.timeoutManager, 1174 Health: n.health, 1175 ShutdownNodeFunc: n.Shutdown, 1176 MeterVMEnabled: n.Config.MeterVMEnabled, 1177 Metrics: n.MetricsGatherer, 1178 MeterDBMetrics: n.MeterDBMetricsGatherer, 1179 SubnetConfigs: n.Config.SubnetConfigs, 1180 ChainConfigs: n.Config.ChainConfigs, 1181 FrontierPollFrequency: n.Config.FrontierPollFrequency, 1182 ConsensusAppConcurrency: n.Config.ConsensusAppConcurrency, 1183 BootstrapMaxTimeGetAncestors: n.Config.BootstrapMaxTimeGetAncestors, 1184 BootstrapAncestorsMaxContainersSent: n.Config.BootstrapAncestorsMaxContainersSent, 1185 BootstrapAncestorsMaxContainersReceived: n.Config.BootstrapAncestorsMaxContainersReceived, 1186 ApricotPhase4Time: version.GetApricotPhase4Time(n.Config.NetworkID), 1187 ApricotPhase4MinPChainHeight: version.ApricotPhase4MinPChainHeight[n.Config.NetworkID], 1188 ResourceTracker: n.resourceTracker, 1189 StateSyncBeacons: n.Config.StateSyncIDs, 1190 TracingEnabled: n.Config.TraceConfig.Enabled, 1191 Tracer: n.tracer, 1192 ChainDataDir: n.Config.ChainDataDir, 1193 Subnets: subnets, 1194 }, 1195 ) 1196 if err != nil { 1197 return err 1198 } 1199 1200 // Notify the API server when new chains are created 1201 n.chainManager.AddRegistrant(n.APIServer) 1202 return nil 1203 } 1204 1205 // initVMs initializes the VMs Avalanche supports + any additional vms installed as plugins. 1206 func (n *Node) initVMs() error { 1207 n.Log.Info("initializing VMs") 1208 1209 vdrs := n.vdrs 1210 1211 // If sybil protection is disabled, we provide the P-chain its own local 1212 // validator manager that will not be used by the rest of the node. This 1213 // allows the node's validator sets to be determined by network connections. 1214 if !n.Config.SybilProtectionEnabled { 1215 vdrs = validators.NewManager() 1216 } 1217 1218 // Register the VMs that Avalanche supports 1219 eUpgradeTime := version.GetEUpgradeTime(n.Config.NetworkID) 1220 err := errors.Join( 1221 n.VMManager.RegisterFactory(context.TODO(), constants.PlatformVMID, &platformvm.Factory{ 1222 Config: platformconfig.Config{ 1223 Chains: n.chainManager, 1224 Validators: vdrs, 1225 UptimeLockedCalculator: n.uptimeCalculator, 1226 SybilProtectionEnabled: n.Config.SybilProtectionEnabled, 1227 PartialSyncPrimaryNetwork: n.Config.PartialSyncPrimaryNetwork, 1228 TrackedSubnets: n.Config.TrackedSubnets, 1229 StaticFeeConfig: n.Config.StaticConfig, 1230 UptimePercentage: n.Config.UptimeRequirement, 1231 MinValidatorStake: n.Config.MinValidatorStake, 1232 MaxValidatorStake: n.Config.MaxValidatorStake, 1233 MinDelegatorStake: n.Config.MinDelegatorStake, 1234 MinDelegationFee: n.Config.MinDelegationFee, 1235 MinStakeDuration: n.Config.MinStakeDuration, 1236 MaxStakeDuration: n.Config.MaxStakeDuration, 1237 RewardConfig: n.Config.RewardConfig, 1238 UpgradeConfig: upgrade.Config{ 1239 ApricotPhase3Time: version.GetApricotPhase3Time(n.Config.NetworkID), 1240 ApricotPhase5Time: version.GetApricotPhase5Time(n.Config.NetworkID), 1241 BanffTime: version.GetBanffTime(n.Config.NetworkID), 1242 CortinaTime: version.GetCortinaTime(n.Config.NetworkID), 1243 DurangoTime: version.GetDurangoTime(n.Config.NetworkID), 1244 EUpgradeTime: eUpgradeTime, 1245 }, 1246 UseCurrentHeight: n.Config.UseCurrentHeight, 1247 }, 1248 }), 1249 n.VMManager.RegisterFactory(context.TODO(), constants.AVMID, &avm.Factory{ 1250 Config: avmconfig.Config{ 1251 TxFee: n.Config.TxFee, 1252 CreateAssetTxFee: n.Config.CreateAssetTxFee, 1253 EUpgradeTime: eUpgradeTime, 1254 }, 1255 }), 1256 n.VMManager.RegisterFactory(context.TODO(), constants.EVMID, &coreth.Factory{}), 1257 ) 1258 if err != nil { 1259 return err 1260 } 1261 1262 // initialize vm runtime manager 1263 n.runtimeManager = runtime.NewManager() 1264 1265 // initialize the vm registry 1266 n.VMRegistry = registry.NewVMRegistry(registry.VMRegistryConfig{ 1267 VMGetter: registry.NewVMGetter(registry.VMGetterConfig{ 1268 FileReader: filesystem.NewReader(), 1269 Manager: n.VMManager, 1270 PluginDirectory: n.Config.PluginDir, 1271 CPUTracker: n.resourceManager, 1272 RuntimeTracker: n.runtimeManager, 1273 }), 1274 VMManager: n.VMManager, 1275 }) 1276 1277 // register any vms that need to be installed as plugins from disk 1278 _, failedVMs, err := n.VMRegistry.Reload(context.TODO()) 1279 for failedVM, err := range failedVMs { 1280 n.Log.Error("failed to register VM", 1281 zap.Stringer("vmID", failedVM), 1282 zap.Error(err), 1283 ) 1284 } 1285 return err 1286 } 1287 1288 // initSharedMemory initializes the shared memory for cross chain interation 1289 func (n *Node) initSharedMemory() { 1290 n.Log.Info("initializing SharedMemory") 1291 sharedMemoryDB := prefixdb.New([]byte("shared memory"), n.DB) 1292 n.sharedMemory = atomic.NewMemory(sharedMemoryDB) 1293 } 1294 1295 // initKeystoreAPI initializes the keystore service, which is an on-node wallet. 1296 // Assumes n.APIServer is already set 1297 func (n *Node) initKeystoreAPI() error { 1298 n.Log.Info("initializing keystore") 1299 n.keystore = keystore.New(n.Log, prefixdb.New(keystoreDBPrefix, n.DB)) 1300 handler, err := n.keystore.CreateHandler() 1301 if err != nil { 1302 return err 1303 } 1304 if !n.Config.KeystoreAPIEnabled { 1305 n.Log.Info("skipping keystore API initialization because it has been disabled") 1306 return nil 1307 } 1308 n.Log.Warn("initializing deprecated keystore API") 1309 return n.APIServer.AddRoute(handler, "keystore", "") 1310 } 1311 1312 // initMetricsAPI initializes the Metrics API 1313 // Assumes n.APIServer is already set 1314 func (n *Node) initMetricsAPI() error { 1315 if !n.Config.MetricsAPIEnabled { 1316 n.Log.Info("skipping metrics API initialization because it has been disabled") 1317 return nil 1318 } 1319 1320 processReg, err := metrics.MakeAndRegister( 1321 n.MetricsGatherer, 1322 processNamespace, 1323 ) 1324 if err != nil { 1325 return err 1326 } 1327 1328 // Current state of process metrics. 1329 processCollector := collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}) 1330 if err := processReg.Register(processCollector); err != nil { 1331 return err 1332 } 1333 1334 // Go process metrics using debug.GCStats. 1335 goCollector := collectors.NewGoCollector() 1336 if err := processReg.Register(goCollector); err != nil { 1337 return err 1338 } 1339 1340 n.Log.Info("initializing metrics API") 1341 1342 return n.APIServer.AddRoute( 1343 promhttp.HandlerFor( 1344 n.MetricsGatherer, 1345 promhttp.HandlerOpts{}, 1346 ), 1347 "metrics", 1348 "", 1349 ) 1350 } 1351 1352 // initAdminAPI initializes the Admin API service 1353 // Assumes n.log, n.chainManager, and n.ValidatorAPI already initialized 1354 func (n *Node) initAdminAPI() error { 1355 if !n.Config.AdminAPIEnabled { 1356 n.Log.Info("skipping admin API initialization because it has been disabled") 1357 return nil 1358 } 1359 n.Log.Info("initializing admin API") 1360 service, err := admin.NewService( 1361 admin.Config{ 1362 Log: n.Log, 1363 DB: n.DB, 1364 ChainManager: n.chainManager, 1365 HTTPServer: n.APIServer, 1366 ProfileDir: n.Config.ProfilerConfig.Dir, 1367 LogFactory: n.LogFactory, 1368 NodeConfig: n.Config, 1369 VMManager: n.VMManager, 1370 VMRegistry: n.VMRegistry, 1371 }, 1372 ) 1373 if err != nil { 1374 return err 1375 } 1376 return n.APIServer.AddRoute( 1377 service, 1378 "admin", 1379 "", 1380 ) 1381 } 1382 1383 // initProfiler initializes the continuous profiling 1384 func (n *Node) initProfiler() { 1385 if !n.Config.ProfilerConfig.Enabled { 1386 n.Log.Info("skipping profiler initialization because it has been disabled") 1387 return 1388 } 1389 1390 n.Log.Info("initializing continuous profiler") 1391 n.profiler = profiler.NewContinuous( 1392 filepath.Join(n.Config.ProfilerConfig.Dir, "continuous"), 1393 n.Config.ProfilerConfig.Freq, 1394 n.Config.ProfilerConfig.MaxNumFiles, 1395 ) 1396 go n.Log.RecoverAndPanic(func() { 1397 err := n.profiler.Dispatch() 1398 if err != nil { 1399 n.Log.Fatal("continuous profiler failed", 1400 zap.Error(err), 1401 ) 1402 } 1403 n.Shutdown(1) 1404 }) 1405 } 1406 1407 func (n *Node) initInfoAPI() error { 1408 if !n.Config.InfoAPIEnabled { 1409 n.Log.Info("skipping info API initialization because it has been disabled") 1410 return nil 1411 } 1412 1413 n.Log.Info("initializing info API") 1414 1415 service, err := info.NewService( 1416 info.Parameters{ 1417 Version: version.CurrentApp, 1418 NodeID: n.ID, 1419 NodePOP: signer.NewProofOfPossession(n.Config.StakingSigningKey), 1420 NetworkID: n.Config.NetworkID, 1421 TxFee: n.Config.TxFee, 1422 CreateAssetTxFee: n.Config.CreateAssetTxFee, 1423 CreateSubnetTxFee: n.Config.CreateSubnetTxFee, 1424 TransformSubnetTxFee: n.Config.TransformSubnetTxFee, 1425 CreateBlockchainTxFee: n.Config.CreateBlockchainTxFee, 1426 AddPrimaryNetworkValidatorFee: n.Config.AddPrimaryNetworkValidatorFee, 1427 AddPrimaryNetworkDelegatorFee: n.Config.AddPrimaryNetworkDelegatorFee, 1428 AddSubnetValidatorFee: n.Config.AddSubnetValidatorFee, 1429 AddSubnetDelegatorFee: n.Config.AddSubnetDelegatorFee, 1430 VMManager: n.VMManager, 1431 }, 1432 n.Log, 1433 n.vdrs, 1434 n.chainManager, 1435 n.VMManager, 1436 n.Config.NetworkConfig.MyIPPort, 1437 n.Net, 1438 n.benchlistManager, 1439 ) 1440 if err != nil { 1441 return err 1442 } 1443 return n.APIServer.AddRoute( 1444 service, 1445 "info", 1446 "", 1447 ) 1448 } 1449 1450 // initHealthAPI initializes the Health API service 1451 // Assumes n.Log, n.Net, n.APIServer, n.HTTPLog already initialized 1452 func (n *Node) initHealthAPI() error { 1453 healthReg, err := metrics.MakeAndRegister( 1454 n.MetricsGatherer, 1455 healthNamespace, 1456 ) 1457 if err != nil { 1458 return err 1459 } 1460 1461 n.health, err = health.New(n.Log, healthReg) 1462 if err != nil { 1463 return err 1464 } 1465 1466 if !n.Config.HealthAPIEnabled { 1467 n.Log.Info("skipping health API initialization because it has been disabled") 1468 return nil 1469 } 1470 1471 n.Log.Info("initializing Health API") 1472 err = n.health.RegisterHealthCheck("network", n.Net, health.ApplicationTag) 1473 if err != nil { 1474 return fmt.Errorf("couldn't register network health check: %w", err) 1475 } 1476 1477 err = n.health.RegisterHealthCheck("router", n.chainRouter, health.ApplicationTag) 1478 if err != nil { 1479 return fmt.Errorf("couldn't register router health check: %w", err) 1480 } 1481 1482 // TODO: add database health to liveness check 1483 err = n.health.RegisterHealthCheck("database", n.DB, health.ApplicationTag) 1484 if err != nil { 1485 return fmt.Errorf("couldn't register database health check: %w", err) 1486 } 1487 1488 diskSpaceCheck := health.CheckerFunc(func(context.Context) (interface{}, error) { 1489 // confirm that the node has enough disk space to continue operating 1490 // if there is too little disk space remaining, first report unhealthy and then shutdown the node 1491 1492 availableDiskBytes := n.resourceTracker.DiskTracker().AvailableDiskBytes() 1493 1494 var err error 1495 if availableDiskBytes < n.Config.RequiredAvailableDiskSpace { 1496 n.Log.Fatal("low on disk space. Shutting down...", 1497 zap.Uint64("remainingDiskBytes", availableDiskBytes), 1498 ) 1499 go n.Shutdown(1) 1500 err = fmt.Errorf("remaining available disk space (%d) is below minimum required available space (%d)", availableDiskBytes, n.Config.RequiredAvailableDiskSpace) 1501 } else if availableDiskBytes < n.Config.WarningThresholdAvailableDiskSpace { 1502 err = fmt.Errorf("remaining available disk space (%d) is below the warning threshold of disk space (%d)", availableDiskBytes, n.Config.WarningThresholdAvailableDiskSpace) 1503 } 1504 1505 return map[string]interface{}{ 1506 "availableDiskBytes": availableDiskBytes, 1507 }, err 1508 }) 1509 1510 err = n.health.RegisterHealthCheck("diskspace", diskSpaceCheck, health.ApplicationTag) 1511 if err != nil { 1512 return fmt.Errorf("couldn't register resource health check: %w", err) 1513 } 1514 1515 handler, err := health.NewGetAndPostHandler(n.Log, n.health) 1516 if err != nil { 1517 return err 1518 } 1519 1520 err = n.APIServer.AddRoute( 1521 handler, 1522 "health", 1523 "", 1524 ) 1525 if err != nil { 1526 return err 1527 } 1528 1529 err = n.APIServer.AddRoute( 1530 health.NewGetHandler(n.health.Readiness), 1531 "health", 1532 "/readiness", 1533 ) 1534 if err != nil { 1535 return err 1536 } 1537 1538 err = n.APIServer.AddRoute( 1539 health.NewGetHandler(n.health.Health), 1540 "health", 1541 "/health", 1542 ) 1543 if err != nil { 1544 return err 1545 } 1546 1547 return n.APIServer.AddRoute( 1548 health.NewGetHandler(n.health.Liveness), 1549 "health", 1550 "/liveness", 1551 ) 1552 } 1553 1554 // Give chains aliases as specified by the genesis information 1555 func (n *Node) initChainAliases(genesisBytes []byte) error { 1556 n.Log.Info("initializing chain aliases") 1557 _, chainAliases, err := genesis.Aliases(genesisBytes) 1558 if err != nil { 1559 return err 1560 } 1561 1562 for chainID, aliases := range chainAliases { 1563 for _, alias := range aliases { 1564 if err := n.chainManager.Alias(chainID, alias); err != nil { 1565 return err 1566 } 1567 } 1568 } 1569 1570 for chainID, aliases := range n.Config.ChainAliases { 1571 for _, alias := range aliases { 1572 if err := n.chainManager.Alias(chainID, alias); err != nil { 1573 return err 1574 } 1575 } 1576 } 1577 1578 return nil 1579 } 1580 1581 // APIs aliases as specified by the genesis information 1582 func (n *Node) initAPIAliases(genesisBytes []byte) error { 1583 n.Log.Info("initializing API aliases") 1584 apiAliases, _, err := genesis.Aliases(genesisBytes) 1585 if err != nil { 1586 return err 1587 } 1588 1589 for url, aliases := range apiAliases { 1590 if err := n.APIServer.AddAliases(url, aliases...); err != nil { 1591 return err 1592 } 1593 } 1594 return nil 1595 } 1596 1597 // Initialize [n.resourceManager]. 1598 func (n *Node) initResourceManager() error { 1599 systemResourcesRegisterer, err := metrics.MakeAndRegister( 1600 n.MetricsGatherer, 1601 systemResourcesNamespace, 1602 ) 1603 if err != nil { 1604 return err 1605 } 1606 resourceManager, err := resource.NewManager( 1607 n.Log, 1608 n.Config.DatabaseConfig.Path, 1609 n.Config.SystemTrackerFrequency, 1610 n.Config.SystemTrackerCPUHalflife, 1611 n.Config.SystemTrackerDiskHalflife, 1612 systemResourcesRegisterer, 1613 ) 1614 if err != nil { 1615 return err 1616 } 1617 n.resourceManager = resourceManager 1618 n.resourceManager.TrackProcess(os.Getpid()) 1619 1620 resourceTrackerRegisterer, err := metrics.MakeAndRegister( 1621 n.MetricsGatherer, 1622 resourceTrackerNamespace, 1623 ) 1624 if err != nil { 1625 return err 1626 } 1627 n.resourceTracker, err = tracker.NewResourceTracker( 1628 resourceTrackerRegisterer, 1629 n.resourceManager, 1630 &meter.ContinuousFactory{}, 1631 n.Config.SystemTrackerProcessingHalflife, 1632 ) 1633 return err 1634 } 1635 1636 // Initialize [n.cpuTargeter]. 1637 // Assumes [n.resourceTracker] is already initialized. 1638 func (n *Node) initCPUTargeter( 1639 config *tracker.TargeterConfig, 1640 ) { 1641 n.cpuTargeter = tracker.NewTargeter( 1642 n.Log, 1643 config, 1644 n.vdrs, 1645 n.resourceTracker.CPUTracker(), 1646 ) 1647 } 1648 1649 // Initialize [n.diskTargeter]. 1650 // Assumes [n.resourceTracker] is already initialized. 1651 func (n *Node) initDiskTargeter( 1652 config *tracker.TargeterConfig, 1653 ) { 1654 n.diskTargeter = tracker.NewTargeter( 1655 n.Log, 1656 config, 1657 n.vdrs, 1658 n.resourceTracker.DiskTracker(), 1659 ) 1660 } 1661 1662 // Shutdown this node 1663 // May be called multiple times 1664 func (n *Node) Shutdown(exitCode int) { 1665 if !n.shuttingDown.Get() { // only set the exit code once 1666 n.shuttingDownExitCode.Set(exitCode) 1667 } 1668 n.shuttingDown.Set(true) 1669 n.shutdownOnce.Do(n.shutdown) 1670 } 1671 1672 func (n *Node) shutdown() { 1673 n.Log.Info("shutting down node", 1674 zap.Int("exitCode", n.ExitCode()), 1675 ) 1676 1677 if n.health != nil { 1678 // Passes if the node is not shutting down 1679 shuttingDownCheck := health.CheckerFunc(func(context.Context) (interface{}, error) { 1680 return map[string]interface{}{ 1681 "isShuttingDown": true, 1682 }, errShuttingDown 1683 }) 1684 1685 err := n.health.RegisterHealthCheck("shuttingDown", shuttingDownCheck, health.ApplicationTag) 1686 if err != nil { 1687 n.Log.Debug("couldn't register shuttingDown health check", 1688 zap.Error(err), 1689 ) 1690 } 1691 1692 time.Sleep(n.Config.ShutdownWait) 1693 } 1694 1695 if n.resourceManager != nil { 1696 n.resourceManager.Shutdown() 1697 } 1698 n.timeoutManager.Stop() 1699 if n.chainManager != nil { 1700 n.chainManager.Shutdown() 1701 } 1702 if n.profiler != nil { 1703 n.profiler.Shutdown() 1704 } 1705 if n.Net != nil { 1706 n.Net.StartClose() 1707 } 1708 if err := n.APIServer.Shutdown(); err != nil { 1709 n.Log.Debug("error during API shutdown", 1710 zap.Error(err), 1711 ) 1712 } 1713 n.portMapper.UnmapAllPorts() 1714 n.ipUpdater.Stop() 1715 if err := n.indexer.Close(); err != nil { 1716 n.Log.Debug("error closing tx indexer", 1717 zap.Error(err), 1718 ) 1719 } 1720 1721 // Ensure all runtimes are shutdown 1722 n.Log.Info("cleaning up plugin runtimes") 1723 n.runtimeManager.Stop(context.TODO()) 1724 1725 if n.DB != nil { 1726 if err := n.DB.Delete(ungracefulShutdown); err != nil { 1727 n.Log.Error( 1728 "failed to delete ungraceful shutdown key", 1729 zap.Error(err), 1730 ) 1731 } 1732 1733 if err := n.DB.Close(); err != nil { 1734 n.Log.Warn("error during DB shutdown", 1735 zap.Error(err), 1736 ) 1737 } 1738 } 1739 1740 if n.Config.TraceConfig.Enabled { 1741 n.Log.Info("shutting down tracing") 1742 } 1743 1744 if err := n.tracer.Close(); err != nil { 1745 n.Log.Warn("error during tracer shutdown", 1746 zap.Error(err), 1747 ) 1748 } 1749 1750 n.DoneShuttingDown.Done() 1751 n.Log.Info("finished node shutdown") 1752 } 1753 1754 func (n *Node) ExitCode() int { 1755 return n.shuttingDownExitCode.Get() 1756 }