github.com/MetalBlockchain/metalgo@v1.11.9/tests/fixture/tmpnet/network.go (about)

     1  // Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
     2  // See the file LICENSE for licensing terms.
     3  
     4  package tmpnet
     5  
     6  import (
     7  	"context"
     8  	"encoding/hex"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"slices"
    15  	"strconv"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/google/uuid"
    20  
    21  	"github.com/MetalBlockchain/metalgo/config"
    22  	"github.com/MetalBlockchain/metalgo/genesis"
    23  	"github.com/MetalBlockchain/metalgo/ids"
    24  	"github.com/MetalBlockchain/metalgo/utils/crypto/secp256k1"
    25  	"github.com/MetalBlockchain/metalgo/utils/perms"
    26  	"github.com/MetalBlockchain/metalgo/utils/set"
    27  	"github.com/MetalBlockchain/metalgo/vms/platformvm"
    28  )
    29  
    30  // The Network type is defined in this file (orchestration) and
    31  // network_config.go (reading/writing configuration).
    32  
    33  const (
    34  	// Constants defining the names of shell variables whose value can
    35  	// configure network orchestration.
    36  	NetworkDirEnvName = "TMPNET_NETWORK_DIR"
    37  	RootDirEnvName    = "TMPNET_ROOT_DIR"
    38  
    39  	// This interval was chosen to avoid spamming node APIs during
    40  	// startup, as smaller intervals (e.g. 50ms) seemed to noticeably
    41  	// increase the time for a network's nodes to be seen as healthy.
    42  	networkHealthCheckInterval = 200 * time.Millisecond
    43  
    44  	// All temporary networks will use this arbitrary network ID by default.
    45  	defaultNetworkID = 88888
    46  
    47  	// eth address: 0x8db97C7cEcE249c2b98bDC0226Cc4C2A57BF52FC
    48  	HardHatKeyStr = "56289e99c94b6912bfc12adc093c9b51124f0dc54ac7a766b2bc5ccf558d8027"
    49  )
    50  
    51  var (
    52  	// Key expected to be funded for subnet-evm hardhat testing
    53  	// TODO(marun) Remove when subnet-evm configures the genesis with this key.
    54  	HardhatKey *secp256k1.PrivateKey
    55  
    56  	errInsufficientNodes = errors.New("at least one node is required")
    57  )
    58  
    59  func init() {
    60  	hardhatKeyBytes, err := hex.DecodeString(HardHatKeyStr)
    61  	if err != nil {
    62  		panic(err)
    63  	}
    64  	HardhatKey, err = secp256k1.ToPrivateKey(hardhatKeyBytes)
    65  	if err != nil {
    66  		panic(err)
    67  	}
    68  }
    69  
    70  // Collects the configuration for running a temporary avalanchego network
    71  type Network struct {
    72  	// Uniquely identifies the temporary network for metrics
    73  	// collection. Distinct from avalanchego's concept of network ID
    74  	// since the utility of special network ID values (e.g. to trigger
    75  	// specific fork behavior in a given network) precludes requiring
    76  	// unique network ID values across all temporary networks.
    77  	UUID string
    78  
    79  	// A string identifying the entity that started or maintains this
    80  	// network. Useful for differentiating between networks when a
    81  	// given CI job uses multiple networks.
    82  	Owner string
    83  
    84  	// Path where network configuration and data is stored
    85  	Dir string
    86  
    87  	// Id of the network. If zero, must be set in Genesis.
    88  	NetworkID uint32
    89  
    90  	// Configuration common across nodes
    91  
    92  	// Genesis for the network. If nil, NetworkID must be non-zero
    93  	Genesis *genesis.UnparsedConfig
    94  
    95  	// Configuration for primary network chains (P, X, C)
    96  	// TODO(marun) Rename to PrimaryChainConfigs
    97  	ChainConfigs map[string]FlagsMap
    98  
    99  	// Default configuration to use when creating new nodes
   100  	DefaultFlags         FlagsMap
   101  	DefaultRuntimeConfig NodeRuntimeConfig
   102  
   103  	// Keys pre-funded in the genesis on both the X-Chain and the C-Chain
   104  	PreFundedKeys []*secp256k1.PrivateKey
   105  
   106  	// Nodes that constitute the network
   107  	Nodes []*Node
   108  
   109  	// Subnets that have been enabled on the network
   110  	Subnets []*Subnet
   111  }
   112  
   113  func NewDefaultNetwork(owner string) *Network {
   114  	return &Network{
   115  		Owner: owner,
   116  		Nodes: NewNodesOrPanic(DefaultNodeCount),
   117  	}
   118  }
   119  
   120  // Ensure a real and absolute network dir so that node
   121  // configuration that embeds the network path will continue to
   122  // work regardless of symlink and working directory changes.
   123  func toCanonicalDir(dir string) (string, error) {
   124  	absDir, err := filepath.Abs(dir)
   125  	if err != nil {
   126  		return "", err
   127  	}
   128  	return filepath.EvalSymlinks(absDir)
   129  }
   130  
   131  func BootstrapNewNetwork(
   132  	ctx context.Context,
   133  	w io.Writer,
   134  	network *Network,
   135  	rootNetworkDir string,
   136  	avalancheGoExecPath string,
   137  	pluginDir string,
   138  ) error {
   139  	if len(network.Nodes) == 0 {
   140  		return errInsufficientNodes
   141  	}
   142  	if err := network.EnsureDefaultConfig(w, avalancheGoExecPath, pluginDir); err != nil {
   143  		return err
   144  	}
   145  	if err := network.Create(rootNetworkDir); err != nil {
   146  		return err
   147  	}
   148  	return network.Bootstrap(ctx, w)
   149  }
   150  
   151  // Stops the nodes of the network configured in the provided directory.
   152  func StopNetwork(ctx context.Context, dir string) error {
   153  	network, err := ReadNetwork(dir)
   154  	if err != nil {
   155  		return err
   156  	}
   157  	return network.Stop(ctx)
   158  }
   159  
   160  // Restarts the nodes of the network configured in the provided directory.
   161  func RestartNetwork(ctx context.Context, w io.Writer, dir string) error {
   162  	network, err := ReadNetwork(dir)
   163  	if err != nil {
   164  		return err
   165  	}
   166  	return network.Restart(ctx, w)
   167  }
   168  
   169  // Reads a network from the provided directory.
   170  func ReadNetwork(dir string) (*Network, error) {
   171  	canonicalDir, err := toCanonicalDir(dir)
   172  	if err != nil {
   173  		return nil, err
   174  	}
   175  	network := &Network{
   176  		Dir: canonicalDir,
   177  	}
   178  	if err := network.Read(); err != nil {
   179  		return nil, fmt.Errorf("failed to read network: %w", err)
   180  	}
   181  	return network, nil
   182  }
   183  
   184  // Initializes a new network with default configuration.
   185  func (n *Network) EnsureDefaultConfig(w io.Writer, avalancheGoPath string, pluginDir string) error {
   186  	if _, err := fmt.Fprintf(w, "Preparing configuration for new network with %s\n", avalancheGoPath); err != nil {
   187  		return err
   188  	}
   189  
   190  	// A UUID supports centralized metrics collection
   191  	if len(n.UUID) == 0 {
   192  		n.UUID = uuid.NewString()
   193  	}
   194  
   195  	// Ensure default flags
   196  	if n.DefaultFlags == nil {
   197  		n.DefaultFlags = FlagsMap{}
   198  	}
   199  	n.DefaultFlags.SetDefaults(DefaultTmpnetFlags())
   200  
   201  	if len(n.Nodes) == 1 {
   202  		// Sybil protection needs to be disabled for a single node network to start
   203  		n.DefaultFlags[config.SybilProtectionEnabledKey] = false
   204  	}
   205  
   206  	// Only configure the plugin dir with a non-empty value to ensure
   207  	// the use of the default value (`[datadir]/plugins`) when
   208  	// no plugin dir is configured.
   209  	if len(pluginDir) > 0 {
   210  		if _, ok := n.DefaultFlags[config.PluginDirKey]; !ok {
   211  			n.DefaultFlags[config.PluginDirKey] = pluginDir
   212  		}
   213  	}
   214  
   215  	// Ensure pre-funded keys if the genesis is not predefined
   216  	if n.Genesis == nil && len(n.PreFundedKeys) == 0 {
   217  		keys, err := NewPrivateKeys(DefaultPreFundedKeyCount)
   218  		if err != nil {
   219  			return err
   220  		}
   221  		n.PreFundedKeys = keys
   222  	}
   223  
   224  	// Ensure primary chains are configured
   225  	if n.ChainConfigs == nil {
   226  		n.ChainConfigs = map[string]FlagsMap{}
   227  	}
   228  	defaultChainConfigs := DefaultChainConfigs()
   229  	for alias, chainConfig := range defaultChainConfigs {
   230  		if _, ok := n.ChainConfigs[alias]; !ok {
   231  			n.ChainConfigs[alias] = FlagsMap{}
   232  		}
   233  		n.ChainConfigs[alias].SetDefaults(chainConfig)
   234  	}
   235  
   236  	// Ensure runtime is configured
   237  	if len(n.DefaultRuntimeConfig.AvalancheGoPath) == 0 {
   238  		n.DefaultRuntimeConfig.AvalancheGoPath = avalancheGoPath
   239  	}
   240  
   241  	// Ensure nodes are configured
   242  	for i := range n.Nodes {
   243  		if err := n.EnsureNodeConfig(n.Nodes[i]); err != nil {
   244  			return err
   245  		}
   246  	}
   247  
   248  	return nil
   249  }
   250  
   251  // Creates the network on disk, generating its genesis and configuring its nodes in the process.
   252  func (n *Network) Create(rootDir string) error {
   253  	// Ensure creation of the root dir
   254  	if len(rootDir) == 0 {
   255  		// Use the default root dir
   256  		var err error
   257  		rootDir, err = getDefaultRootNetworkDir()
   258  		if err != nil {
   259  			return err
   260  		}
   261  	}
   262  	if err := os.MkdirAll(rootDir, perms.ReadWriteExecute); err != nil {
   263  		return fmt.Errorf("failed to create root network dir: %w", err)
   264  	}
   265  
   266  	// A time-based name ensures consistent directory ordering
   267  	dirName := time.Now().Format("20060102-150405.999999")
   268  	if len(n.Owner) > 0 {
   269  		// Include the owner to differentiate networks created at similar times
   270  		dirName = fmt.Sprintf("%s-%s", dirName, n.Owner)
   271  	}
   272  
   273  	// Ensure creation of the network dir
   274  	networkDir := filepath.Join(rootDir, dirName)
   275  	if err := os.MkdirAll(networkDir, perms.ReadWriteExecute); err != nil {
   276  		return fmt.Errorf("failed to create network dir: %w", err)
   277  	}
   278  	canonicalDir, err := toCanonicalDir(networkDir)
   279  	if err != nil {
   280  		return err
   281  	}
   282  	n.Dir = canonicalDir
   283  
   284  	// Ensure the existence of the plugin directory or nodes won't be able to start.
   285  	pluginDir, err := n.DefaultFlags.GetStringVal(config.PluginDirKey)
   286  	if err != nil {
   287  		return err
   288  	}
   289  	if len(pluginDir) > 0 {
   290  		if err := os.MkdirAll(pluginDir, perms.ReadWriteExecute); err != nil {
   291  			return fmt.Errorf("failed to create plugin dir: %w", err)
   292  		}
   293  	}
   294  
   295  	if n.NetworkID == 0 && n.Genesis == nil {
   296  		// Pre-fund known legacy keys to support ad-hoc testing. Usage of a legacy key will
   297  		// require knowing the key beforehand rather than retrieving it from the set of pre-funded
   298  		// keys exposed by a network. Since allocation will not be exclusive, a test using a
   299  		// legacy key is unlikely to be a good candidate for parallel execution.
   300  		keysToFund := []*secp256k1.PrivateKey{
   301  			genesis.VMRQKey,
   302  			genesis.EWOQKey,
   303  			HardhatKey,
   304  		}
   305  		keysToFund = append(keysToFund, n.PreFundedKeys...)
   306  
   307  		genesis, err := NewTestGenesis(defaultNetworkID, n.Nodes, keysToFund)
   308  		if err != nil {
   309  			return err
   310  		}
   311  		n.Genesis = genesis
   312  	}
   313  
   314  	for _, node := range n.Nodes {
   315  		// Ensure the node is configured for use with the network and
   316  		// knows where to write its configuration.
   317  		if err := n.EnsureNodeConfig(node); err != nil {
   318  			return nil
   319  		}
   320  	}
   321  
   322  	// Ensure configuration on disk is current
   323  	return n.Write()
   324  }
   325  
   326  // Starts the specified nodes
   327  func (n *Network) StartNodes(ctx context.Context, w io.Writer, nodesToStart ...*Node) error {
   328  	if len(nodesToStart) == 0 {
   329  		return errInsufficientNodes
   330  	}
   331  	nodesToWaitFor := nodesToStart
   332  	if !slices.Contains(nodesToStart, n.Nodes[0]) {
   333  		// If starting all nodes except the bootstrap node (because the bootstrap node is already
   334  		// running), ensure that the health of the bootstrap node will be logged by including it in
   335  		// the set of nodes to wait for.
   336  		nodesToWaitFor = n.Nodes
   337  	} else {
   338  		// Simplify output by only logging network start when starting all nodes or when starting
   339  		// the first node by itself to bootstrap subnet creation.
   340  		if _, err := fmt.Fprintf(w, "Starting network %s (UUID: %s)\n", n.Dir, n.UUID); err != nil {
   341  			return err
   342  		}
   343  	}
   344  
   345  	// Record the time before nodes are started to ensure visibility of subsequently collected metrics via the emitted link
   346  	startTime := time.Now()
   347  
   348  	// Configure the networking for each node and start
   349  	for _, node := range nodesToStart {
   350  		if err := n.StartNode(ctx, w, node); err != nil {
   351  			return err
   352  		}
   353  	}
   354  
   355  	if _, err := fmt.Fprint(w, "Waiting for nodes to report healthy...\n\n"); err != nil {
   356  		return err
   357  	}
   358  	if err := waitForHealthy(ctx, w, nodesToWaitFor); err != nil {
   359  		return err
   360  	}
   361  	if _, err := fmt.Fprintf(w, "\nStarted network %s (UUID: %s)\n", n.Dir, n.UUID); err != nil {
   362  		return err
   363  	}
   364  	// Provide a link to the main dashboard filtered by the uuid and showing results from now till whenever the link is viewed
   365  	if _, err := fmt.Fprintf(w, "\nMetrics: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?&var-filter=network_uuid%%7C%%3D%%7C%s&var-filter=is_ephemeral_node%%7C%%3D%%7Cfalse&from=%d&to=now\n", n.UUID, startTime.UnixMilli()); err != nil {
   366  		return err
   367  	}
   368  
   369  	return nil
   370  }
   371  
   372  // Start the network for the first time
   373  func (n *Network) Bootstrap(ctx context.Context, w io.Writer) error {
   374  	if len(n.Subnets) == 0 {
   375  		// Without the need to coordinate subnet configuration,
   376  		// starting all nodes at once is the simplest option.
   377  		return n.StartNodes(ctx, w, n.Nodes...)
   378  	}
   379  
   380  	// The node that will be used to create subnets and bootstrap the network
   381  	bootstrapNode := n.Nodes[0]
   382  
   383  	// Whether sybil protection will need to be re-enabled after subnet creation
   384  	reEnableSybilProtection := false
   385  
   386  	if len(n.Nodes) > 1 {
   387  		// Reduce the cost of subnet creation for a network of multiple nodes by
   388  		// creating subnets with a single node with sybil protection
   389  		// disabled. This allows the creation of initial subnet state without
   390  		// requiring coordination between multiple nodes.
   391  
   392  		if _, err := fmt.Fprintln(w, "Starting a single-node network with sybil protection disabled for quicker subnet creation"); err != nil {
   393  			return err
   394  		}
   395  
   396  		// If sybil protection is enabled, it should be re-enabled before the node is used to bootstrap the other nodes
   397  		var err error
   398  		reEnableSybilProtection, err = bootstrapNode.Flags.GetBoolVal(config.SybilProtectionEnabledKey, true)
   399  		if err != nil {
   400  			return fmt.Errorf("failed to read sybil protection flag: %w", err)
   401  		}
   402  
   403  		// Ensure sybil protection is disabled for the bootstrap node.
   404  		bootstrapNode.Flags[config.SybilProtectionEnabledKey] = false
   405  	}
   406  
   407  	if err := n.StartNodes(ctx, w, bootstrapNode); err != nil {
   408  		return err
   409  	}
   410  
   411  	// Don't restart the node during subnet creation since it will always be restarted afterwards.
   412  	if err := n.CreateSubnets(ctx, w, bootstrapNode.URI, false /* restartRequired */); err != nil {
   413  		return err
   414  	}
   415  
   416  	if reEnableSybilProtection {
   417  		if _, err := fmt.Fprintf(w, "Re-enabling sybil protection for %s\n", bootstrapNode.NodeID); err != nil {
   418  			return err
   419  		}
   420  		delete(bootstrapNode.Flags, config.SybilProtectionEnabledKey)
   421  	}
   422  
   423  	if _, err := fmt.Fprintf(w, "Restarting bootstrap node %s\n", bootstrapNode.NodeID); err != nil {
   424  		return err
   425  	}
   426  
   427  	if len(n.Nodes) == 1 {
   428  		// Ensure the node is restarted to pick up subnet and chain configuration
   429  		return n.RestartNode(ctx, w, bootstrapNode)
   430  	}
   431  
   432  	// TODO(marun) This last restart of the bootstrap node might be unnecessary if:
   433  	// - sybil protection didn't change
   434  	// - the node is not a subnet validator
   435  
   436  	// Ensure the bootstrap node is restarted to pick up configuration changes. Avoid using
   437  	// RestartNode since the node won't be able to report healthy until other nodes are started.
   438  	if err := bootstrapNode.Stop(ctx); err != nil {
   439  		return fmt.Errorf("failed to stop node %s: %w", bootstrapNode.NodeID, err)
   440  	}
   441  	if err := n.StartNode(ctx, w, bootstrapNode); err != nil {
   442  		return fmt.Errorf("failed to start node %s: %w", bootstrapNode.NodeID, err)
   443  	}
   444  
   445  	if _, err := fmt.Fprintln(w, "Starting remaining nodes..."); err != nil {
   446  		return err
   447  	}
   448  	return n.StartNodes(ctx, w, n.Nodes[1:]...)
   449  }
   450  
   451  // Starts the provided node after configuring it for the network.
   452  func (n *Network) StartNode(ctx context.Context, w io.Writer, node *Node) error {
   453  	if err := n.EnsureNodeConfig(node); err != nil {
   454  		return err
   455  	}
   456  
   457  	bootstrapIPs, bootstrapIDs, err := n.getBootstrapIPsAndIDs(node)
   458  	if err != nil {
   459  		return err
   460  	}
   461  	node.SetNetworkingConfig(bootstrapIDs, bootstrapIPs)
   462  
   463  	if err := node.Write(); err != nil {
   464  		return err
   465  	}
   466  
   467  	if err := node.Start(w); err != nil {
   468  		// Attempt to stop an unhealthy node to provide some assurance to the caller
   469  		// that an error condition will not result in a lingering process.
   470  		err = errors.Join(err, node.Stop(ctx))
   471  		return err
   472  	}
   473  
   474  	return nil
   475  }
   476  
   477  // Restart a single node.
   478  func (n *Network) RestartNode(ctx context.Context, w io.Writer, node *Node) error {
   479  	// Ensure the node reuses the same API port across restarts to ensure
   480  	// consistent labeling of metrics. Otherwise prometheus's automatic
   481  	// addition of the `instance` label (host:port) results in
   482  	// segmentation of results for a given node every time the port
   483  	// changes on restart. This segmentation causes graphs on the grafana
   484  	// dashboards to display multiple series per graph for a given node,
   485  	// one for each port that the node used.
   486  	//
   487  	// There is a non-zero chance of the port being allocatted to a
   488  	// different process and the node subsequently being unable to start,
   489  	// but the alternative is having to update the grafana dashboards
   490  	// query-by-query to ensure that node metrics ignore the instance
   491  	// label.
   492  	if err := node.SaveAPIPort(); err != nil {
   493  		return err
   494  	}
   495  
   496  	if err := node.Stop(ctx); err != nil {
   497  		return fmt.Errorf("failed to stop node %s: %w", node.NodeID, err)
   498  	}
   499  	if err := n.StartNode(ctx, w, node); err != nil {
   500  		return fmt.Errorf("failed to start node %s: %w", node.NodeID, err)
   501  	}
   502  	if _, err := fmt.Fprintf(w, " waiting for node %s to report healthy\n", node.NodeID); err != nil {
   503  		return err
   504  	}
   505  	return WaitForHealthy(ctx, node)
   506  }
   507  
   508  // Stops all nodes in the network.
   509  func (n *Network) Stop(ctx context.Context) error {
   510  	// Target all nodes, including the ephemeral ones
   511  	nodes, err := ReadNodes(n.Dir, true /* includeEphemeral */)
   512  	if err != nil {
   513  		return err
   514  	}
   515  
   516  	var errs []error
   517  
   518  	// Initiate stop on all nodes
   519  	for _, node := range nodes {
   520  		if err := node.InitiateStop(ctx); err != nil {
   521  			errs = append(errs, fmt.Errorf("failed to stop node %s: %w", node.NodeID, err))
   522  		}
   523  	}
   524  
   525  	// Wait for stop to complete on all nodes
   526  	for _, node := range nodes {
   527  		if err := node.WaitForStopped(ctx); err != nil {
   528  			errs = append(errs, fmt.Errorf("failed to wait for node %s to stop: %w", node.NodeID, err))
   529  		}
   530  	}
   531  
   532  	if len(errs) > 0 {
   533  		return fmt.Errorf("failed to stop network:\n%w", errors.Join(errs...))
   534  	}
   535  	return nil
   536  }
   537  
   538  // Restarts all non-ephemeral nodes in the network.
   539  func (n *Network) Restart(ctx context.Context, w io.Writer) error {
   540  	if _, err := fmt.Fprintln(w, " restarting network"); err != nil {
   541  		return err
   542  	}
   543  	for _, node := range n.Nodes {
   544  		if err := n.RestartNode(ctx, w, node); err != nil {
   545  			return err
   546  		}
   547  	}
   548  	return nil
   549  }
   550  
   551  // Ensures the provided node has the configuration it needs to start. If the data dir is not
   552  // set, it will be defaulted to [nodeParentDir]/[node ID]. For a not-yet-created network,
   553  // no action will be taken.
   554  // TODO(marun) Reword or refactor to account for the differing behavior pre- vs post-start
   555  func (n *Network) EnsureNodeConfig(node *Node) error {
   556  	flags := node.Flags
   557  
   558  	// Ensure nodes can label their metrics with the network uuid
   559  	node.NetworkUUID = n.UUID
   560  
   561  	// Ensure nodes can label metrics with an indication of the shared/private nature of the network
   562  	node.NetworkOwner = n.Owner
   563  
   564  	// Set the network name if available
   565  	networkID := n.NetworkID
   566  	if networkID == 0 && n.Genesis != nil && n.Genesis.NetworkID > 0 {
   567  		networkID = n.Genesis.NetworkID
   568  	}
   569  	if networkID > 0 {
   570  		// Convert the network id to a string to ensure consistency in JSON round-tripping.
   571  		flags[config.NetworkNameKey] = strconv.FormatUint(uint64(networkID), 10)
   572  	}
   573  
   574  	if err := node.EnsureKeys(); err != nil {
   575  		return err
   576  	}
   577  
   578  	flags.SetDefaults(n.DefaultFlags)
   579  
   580  	// Set fields including the network path
   581  	if len(n.Dir) > 0 {
   582  		defaultFlags := FlagsMap{
   583  			config.ChainConfigDirKey: n.getChainConfigDir(),
   584  		}
   585  
   586  		if n.Genesis != nil {
   587  			defaultFlags[config.GenesisFileKey] = n.getGenesisPath()
   588  		}
   589  
   590  		// Only set the subnet dir if it exists or the node won't start.
   591  		subnetDir := n.getSubnetDir()
   592  		if _, err := os.Stat(subnetDir); err == nil {
   593  			defaultFlags[config.SubnetConfigDirKey] = subnetDir
   594  		} else if !errors.Is(err, os.ErrNotExist) {
   595  			return err
   596  		}
   597  
   598  		node.Flags.SetDefaults(defaultFlags)
   599  
   600  		// Ensure the node's data dir is configured
   601  		dataDir := node.GetDataDir()
   602  		if len(dataDir) == 0 {
   603  			// NodeID will have been set by EnsureKeys
   604  			dataDir = filepath.Join(n.Dir, node.NodeID.String())
   605  			flags[config.DataDirKey] = dataDir
   606  		}
   607  	}
   608  
   609  	// Ensure the node runtime is configured
   610  	if node.RuntimeConfig == nil {
   611  		node.RuntimeConfig = &NodeRuntimeConfig{
   612  			AvalancheGoPath: n.DefaultRuntimeConfig.AvalancheGoPath,
   613  		}
   614  	}
   615  
   616  	return nil
   617  }
   618  
   619  // TrackedSubnetsForNode returns the subnet IDs for the given node
   620  func (n *Network) TrackedSubnetsForNode(nodeID ids.NodeID) string {
   621  	subnetIDs := make([]string, 0, len(n.Subnets))
   622  	for _, subnet := range n.Subnets {
   623  		if subnet.SubnetID == ids.Empty {
   624  			// Subnet has not yet been created
   625  			continue
   626  		}
   627  		// Only track subnets that this node validates
   628  		for _, validatorID := range subnet.ValidatorIDs {
   629  			if validatorID == nodeID {
   630  				subnetIDs = append(subnetIDs, subnet.SubnetID.String())
   631  				break
   632  			}
   633  		}
   634  	}
   635  	return strings.Join(subnetIDs, ",")
   636  }
   637  
   638  func (n *Network) GetSubnet(name string) *Subnet {
   639  	for _, subnet := range n.Subnets {
   640  		if subnet.Name == name {
   641  			return subnet
   642  		}
   643  	}
   644  	return nil
   645  }
   646  
   647  // Ensure that each subnet on the network is created. If restartRequired is false, node restart
   648  // to pick up configuration changes becomes the responsibility of the caller.
   649  func (n *Network) CreateSubnets(ctx context.Context, w io.Writer, apiURI string, restartRequired bool) error {
   650  	createdSubnets := make([]*Subnet, 0, len(n.Subnets))
   651  	for _, subnet := range n.Subnets {
   652  		if len(subnet.ValidatorIDs) == 0 {
   653  			return fmt.Errorf("subnet %s needs at least one validator", subnet.SubnetID)
   654  		}
   655  		if subnet.SubnetID != ids.Empty {
   656  			// The subnet already exists
   657  			continue
   658  		}
   659  
   660  		if _, err := fmt.Fprintf(w, "Creating subnet %q\n", subnet.Name); err != nil {
   661  			return err
   662  		}
   663  
   664  		if subnet.OwningKey == nil {
   665  			// Allocate a pre-funded key and remove it from the network so it won't be used for
   666  			// other purposes
   667  			if len(n.PreFundedKeys) == 0 {
   668  				return fmt.Errorf("no pre-funded keys available to create subnet %q", subnet.Name)
   669  			}
   670  			subnet.OwningKey = n.PreFundedKeys[len(n.PreFundedKeys)-1]
   671  			n.PreFundedKeys = n.PreFundedKeys[:len(n.PreFundedKeys)-1]
   672  		}
   673  
   674  		// Create the subnet on the network
   675  		if err := subnet.Create(ctx, n.Nodes[0].URI); err != nil {
   676  			return err
   677  		}
   678  
   679  		if _, err := fmt.Fprintf(w, " created subnet %q as %q\n", subnet.Name, subnet.SubnetID); err != nil {
   680  			return err
   681  		}
   682  
   683  		// Persist the subnet configuration
   684  		if err := subnet.Write(n.getSubnetDir(), n.getChainConfigDir()); err != nil {
   685  			return err
   686  		}
   687  
   688  		if _, err := fmt.Fprintf(w, " wrote configuration for subnet %q\n", subnet.Name); err != nil {
   689  			return err
   690  		}
   691  
   692  		createdSubnets = append(createdSubnets, subnet)
   693  	}
   694  
   695  	if len(createdSubnets) == 0 {
   696  		return nil
   697  	}
   698  
   699  	// Ensure the pre-funded key changes are persisted to disk
   700  	if err := n.Write(); err != nil {
   701  		return err
   702  	}
   703  
   704  	reconfiguredNodes := []*Node{}
   705  	for _, node := range n.Nodes {
   706  		existingTrackedSubnets, err := node.Flags.GetStringVal(config.TrackSubnetsKey)
   707  		if err != nil {
   708  			return err
   709  		}
   710  		trackedSubnets := n.TrackedSubnetsForNode(node.NodeID)
   711  		if existingTrackedSubnets == trackedSubnets {
   712  			continue
   713  		}
   714  		node.Flags[config.TrackSubnetsKey] = trackedSubnets
   715  		reconfiguredNodes = append(reconfiguredNodes, node)
   716  	}
   717  
   718  	if restartRequired {
   719  		if _, err := fmt.Fprintln(w, "Restarting node(s) to enable them to track the new subnet(s)"); err != nil {
   720  			return err
   721  		}
   722  
   723  		for _, node := range reconfiguredNodes {
   724  			if len(node.URI) == 0 {
   725  				// Only running nodes should be restarted
   726  				continue
   727  			}
   728  			if err := n.RestartNode(ctx, w, node); err != nil {
   729  				return err
   730  			}
   731  		}
   732  	}
   733  
   734  	// Add validators for the subnet
   735  	for _, subnet := range createdSubnets {
   736  		if _, err := fmt.Fprintf(w, "Adding validators for subnet %q\n", subnet.Name); err != nil {
   737  			return err
   738  		}
   739  
   740  		// Collect the nodes intended to validate the subnet
   741  		validatorIDs := set.NewSet[ids.NodeID](len(subnet.ValidatorIDs))
   742  		validatorIDs.Add(subnet.ValidatorIDs...)
   743  		validatorNodes := []*Node{}
   744  		for _, node := range n.Nodes {
   745  			if !validatorIDs.Contains(node.NodeID) {
   746  				continue
   747  			}
   748  			validatorNodes = append(validatorNodes, node)
   749  		}
   750  
   751  		if err := subnet.AddValidators(ctx, w, apiURI, validatorNodes...); err != nil {
   752  			return err
   753  		}
   754  	}
   755  
   756  	// Wait for nodes to become subnet validators
   757  	pChainClient := platformvm.NewClient(n.Nodes[0].URI)
   758  	validatorsToRestart := set.Set[ids.NodeID]{}
   759  	for _, subnet := range createdSubnets {
   760  		if err := waitForActiveValidators(ctx, w, pChainClient, subnet); err != nil {
   761  			return err
   762  		}
   763  
   764  		// It should now be safe to create chains for the subnet
   765  		if err := subnet.CreateChains(ctx, w, n.Nodes[0].URI); err != nil {
   766  			return err
   767  		}
   768  
   769  		// Persist the chain configuration
   770  		if err := subnet.Write(n.getSubnetDir(), n.getChainConfigDir()); err != nil {
   771  			return err
   772  		}
   773  		if _, err := fmt.Fprintf(w, " wrote chain configuration for subnet %q\n", subnet.Name); err != nil {
   774  			return err
   775  		}
   776  
   777  		// If one or more of the subnets chains have explicit configuration, the
   778  		// subnet's validator nodes will need to be restarted for those nodes to read
   779  		// the newly written chain configuration and apply it to the chain(s).
   780  		if subnet.HasChainConfig() {
   781  			validatorsToRestart.Add(subnet.ValidatorIDs...)
   782  		}
   783  	}
   784  
   785  	if !restartRequired || len(validatorsToRestart) == 0 {
   786  		return nil
   787  	}
   788  
   789  	if _, err := fmt.Fprintln(w, "Restarting node(s) to pick up chain configuration"); err != nil {
   790  		return err
   791  	}
   792  
   793  	// Restart nodes to allow configuration for the new chains to take effect
   794  	for _, node := range n.Nodes {
   795  		if !validatorsToRestart.Contains(node.NodeID) {
   796  			continue
   797  		}
   798  		if err := n.RestartNode(ctx, w, node); err != nil {
   799  			return err
   800  		}
   801  	}
   802  
   803  	return nil
   804  }
   805  
   806  func (n *Network) GetURIForNodeID(nodeID ids.NodeID) (string, error) {
   807  	for _, node := range n.Nodes {
   808  		if node.NodeID == nodeID {
   809  			return node.URI, nil
   810  		}
   811  	}
   812  	return "", fmt.Errorf("%s is not known to the network", nodeID)
   813  }
   814  
   815  func (n *Network) GetNodeURIs() []NodeURI {
   816  	return GetNodeURIs(n.Nodes)
   817  }
   818  
   819  // Retrieves bootstrap IPs and IDs for all nodes except the skipped one (this supports
   820  // collecting the bootstrap details for restarting a node).
   821  func (n *Network) getBootstrapIPsAndIDs(skippedNode *Node) ([]string, []string, error) {
   822  	// Collect staking addresses of non-ephemeral nodes for use in bootstrapping a node
   823  	nodes, err := ReadNodes(n.Dir, false /* includeEphemeral */)
   824  	if err != nil {
   825  		return nil, nil, fmt.Errorf("failed to read network's nodes: %w", err)
   826  	}
   827  	var (
   828  		bootstrapIPs = make([]string, 0, len(nodes))
   829  		bootstrapIDs = make([]string, 0, len(nodes))
   830  	)
   831  	for _, node := range nodes {
   832  		if skippedNode != nil && node.NodeID == skippedNode.NodeID {
   833  			continue
   834  		}
   835  
   836  		if len(node.StakingAddress) == 0 {
   837  			// Node is not running
   838  			continue
   839  		}
   840  
   841  		bootstrapIPs = append(bootstrapIPs, node.StakingAddress)
   842  		bootstrapIDs = append(bootstrapIDs, node.NodeID.String())
   843  	}
   844  
   845  	return bootstrapIPs, bootstrapIDs, nil
   846  }
   847  
   848  // Waits until the provided nodes are healthy.
   849  func waitForHealthy(ctx context.Context, w io.Writer, nodes []*Node) error {
   850  	ticker := time.NewTicker(networkHealthCheckInterval)
   851  	defer ticker.Stop()
   852  
   853  	unhealthyNodes := set.Of(nodes...)
   854  	for {
   855  		for node := range unhealthyNodes {
   856  			healthy, err := node.IsHealthy(ctx)
   857  			if err != nil && !errors.Is(err, ErrNotRunning) {
   858  				return err
   859  			}
   860  			if !healthy {
   861  				continue
   862  			}
   863  
   864  			unhealthyNodes.Remove(node)
   865  			if _, err := fmt.Fprintf(w, "%s is healthy @ %s\n", node.NodeID, node.URI); err != nil {
   866  				return err
   867  			}
   868  		}
   869  
   870  		if unhealthyNodes.Len() == 0 {
   871  			return nil
   872  		}
   873  
   874  		select {
   875  		case <-ctx.Done():
   876  			return fmt.Errorf("failed to see all nodes healthy before timeout: %w", ctx.Err())
   877  		case <-ticker.C:
   878  		}
   879  	}
   880  }
   881  
   882  // Retrieves the root dir for tmpnet data.
   883  func getTmpnetPath() (string, error) {
   884  	homeDir, err := os.UserHomeDir()
   885  	if err != nil {
   886  		return "", err
   887  	}
   888  	return filepath.Join(homeDir, ".tmpnet"), nil
   889  }
   890  
   891  // Retrieves the default root dir for storing networks and their
   892  // configuration.
   893  func getDefaultRootNetworkDir() (string, error) {
   894  	tmpnetPath, err := getTmpnetPath()
   895  	if err != nil {
   896  		return "", err
   897  	}
   898  	return filepath.Join(tmpnetPath, "networks"), nil
   899  }
   900  
   901  // Retrieves the path to a reusable network path for the given owner.
   902  func GetReusableNetworkPathForOwner(owner string) (string, error) {
   903  	networkPath, err := getDefaultRootNetworkDir()
   904  	if err != nil {
   905  		return "", err
   906  	}
   907  	return filepath.Join(networkPath, "latest_"+owner), nil
   908  }