github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/e2eutil/node.go (about)

     1  package e2eutil
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"time"
     8  
     9  	"github.com/hashicorp/nomad/api"
    10  	"github.com/hashicorp/nomad/testutil"
    11  )
    12  
    13  // AgentDisconnect is a test helper function that runs a raw_exec job
    14  // that will disconnect a client at the network level and reconnect it
    15  // after the specified period of time.
    16  //
    17  // Returns once the job is registered with the job ID of the restart
    18  // job and any registration errors, not after the duration, so that
    19  // callers can take actions while the client is down.
    20  func AgentDisconnect(nodeID string, after time.Duration) (string, error) {
    21  	jobID := "disconnect-" + nodeID
    22  	vars := []string{"-var", "nodeID=" + nodeID}
    23  	if after > 0 {
    24  		vars = append(vars, "-var", fmt.Sprintf("time=%d", int(after.Seconds())))
    25  	}
    26  
    27  	jobFilePath := "../e2eutil/input/disconnect-node.nomad"
    28  
    29  	// TODO: temporary hack around having older tests running on the
    30  	// framework vs new tests not, as the framework has a different
    31  	// working directory
    32  	dir, err := os.Getwd()
    33  	if err != nil {
    34  		return "", err
    35  	}
    36  	if filepath.Base(dir) == "e2e" {
    37  		jobFilePath = "e2eutil/input/disconnect-node.nomad"
    38  	}
    39  
    40  	err = RegisterWithArgs(jobID, jobFilePath, vars...)
    41  	return jobID, err
    42  }
    43  
    44  // AgentRestartAfter is a test helper function that runs a raw_exec
    45  // job that will stop a client and restart it after the specified
    46  // period of time. The node must be running under systemd.
    47  //
    48  // Returns once the job is registered with the job ID of the restart
    49  // job and any registration errors, not after the duration, so that
    50  // callers can take actions while the client is down.
    51  func AgentRestartAfter(nodeID string, after time.Duration) (string, error) {
    52  	jobID := "restart-" + nodeID
    53  	vars := []string{"-var", "nodeID=" + nodeID}
    54  	if after > 0 {
    55  		vars = append(vars, "-var", fmt.Sprintf("time=%d", int(after.Seconds())))
    56  	}
    57  
    58  	jobFilePath := "../e2eutil/input/restart-node.nomad"
    59  
    60  	// TODO: temporary hack around having older tests running on the
    61  	// framework vs new tests not, as the framework has a different
    62  	// working directory
    63  	dir, err := os.Getwd()
    64  	if err != nil {
    65  		return "", err
    66  	}
    67  	if filepath.Base(dir) == "e2e" {
    68  		jobFilePath = "e2eutil/input/restart-node.nomad"
    69  	}
    70  
    71  	err = RegisterWithArgs(jobID, jobFilePath, vars...)
    72  	return jobID, err
    73  }
    74  
    75  // AgentRestart is a test helper function that restarts a client node
    76  // running under systemd using a raw_exec job. Returns the job ID of
    77  // the restart job so that callers can clean it up.
    78  func AgentRestart(client *api.Client, nodeID string) (string, error) {
    79  
    80  	jobID, err := AgentRestartAfter(nodeID, 0)
    81  	if err != nil {
    82  		return jobID, err
    83  	}
    84  
    85  	reasonErr := fmt.Errorf("timed out")
    86  	retries := 30
    87  	for retries > 0 {
    88  		time.Sleep(1 * time.Second)
    89  		retries--
    90  
    91  		allocStubs, _, err := client.Jobs().Allocations(jobID, true, nil)
    92  		if err != nil {
    93  			reasonErr = err
    94  			continue
    95  		}
    96  
    97  		if len(allocStubs) > 0 {
    98  		INNER:
    99  			for _, state := range allocStubs[0].TaskStates {
   100  				if state.State == "dead" {
   101  					node, _, err := client.Nodes().Info(nodeID, nil)
   102  					if err != nil {
   103  						reasonErr = err
   104  						break INNER
   105  					}
   106  					if node != nil && node.Status == "ready" {
   107  						return jobID, nil
   108  					}
   109  					reasonErr = fmt.Errorf("node status not ready")
   110  				}
   111  			}
   112  		}
   113  	}
   114  	return jobID, fmt.Errorf("node did not become ready: %v", reasonErr)
   115  }
   116  
   117  // ListWindowsClientNodes returns a list of Windows client IDs, so that tests
   118  // can skip operating-specific tests if there are no Windows clients available.
   119  // Returns an error only on client errors.
   120  func ListWindowsClientNodes(client *api.Client) ([]string, error) {
   121  	return listClientNodesByOS(client, "windows")
   122  }
   123  
   124  // ListLinuxClientNodes returns a list of Linux client IDs, so that tests
   125  // can skip operating-specific tests if there are no Linux clients available
   126  // Returns an error only on client errors.
   127  func ListLinuxClientNodes(client *api.Client) ([]string, error) {
   128  	return listClientNodesByOS(client, "linux")
   129  }
   130  
   131  func listClientNodesByOS(client *api.Client, osName string) ([]string, error) {
   132  	nodeIDs := []string{}
   133  	nodes, _, err := client.Nodes().List(&api.QueryOptions{})
   134  	if err != nil {
   135  		return nodeIDs, fmt.Errorf("could not query nodes: %v", err)
   136  	}
   137  	for _, stubNode := range nodes {
   138  		node, _, err := client.Nodes().Info(stubNode.ID, nil)
   139  		if err != nil {
   140  			return nodeIDs, fmt.Errorf("could not query nodes: %v", err)
   141  		}
   142  		if name, ok := node.Attributes["kernel.name"]; ok && name == osName {
   143  			nodeIDs = append(nodeIDs, stubNode.ID)
   144  		}
   145  	}
   146  	return nodeIDs, nil
   147  }
   148  
   149  func NodeStatusList() ([]map[string]string, error) {
   150  
   151  	out, err := Command("nomad", "node", "status", "-verbose")
   152  	if err != nil {
   153  		return nil, fmt.Errorf("'nomad node status' failed: %w", err)
   154  	}
   155  
   156  	nodes, err := ParseColumns(out)
   157  	if err != nil {
   158  		return nil, fmt.Errorf("could not parse node status output: %w", err)
   159  	}
   160  	return nodes, nil
   161  }
   162  
   163  func NodeStatusListFiltered(filterFn func(string) bool) ([]map[string]string, error) {
   164  
   165  	out, err := Command("nomad", "node", "status", "-verbose")
   166  	if err != nil {
   167  		return nil, fmt.Errorf("'nomad node status' failed: %w", err)
   168  	}
   169  
   170  	allNodes, err := ParseColumns(out)
   171  	if err != nil {
   172  		return nil, fmt.Errorf("could not parse node status output: %w", err)
   173  	}
   174  	nodes := []map[string]string{}
   175  
   176  	for _, node := range allNodes {
   177  		out, err := Command("nomad", "node", "status", "-verbose", node["ID"])
   178  		if err != nil {
   179  			return nil, fmt.Errorf("could not node status output: %w", err)
   180  		}
   181  		if filterFn(out) {
   182  			nodes = append(nodes, node)
   183  		}
   184  	}
   185  
   186  	return nodes, nil
   187  }
   188  
   189  func WaitForNodeStatus(nodeID, status string, wc *WaitConfig) error {
   190  	var got string
   191  	var err error
   192  	interval, retries := wc.OrDefault()
   193  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   194  		time.Sleep(interval)
   195  
   196  		nodeStatuses, err := NodeStatusList()
   197  		if err != nil {
   198  			return false, err
   199  		}
   200  		for _, nodeStatus := range nodeStatuses {
   201  			if nodeStatus["ID"] == nodeID {
   202  				got = nodeStatus["Status"]
   203  				if got == status {
   204  					return true, nil
   205  				}
   206  			}
   207  		}
   208  		return false, nil
   209  	}, func(e error) {
   210  		err = fmt.Errorf("node status check failed: got %#v", got)
   211  	})
   212  	return err
   213  }