github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/e2eutil/node.go (about) 1 package e2eutil 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "time" 8 9 "github.com/hashicorp/nomad/api" 10 "github.com/hashicorp/nomad/testutil" 11 ) 12 13 // AgentDisconnect is a test helper function that runs a raw_exec job 14 // that will disconnect a client at the network level and reconnect it 15 // after the specified period of time. 16 // 17 // Returns once the job is registered with the job ID of the restart 18 // job and any registration errors, not after the duration, so that 19 // callers can take actions while the client is down. 20 func AgentDisconnect(nodeID string, after time.Duration) (string, error) { 21 jobID := "disconnect-" + nodeID 22 vars := []string{"-var", "nodeID=" + nodeID} 23 if after > 0 { 24 vars = append(vars, "-var", fmt.Sprintf("time=%d", int(after.Seconds()))) 25 } 26 27 jobFilePath := "../e2eutil/input/disconnect-node.nomad" 28 29 // TODO: temporary hack around having older tests running on the 30 // framework vs new tests not, as the framework has a different 31 // working directory 32 dir, err := os.Getwd() 33 if err != nil { 34 return "", err 35 } 36 if filepath.Base(dir) == "e2e" { 37 jobFilePath = "e2eutil/input/disconnect-node.nomad" 38 } 39 40 err = RegisterWithArgs(jobID, jobFilePath, vars...) 41 return jobID, err 42 } 43 44 // AgentRestartAfter is a test helper function that runs a raw_exec 45 // job that will stop a client and restart it after the specified 46 // period of time. The node must be running under systemd. 47 // 48 // Returns once the job is registered with the job ID of the restart 49 // job and any registration errors, not after the duration, so that 50 // callers can take actions while the client is down. 51 func AgentRestartAfter(nodeID string, after time.Duration) (string, error) { 52 jobID := "restart-" + nodeID 53 vars := []string{"-var", "nodeID=" + nodeID} 54 if after > 0 { 55 vars = append(vars, "-var", fmt.Sprintf("time=%d", int(after.Seconds()))) 56 } 57 58 jobFilePath := "../e2eutil/input/restart-node.nomad" 59 60 // TODO: temporary hack around having older tests running on the 61 // framework vs new tests not, as the framework has a different 62 // working directory 63 dir, err := os.Getwd() 64 if err != nil { 65 return "", err 66 } 67 if filepath.Base(dir) == "e2e" { 68 jobFilePath = "e2eutil/input/restart-node.nomad" 69 } 70 71 err = RegisterWithArgs(jobID, jobFilePath, vars...) 72 return jobID, err 73 } 74 75 // AgentRestart is a test helper function that restarts a client node 76 // running under systemd using a raw_exec job. Returns the job ID of 77 // the restart job so that callers can clean it up. 78 func AgentRestart(client *api.Client, nodeID string) (string, error) { 79 80 jobID, err := AgentRestartAfter(nodeID, 0) 81 if err != nil { 82 return jobID, err 83 } 84 85 reasonErr := fmt.Errorf("timed out") 86 retries := 30 87 for retries > 0 { 88 time.Sleep(1 * time.Second) 89 retries-- 90 91 allocStubs, _, err := client.Jobs().Allocations(jobID, true, nil) 92 if err != nil { 93 reasonErr = err 94 continue 95 } 96 97 if len(allocStubs) > 0 { 98 INNER: 99 for _, state := range allocStubs[0].TaskStates { 100 if state.State == "dead" { 101 node, _, err := client.Nodes().Info(nodeID, nil) 102 if err != nil { 103 reasonErr = err 104 break INNER 105 } 106 if node != nil && node.Status == "ready" { 107 return jobID, nil 108 } 109 reasonErr = fmt.Errorf("node status not ready") 110 } 111 } 112 } 113 } 114 return jobID, fmt.Errorf("node did not become ready: %v", reasonErr) 115 } 116 117 // ListWindowsClientNodes returns a list of Windows client IDs, so that tests 118 // can skip operating-specific tests if there are no Windows clients available. 119 // Returns an error only on client errors. 120 func ListWindowsClientNodes(client *api.Client) ([]string, error) { 121 return listClientNodesByOS(client, "windows") 122 } 123 124 // ListLinuxClientNodes returns a list of Linux client IDs, so that tests 125 // can skip operating-specific tests if there are no Linux clients available 126 // Returns an error only on client errors. 127 func ListLinuxClientNodes(client *api.Client) ([]string, error) { 128 return listClientNodesByOS(client, "linux") 129 } 130 131 func listClientNodesByOS(client *api.Client, osName string) ([]string, error) { 132 nodeIDs := []string{} 133 nodes, _, err := client.Nodes().List(&api.QueryOptions{}) 134 if err != nil { 135 return nodeIDs, fmt.Errorf("could not query nodes: %v", err) 136 } 137 for _, stubNode := range nodes { 138 node, _, err := client.Nodes().Info(stubNode.ID, nil) 139 if err != nil { 140 return nodeIDs, fmt.Errorf("could not query nodes: %v", err) 141 } 142 if name, ok := node.Attributes["kernel.name"]; ok && name == osName { 143 nodeIDs = append(nodeIDs, stubNode.ID) 144 } 145 } 146 return nodeIDs, nil 147 } 148 149 func NodeStatusList() ([]map[string]string, error) { 150 151 out, err := Command("nomad", "node", "status", "-verbose") 152 if err != nil { 153 return nil, fmt.Errorf("'nomad node status' failed: %w", err) 154 } 155 156 nodes, err := ParseColumns(out) 157 if err != nil { 158 return nil, fmt.Errorf("could not parse node status output: %w", err) 159 } 160 return nodes, nil 161 } 162 163 func NodeStatusListFiltered(filterFn func(string) bool) ([]map[string]string, error) { 164 165 out, err := Command("nomad", "node", "status", "-verbose") 166 if err != nil { 167 return nil, fmt.Errorf("'nomad node status' failed: %w", err) 168 } 169 170 allNodes, err := ParseColumns(out) 171 if err != nil { 172 return nil, fmt.Errorf("could not parse node status output: %w", err) 173 } 174 nodes := []map[string]string{} 175 176 for _, node := range allNodes { 177 out, err := Command("nomad", "node", "status", "-verbose", node["ID"]) 178 if err != nil { 179 return nil, fmt.Errorf("could not node status output: %w", err) 180 } 181 if filterFn(out) { 182 nodes = append(nodes, node) 183 } 184 } 185 186 return nodes, nil 187 } 188 189 func WaitForNodeStatus(nodeID, status string, wc *WaitConfig) error { 190 var got string 191 var err error 192 interval, retries := wc.OrDefault() 193 testutil.WaitForResultRetries(retries, func() (bool, error) { 194 time.Sleep(interval) 195 196 nodeStatuses, err := NodeStatusList() 197 if err != nil { 198 return false, err 199 } 200 for _, nodeStatus := range nodeStatuses { 201 if nodeStatus["ID"] == nodeID { 202 got = nodeStatus["Status"] 203 if got == status { 204 return true, nil 205 } 206 } 207 } 208 return false, nil 209 }, func(e error) { 210 err = fmt.Errorf("node status check failed: got %#v", got) 211 }) 212 return err 213 }