github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/e2e/e2eutil/utils.go (about) 1 package e2eutil 2 3 import ( 4 "fmt" 5 "os" 6 "strings" 7 "testing" 8 "time" 9 10 consulapi "github.com/hashicorp/consul/api" 11 "github.com/hashicorp/nomad/api" 12 "github.com/hashicorp/nomad/helper" 13 "github.com/hashicorp/nomad/jobspec2" 14 "github.com/hashicorp/nomad/nomad/structs" 15 "github.com/hashicorp/nomad/testutil" 16 "github.com/kr/pretty" 17 "github.com/stretchr/testify/require" 18 ) 19 20 // retries is used to control how many times to retry checking if the cluster has a leader yet 21 const retries = 500 22 23 func WaitForLeader(t *testing.T, nomadClient *api.Client) { 24 statusAPI := nomadClient.Status() 25 26 testutil.WaitForResultRetries(retries, func() (bool, error) { 27 leader, err := statusAPI.Leader() 28 return leader != "", err 29 }, func(err error) { 30 require.NoError(t, err, "failed to find leader") 31 }) 32 } 33 34 // WaitForNodesReady waits until at least `nodes` number of nodes are ready or 35 // fails the test. 36 func WaitForNodesReady(t *testing.T, nomadClient *api.Client, nodes int) { 37 nodesAPI := nomadClient.Nodes() 38 39 testutil.WaitForResultRetries(retries, func() (bool, error) { 40 defer time.Sleep(time.Millisecond * 100) 41 nodesList, _, err := nodesAPI.List(nil) 42 if err != nil { 43 return false, fmt.Errorf("error listing nodes: %v", err) 44 } 45 46 eligibleNodes := 0 47 for _, node := range nodesList { 48 if node.Status == "ready" { 49 eligibleNodes++ 50 } 51 } 52 53 return eligibleNodes >= nodes, fmt.Errorf("only %d nodes ready (wanted at least %d)", eligibleNodes, nodes) 54 }, func(err error) { 55 require.NoError(t, err, "failed to get enough ready nodes") 56 }) 57 } 58 59 func stringToPtrOrNil(s string) *string { 60 if s == "" { 61 return nil 62 } 63 return helper.StringToPtr(s) 64 } 65 66 func Parse2(t *testing.T, jobFile string) (*api.Job, error) { 67 f, err := os.Open(jobFile) 68 require.NoError(t, err) 69 return jobspec2.Parse(jobFile, f) 70 } 71 72 func RegisterAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub { 73 74 // Parse job 75 job, err := Parse2(t, jobFile) 76 require.NoError(t, err) 77 78 // Set custom job ID (distinguish among tests) 79 job.ID = helper.StringToPtr(jobID) 80 81 // Set a Consul "operator" token for the job, if provided. 82 job.ConsulToken = stringToPtrOrNil(cToken) 83 84 // Register job 85 var idx uint64 86 jobs := nomadClient.Jobs() 87 testutil.WaitForResult(func() (bool, error) { 88 resp, meta, err := jobs.Register(job, nil) 89 if err != nil { 90 return false, err 91 } 92 idx = meta.LastIndex 93 return resp.EvalID != "", fmt.Errorf("expected EvalID:%s", pretty.Sprint(resp)) 94 }, func(err error) { 95 require.NoError(t, err) 96 }) 97 98 allocs, _, err := jobs.Allocations(jobID, false, &api.QueryOptions{WaitIndex: idx}) 99 require.NoError(t, err) 100 return allocs 101 } 102 103 // RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals 104 // successfully create Allocs. 105 func RegisterAndWaitForAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub { 106 jobs := nomadClient.Jobs() 107 108 // Start allocations 109 RegisterAllocs(t, nomadClient, jobFile, jobID, cToken) 110 111 var err error 112 allocs := []*api.AllocationListStub{} 113 evals := []*api.Evaluation{} 114 115 // Wrap in retry to wait until placement 116 testutil.WaitForResultRetries(retries, func() (bool, error) { 117 time.Sleep(time.Second) 118 119 allocs, _, err = jobs.Allocations(jobID, false, nil) 120 if len(allocs) == 0 { 121 evals, _, err = nomadClient.Jobs().Evaluations(jobID, nil) 122 return false, fmt.Errorf("no allocations for job %v", jobID) 123 } 124 125 return true, nil 126 }, func(e error) { 127 msg := fmt.Sprintf("allocations not placed for %s", jobID) 128 for _, eval := range evals { 129 msg += fmt.Sprintf("\n %s - %s", eval.Status, eval.StatusDescription) 130 } 131 132 require.Fail(t, msg, "full evals: %v", pretty.Sprint(evals)) 133 }) 134 135 require.NoError(t, err) // we only care about the last error 136 137 return allocs 138 } 139 140 func WaitForAllocRunning(t *testing.T, nomadClient *api.Client, allocID string) { 141 t.Helper() 142 143 testutil.WaitForResultRetries(retries, func() (bool, error) { 144 time.Sleep(time.Millisecond * 100) 145 alloc, _, err := nomadClient.Allocations().Info(allocID, nil) 146 if err != nil { 147 return false, err 148 } 149 150 return alloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s\n%v", alloc.ClientStatus, pretty.Sprint(alloc)) 151 }, func(err error) { 152 require.NoError(t, err, "failed to wait on alloc") 153 }) 154 } 155 156 func WaitForAllocTaskRunning(t *testing.T, nomadClient *api.Client, allocID, task string) { 157 testutil.WaitForResultRetries(retries, func() (bool, error) { 158 time.Sleep(time.Millisecond * 100) 159 alloc, _, err := nomadClient.Allocations().Info(allocID, nil) 160 if err != nil { 161 return false, err 162 } 163 164 state := "n/a" 165 if task := alloc.TaskStates[task]; task != nil { 166 state = task.State 167 } 168 return state == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s", state) 169 }, func(err error) { 170 t.Fatalf("failed to wait on alloc: %v", err) 171 }) 172 } 173 174 func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []string) { 175 for _, allocID := range allocIDs { 176 WaitForAllocRunning(t, nomadClient, allocID) 177 } 178 } 179 180 func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) { 181 for _, allocID := range allocIDs { 182 WaitForAllocNotPending(t, nomadClient, allocID) 183 } 184 } 185 186 func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) { 187 testutil.WaitForResultRetries(retries, func() (bool, error) { 188 time.Sleep(time.Millisecond * 100) 189 alloc, _, err := nomadClient.Allocations().Info(allocID, nil) 190 if err != nil { 191 return false, err 192 } 193 194 return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus) 195 }, func(err error) { 196 require.NoError(t, err, "failed to wait on alloc") 197 }) 198 } 199 200 // WaitForJobStopped stops a job and waits for all of its allocs to terminate. 201 func WaitForJobStopped(t *testing.T, nomadClient *api.Client, job string) { 202 allocs, _, err := nomadClient.Jobs().Allocations(job, true, nil) 203 require.NoError(t, err, "error getting allocations for job %q", job) 204 ids := AllocIDsFromAllocationListStubs(allocs) 205 _, _, err = nomadClient.Jobs().Deregister(job, true, nil) 206 require.NoError(t, err, "error deregistering job %q", job) 207 for _, id := range ids { 208 WaitForAllocStopped(t, nomadClient, id) 209 } 210 } 211 212 func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string) { 213 testutil.WaitForResultRetries(retries, func() (bool, error) { 214 time.Sleep(time.Millisecond * 100) 215 alloc, _, err := nomadClient.Allocations().Info(allocID, nil) 216 if err != nil { 217 return false, err 218 } 219 switch alloc.ClientStatus { 220 case structs.AllocClientStatusComplete: 221 return true, nil 222 case structs.AllocClientStatusFailed: 223 return true, nil 224 case structs.AllocClientStatusLost: 225 return true, nil 226 default: 227 return false, fmt.Errorf("expected stopped alloc, but was: %s", 228 alloc.ClientStatus) 229 } 230 }, func(err error) { 231 require.NoError(t, err, "failed to wait on alloc") 232 }) 233 } 234 235 func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string { 236 allocIDs := make([]string, 0, len(allocs)) 237 for _, alloc := range allocs { 238 allocIDs = append(allocIDs, alloc.ID) 239 } 240 return allocIDs 241 } 242 243 func DeploymentsForJob(t *testing.T, nomadClient *api.Client, jobID string) []*api.Deployment { 244 ds, _, err := nomadClient.Deployments().List(nil) 245 require.NoError(t, err) 246 247 out := []*api.Deployment{} 248 for _, d := range ds { 249 if d.JobID == jobID { 250 out = append(out, d) 251 } 252 } 253 254 return out 255 } 256 257 func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, status string, statusDesc string) { 258 testutil.WaitForResultRetries(retries, func() (bool, error) { 259 time.Sleep(time.Millisecond * 100) 260 deploy, _, err := nomadClient.Deployments().Info(deployID, nil) 261 if err != nil { 262 return false, err 263 } 264 265 if deploy.Status == status && deploy.StatusDescription == statusDesc { 266 return true, nil 267 } 268 return false, fmt.Errorf("expected status %s \"%s\", but got: %s \"%s\"", 269 status, 270 statusDesc, 271 deploy.Status, 272 deploy.StatusDescription, 273 ) 274 275 }, func(err error) { 276 require.NoError(t, err, "failed to wait on deployment") 277 }) 278 } 279 280 // CheckServicesPassing scans for passing agent checks via the given agent API 281 // client. 282 // 283 // Deprecated: not useful in e2e, where more than one node exists and Nomad jobs 284 // are placed non-deterministically. The Consul agentAPI only knows about what 285 // is registered on its node, and cannot be used to query for cluster wide state. 286 func CheckServicesPassing(t *testing.T, agentAPI *consulapi.Agent, allocIDs []string) { 287 failing := map[string]*consulapi.AgentCheck{} 288 for i := 0; i < 60; i++ { 289 checks, err := agentAPI.Checks() 290 require.NoError(t, err) 291 292 // Filter out checks for other services 293 for cid, check := range checks { 294 found := false 295 for _, allocID := range allocIDs { 296 if strings.Contains(check.ServiceID, allocID) { 297 found = true 298 break 299 } 300 } 301 302 if !found { 303 delete(checks, cid) 304 } 305 } 306 307 // Ensure checks are all passing 308 failing = map[string]*consulapi.AgentCheck{} 309 for _, check := range checks { 310 if check.Status != "passing" { 311 failing[check.CheckID] = check 312 break 313 } 314 } 315 316 if len(failing) == 0 { 317 break 318 } 319 320 t.Logf("still %d checks not passing", len(failing)) 321 322 time.Sleep(time.Second) 323 } 324 require.Len(t, failing, 0, pretty.Sprint(failing)) 325 }