github.com/hernad/nomad@v1.6.112/testutil/wait.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package testutil 5 6 import ( 7 "fmt" 8 "os" 9 "runtime" 10 "testing" 11 "time" 12 13 "github.com/google/go-cmp/cmp" 14 "github.com/kr/pretty" 15 "github.com/shoenig/test/must" 16 "github.com/shoenig/test/wait" 17 18 "github.com/hernad/nomad/nomad/structs" 19 ) 20 21 type testFn func() (bool, error) 22 type errorFn func(error) 23 24 func Wait(t *testing.T, test testFn) { 25 t.Helper() 26 retries := 500 * TestMultiplier() 27 warn := int64(float64(retries) * 0.75) 28 for tries := retries; tries > 0; { 29 time.Sleep(10 * time.Millisecond) 30 tries-- 31 32 success, err := test() 33 if success { 34 return 35 } 36 37 switch tries { 38 case 0: 39 if err == nil { 40 t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)") 41 } else { 42 t.Fatalf("timeout: %v", err) 43 } 44 case warn: 45 pc, _, _, _ := runtime.Caller(1) 46 f := runtime.FuncForPC(pc) 47 t.Logf("%d/%d retries reached for %s (err=%v)", warn, retries, f.Name(), err) 48 } 49 50 } 51 } 52 53 func WaitForResult(test testFn, error errorFn) { 54 WaitForResultRetries(500*TestMultiplier(), test, error) 55 } 56 57 func WaitForResultRetries(retries int64, test testFn, error errorFn) { 58 for retries > 0 { 59 time.Sleep(10 * time.Millisecond) 60 retries-- 61 62 success, err := test() 63 if success { 64 return 65 } 66 67 if retries == 0 { 68 error(err) 69 } 70 } 71 } 72 73 // WaitForResultUntil waits the duration for the test to pass. 74 // Otherwise error is called after the deadline expires. 75 func WaitForResultUntil(until time.Duration, test testFn, errorFunc errorFn) { 76 var success bool 77 var err error 78 deadline := time.Now().Add(until) 79 for time.Now().Before(deadline) { 80 success, err = test() 81 if success { 82 return 83 } 84 // Sleep some arbitrary fraction of the deadline 85 time.Sleep(until / 30) 86 } 87 errorFunc(err) 88 } 89 90 // AssertUntil asserts the test function passes throughout the given duration. 91 // Otherwise error is called on failure. 92 func AssertUntil(until time.Duration, test testFn, error errorFn) { 93 deadline := time.Now().Add(until) 94 for time.Now().Before(deadline) { 95 success, err := test() 96 if !success { 97 error(err) 98 return 99 } 100 // Sleep some arbitrary fraction of the deadline 101 time.Sleep(until / 30) 102 } 103 } 104 105 // TestMultiplier returns a multiplier for retries and waits given environment 106 // the tests are being run under. 107 func TestMultiplier() int64 { 108 if IsCI() { 109 return 4 110 } 111 112 return 1 113 } 114 115 // Timeout takes the desired timeout and increases it if running in Travis 116 func Timeout(original time.Duration) time.Duration { 117 return original * time.Duration(TestMultiplier()) 118 } 119 120 func IsCI() bool { 121 _, ok := os.LookupEnv("CI") 122 return ok 123 } 124 125 func IsTravis() bool { 126 _, ok := os.LookupEnv("TRAVIS") 127 return ok 128 } 129 130 func IsAppVeyor() bool { 131 _, ok := os.LookupEnv("APPVEYOR") 132 return ok 133 } 134 135 type rpcFn func(string, interface{}, interface{}) error 136 137 // WaitForLeader blocks until a leader is elected. 138 func WaitForLeader(t testing.TB, rpc rpcFn) { 139 t.Helper() 140 WaitForResult(func() (bool, error) { 141 args := &structs.GenericRequest{} 142 var leader string 143 err := rpc("Status.Leader", args, &leader) 144 return leader != "", err 145 }, func(err error) { 146 t.Fatalf("failed to find leader: %v", err) 147 }) 148 } 149 150 // WaitForLeaders blocks until each rpcs knows the leader. 151 func WaitForLeaders(t testing.TB, rpcs ...rpcFn) string { 152 t.Helper() 153 154 var leader string 155 for i := 0; i < len(rpcs); i++ { 156 ok := func() (bool, error) { 157 leader = "" 158 args := &structs.GenericRequest{} 159 err := rpcs[i]("Status.Leader", args, &leader) 160 return leader != "", err 161 } 162 must.Wait(t, wait.InitialSuccess( 163 wait.TestFunc(ok), 164 wait.Timeout(10*time.Second), 165 wait.Gap(1*time.Second), 166 )) 167 } 168 169 return leader 170 } 171 172 // WaitForClient blocks until the client can be found 173 func WaitForClient(t testing.TB, rpc rpcFn, nodeID string, region string) { 174 t.Helper() 175 WaitForClientStatus(t, rpc, nodeID, region, structs.NodeStatusReady) 176 } 177 178 // WaitForClientStatus blocks until the client is in the expected status. 179 func WaitForClientStatus(t testing.TB, rpc rpcFn, nodeID string, region string, status string) { 180 t.Helper() 181 182 if region == "" { 183 region = "global" 184 } 185 WaitForResult(func() (bool, error) { 186 req := structs.NodeSpecificRequest{ 187 NodeID: nodeID, 188 QueryOptions: structs.QueryOptions{Region: region}, 189 } 190 var out structs.SingleNodeResponse 191 192 err := rpc("Node.GetNode", &req, &out) 193 if err != nil { 194 return false, err 195 } 196 if out.Node == nil { 197 return false, fmt.Errorf("node not found") 198 } 199 if out.Node.Status != status { 200 return false, fmt.Errorf("node is %s, not %s", out.Node.Status, status) 201 } 202 return true, nil 203 }, func(err error) { 204 t.Fatalf("failed to wait for node staus: %v", err) 205 }) 206 207 t.Logf("[TEST] Client for test %s %s, id: %s, region: %s", t.Name(), status, nodeID, region) 208 } 209 210 // WaitForVotingMembers blocks until autopilot promotes all server peers 211 // to be voting members. 212 // 213 // Useful for tests that change cluster topology (e.g. kill a node) 214 // that should wait until cluster is stable. 215 func WaitForVotingMembers(t testing.TB, rpc rpcFn, nPeers int) { 216 WaitForResult(func() (bool, error) { 217 args := &structs.GenericRequest{} 218 args.AllowStale = true 219 args.Region = "global" 220 args.Namespace = structs.DefaultNamespace 221 resp := structs.RaftConfigurationResponse{} 222 err := rpc("Operator.RaftGetConfiguration", args, &resp) 223 if err != nil { 224 return false, fmt.Errorf("failed to query raft: %v", err) 225 } 226 227 if len(resp.Servers) != nPeers { 228 return false, fmt.Errorf("expected %d peers found %d", nPeers, len(resp.Servers)) 229 } 230 231 for _, s := range resp.Servers { 232 if !s.Voter { 233 return false, fmt.Errorf("found nonvoting server: %v", s) 234 } 235 } 236 237 return true, nil 238 }, func(err error) { 239 t.Fatalf("failed to wait until voting members: %v", err) 240 }) 241 } 242 243 // RegisterJobWithToken registers a job and uses the job's Region and Namespace. 244 func RegisterJobWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) { 245 t.Helper() 246 WaitForResult(func() (bool, error) { 247 args := &structs.JobRegisterRequest{} 248 args.Job = job 249 args.WriteRequest.Region = job.Region 250 args.AuthToken = token 251 args.Namespace = job.Namespace 252 var jobResp structs.JobRegisterResponse 253 err := rpc("Job.Register", args, &jobResp) 254 return err == nil, fmt.Errorf("Job.Register error: %v", err) 255 }, func(err error) { 256 t.Fatalf("error registering job: %v", err) 257 }) 258 259 t.Logf("Job %q registered", job.ID) 260 } 261 262 func RegisterJob(t testing.TB, rpc rpcFn, job *structs.Job) { 263 RegisterJobWithToken(t, rpc, job, "") 264 } 265 266 func WaitForRunningWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) []*structs.AllocListStub { 267 RegisterJobWithToken(t, rpc, job, token) 268 269 var resp structs.JobAllocationsResponse 270 271 // This can be quite slow if the job has expensive setup such as 272 // downloading large artifacts or creating a chroot. 273 WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) { 274 args := &structs.JobSpecificRequest{} 275 args.JobID = job.ID 276 args.QueryOptions.Region = job.Region 277 args.AuthToken = token 278 args.Namespace = job.Namespace 279 err := rpc("Job.Allocations", args, &resp) 280 if err != nil { 281 return false, fmt.Errorf("Job.Allocations error: %v", err) 282 } 283 284 if len(resp.Allocations) == 0 { 285 evals := structs.JobEvaluationsResponse{} 286 must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals")) 287 return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations)) 288 } 289 290 for _, alloc := range resp.Allocations { 291 if alloc.ClientStatus == structs.AllocClientStatusPending { 292 return false, fmt.Errorf("alloc not running: id=%v tg=%v status=%v", 293 alloc.ID, alloc.TaskGroup, alloc.ClientStatus) 294 } 295 } 296 297 return true, nil 298 }, func(err error) { 299 must.NoError(t, err) 300 }) 301 302 return resp.Allocations 303 } 304 305 // WaitForRunning runs a job and blocks until all allocs are out of pending. 306 func WaitForRunning(t testing.TB, rpc rpcFn, job *structs.Job) []*structs.AllocListStub { 307 return WaitForRunningWithToken(t, rpc, job, "") 308 } 309 310 // WaitforJobAllocStatus blocks until the ClientStatus of allocations for a job 311 // match the expected map of <ClientStatus>: <count>. 312 func WaitForJobAllocStatus(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int) { 313 t.Helper() 314 WaitForJobAllocStatusWithToken(t, rpc, job, allocStatus, "") 315 } 316 317 // WaitForJobAllocStatusWithToken behaves the same way as WaitForJobAllocStatus 318 // but is used for clusters with ACL enabled. 319 func WaitForJobAllocStatusWithToken(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int, token string) []*structs.AllocListStub { 320 t.Helper() 321 322 var allocs []*structs.AllocListStub 323 WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) { 324 args := &structs.JobSpecificRequest{ 325 JobID: job.ID, 326 QueryOptions: structs.QueryOptions{ 327 AuthToken: token, 328 Namespace: job.Namespace, 329 Region: job.Region, 330 }, 331 } 332 333 var resp structs.JobAllocationsResponse 334 err := rpc("Job.Allocations", args, &resp) 335 if err != nil { 336 return false, fmt.Errorf("Job.Allocations error: %v", err) 337 } 338 339 if len(resp.Allocations) == 0 { 340 evals := structs.JobEvaluationsResponse{} 341 must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals")) 342 return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations)) 343 } 344 345 allocs = resp.Allocations 346 347 got := map[string]int{} 348 for _, alloc := range resp.Allocations { 349 got[alloc.ClientStatus]++ 350 } 351 if diff := cmp.Diff(allocStatus, got); diff != "" { 352 return false, fmt.Errorf("alloc status mismatch (-want +got):\n%s", diff) 353 } 354 return true, nil 355 }, func(err error) { 356 must.NoError(t, err) 357 }) 358 359 return allocs 360 } 361 362 // WaitForFiles blocks until all the files in the slice are present 363 func WaitForFiles(t testing.TB, files []string) { 364 WaitForResult(func() (bool, error) { 365 return FilesExist(files) 366 }, func(err error) { 367 t.Fatalf("missing expected files: %v", err) 368 }) 369 } 370 371 // WaitForFilesUntil blocks until duration or all the files in the slice are present 372 func WaitForFilesUntil(t testing.TB, files []string, until time.Duration) { 373 WaitForResultUntil(until, func() (bool, error) { 374 return FilesExist(files) 375 }, func(err error) { 376 t.Fatalf("missing expected files: %v", err) 377 }) 378 } 379 380 // FilesExist verifies all files in the slice are present 381 func FilesExist(files []string) (bool, error) { 382 for _, f := range files { 383 if _, err := os.Stat(f); os.IsNotExist(err) { 384 return false, fmt.Errorf("expected file not found: %v", f) 385 } 386 } 387 return true, nil 388 }