github.com/hernad/nomad@v1.6.112/testutil/wait.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package testutil
     5  
     6  import (
     7  	"fmt"
     8  	"os"
     9  	"runtime"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/google/go-cmp/cmp"
    14  	"github.com/kr/pretty"
    15  	"github.com/shoenig/test/must"
    16  	"github.com/shoenig/test/wait"
    17  
    18  	"github.com/hernad/nomad/nomad/structs"
    19  )
    20  
    21  type testFn func() (bool, error)
    22  type errorFn func(error)
    23  
    24  func Wait(t *testing.T, test testFn) {
    25  	t.Helper()
    26  	retries := 500 * TestMultiplier()
    27  	warn := int64(float64(retries) * 0.75)
    28  	for tries := retries; tries > 0; {
    29  		time.Sleep(10 * time.Millisecond)
    30  		tries--
    31  
    32  		success, err := test()
    33  		if success {
    34  			return
    35  		}
    36  
    37  		switch tries {
    38  		case 0:
    39  			if err == nil {
    40  				t.Fatalf("timeout waiting for test function to succeed (you should probably return a helpful error instead of nil!)")
    41  			} else {
    42  				t.Fatalf("timeout: %v", err)
    43  			}
    44  		case warn:
    45  			pc, _, _, _ := runtime.Caller(1)
    46  			f := runtime.FuncForPC(pc)
    47  			t.Logf("%d/%d retries reached for %s (err=%v)", warn, retries, f.Name(), err)
    48  		}
    49  
    50  	}
    51  }
    52  
    53  func WaitForResult(test testFn, error errorFn) {
    54  	WaitForResultRetries(500*TestMultiplier(), test, error)
    55  }
    56  
    57  func WaitForResultRetries(retries int64, test testFn, error errorFn) {
    58  	for retries > 0 {
    59  		time.Sleep(10 * time.Millisecond)
    60  		retries--
    61  
    62  		success, err := test()
    63  		if success {
    64  			return
    65  		}
    66  
    67  		if retries == 0 {
    68  			error(err)
    69  		}
    70  	}
    71  }
    72  
    73  // WaitForResultUntil waits the duration for the test to pass.
    74  // Otherwise error is called after the deadline expires.
    75  func WaitForResultUntil(until time.Duration, test testFn, errorFunc errorFn) {
    76  	var success bool
    77  	var err error
    78  	deadline := time.Now().Add(until)
    79  	for time.Now().Before(deadline) {
    80  		success, err = test()
    81  		if success {
    82  			return
    83  		}
    84  		// Sleep some arbitrary fraction of the deadline
    85  		time.Sleep(until / 30)
    86  	}
    87  	errorFunc(err)
    88  }
    89  
    90  // AssertUntil asserts the test function passes throughout the given duration.
    91  // Otherwise error is called on failure.
    92  func AssertUntil(until time.Duration, test testFn, error errorFn) {
    93  	deadline := time.Now().Add(until)
    94  	for time.Now().Before(deadline) {
    95  		success, err := test()
    96  		if !success {
    97  			error(err)
    98  			return
    99  		}
   100  		// Sleep some arbitrary fraction of the deadline
   101  		time.Sleep(until / 30)
   102  	}
   103  }
   104  
   105  // TestMultiplier returns a multiplier for retries and waits given environment
   106  // the tests are being run under.
   107  func TestMultiplier() int64 {
   108  	if IsCI() {
   109  		return 4
   110  	}
   111  
   112  	return 1
   113  }
   114  
   115  // Timeout takes the desired timeout and increases it if running in Travis
   116  func Timeout(original time.Duration) time.Duration {
   117  	return original * time.Duration(TestMultiplier())
   118  }
   119  
   120  func IsCI() bool {
   121  	_, ok := os.LookupEnv("CI")
   122  	return ok
   123  }
   124  
   125  func IsTravis() bool {
   126  	_, ok := os.LookupEnv("TRAVIS")
   127  	return ok
   128  }
   129  
   130  func IsAppVeyor() bool {
   131  	_, ok := os.LookupEnv("APPVEYOR")
   132  	return ok
   133  }
   134  
   135  type rpcFn func(string, interface{}, interface{}) error
   136  
   137  // WaitForLeader blocks until a leader is elected.
   138  func WaitForLeader(t testing.TB, rpc rpcFn) {
   139  	t.Helper()
   140  	WaitForResult(func() (bool, error) {
   141  		args := &structs.GenericRequest{}
   142  		var leader string
   143  		err := rpc("Status.Leader", args, &leader)
   144  		return leader != "", err
   145  	}, func(err error) {
   146  		t.Fatalf("failed to find leader: %v", err)
   147  	})
   148  }
   149  
   150  // WaitForLeaders blocks until each rpcs knows the leader.
   151  func WaitForLeaders(t testing.TB, rpcs ...rpcFn) string {
   152  	t.Helper()
   153  
   154  	var leader string
   155  	for i := 0; i < len(rpcs); i++ {
   156  		ok := func() (bool, error) {
   157  			leader = ""
   158  			args := &structs.GenericRequest{}
   159  			err := rpcs[i]("Status.Leader", args, &leader)
   160  			return leader != "", err
   161  		}
   162  		must.Wait(t, wait.InitialSuccess(
   163  			wait.TestFunc(ok),
   164  			wait.Timeout(10*time.Second),
   165  			wait.Gap(1*time.Second),
   166  		))
   167  	}
   168  
   169  	return leader
   170  }
   171  
   172  // WaitForClient blocks until the client can be found
   173  func WaitForClient(t testing.TB, rpc rpcFn, nodeID string, region string) {
   174  	t.Helper()
   175  	WaitForClientStatus(t, rpc, nodeID, region, structs.NodeStatusReady)
   176  }
   177  
   178  // WaitForClientStatus blocks until the client is in the expected status.
   179  func WaitForClientStatus(t testing.TB, rpc rpcFn, nodeID string, region string, status string) {
   180  	t.Helper()
   181  
   182  	if region == "" {
   183  		region = "global"
   184  	}
   185  	WaitForResult(func() (bool, error) {
   186  		req := structs.NodeSpecificRequest{
   187  			NodeID:       nodeID,
   188  			QueryOptions: structs.QueryOptions{Region: region},
   189  		}
   190  		var out structs.SingleNodeResponse
   191  
   192  		err := rpc("Node.GetNode", &req, &out)
   193  		if err != nil {
   194  			return false, err
   195  		}
   196  		if out.Node == nil {
   197  			return false, fmt.Errorf("node not found")
   198  		}
   199  		if out.Node.Status != status {
   200  			return false, fmt.Errorf("node is %s, not %s", out.Node.Status, status)
   201  		}
   202  		return true, nil
   203  	}, func(err error) {
   204  		t.Fatalf("failed to wait for node staus: %v", err)
   205  	})
   206  
   207  	t.Logf("[TEST] Client for test %s %s, id: %s, region: %s", t.Name(), status, nodeID, region)
   208  }
   209  
   210  // WaitForVotingMembers blocks until autopilot promotes all server peers
   211  // to be voting members.
   212  //
   213  // Useful for tests that change cluster topology (e.g. kill a node)
   214  // that should wait until cluster is stable.
   215  func WaitForVotingMembers(t testing.TB, rpc rpcFn, nPeers int) {
   216  	WaitForResult(func() (bool, error) {
   217  		args := &structs.GenericRequest{}
   218  		args.AllowStale = true
   219  		args.Region = "global"
   220  		args.Namespace = structs.DefaultNamespace
   221  		resp := structs.RaftConfigurationResponse{}
   222  		err := rpc("Operator.RaftGetConfiguration", args, &resp)
   223  		if err != nil {
   224  			return false, fmt.Errorf("failed to query raft: %v", err)
   225  		}
   226  
   227  		if len(resp.Servers) != nPeers {
   228  			return false, fmt.Errorf("expected %d peers found %d", nPeers, len(resp.Servers))
   229  		}
   230  
   231  		for _, s := range resp.Servers {
   232  			if !s.Voter {
   233  				return false, fmt.Errorf("found nonvoting server: %v", s)
   234  			}
   235  		}
   236  
   237  		return true, nil
   238  	}, func(err error) {
   239  		t.Fatalf("failed to wait until voting members: %v", err)
   240  	})
   241  }
   242  
   243  // RegisterJobWithToken registers a job and uses the job's Region and Namespace.
   244  func RegisterJobWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) {
   245  	t.Helper()
   246  	WaitForResult(func() (bool, error) {
   247  		args := &structs.JobRegisterRequest{}
   248  		args.Job = job
   249  		args.WriteRequest.Region = job.Region
   250  		args.AuthToken = token
   251  		args.Namespace = job.Namespace
   252  		var jobResp structs.JobRegisterResponse
   253  		err := rpc("Job.Register", args, &jobResp)
   254  		return err == nil, fmt.Errorf("Job.Register error: %v", err)
   255  	}, func(err error) {
   256  		t.Fatalf("error registering job: %v", err)
   257  	})
   258  
   259  	t.Logf("Job %q registered", job.ID)
   260  }
   261  
   262  func RegisterJob(t testing.TB, rpc rpcFn, job *structs.Job) {
   263  	RegisterJobWithToken(t, rpc, job, "")
   264  }
   265  
   266  func WaitForRunningWithToken(t testing.TB, rpc rpcFn, job *structs.Job, token string) []*structs.AllocListStub {
   267  	RegisterJobWithToken(t, rpc, job, token)
   268  
   269  	var resp structs.JobAllocationsResponse
   270  
   271  	// This can be quite slow if the job has expensive setup such as
   272  	// downloading large artifacts or creating a chroot.
   273  	WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) {
   274  		args := &structs.JobSpecificRequest{}
   275  		args.JobID = job.ID
   276  		args.QueryOptions.Region = job.Region
   277  		args.AuthToken = token
   278  		args.Namespace = job.Namespace
   279  		err := rpc("Job.Allocations", args, &resp)
   280  		if err != nil {
   281  			return false, fmt.Errorf("Job.Allocations error: %v", err)
   282  		}
   283  
   284  		if len(resp.Allocations) == 0 {
   285  			evals := structs.JobEvaluationsResponse{}
   286  			must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals"))
   287  			return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations))
   288  		}
   289  
   290  		for _, alloc := range resp.Allocations {
   291  			if alloc.ClientStatus == structs.AllocClientStatusPending {
   292  				return false, fmt.Errorf("alloc not running: id=%v tg=%v status=%v",
   293  					alloc.ID, alloc.TaskGroup, alloc.ClientStatus)
   294  			}
   295  		}
   296  
   297  		return true, nil
   298  	}, func(err error) {
   299  		must.NoError(t, err)
   300  	})
   301  
   302  	return resp.Allocations
   303  }
   304  
   305  // WaitForRunning runs a job and blocks until all allocs are out of pending.
   306  func WaitForRunning(t testing.TB, rpc rpcFn, job *structs.Job) []*structs.AllocListStub {
   307  	return WaitForRunningWithToken(t, rpc, job, "")
   308  }
   309  
   310  // WaitforJobAllocStatus blocks until the ClientStatus of allocations for a job
   311  // match the expected map of <ClientStatus>: <count>.
   312  func WaitForJobAllocStatus(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int) {
   313  	t.Helper()
   314  	WaitForJobAllocStatusWithToken(t, rpc, job, allocStatus, "")
   315  }
   316  
   317  // WaitForJobAllocStatusWithToken behaves the same way as WaitForJobAllocStatus
   318  // but is used for clusters with ACL enabled.
   319  func WaitForJobAllocStatusWithToken(t testing.TB, rpc rpcFn, job *structs.Job, allocStatus map[string]int, token string) []*structs.AllocListStub {
   320  	t.Helper()
   321  
   322  	var allocs []*structs.AllocListStub
   323  	WaitForResultRetries(2000*TestMultiplier(), func() (bool, error) {
   324  		args := &structs.JobSpecificRequest{
   325  			JobID: job.ID,
   326  			QueryOptions: structs.QueryOptions{
   327  				AuthToken: token,
   328  				Namespace: job.Namespace,
   329  				Region:    job.Region,
   330  			},
   331  		}
   332  
   333  		var resp structs.JobAllocationsResponse
   334  		err := rpc("Job.Allocations", args, &resp)
   335  		if err != nil {
   336  			return false, fmt.Errorf("Job.Allocations error: %v", err)
   337  		}
   338  
   339  		if len(resp.Allocations) == 0 {
   340  			evals := structs.JobEvaluationsResponse{}
   341  			must.NoError(t, rpc("Job.Evaluations", args, &evals), must.Sprintf("error looking up evals"))
   342  			return false, fmt.Errorf("0 allocations; evals: %s", pretty.Sprint(evals.Evaluations))
   343  		}
   344  
   345  		allocs = resp.Allocations
   346  
   347  		got := map[string]int{}
   348  		for _, alloc := range resp.Allocations {
   349  			got[alloc.ClientStatus]++
   350  		}
   351  		if diff := cmp.Diff(allocStatus, got); diff != "" {
   352  			return false, fmt.Errorf("alloc status mismatch (-want +got):\n%s", diff)
   353  		}
   354  		return true, nil
   355  	}, func(err error) {
   356  		must.NoError(t, err)
   357  	})
   358  
   359  	return allocs
   360  }
   361  
   362  // WaitForFiles blocks until all the files in the slice are present
   363  func WaitForFiles(t testing.TB, files []string) {
   364  	WaitForResult(func() (bool, error) {
   365  		return FilesExist(files)
   366  	}, func(err error) {
   367  		t.Fatalf("missing expected files: %v", err)
   368  	})
   369  }
   370  
   371  // WaitForFilesUntil blocks until duration or all the files in the slice are present
   372  func WaitForFilesUntil(t testing.TB, files []string, until time.Duration) {
   373  	WaitForResultUntil(until, func() (bool, error) {
   374  		return FilesExist(files)
   375  	}, func(err error) {
   376  		t.Fatalf("missing expected files: %v", err)
   377  	})
   378  }
   379  
   380  // FilesExist verifies all files in the slice are present
   381  func FilesExist(files []string) (bool, error) {
   382  	for _, f := range files {
   383  		if _, err := os.Stat(f); os.IsNotExist(err) {
   384  			return false, fmt.Errorf("expected file not found: %v", f)
   385  		}
   386  	}
   387  	return true, nil
   388  }