github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/e2eutil/utils.go

github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/e2eutil/utils.go (about)

     1  package e2eutil
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"os"
     7  	"testing"
     8  	"text/template"
     9  	"time"
    10  
    11  	api "github.com/hashicorp/nomad/api"
    12  	"github.com/hashicorp/nomad/helper/pointer"
    13  	"github.com/hashicorp/nomad/jobspec2"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/hashicorp/nomad/testutil"
    16  	"github.com/kr/pretty"
    17  	"github.com/stretchr/testify/require"
    18  )
    19  
    20  // retries is used to control how many times to retry checking if the cluster has a leader yet
    21  const retries = 500
    22  
    23  func WaitForLeader(t *testing.T, nomadClient *api.Client) {
    24  	statusAPI := nomadClient.Status()
    25  
    26  	testutil.WaitForResultRetries(retries, func() (bool, error) {
    27  		leader, err := statusAPI.Leader()
    28  		return leader != "", err
    29  	}, func(err error) {
    30  		require.NoError(t, err, "failed to find leader")
    31  	})
    32  }
    33  
    34  // WaitForNodesReady waits until at least `nodes` number of nodes are ready or
    35  // fails the test.
    36  func WaitForNodesReady(t *testing.T, nomadClient *api.Client, nodes int) {
    37  	nodesAPI := nomadClient.Nodes()
    38  
    39  	testutil.WaitForResultRetries(retries, func() (bool, error) {
    40  		defer time.Sleep(time.Millisecond * 100)
    41  		nodesList, _, err := nodesAPI.List(nil)
    42  		if err != nil {
    43  			return false, fmt.Errorf("error listing nodes: %v", err)
    44  		}
    45  
    46  		eligibleNodes := 0
    47  		for _, node := range nodesList {
    48  			if node.Status == "ready" {
    49  				eligibleNodes++
    50  			}
    51  		}
    52  
    53  		return eligibleNodes >= nodes, fmt.Errorf("only %d nodes ready (wanted at least %d)", eligibleNodes, nodes)
    54  	}, func(err error) {
    55  		require.NoError(t, err, "failed to get enough ready nodes")
    56  	})
    57  }
    58  
    59  func stringToPtrOrNil(s string) *string {
    60  	if s == "" {
    61  		return nil
    62  	}
    63  	return pointer.Of(s)
    64  }
    65  
    66  func Parse2(t *testing.T, jobFile string) (*api.Job, error) {
    67  	f, err := os.Open(jobFile)
    68  	require.NoError(t, err)
    69  	return jobspec2.Parse(jobFile, f)
    70  }
    71  
    72  func RegisterAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
    73  
    74  	// Parse job
    75  	job, err := Parse2(t, jobFile)
    76  	require.NoError(t, err)
    77  
    78  	// Set custom job ID (distinguish among tests)
    79  	job.ID = pointer.Of(jobID)
    80  
    81  	// Set a Consul "operator" token for the job, if provided.
    82  	job.ConsulToken = stringToPtrOrNil(cToken)
    83  
    84  	// Register job
    85  	var idx uint64
    86  	jobs := nomadClient.Jobs()
    87  	testutil.WaitForResult(func() (bool, error) {
    88  		resp, meta, err := jobs.Register(job, nil)
    89  		if err != nil {
    90  			return false, err
    91  		}
    92  		idx = meta.LastIndex
    93  		return resp.EvalID != "", fmt.Errorf("expected EvalID:%s", pretty.Sprint(resp))
    94  	}, func(err error) {
    95  		require.NoError(t, err)
    96  	})
    97  
    98  	allocs, _, err := jobs.Allocations(jobID, false, &api.QueryOptions{WaitIndex: idx})
    99  	require.NoError(t, err)
   100  	return allocs
   101  }
   102  
   103  // RegisterAndWaitForAllocs wraps RegisterAllocs but blocks until Evals
   104  // successfully create Allocs.
   105  func RegisterAndWaitForAllocs(t *testing.T, nomadClient *api.Client, jobFile, jobID, cToken string) []*api.AllocationListStub {
   106  	jobs := nomadClient.Jobs()
   107  
   108  	// Start allocations
   109  	RegisterAllocs(t, nomadClient, jobFile, jobID, cToken)
   110  
   111  	var err error
   112  	allocs := []*api.AllocationListStub{}
   113  	evals := []*api.Evaluation{}
   114  
   115  	// Wrap in retry to wait until placement
   116  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   117  		time.Sleep(time.Second)
   118  
   119  		allocs, _, err = jobs.Allocations(jobID, false, nil)
   120  		if len(allocs) == 0 {
   121  			evals, _, err = nomadClient.Jobs().Evaluations(jobID, nil)
   122  			return false, fmt.Errorf("no allocations for job %v", jobID)
   123  		}
   124  
   125  		return true, nil
   126  	}, func(e error) {
   127  		msg := fmt.Sprintf("allocations not placed for %s", jobID)
   128  		for _, eval := range evals {
   129  			msg += fmt.Sprintf("\n  %s - %s", eval.Status, eval.StatusDescription)
   130  		}
   131  
   132  		require.Fail(t, msg, "full evals: %v", pretty.Sprint(evals))
   133  	})
   134  
   135  	require.NoError(t, err) // we only care about the last error
   136  
   137  	return allocs
   138  }
   139  
   140  func WaitForAllocRunning(t *testing.T, nomadClient *api.Client, allocID string) {
   141  	t.Helper()
   142  
   143  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   144  		time.Sleep(time.Millisecond * 100)
   145  		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
   146  		if err != nil {
   147  			return false, err
   148  		}
   149  
   150  		return alloc.ClientStatus == structs.AllocClientStatusRunning, fmt.Errorf("expected status running, but was: %s\n%v", alloc.ClientStatus, pretty.Sprint(alloc))
   151  	}, func(err error) {
   152  		require.NoError(t, err, "failed to wait on alloc")
   153  	})
   154  }
   155  
   156  func WaitForAllocTaskRunning(t *testing.T, nomadClient *api.Client, allocID, task string) {
   157  	WaitForAllocTaskState(t, nomadClient, allocID, task, structs.TaskStateRunning)
   158  }
   159  
   160  func WaitForAllocTaskComplete(t *testing.T, nomadClient *api.Client, allocID, task string) {
   161  	WaitForAllocTaskState(t, nomadClient, allocID, task, structs.TaskStateDead)
   162  }
   163  
   164  func WaitForAllocTaskState(t *testing.T, nomadClient *api.Client, allocID, task, state string) {
   165  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   166  		time.Sleep(time.Millisecond * 500)
   167  		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
   168  		if err != nil {
   169  			return false, err
   170  		}
   171  		currentState := "n/a"
   172  		if taskState := alloc.TaskStates[task]; taskState != nil {
   173  			currentState = taskState.State
   174  		}
   175  		return currentState == state, fmt.Errorf("expected status %s, but was: %s", state, currentState)
   176  	}, func(err error) {
   177  		t.Fatalf("failed to wait on alloc task: %v", err)
   178  	})
   179  }
   180  
   181  func WaitForAllocsRunning(t *testing.T, nomadClient *api.Client, allocIDs []string) {
   182  	for _, allocID := range allocIDs {
   183  		WaitForAllocRunning(t, nomadClient, allocID)
   184  	}
   185  }
   186  
   187  func WaitForAllocsNotPending(t *testing.T, nomadClient *api.Client, allocIDs []string) {
   188  	for _, allocID := range allocIDs {
   189  		WaitForAllocNotPending(t, nomadClient, allocID)
   190  	}
   191  }
   192  
   193  func WaitForAllocNotPending(t *testing.T, nomadClient *api.Client, allocID string) {
   194  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   195  		time.Sleep(time.Millisecond * 100)
   196  		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
   197  		if err != nil {
   198  			return false, err
   199  		}
   200  
   201  		return alloc.ClientStatus != structs.AllocClientStatusPending, fmt.Errorf("expected status not pending, but was: %s", alloc.ClientStatus)
   202  	}, func(err error) {
   203  		require.NoError(t, err, "failed to wait on alloc")
   204  	})
   205  }
   206  
   207  // WaitForJobStopped stops a job and waits for all of its allocs to terminate.
   208  func WaitForJobStopped(t *testing.T, nomadClient *api.Client, job string) {
   209  	allocs, _, err := nomadClient.Jobs().Allocations(job, true, nil)
   210  	require.NoError(t, err, "error getting allocations for job %q", job)
   211  	ids := AllocIDsFromAllocationListStubs(allocs)
   212  	_, _, err = nomadClient.Jobs().Deregister(job, true, nil)
   213  	require.NoError(t, err, "error deregistering job %q", job)
   214  	for _, id := range ids {
   215  		WaitForAllocStopped(t, nomadClient, id)
   216  	}
   217  }
   218  
   219  func WaitForAllocsStopped(t *testing.T, nomadClient *api.Client, allocIDs []string) {
   220  	for _, allocID := range allocIDs {
   221  		WaitForAllocStopped(t, nomadClient, allocID)
   222  	}
   223  }
   224  
   225  func WaitForAllocStopped(t *testing.T, nomadClient *api.Client, allocID string) {
   226  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   227  		time.Sleep(time.Millisecond * 100)
   228  		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
   229  		if err != nil {
   230  			return false, err
   231  		}
   232  		switch alloc.ClientStatus {
   233  		case structs.AllocClientStatusComplete:
   234  			return true, nil
   235  		case structs.AllocClientStatusFailed:
   236  			return true, nil
   237  		case structs.AllocClientStatusLost:
   238  			return true, nil
   239  		default:
   240  			return false, fmt.Errorf("expected stopped alloc, but was: %s",
   241  				alloc.ClientStatus)
   242  		}
   243  	}, func(err error) {
   244  		require.NoError(t, err, "failed to wait on alloc")
   245  	})
   246  }
   247  
   248  func WaitForAllocStatus(t *testing.T, nomadClient *api.Client, allocID string, status string) {
   249  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   250  		time.Sleep(time.Millisecond * 100)
   251  		alloc, _, err := nomadClient.Allocations().Info(allocID, nil)
   252  		if err != nil {
   253  			return false, err
   254  		}
   255  		switch alloc.ClientStatus {
   256  		case status:
   257  			return true, nil
   258  		default:
   259  			return false, fmt.Errorf("expected %s alloc, but was: %s", status, alloc.ClientStatus)
   260  		}
   261  	}, func(err error) {
   262  		t.Fatalf("failed to wait on alloc: %v", err)
   263  	})
   264  }
   265  
   266  func WaitForAllocsStatus(t *testing.T, nomadClient *api.Client, allocIDs []string, status string) {
   267  	for _, allocID := range allocIDs {
   268  		WaitForAllocStatus(t, nomadClient, allocID, status)
   269  	}
   270  }
   271  
   272  func AllocIDsFromAllocationListStubs(allocs []*api.AllocationListStub) []string {
   273  	allocIDs := make([]string, 0, len(allocs))
   274  	for _, alloc := range allocs {
   275  		allocIDs = append(allocIDs, alloc.ID)
   276  	}
   277  	return allocIDs
   278  }
   279  
   280  func DeploymentsForJob(t *testing.T, nomadClient *api.Client, jobID string) []*api.Deployment {
   281  	ds, _, err := nomadClient.Deployments().List(nil)
   282  	require.NoError(t, err)
   283  
   284  	out := []*api.Deployment{}
   285  	for _, d := range ds {
   286  		if d.JobID == jobID {
   287  			out = append(out, d)
   288  		}
   289  	}
   290  
   291  	return out
   292  }
   293  
   294  func WaitForDeployment(t *testing.T, nomadClient *api.Client, deployID string, status string, statusDesc string) {
   295  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   296  		time.Sleep(time.Millisecond * 100)
   297  		deploy, _, err := nomadClient.Deployments().Info(deployID, nil)
   298  		if err != nil {
   299  			return false, err
   300  		}
   301  
   302  		if deploy.Status == status && deploy.StatusDescription == statusDesc {
   303  			return true, nil
   304  		}
   305  		return false, fmt.Errorf("expected status %s \"%s\", but got: %s \"%s\"",
   306  			status,
   307  			statusDesc,
   308  			deploy.Status,
   309  			deploy.StatusDescription,
   310  		)
   311  
   312  	}, func(err error) {
   313  		require.NoError(t, err, "failed to wait on deployment")
   314  	})
   315  }
   316  
   317  // DumpEvals for a job. This is intended to be used during test development or
   318  // prior to exiting a test after an assertion failed.
   319  func DumpEvals(c *api.Client, jobID string) string {
   320  	evals, _, err := c.Jobs().Evaluations(jobID, nil)
   321  	if err != nil {
   322  		return fmt.Sprintf("error retrieving evals for job %q: %s", jobID, err)
   323  	}
   324  	if len(evals) == 0 {
   325  		return fmt.Sprintf("no evals found for job %q", jobID)
   326  	}
   327  	buf := bytes.NewBuffer(nil)
   328  	for i, e := range evals {
   329  		err := EvalTemplate.Execute(buf, map[string]interface{}{
   330  			"Index": i + 1,
   331  			"Total": len(evals),
   332  			"Eval":  e,
   333  		})
   334  		if err != nil {
   335  			fmt.Fprintf(buf, "error rendering eval: %s\n", err)
   336  		}
   337  	}
   338  	return buf.String()
   339  }
   340  
   341  var EvalTemplate = template.Must(template.New("dump_eval").Parse(
   342  	`{{.Index}}/{{.Total}} Job {{.Eval.JobID}} Eval {{.Eval.ID}}
   343    Type:         {{.Eval.Type}}
   344    TriggeredBy:  {{.Eval.TriggeredBy}}
   345    {{- if .Eval.DeploymentID}}
   346    Deployment:   {{.Eval.DeploymentID}}
   347    {{- end}}
   348    Status:       {{.Eval.Status}} {{if .Eval.StatusDescription}}({{.Eval.StatusDescription}}){{end}}
   349    {{- if .Eval.Wait}}
   350    Wait:         {{.Eval.Wait}} <- DEPRECATED
   351    {{- end}}
   352    {{- if not .Eval.WaitUntil.IsZero}}
   353    WaitUntil:    {{.Eval.WaitUntil}}
   354    {{- end}}
   355    {{- if .Eval.NextEval}}
   356    NextEval:     {{.Eval.NextEval}}
   357    {{- end}}
   358    {{- if .Eval.PreviousEval}}
   359    PrevEval:     {{.Eval.PreviousEval}}
   360    {{- end}}
   361    {{- if .Eval.BlockedEval}}
   362    BlockedEval:  {{.Eval.BlockedEval}}
   363    {{- end}}
   364    {{- if .Eval.FailedTGAllocs }}
   365    Failed Allocs:
   366    {{- end}}
   367    {{- range $k, $v := .Eval.FailedTGAllocs}}
   368      Failed Group: {{$k}}
   369        NodesEvaluated: {{$v.NodesEvaluated}}
   370        NodesFiltered:  {{$v.NodesFiltered}}
   371        NodesAvailable: {{range $dc, $n := $v.NodesAvailable}}{{$dc}}:{{$n}} {{end}}
   372        NodesExhausted: {{$v.NodesExhausted}}
   373        ClassFiltered:  {{len $v.ClassFiltered}}
   374        ConstraintFilt: {{len $v.ConstraintFiltered}}
   375        DimensionExhst: {{range $d, $n := $v.DimensionExhausted}}{{$d}}:{{$n}} {{end}}
   376        ResourcesExhst: {{range $r, $n := $v.ResourcesExhausted}}{{$r}}:{{$n}} {{end}}
   377        QuotaExhausted: {{range $i, $q := $v.QuotaExhausted}}{{$q}} {{end}}
   378        CoalescedFail:  {{$v.CoalescedFailures}}
   379        ScoreMetaData:  {{len $v.ScoreMetaData}}
   380        AllocationTime: {{$v.AllocationTime}}
   381    {{- end}}
   382    {{- if .Eval.QueuedAllocations}}
   383    QueuedAllocs: {{range $k, $n := .Eval.QueuedAllocations}}{{$k}}:{{$n}} {{end}}
   384    {{- end}}
   385    SnapshotIdx:  {{.Eval.SnapshotIndex}}
   386    CreateIndex:  {{.Eval.CreateIndex}}
   387    ModifyIndex:  {{.Eval.ModifyIndex}}
   388  `))