github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/taskevents/taskevents.go (about)

     1  package taskevents
     2  
     3  import (
     4  	"fmt"
     5  	"strings"
     6  	"time"
     7  
     8  	"github.com/hashicorp/nomad/api"
     9  	"github.com/hashicorp/nomad/e2e/framework"
    10  	"github.com/hashicorp/nomad/testutil"
    11  	"github.com/stretchr/testify/require"
    12  
    13  	"github.com/hashicorp/nomad/e2e/e2eutil"
    14  	"github.com/hashicorp/nomad/helper/uuid"
    15  )
    16  
    17  type TaskEventsTest struct {
    18  	framework.TC
    19  	jobIds []string
    20  }
    21  
    22  func init() {
    23  	framework.AddSuites(&framework.TestSuite{
    24  		Component:   "TaskEvents",
    25  		CanRunLocal: true,
    26  		Cases: []framework.TestCase{
    27  			new(TaskEventsTest),
    28  		},
    29  	})
    30  }
    31  
    32  func (tc *TaskEventsTest) BeforeAll(f *framework.F) {
    33  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    34  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    35  }
    36  
    37  func (tc *TaskEventsTest) AfterEach(f *framework.F) {
    38  	nomadClient := tc.Nomad()
    39  	jobs := nomadClient.Jobs()
    40  	// Stop all jobs in test
    41  	for _, id := range tc.jobIds {
    42  		jobs.Deregister(id, true, nil)
    43  	}
    44  	// Garbage collect
    45  	nomadClient.System().GarbageCollect()
    46  }
    47  
    48  func formatEvents(events []*api.TaskEvent) string {
    49  	estrs := make([]string, len(events))
    50  	for i, e := range events {
    51  		estrs[i] = fmt.Sprintf("%2d %-20s fail=%t msg=> %s", i, e.Type, e.FailsTask, e.DisplayMessage)
    52  	}
    53  	return strings.Join(estrs, "\n")
    54  }
    55  
    56  // waitUntilEvents submits a job and then waits until the expected number of
    57  // events exist.
    58  //
    59  // The job name is used to load the job file from "input/${job}.nomad", and
    60  // events are only inspected for tasks named the same as the job. That task's
    61  // state is returned as well as the last allocation received.
    62  func (tc *TaskEventsTest) waitUntilEvents(f *framework.F, jobName string, numEvents int) (*api.Allocation, *api.TaskState) {
    63  	t := f.T()
    64  	nomadClient := tc.Nomad()
    65  	uuid := uuid.Generate()
    66  	uniqJobId := jobName + uuid[0:8]
    67  	tc.jobIds = append(tc.jobIds, uniqJobId)
    68  
    69  	jobFile := fmt.Sprintf("taskevents/input/%s.nomad", jobName)
    70  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, jobFile, uniqJobId, "")
    71  
    72  	require.Len(t, allocs, 1)
    73  	allocID := allocs[0].ID
    74  	qo := &api.QueryOptions{
    75  		WaitTime: time.Second,
    76  	}
    77  
    78  	// Capture state outside of wait to ease assertions once expected
    79  	// number of events have been received.
    80  	var alloc *api.Allocation
    81  	var taskState *api.TaskState
    82  
    83  	testutil.WaitForResultRetries(10, func() (bool, error) {
    84  		a, meta, err := nomadClient.Allocations().Info(allocID, qo)
    85  		if err != nil {
    86  			return false, err
    87  		}
    88  
    89  		qo.WaitIndex = meta.LastIndex
    90  
    91  		// Capture alloc and task state
    92  		alloc = a
    93  		taskState = a.TaskStates[jobName]
    94  		if taskState == nil {
    95  			return false, fmt.Errorf("task state not found for %s", jobName)
    96  		}
    97  
    98  		// Assert expected number of task events; we can't check for the exact
    99  		// count because of a race where Allocation Unhealthy events can be
   100  		// emitted when a peer task dies, but the caller can assert the
   101  		// specific events and their order up to that point
   102  		if len(taskState.Events) < numEvents {
   103  			return false, fmt.Errorf("expected %d task events but found %d\n%s",
   104  				numEvents, len(taskState.Events), formatEvents(taskState.Events),
   105  			)
   106  		}
   107  
   108  		return true, nil
   109  	}, func(err error) {
   110  		require.NoError(t, err, "task events error")
   111  	})
   112  
   113  	return alloc, taskState
   114  }
   115  
   116  func (tc *TaskEventsTest) TestTaskEvents_SimpleBatch(f *framework.F) {
   117  	t := f.T()
   118  	_, taskState := tc.waitUntilEvents(f, "simple_batch", 4)
   119  	events := taskState.Events
   120  
   121  	// Assert task did not fail
   122  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   123  		len(events), formatEvents(events),
   124  	)
   125  
   126  	// Assert the expected type of events were emitted in a specific order
   127  	// (based on v0.8.6)
   128  	require.Equal(t, api.TaskReceived, events[0].Type)
   129  	require.Equal(t, api.TaskSetup, events[1].Type)
   130  	require.Equal(t, api.TaskStarted, events[2].Type)
   131  	require.Equal(t, api.TaskTerminated, events[3].Type)
   132  }
   133  
   134  func (tc *TaskEventsTest) TestTaskEvents_FailedBatch(f *framework.F) {
   135  	t := f.T()
   136  	_, taskState := tc.waitUntilEvents(f, "failed_batch", 4)
   137  	events := taskState.Events
   138  
   139  	// Assert task did fail
   140  	require.Truef(t, taskState.Failed, "task unexpectedly succeeded after %d events\n%s",
   141  		len(events), formatEvents(events),
   142  	)
   143  
   144  	// Assert the expected type of events were emitted in a specific order
   145  	// (based on v0.8.6)
   146  	require.Equal(t, api.TaskReceived, events[0].Type)
   147  	require.Equal(t, api.TaskSetup, events[1].Type)
   148  	require.Equal(t, api.TaskDriverFailure, events[2].Type)
   149  	require.Equal(t, api.TaskNotRestarting, events[3].Type)
   150  	require.True(t, events[3].FailsTask)
   151  }
   152  
   153  // TestTaskEvents_CompletedLeader asserts the proper events are emitted for a
   154  // non-leader task when its leader task completes.
   155  func (tc *TaskEventsTest) TestTaskEvents_CompletedLeader(f *framework.F) {
   156  	t := f.T()
   157  	_, taskState := tc.waitUntilEvents(f, "completed_leader", 7)
   158  	events := taskState.Events
   159  
   160  	// Assert task did not fail
   161  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   162  		len(events), formatEvents(events),
   163  	)
   164  
   165  	// Assert the expected type of events were emitted in a specific order
   166  	require.Equal(t, api.TaskReceived, events[0].Type)
   167  	require.Equal(t, api.TaskSetup, events[1].Type)
   168  	require.Equal(t, api.TaskStarted, events[2].Type)
   169  	require.Equal(t, api.TaskLeaderDead, events[3].Type)
   170  	require.Equal(t, api.TaskKilling, events[4].Type)
   171  	require.Equal(t, api.TaskTerminated, events[5].Type)
   172  	require.Equal(t, api.TaskKilled, events[6].Type)
   173  }
   174  
   175  // TestTaskEvents_FailedSibling asserts the proper events are emitted for a
   176  // task when another task in its task group fails.
   177  func (tc *TaskEventsTest) TestTaskEvents_FailedSibling(f *framework.F) {
   178  	t := f.T()
   179  	alloc, taskState := tc.waitUntilEvents(f, "failed_sibling", 7)
   180  	events := taskState.Events
   181  
   182  	// Just because a sibling failed doesn't mean this task fails. It
   183  	// should exit cleanly. (same as in v0.8.6)
   184  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   185  		len(events), formatEvents(events),
   186  	)
   187  
   188  	// The alloc should be faied
   189  	require.Equal(t, "failed", alloc.ClientStatus)
   190  
   191  	// Assert the expected type of events were emitted in a specific order
   192  	require.Equal(t, api.TaskReceived, events[0].Type)
   193  	require.Equal(t, api.TaskSetup, events[1].Type)
   194  	require.Equal(t, api.TaskStarted, events[2].Type)
   195  	require.Equal(t, api.TaskSiblingFailed, events[3].Type)
   196  	require.Equal(t, api.TaskKilling, events[4].Type)
   197  	require.Equal(t, api.TaskTerminated, events[5].Type)
   198  	require.Equal(t, api.TaskKilled, events[6].Type)
   199  }