github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/taskevents/taskevents.go (about) 1 package taskevents 2 3 import ( 4 "fmt" 5 "strings" 6 "time" 7 8 "github.com/hashicorp/nomad/api" 9 "github.com/hashicorp/nomad/e2e/framework" 10 "github.com/hashicorp/nomad/testutil" 11 "github.com/stretchr/testify/require" 12 13 "github.com/hashicorp/nomad/e2e/e2eutil" 14 "github.com/hashicorp/nomad/helper/uuid" 15 ) 16 17 type TaskEventsTest struct { 18 framework.TC 19 jobIds []string 20 } 21 22 func init() { 23 framework.AddSuites(&framework.TestSuite{ 24 Component: "TaskEvents", 25 CanRunLocal: true, 26 Cases: []framework.TestCase{ 27 new(TaskEventsTest), 28 }, 29 }) 30 } 31 32 func (tc *TaskEventsTest) BeforeAll(f *framework.F) { 33 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 34 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) 35 } 36 37 func (tc *TaskEventsTest) AfterEach(f *framework.F) { 38 nomadClient := tc.Nomad() 39 jobs := nomadClient.Jobs() 40 // Stop all jobs in test 41 for _, id := range tc.jobIds { 42 jobs.Deregister(id, true, nil) 43 } 44 // Garbage collect 45 nomadClient.System().GarbageCollect() 46 } 47 48 func formatEvents(events []*api.TaskEvent) string { 49 estrs := make([]string, len(events)) 50 for i, e := range events { 51 estrs[i] = fmt.Sprintf("%2d %-20s fail=%t msg=> %s", i, e.Type, e.FailsTask, e.DisplayMessage) 52 } 53 return strings.Join(estrs, "\n") 54 } 55 56 // waitUntilEvents submits a job and then waits until the expected number of 57 // events exist. 58 // 59 // The job name is used to load the job file from "input/${job}.nomad", and 60 // events are only inspected for tasks named the same as the job. That task's 61 // state is returned as well as the last allocation received. 62 func (tc *TaskEventsTest) waitUntilEvents(f *framework.F, jobName string, numEvents int) (*api.Allocation, *api.TaskState) { 63 t := f.T() 64 nomadClient := tc.Nomad() 65 uuid := uuid.Generate() 66 uniqJobId := jobName + uuid[0:8] 67 tc.jobIds = append(tc.jobIds, uniqJobId) 68 69 jobFile := fmt.Sprintf("taskevents/input/%s.nomad", jobName) 70 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, jobFile, uniqJobId, "") 71 72 require.Len(t, allocs, 1) 73 allocID := allocs[0].ID 74 qo := &api.QueryOptions{ 75 WaitTime: time.Second, 76 } 77 78 // Capture state outside of wait to ease assertions once expected 79 // number of events have been received. 80 var alloc *api.Allocation 81 var taskState *api.TaskState 82 83 testutil.WaitForResultRetries(10, func() (bool, error) { 84 a, meta, err := nomadClient.Allocations().Info(allocID, qo) 85 if err != nil { 86 return false, err 87 } 88 89 qo.WaitIndex = meta.LastIndex 90 91 // Capture alloc and task state 92 alloc = a 93 taskState = a.TaskStates[jobName] 94 if taskState == nil { 95 return false, fmt.Errorf("task state not found for %s", jobName) 96 } 97 98 // Assert expected number of task events; we can't check for the exact 99 // count because of a race where Allocation Unhealthy events can be 100 // emitted when a peer task dies, but the caller can assert the 101 // specific events and their order up to that point 102 if len(taskState.Events) < numEvents { 103 return false, fmt.Errorf("expected %d task events but found %d\n%s", 104 numEvents, len(taskState.Events), formatEvents(taskState.Events), 105 ) 106 } 107 108 return true, nil 109 }, func(err error) { 110 require.NoError(t, err, "task events error") 111 }) 112 113 return alloc, taskState 114 } 115 116 func (tc *TaskEventsTest) TestTaskEvents_SimpleBatch(f *framework.F) { 117 t := f.T() 118 _, taskState := tc.waitUntilEvents(f, "simple_batch", 4) 119 events := taskState.Events 120 121 // Assert task did not fail 122 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 123 len(events), formatEvents(events), 124 ) 125 126 // Assert the expected type of events were emitted in a specific order 127 // (based on v0.8.6) 128 require.Equal(t, api.TaskReceived, events[0].Type) 129 require.Equal(t, api.TaskSetup, events[1].Type) 130 require.Equal(t, api.TaskStarted, events[2].Type) 131 require.Equal(t, api.TaskTerminated, events[3].Type) 132 } 133 134 func (tc *TaskEventsTest) TestTaskEvents_FailedBatch(f *framework.F) { 135 t := f.T() 136 _, taskState := tc.waitUntilEvents(f, "failed_batch", 4) 137 events := taskState.Events 138 139 // Assert task did fail 140 require.Truef(t, taskState.Failed, "task unexpectedly succeeded after %d events\n%s", 141 len(events), formatEvents(events), 142 ) 143 144 // Assert the expected type of events were emitted in a specific order 145 // (based on v0.8.6) 146 require.Equal(t, api.TaskReceived, events[0].Type) 147 require.Equal(t, api.TaskSetup, events[1].Type) 148 require.Equal(t, api.TaskDriverFailure, events[2].Type) 149 require.Equal(t, api.TaskNotRestarting, events[3].Type) 150 require.True(t, events[3].FailsTask) 151 } 152 153 // TestTaskEvents_CompletedLeader asserts the proper events are emitted for a 154 // non-leader task when its leader task completes. 155 func (tc *TaskEventsTest) TestTaskEvents_CompletedLeader(f *framework.F) { 156 t := f.T() 157 _, taskState := tc.waitUntilEvents(f, "completed_leader", 7) 158 events := taskState.Events 159 160 // Assert task did not fail 161 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 162 len(events), formatEvents(events), 163 ) 164 165 // Assert the expected type of events were emitted in a specific order 166 require.Equal(t, api.TaskReceived, events[0].Type) 167 require.Equal(t, api.TaskSetup, events[1].Type) 168 require.Equal(t, api.TaskStarted, events[2].Type) 169 require.Equal(t, api.TaskLeaderDead, events[3].Type) 170 require.Equal(t, api.TaskKilling, events[4].Type) 171 require.Equal(t, api.TaskTerminated, events[5].Type) 172 require.Equal(t, api.TaskKilled, events[6].Type) 173 } 174 175 // TestTaskEvents_FailedSibling asserts the proper events are emitted for a 176 // task when another task in its task group fails. 177 func (tc *TaskEventsTest) TestTaskEvents_FailedSibling(f *framework.F) { 178 t := f.T() 179 alloc, taskState := tc.waitUntilEvents(f, "failed_sibling", 7) 180 events := taskState.Events 181 182 // Just because a sibling failed doesn't mean this task fails. It 183 // should exit cleanly. (same as in v0.8.6) 184 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 185 len(events), formatEvents(events), 186 ) 187 188 // The alloc should be faied 189 require.Equal(t, "failed", alloc.ClientStatus) 190 191 // Assert the expected type of events were emitted in a specific order 192 require.Equal(t, api.TaskReceived, events[0].Type) 193 require.Equal(t, api.TaskSetup, events[1].Type) 194 require.Equal(t, api.TaskStarted, events[2].Type) 195 require.Equal(t, api.TaskSiblingFailed, events[3].Type) 196 require.Equal(t, api.TaskKilling, events[4].Type) 197 require.Equal(t, api.TaskTerminated, events[5].Type) 198 require.Equal(t, api.TaskKilled, events[6].Type) 199 }