github.com/hernad/nomad@v1.6.112/e2e/taskevents/taskevents.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package taskevents 5 6 import ( 7 "fmt" 8 "strings" 9 "time" 10 11 "github.com/hernad/nomad/api" 12 "github.com/hernad/nomad/e2e/framework" 13 "github.com/hernad/nomad/testutil" 14 "github.com/stretchr/testify/require" 15 16 "github.com/hernad/nomad/e2e/e2eutil" 17 "github.com/hernad/nomad/helper/uuid" 18 ) 19 20 type TaskEventsTest struct { 21 framework.TC 22 jobIds []string 23 } 24 25 func init() { 26 framework.AddSuites(&framework.TestSuite{ 27 Component: "TaskEvents", 28 CanRunLocal: true, 29 Cases: []framework.TestCase{ 30 new(TaskEventsTest), 31 }, 32 }) 33 } 34 35 func (tc *TaskEventsTest) BeforeAll(f *framework.F) { 36 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 37 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) 38 } 39 40 func (tc *TaskEventsTest) AfterEach(f *framework.F) { 41 nomadClient := tc.Nomad() 42 jobs := nomadClient.Jobs() 43 // Stop all jobs in test 44 for _, id := range tc.jobIds { 45 jobs.Deregister(id, true, nil) 46 } 47 // Garbage collect 48 nomadClient.System().GarbageCollect() 49 } 50 51 func formatEvents(events []*api.TaskEvent) string { 52 estrs := make([]string, len(events)) 53 for i, e := range events { 54 estrs[i] = fmt.Sprintf("%2d %-20s fail=%t msg=> %s", i, e.Type, e.FailsTask, e.DisplayMessage) 55 } 56 return strings.Join(estrs, "\n") 57 } 58 59 // waitUntilEvents submits a job and then waits until the expected number of 60 // events exist. 61 // 62 // The job name is used to load the job file from "input/${job}.nomad", and 63 // events are only inspected for tasks named the same as the job. That task's 64 // state is returned as well as the last allocation received. 65 func (tc *TaskEventsTest) waitUntilEvents(f *framework.F, jobName string, numEvents int) (*api.Allocation, *api.TaskState) { 66 t := f.T() 67 nomadClient := tc.Nomad() 68 uuid := uuid.Generate() 69 uniqJobId := jobName + uuid[0:8] 70 tc.jobIds = append(tc.jobIds, uniqJobId) 71 72 jobFile := fmt.Sprintf("taskevents/input/%s.nomad", jobName) 73 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, jobFile, uniqJobId, "") 74 75 require.Len(t, allocs, 1) 76 allocID := allocs[0].ID 77 qo := &api.QueryOptions{ 78 WaitTime: time.Second, 79 } 80 81 // Capture state outside of wait to ease assertions once expected 82 // number of events have been received. 83 var alloc *api.Allocation 84 var taskState *api.TaskState 85 86 testutil.WaitForResultRetries(10, func() (bool, error) { 87 a, meta, err := nomadClient.Allocations().Info(allocID, qo) 88 if err != nil { 89 return false, err 90 } 91 92 qo.WaitIndex = meta.LastIndex 93 94 // Capture alloc and task state 95 alloc = a 96 taskState = a.TaskStates[jobName] 97 if taskState == nil { 98 return false, fmt.Errorf("task state not found for %s", jobName) 99 } 100 101 // Assert expected number of task events; we can't check for the exact 102 // count because of a race where Allocation Unhealthy events can be 103 // emitted when a peer task dies, but the caller can assert the 104 // specific events and their order up to that point 105 if len(taskState.Events) < numEvents { 106 return false, fmt.Errorf("expected %d task events but found %d\n%s", 107 numEvents, len(taskState.Events), formatEvents(taskState.Events), 108 ) 109 } 110 111 return true, nil 112 }, func(err error) { 113 require.NoError(t, err, "task events error") 114 }) 115 116 return alloc, taskState 117 } 118 119 func (tc *TaskEventsTest) TestTaskEvents_SimpleBatch(f *framework.F) { 120 t := f.T() 121 _, taskState := tc.waitUntilEvents(f, "simple_batch", 4) 122 events := taskState.Events 123 124 // Assert task did not fail 125 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 126 len(events), formatEvents(events), 127 ) 128 129 // Assert the expected type of events were emitted in a specific order 130 // (based on v0.8.6) 131 require.Equal(t, api.TaskReceived, events[0].Type) 132 require.Equal(t, api.TaskSetup, events[1].Type) 133 require.Equal(t, api.TaskStarted, events[2].Type) 134 require.Equal(t, api.TaskTerminated, events[3].Type) 135 } 136 137 func (tc *TaskEventsTest) TestTaskEvents_FailedBatch(f *framework.F) { 138 t := f.T() 139 _, taskState := tc.waitUntilEvents(f, "failed_batch", 4) 140 events := taskState.Events 141 142 // Assert task did fail 143 require.Truef(t, taskState.Failed, "task unexpectedly succeeded after %d events\n%s", 144 len(events), formatEvents(events), 145 ) 146 147 // Assert the expected type of events were emitted in a specific order 148 // (based on v0.8.6) 149 require.Equal(t, api.TaskReceived, events[0].Type) 150 require.Equal(t, api.TaskSetup, events[1].Type) 151 require.Equal(t, api.TaskDriverFailure, events[2].Type) 152 require.Equal(t, api.TaskNotRestarting, events[3].Type) 153 require.True(t, events[3].FailsTask) 154 } 155 156 // TestTaskEvents_CompletedLeader asserts the proper events are emitted for a 157 // non-leader task when its leader task completes. 158 func (tc *TaskEventsTest) TestTaskEvents_CompletedLeader(f *framework.F) { 159 t := f.T() 160 _, taskState := tc.waitUntilEvents(f, "completed_leader", 7) 161 events := taskState.Events 162 163 // Assert task did not fail 164 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 165 len(events), formatEvents(events), 166 ) 167 168 // Assert the expected type of events were emitted in a specific order 169 require.Equal(t, api.TaskReceived, events[0].Type) 170 require.Equal(t, api.TaskSetup, events[1].Type) 171 require.Equal(t, api.TaskStarted, events[2].Type) 172 require.Equal(t, api.TaskLeaderDead, events[3].Type) 173 require.Equal(t, api.TaskKilling, events[4].Type) 174 require.Equal(t, api.TaskTerminated, events[5].Type) 175 require.Equal(t, api.TaskKilled, events[6].Type) 176 } 177 178 // TestTaskEvents_FailedSibling asserts the proper events are emitted for a 179 // task when another task in its task group fails. 180 func (tc *TaskEventsTest) TestTaskEvents_FailedSibling(f *framework.F) { 181 t := f.T() 182 alloc, taskState := tc.waitUntilEvents(f, "failed_sibling", 7) 183 events := taskState.Events 184 185 // Just because a sibling failed doesn't mean this task fails. It 186 // should exit cleanly. (same as in v0.8.6) 187 require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s", 188 len(events), formatEvents(events), 189 ) 190 191 // The alloc should be faied 192 require.Equal(t, "failed", alloc.ClientStatus) 193 194 // Assert the expected type of events were emitted in a specific order 195 require.Equal(t, api.TaskReceived, events[0].Type) 196 require.Equal(t, api.TaskSetup, events[1].Type) 197 require.Equal(t, api.TaskStarted, events[2].Type) 198 require.Equal(t, api.TaskSiblingFailed, events[3].Type) 199 require.Equal(t, api.TaskKilling, events[4].Type) 200 require.Equal(t, api.TaskTerminated, events[5].Type) 201 require.Equal(t, api.TaskKilled, events[6].Type) 202 }