github.com/hernad/nomad@v1.6.112/e2e/taskevents/taskevents.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package taskevents
     5  
     6  import (
     7  	"fmt"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/hernad/nomad/api"
    12  	"github.com/hernad/nomad/e2e/framework"
    13  	"github.com/hernad/nomad/testutil"
    14  	"github.com/stretchr/testify/require"
    15  
    16  	"github.com/hernad/nomad/e2e/e2eutil"
    17  	"github.com/hernad/nomad/helper/uuid"
    18  )
    19  
    20  type TaskEventsTest struct {
    21  	framework.TC
    22  	jobIds []string
    23  }
    24  
    25  func init() {
    26  	framework.AddSuites(&framework.TestSuite{
    27  		Component:   "TaskEvents",
    28  		CanRunLocal: true,
    29  		Cases: []framework.TestCase{
    30  			new(TaskEventsTest),
    31  		},
    32  	})
    33  }
    34  
    35  func (tc *TaskEventsTest) BeforeAll(f *framework.F) {
    36  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    37  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    38  }
    39  
    40  func (tc *TaskEventsTest) AfterEach(f *framework.F) {
    41  	nomadClient := tc.Nomad()
    42  	jobs := nomadClient.Jobs()
    43  	// Stop all jobs in test
    44  	for _, id := range tc.jobIds {
    45  		jobs.Deregister(id, true, nil)
    46  	}
    47  	// Garbage collect
    48  	nomadClient.System().GarbageCollect()
    49  }
    50  
    51  func formatEvents(events []*api.TaskEvent) string {
    52  	estrs := make([]string, len(events))
    53  	for i, e := range events {
    54  		estrs[i] = fmt.Sprintf("%2d %-20s fail=%t msg=> %s", i, e.Type, e.FailsTask, e.DisplayMessage)
    55  	}
    56  	return strings.Join(estrs, "\n")
    57  }
    58  
    59  // waitUntilEvents submits a job and then waits until the expected number of
    60  // events exist.
    61  //
    62  // The job name is used to load the job file from "input/${job}.nomad", and
    63  // events are only inspected for tasks named the same as the job. That task's
    64  // state is returned as well as the last allocation received.
    65  func (tc *TaskEventsTest) waitUntilEvents(f *framework.F, jobName string, numEvents int) (*api.Allocation, *api.TaskState) {
    66  	t := f.T()
    67  	nomadClient := tc.Nomad()
    68  	uuid := uuid.Generate()
    69  	uniqJobId := jobName + uuid[0:8]
    70  	tc.jobIds = append(tc.jobIds, uniqJobId)
    71  
    72  	jobFile := fmt.Sprintf("taskevents/input/%s.nomad", jobName)
    73  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), nomadClient, jobFile, uniqJobId, "")
    74  
    75  	require.Len(t, allocs, 1)
    76  	allocID := allocs[0].ID
    77  	qo := &api.QueryOptions{
    78  		WaitTime: time.Second,
    79  	}
    80  
    81  	// Capture state outside of wait to ease assertions once expected
    82  	// number of events have been received.
    83  	var alloc *api.Allocation
    84  	var taskState *api.TaskState
    85  
    86  	testutil.WaitForResultRetries(10, func() (bool, error) {
    87  		a, meta, err := nomadClient.Allocations().Info(allocID, qo)
    88  		if err != nil {
    89  			return false, err
    90  		}
    91  
    92  		qo.WaitIndex = meta.LastIndex
    93  
    94  		// Capture alloc and task state
    95  		alloc = a
    96  		taskState = a.TaskStates[jobName]
    97  		if taskState == nil {
    98  			return false, fmt.Errorf("task state not found for %s", jobName)
    99  		}
   100  
   101  		// Assert expected number of task events; we can't check for the exact
   102  		// count because of a race where Allocation Unhealthy events can be
   103  		// emitted when a peer task dies, but the caller can assert the
   104  		// specific events and their order up to that point
   105  		if len(taskState.Events) < numEvents {
   106  			return false, fmt.Errorf("expected %d task events but found %d\n%s",
   107  				numEvents, len(taskState.Events), formatEvents(taskState.Events),
   108  			)
   109  		}
   110  
   111  		return true, nil
   112  	}, func(err error) {
   113  		require.NoError(t, err, "task events error")
   114  	})
   115  
   116  	return alloc, taskState
   117  }
   118  
   119  func (tc *TaskEventsTest) TestTaskEvents_SimpleBatch(f *framework.F) {
   120  	t := f.T()
   121  	_, taskState := tc.waitUntilEvents(f, "simple_batch", 4)
   122  	events := taskState.Events
   123  
   124  	// Assert task did not fail
   125  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   126  		len(events), formatEvents(events),
   127  	)
   128  
   129  	// Assert the expected type of events were emitted in a specific order
   130  	// (based on v0.8.6)
   131  	require.Equal(t, api.TaskReceived, events[0].Type)
   132  	require.Equal(t, api.TaskSetup, events[1].Type)
   133  	require.Equal(t, api.TaskStarted, events[2].Type)
   134  	require.Equal(t, api.TaskTerminated, events[3].Type)
   135  }
   136  
   137  func (tc *TaskEventsTest) TestTaskEvents_FailedBatch(f *framework.F) {
   138  	t := f.T()
   139  	_, taskState := tc.waitUntilEvents(f, "failed_batch", 4)
   140  	events := taskState.Events
   141  
   142  	// Assert task did fail
   143  	require.Truef(t, taskState.Failed, "task unexpectedly succeeded after %d events\n%s",
   144  		len(events), formatEvents(events),
   145  	)
   146  
   147  	// Assert the expected type of events were emitted in a specific order
   148  	// (based on v0.8.6)
   149  	require.Equal(t, api.TaskReceived, events[0].Type)
   150  	require.Equal(t, api.TaskSetup, events[1].Type)
   151  	require.Equal(t, api.TaskDriverFailure, events[2].Type)
   152  	require.Equal(t, api.TaskNotRestarting, events[3].Type)
   153  	require.True(t, events[3].FailsTask)
   154  }
   155  
   156  // TestTaskEvents_CompletedLeader asserts the proper events are emitted for a
   157  // non-leader task when its leader task completes.
   158  func (tc *TaskEventsTest) TestTaskEvents_CompletedLeader(f *framework.F) {
   159  	t := f.T()
   160  	_, taskState := tc.waitUntilEvents(f, "completed_leader", 7)
   161  	events := taskState.Events
   162  
   163  	// Assert task did not fail
   164  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   165  		len(events), formatEvents(events),
   166  	)
   167  
   168  	// Assert the expected type of events were emitted in a specific order
   169  	require.Equal(t, api.TaskReceived, events[0].Type)
   170  	require.Equal(t, api.TaskSetup, events[1].Type)
   171  	require.Equal(t, api.TaskStarted, events[2].Type)
   172  	require.Equal(t, api.TaskLeaderDead, events[3].Type)
   173  	require.Equal(t, api.TaskKilling, events[4].Type)
   174  	require.Equal(t, api.TaskTerminated, events[5].Type)
   175  	require.Equal(t, api.TaskKilled, events[6].Type)
   176  }
   177  
   178  // TestTaskEvents_FailedSibling asserts the proper events are emitted for a
   179  // task when another task in its task group fails.
   180  func (tc *TaskEventsTest) TestTaskEvents_FailedSibling(f *framework.F) {
   181  	t := f.T()
   182  	alloc, taskState := tc.waitUntilEvents(f, "failed_sibling", 7)
   183  	events := taskState.Events
   184  
   185  	// Just because a sibling failed doesn't mean this task fails. It
   186  	// should exit cleanly. (same as in v0.8.6)
   187  	require.Falsef(t, taskState.Failed, "task unexpectedly failed after %d events\n%s",
   188  		len(events), formatEvents(events),
   189  	)
   190  
   191  	// The alloc should be faied
   192  	require.Equal(t, "failed", alloc.ClientStatus)
   193  
   194  	// Assert the expected type of events were emitted in a specific order
   195  	require.Equal(t, api.TaskReceived, events[0].Type)
   196  	require.Equal(t, api.TaskSetup, events[1].Type)
   197  	require.Equal(t, api.TaskStarted, events[2].Type)
   198  	require.Equal(t, api.TaskSiblingFailed, events[3].Type)
   199  	require.Equal(t, api.TaskKilling, events[4].Type)
   200  	require.Equal(t, api.TaskTerminated, events[5].Type)
   201  	require.Equal(t, api.TaskKilled, events[6].Type)
   202  }