github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/script_check_hook_test.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync/atomic"
     7  	"testing"
     8  	"time"
     9  
    10  	"github.com/hashicorp/consul/api"
    11  	hclog "github.com/hashicorp/go-hclog"
    12  	"github.com/hashicorp/nomad/ci"
    13  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
    14  	"github.com/hashicorp/nomad/client/serviceregistration"
    15  	regMock "github.com/hashicorp/nomad/client/serviceregistration/mock"
    16  	"github.com/hashicorp/nomad/client/serviceregistration/wrapper"
    17  	"github.com/hashicorp/nomad/client/taskenv"
    18  	agentconsul "github.com/hashicorp/nomad/command/agent/consul"
    19  	"github.com/hashicorp/nomad/helper/testlog"
    20  	"github.com/hashicorp/nomad/nomad/mock"
    21  	"github.com/hashicorp/nomad/nomad/structs"
    22  	"github.com/stretchr/testify/require"
    23  )
    24  
    25  func newScriptMock(hb TTLUpdater, exec interfaces.ScriptExecutor, logger hclog.Logger, interval, timeout time.Duration) *scriptCheck {
    26  	script := newScriptCheck(&scriptCheckConfig{
    27  		allocID:   "allocid",
    28  		taskName:  "testtask",
    29  		serviceID: "serviceid",
    30  		check: &structs.ServiceCheck{
    31  			Interval: interval,
    32  			Timeout:  timeout,
    33  		},
    34  		ttlUpdater: hb,
    35  		driverExec: exec,
    36  		taskEnv:    &taskenv.TaskEnv{},
    37  		logger:     logger,
    38  		shutdownCh: nil,
    39  	})
    40  	script.callback = newScriptCheckCallback(script)
    41  	script.lastCheckOk = true
    42  	return script
    43  }
    44  
    45  // fakeHeartbeater implements the TTLUpdater interface to allow mocking out
    46  // Consul in script executor tests.
    47  type fakeHeartbeater struct {
    48  	heartbeats chan heartbeat
    49  }
    50  
    51  func (f *fakeHeartbeater) UpdateTTL(checkID, namespace, output, status string) error {
    52  	f.heartbeats <- heartbeat{checkID: checkID, output: output, status: status}
    53  	return nil
    54  }
    55  
    56  func newFakeHeartbeater() *fakeHeartbeater {
    57  	return &fakeHeartbeater{heartbeats: make(chan heartbeat)}
    58  }
    59  
    60  type heartbeat struct {
    61  	checkID string
    62  	output  string
    63  	status  string
    64  }
    65  
    66  // TestScript_Exec_Cancel asserts cancelling a script check shortcircuits
    67  // any running scripts.
    68  func TestScript_Exec_Cancel(t *testing.T) {
    69  	ci.Parallel(t)
    70  
    71  	exec, cancel := newBlockingScriptExec()
    72  	defer cancel()
    73  
    74  	logger := testlog.HCLogger(t)
    75  	script := newScriptMock(nil, // TTLUpdater should never be called
    76  		exec, logger, time.Hour, time.Hour)
    77  
    78  	handle := script.run()
    79  	<-exec.running  // wait until Exec is called
    80  	handle.cancel() // cancel now that we're blocked in exec
    81  
    82  	select {
    83  	case <-handle.wait():
    84  	case <-time.After(3 * time.Second):
    85  		t.Fatalf("timed out waiting for script check to exit")
    86  	}
    87  
    88  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
    89  	// canceled. Only a wrapper around it obeys the context cancelation.
    90  	require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
    91  		"expected script executor to still be running after timeout")
    92  }
    93  
    94  // TestScript_Exec_TimeoutBasic asserts a script will be killed when the
    95  // timeout is reached.
    96  func TestScript_Exec_TimeoutBasic(t *testing.T) {
    97  	ci.Parallel(t)
    98  	exec, cancel := newBlockingScriptExec()
    99  	defer cancel()
   100  
   101  	logger := testlog.HCLogger(t)
   102  	hb := newFakeHeartbeater()
   103  	script := newScriptMock(hb, exec, logger, time.Hour, time.Second)
   104  
   105  	handle := script.run()
   106  	defer handle.cancel() // cleanup
   107  	<-exec.running        // wait until Exec is called
   108  
   109  	// Check for UpdateTTL call
   110  	select {
   111  	case update := <-hb.heartbeats:
   112  		require.Equal(t, update.output, context.DeadlineExceeded.Error())
   113  		require.Equal(t, update.status, api.HealthCritical)
   114  	case <-time.After(3 * time.Second):
   115  		t.Fatalf("timed out waiting for script check to exit")
   116  	}
   117  
   118  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
   119  	// canceled. Only a wrapper around it obeys the context cancelation.
   120  	require.NotEqual(t, atomic.LoadInt32(&exec.exited), 1,
   121  		"expected script executor to still be running after timeout")
   122  
   123  	// Cancel and watch for exit
   124  	handle.cancel()
   125  	select {
   126  	case <-handle.wait(): // ok!
   127  	case update := <-hb.heartbeats:
   128  		t.Errorf("unexpected UpdateTTL call on exit with status=%q", update)
   129  	case <-time.After(3 * time.Second):
   130  		t.Fatalf("timed out waiting for script check to exit")
   131  	}
   132  }
   133  
   134  // TestScript_Exec_TimeoutCritical asserts a script will be killed when
   135  // the timeout is reached and always set a critical status regardless of what
   136  // Exec returns.
   137  func TestScript_Exec_TimeoutCritical(t *testing.T) {
   138  	ci.Parallel(t)
   139  	logger := testlog.HCLogger(t)
   140  	hb := newFakeHeartbeater()
   141  	script := newScriptMock(hb, sleeperExec{}, logger, time.Hour, time.Nanosecond)
   142  
   143  	handle := script.run()
   144  	defer handle.cancel() // cleanup
   145  
   146  	// Check for UpdateTTL call
   147  	select {
   148  	case update := <-hb.heartbeats:
   149  		require.Equal(t, update.output, context.DeadlineExceeded.Error())
   150  		require.Equal(t, update.status, api.HealthCritical)
   151  	case <-time.After(3 * time.Second):
   152  		t.Fatalf("timed out waiting for script check to timeout")
   153  	}
   154  }
   155  
   156  // TestScript_Exec_Shutdown asserts a script will be executed once more
   157  // when told to shutdown.
   158  func TestScript_Exec_Shutdown(t *testing.T) {
   159  	ci.Parallel(t)
   160  
   161  	shutdown := make(chan struct{})
   162  	exec := newSimpleExec(0, nil)
   163  	logger := testlog.HCLogger(t)
   164  	hb := newFakeHeartbeater()
   165  	script := newScriptMock(hb, exec, logger, time.Hour, 3*time.Second)
   166  	script.shutdownCh = shutdown
   167  
   168  	handle := script.run()
   169  	defer handle.cancel() // cleanup
   170  	close(shutdown)       // tell scriptCheck to exit
   171  
   172  	select {
   173  	case update := <-hb.heartbeats:
   174  		require.Equal(t, update.output, "code=0 err=<nil>")
   175  		require.Equal(t, update.status, api.HealthPassing)
   176  	case <-time.After(3 * time.Second):
   177  		t.Fatalf("timed out waiting for script check to exit")
   178  	}
   179  
   180  	select {
   181  	case <-handle.wait(): // ok!
   182  	case <-time.After(3 * time.Second):
   183  		t.Fatalf("timed out waiting for script check to exit")
   184  	}
   185  }
   186  
   187  // TestScript_Exec_Codes asserts script exit codes are translated to their
   188  // corresponding Consul health check status.
   189  func TestScript_Exec_Codes(t *testing.T) {
   190  	ci.Parallel(t)
   191  
   192  	exec := newScriptedExec([]execResult{
   193  		{[]byte("output"), 1, nil},
   194  		{[]byte("output"), 0, nil},
   195  		{[]byte("output"), 0, context.DeadlineExceeded},
   196  		{[]byte("output"), 0, nil},
   197  		{[]byte("<ignored output>"), 2, fmt.Errorf("some error")},
   198  		{[]byte("output"), 0, nil},
   199  		{[]byte("error9000"), 9000, nil},
   200  	})
   201  	logger := testlog.HCLogger(t)
   202  	hb := newFakeHeartbeater()
   203  	script := newScriptMock(
   204  		hb, exec, logger, time.Nanosecond, 3*time.Second)
   205  
   206  	handle := script.run()
   207  	defer handle.cancel() // cleanup
   208  	deadline := time.After(3 * time.Second)
   209  
   210  	expected := []heartbeat{
   211  		{script.id, "output", api.HealthWarning},
   212  		{script.id, "output", api.HealthPassing},
   213  		{script.id, context.DeadlineExceeded.Error(), api.HealthCritical},
   214  		{script.id, "output", api.HealthPassing},
   215  		{script.id, "some error", api.HealthCritical},
   216  		{script.id, "output", api.HealthPassing},
   217  		{script.id, "error9000", api.HealthCritical},
   218  	}
   219  
   220  	for i := 0; i <= 6; i++ {
   221  		select {
   222  		case update := <-hb.heartbeats:
   223  			require.Equal(t, update, expected[i],
   224  				"expected update %d to be '%s' but received '%s'",
   225  				i, expected[i], update)
   226  		case <-deadline:
   227  			t.Fatalf("timed out waiting for all script checks to finish")
   228  		}
   229  	}
   230  }
   231  
   232  // TestScript_TaskEnvInterpolation asserts that script check hooks are
   233  // interpolated in the same way that services are
   234  func TestScript_TaskEnvInterpolation(t *testing.T) {
   235  	ci.Parallel(t)
   236  
   237  	logger := testlog.HCLogger(t)
   238  	consulClient := regMock.NewServiceRegistrationHandler(logger)
   239  	regWrap := wrapper.NewHandlerWrapper(logger, consulClient, nil)
   240  	exec, cancel := newBlockingScriptExec()
   241  	defer cancel()
   242  
   243  	alloc := mock.ConnectAlloc()
   244  	task := alloc.Job.TaskGroups[0].Tasks[0]
   245  
   246  	task.Services[0].Name = "${NOMAD_JOB_NAME}-${TASK}-${SVC_NAME}"
   247  	task.Services[0].Checks[0].Name = "${NOMAD_JOB_NAME}-${SVC_NAME}-check"
   248  	alloc.Job.Canonicalize() // need to re-canonicalize b/c the mock already did it
   249  
   250  	env := taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
   251  		"script_check",
   252  		map[string]string{"SVC_NAME": "frontend"}).Build()
   253  
   254  	svcHook := newServiceHook(serviceHookConfig{
   255  		alloc:             alloc,
   256  		task:              task,
   257  		serviceRegWrapper: regWrap,
   258  		logger:            logger,
   259  	})
   260  	// emulate prestart having been fired
   261  	svcHook.taskEnv = env
   262  
   263  	scHook := newScriptCheckHook(scriptCheckHookConfig{
   264  		alloc:        alloc,
   265  		task:         task,
   266  		consul:       consulClient,
   267  		logger:       logger,
   268  		shutdownWait: time.Hour, // TTLUpdater will never be called
   269  	})
   270  	// emulate prestart having been fired
   271  	scHook.taskEnv = env
   272  	scHook.driverExec = exec
   273  
   274  	expectedSvc := svcHook.getWorkloadServices().Services[0]
   275  	expected := agentconsul.MakeCheckID(serviceregistration.MakeAllocServiceID(
   276  		alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
   277  
   278  	actual := scHook.newScriptChecks()
   279  	check, ok := actual[expected]
   280  	require.True(t, ok)
   281  	require.Equal(t, "my-job-frontend-check", check.check.Name)
   282  
   283  	// emulate an update
   284  	env = taskenv.NewBuilder(mock.Node(), alloc, task, "global").SetHookEnv(
   285  		"script_check",
   286  		map[string]string{"SVC_NAME": "backend"}).Build()
   287  	scHook.taskEnv = env
   288  	svcHook.taskEnv = env
   289  
   290  	expectedSvc = svcHook.getWorkloadServices().Services[0]
   291  	expected = agentconsul.MakeCheckID(serviceregistration.MakeAllocServiceID(
   292  		alloc.ID, task.Name, expectedSvc), expectedSvc.Checks[0])
   293  
   294  	actual = scHook.newScriptChecks()
   295  	check, ok = actual[expected]
   296  	require.True(t, ok)
   297  	require.Equal(t, "my-job-backend-check", check.check.Name)
   298  }
   299  
   300  func TestScript_associated(t *testing.T) {
   301  	ci.Parallel(t)
   302  
   303  	t.Run("neither set", func(t *testing.T) {
   304  		require.False(t, new(scriptCheckHook).associated("task1", "", ""))
   305  	})
   306  
   307  	t.Run("service set", func(t *testing.T) {
   308  		require.True(t, new(scriptCheckHook).associated("task1", "task1", ""))
   309  		require.False(t, new(scriptCheckHook).associated("task1", "task2", ""))
   310  	})
   311  
   312  	t.Run("check set", func(t *testing.T) {
   313  		require.True(t, new(scriptCheckHook).associated("task1", "", "task1"))
   314  		require.False(t, new(scriptCheckHook).associated("task1", "", "task2"))
   315  	})
   316  
   317  	t.Run("both set", func(t *testing.T) {
   318  		// ensure check.task takes precedence over service.task
   319  		require.True(t, new(scriptCheckHook).associated("task1", "task1", "task1"))
   320  		require.False(t, new(scriptCheckHook).associated("task1", "task1", "task2"))
   321  		require.True(t, new(scriptCheckHook).associated("task1", "task2", "task1"))
   322  		require.False(t, new(scriptCheckHook).associated("task1", "task2", "task2"))
   323  	})
   324  }