github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/tasklet_test.go (about)

     1  package taskrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"sync/atomic"
     9  	"testing"
    10  	"time"
    11  
    12  	hclog "github.com/hashicorp/go-hclog"
    13  	"github.com/hashicorp/nomad/ci"
    14  	"github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces"
    15  	"github.com/hashicorp/nomad/helper/testlog"
    16  	"github.com/hashicorp/nomad/helper/testtask"
    17  	"github.com/stretchr/testify/assert"
    18  )
    19  
    20  func TestMain(m *testing.M) {
    21  	if !testtask.Run() {
    22  		os.Exit(m.Run())
    23  	}
    24  }
    25  
    26  func TestTasklet_Exec_HappyPath(t *testing.T) {
    27  	ci.Parallel(t)
    28  
    29  	results := []execResult{
    30  		{[]byte("output"), 0, nil},
    31  		{[]byte("output"), 1, nil},
    32  		{[]byte("output"), 0, context.DeadlineExceeded},
    33  		{[]byte("<ignored output>"), 2, fmt.Errorf("some error")},
    34  		{[]byte("error9000"), 9000, nil},
    35  	}
    36  	exec := newScriptedExec(results)
    37  	tm := newTaskletMock(exec, testlog.HCLogger(t), time.Nanosecond, 3*time.Second)
    38  
    39  	handle := tm.run()
    40  	defer handle.cancel() // just-in-case cleanup
    41  
    42  	deadline := time.After(3 * time.Second)
    43  	for i := 0; i <= 4; i++ {
    44  		select {
    45  		case result := <-tm.calls:
    46  			// for the happy path without cancelations or shutdowns, we expect
    47  			// to get the results passed to the callback in order and without
    48  			// modification
    49  			assert.Equal(t, result, results[i])
    50  		case <-deadline:
    51  			t.Fatalf("timed out waiting for all script checks to finish")
    52  		}
    53  	}
    54  }
    55  
    56  // TestTasklet_Exec_Cancel asserts cancelling a tasklet short-circuits
    57  // any running executions the tasklet
    58  func TestTasklet_Exec_Cancel(t *testing.T) {
    59  	ci.Parallel(t)
    60  
    61  	exec, cancel := newBlockingScriptExec()
    62  	defer cancel()
    63  	tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, time.Hour)
    64  
    65  	handle := tm.run()
    66  	<-exec.running  // wait until Exec is called
    67  	handle.cancel() // cancel now that we're blocked in exec
    68  
    69  	select {
    70  	case <-handle.wait():
    71  	case <-time.After(3 * time.Second):
    72  		t.Fatalf("timed out waiting for tasklet check to exit")
    73  	}
    74  
    75  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
    76  	// canceled. Only a wrapper around it obeys the context cancelation.
    77  	if atomic.LoadInt32(&exec.exited) == 1 {
    78  		t.Errorf("expected script executor to still be running after timeout")
    79  	}
    80  	// No tasklets finished, so no callbacks should have gotten a
    81  	// chance to fire
    82  	select {
    83  	case call := <-tm.calls:
    84  		t.Errorf("expected 0 calls of tasklet, got %v", call)
    85  	default:
    86  		break
    87  	}
    88  }
    89  
    90  // TestTasklet_Exec_Timeout asserts a tasklet script will be killed
    91  // when the timeout is reached.
    92  func TestTasklet_Exec_Timeout(t *testing.T) {
    93  	ci.Parallel(t)
    94  	exec, cancel := newBlockingScriptExec()
    95  	defer cancel()
    96  
    97  	tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, time.Second)
    98  
    99  	handle := tm.run()
   100  	defer handle.cancel() // just-in-case cleanup
   101  	<-exec.running        // wait until Exec is called
   102  
   103  	// We should get a timeout
   104  	select {
   105  	case update := <-tm.calls:
   106  		if update.err != context.DeadlineExceeded {
   107  			t.Errorf("expected context.DeadlineExceeed but received %+v", update)
   108  		}
   109  	case <-time.After(3 * time.Second):
   110  		t.Fatalf("timed out waiting for script check to exit")
   111  	}
   112  
   113  	// The underlying ScriptExecutor (newBlockScriptExec) *cannot* be
   114  	// canceled. Only a wrapper around it obeys the context cancelation.
   115  	if atomic.LoadInt32(&exec.exited) == 1 {
   116  		t.Errorf("expected executor to still be running after timeout")
   117  	}
   118  
   119  	// Cancel and watch for exit
   120  	handle.cancel()
   121  	select {
   122  	case <-handle.wait(): // ok!
   123  	case update := <-tm.calls:
   124  		t.Errorf("unexpected extra callback on exit with status=%v", update)
   125  	case <-time.After(3 * time.Second):
   126  		t.Fatalf("timed out waiting for tasklet to exit")
   127  	}
   128  }
   129  
   130  // TestTasklet_Exec_Shutdown asserts a script will be executed once more
   131  // when told to shutdown.
   132  func TestTasklet_Exec_Shutdown(t *testing.T) {
   133  	ci.Parallel(t)
   134  
   135  	exec := newSimpleExec(0, nil)
   136  	shutdown := make(chan struct{})
   137  	tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, 3*time.Second)
   138  	tm.shutdownCh = shutdown
   139  	handle := tm.run()
   140  
   141  	defer handle.cancel() // just-in-case cleanup
   142  	close(shutdown)       // tell script to exit
   143  
   144  	select {
   145  	case update := <-tm.calls:
   146  		if update.err != nil {
   147  			t.Errorf("expected clean shutdown but received %q", update.err)
   148  		}
   149  	case <-time.After(3 * time.Second):
   150  		t.Fatalf("timed out waiting for script check to exit")
   151  	}
   152  
   153  	select {
   154  	case <-handle.wait(): // ok
   155  	case <-time.After(3 * time.Second):
   156  		t.Fatalf("timed out waiting for script check to exit")
   157  	}
   158  }
   159  
   160  // test helpers
   161  
   162  type taskletMock struct {
   163  	tasklet
   164  	calls chan execResult
   165  }
   166  
   167  func newTaskletMock(exec interfaces.ScriptExecutor, logger hclog.Logger, interval, timeout time.Duration) *taskletMock {
   168  	tm := &taskletMock{calls: make(chan execResult)}
   169  	tm.exec = exec
   170  	tm.logger = logger
   171  	tm.Interval = interval
   172  	tm.Timeout = timeout
   173  	tm.callback = func(ctx context.Context, params execResult) {
   174  		tm.calls <- params
   175  	}
   176  	return tm
   177  }
   178  
   179  // blockingScriptExec implements ScriptExec by running a subcommand that never
   180  // exits.
   181  type blockingScriptExec struct {
   182  	// pctx is canceled *only* for test cleanup. Just like real
   183  	// ScriptExecutors its Exec method cannot be canceled directly -- only
   184  	// with a timeout.
   185  	pctx context.Context
   186  
   187  	// running is ticked before blocking to allow synchronizing operations
   188  	running chan struct{}
   189  
   190  	// set to 1 with atomics if Exec is called and has exited
   191  	exited int32
   192  }
   193  
   194  // newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the
   195  // caller recvs on the b.running chan. It also returns a CancelFunc for test
   196  // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout
   197  // expires.
   198  func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) {
   199  	ctx, cancel := context.WithCancel(context.Background())
   200  	exec := &blockingScriptExec{
   201  		pctx:    ctx,
   202  		running: make(chan struct{}),
   203  	}
   204  	return exec, cancel
   205  }
   206  
   207  func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) {
   208  	b.running <- struct{}{}
   209  	ctx, cancel := context.WithTimeout(b.pctx, dur)
   210  	defer cancel()
   211  	cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h")
   212  	testtask.SetCmdEnv(cmd)
   213  	err := cmd.Run()
   214  	code := 0
   215  	if exitErr, ok := err.(*exec.ExitError); ok {
   216  		if !exitErr.Success() {
   217  			code = 1
   218  		}
   219  	}
   220  	atomic.StoreInt32(&b.exited, 1)
   221  	return []byte{}, code, err
   222  }
   223  
   224  // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions
   225  type sleeperExec struct{}
   226  
   227  func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
   228  	time.Sleep(100 * time.Millisecond)
   229  	return []byte{}, 0, nil
   230  }
   231  
   232  // simpleExec is a fake ScriptExecutor that returns whatever is specified.
   233  type simpleExec struct {
   234  	code int
   235  	err  error
   236  }
   237  
   238  func (s simpleExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
   239  	return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err
   240  }
   241  
   242  // newSimpleExec creates a new ScriptExecutor that returns the given code and err.
   243  func newSimpleExec(code int, err error) simpleExec {
   244  	return simpleExec{code: code, err: err}
   245  }
   246  
   247  // scriptedExec is a fake ScriptExecutor with a predetermined sequence
   248  // of results.
   249  type scriptedExec struct {
   250  	fn func() ([]byte, int, error)
   251  }
   252  
   253  // For each call to Exec, scriptedExec returns the next result in its
   254  // sequence of results
   255  func (s scriptedExec) Exec(time.Duration, string, []string) ([]byte, int, error) {
   256  	return s.fn()
   257  }
   258  
   259  func newScriptedExec(results []execResult) scriptedExec {
   260  	index := 0
   261  	s := scriptedExec{}
   262  	// we have to close over the index because the interface we're
   263  	// mocking expects a value and not a pointer, which prevents
   264  	// us from updating the index
   265  	fn := func() ([]byte, int, error) {
   266  		result := results[index]
   267  		// prevents us from iterating off the end of the results
   268  		if index+1 < len(results) {
   269  			index = index + 1
   270  		}
   271  		return result.output, result.code, result.err
   272  	}
   273  	s.fn = fn
   274  	return s
   275  }