github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/taskrunner/tasklet_test.go (about) 1 package taskrunner 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "os/exec" 8 "sync/atomic" 9 "testing" 10 "time" 11 12 hclog "github.com/hashicorp/go-hclog" 13 "github.com/hashicorp/nomad/ci" 14 "github.com/hashicorp/nomad/client/allocrunner/taskrunner/interfaces" 15 "github.com/hashicorp/nomad/helper/testlog" 16 "github.com/hashicorp/nomad/helper/testtask" 17 "github.com/stretchr/testify/assert" 18 ) 19 20 func TestMain(m *testing.M) { 21 if !testtask.Run() { 22 os.Exit(m.Run()) 23 } 24 } 25 26 func TestTasklet_Exec_HappyPath(t *testing.T) { 27 ci.Parallel(t) 28 29 results := []execResult{ 30 {[]byte("output"), 0, nil}, 31 {[]byte("output"), 1, nil}, 32 {[]byte("output"), 0, context.DeadlineExceeded}, 33 {[]byte("<ignored output>"), 2, fmt.Errorf("some error")}, 34 {[]byte("error9000"), 9000, nil}, 35 } 36 exec := newScriptedExec(results) 37 tm := newTaskletMock(exec, testlog.HCLogger(t), time.Nanosecond, 3*time.Second) 38 39 handle := tm.run() 40 defer handle.cancel() // just-in-case cleanup 41 42 deadline := time.After(3 * time.Second) 43 for i := 0; i <= 4; i++ { 44 select { 45 case result := <-tm.calls: 46 // for the happy path without cancelations or shutdowns, we expect 47 // to get the results passed to the callback in order and without 48 // modification 49 assert.Equal(t, result, results[i]) 50 case <-deadline: 51 t.Fatalf("timed out waiting for all script checks to finish") 52 } 53 } 54 } 55 56 // TestTasklet_Exec_Cancel asserts cancelling a tasklet short-circuits 57 // any running executions the tasklet 58 func TestTasklet_Exec_Cancel(t *testing.T) { 59 ci.Parallel(t) 60 61 exec, cancel := newBlockingScriptExec() 62 defer cancel() 63 tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, time.Hour) 64 65 handle := tm.run() 66 <-exec.running // wait until Exec is called 67 handle.cancel() // cancel now that we're blocked in exec 68 69 select { 70 case <-handle.wait(): 71 case <-time.After(3 * time.Second): 72 t.Fatalf("timed out waiting for tasklet check to exit") 73 } 74 75 // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be 76 // canceled. Only a wrapper around it obeys the context cancelation. 77 if atomic.LoadInt32(&exec.exited) == 1 { 78 t.Errorf("expected script executor to still be running after timeout") 79 } 80 // No tasklets finished, so no callbacks should have gotten a 81 // chance to fire 82 select { 83 case call := <-tm.calls: 84 t.Errorf("expected 0 calls of tasklet, got %v", call) 85 default: 86 break 87 } 88 } 89 90 // TestTasklet_Exec_Timeout asserts a tasklet script will be killed 91 // when the timeout is reached. 92 func TestTasklet_Exec_Timeout(t *testing.T) { 93 ci.Parallel(t) 94 exec, cancel := newBlockingScriptExec() 95 defer cancel() 96 97 tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, time.Second) 98 99 handle := tm.run() 100 defer handle.cancel() // just-in-case cleanup 101 <-exec.running // wait until Exec is called 102 103 // We should get a timeout 104 select { 105 case update := <-tm.calls: 106 if update.err != context.DeadlineExceeded { 107 t.Errorf("expected context.DeadlineExceeed but received %+v", update) 108 } 109 case <-time.After(3 * time.Second): 110 t.Fatalf("timed out waiting for script check to exit") 111 } 112 113 // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be 114 // canceled. Only a wrapper around it obeys the context cancelation. 115 if atomic.LoadInt32(&exec.exited) == 1 { 116 t.Errorf("expected executor to still be running after timeout") 117 } 118 119 // Cancel and watch for exit 120 handle.cancel() 121 select { 122 case <-handle.wait(): // ok! 123 case update := <-tm.calls: 124 t.Errorf("unexpected extra callback on exit with status=%v", update) 125 case <-time.After(3 * time.Second): 126 t.Fatalf("timed out waiting for tasklet to exit") 127 } 128 } 129 130 // TestTasklet_Exec_Shutdown asserts a script will be executed once more 131 // when told to shutdown. 132 func TestTasklet_Exec_Shutdown(t *testing.T) { 133 ci.Parallel(t) 134 135 exec := newSimpleExec(0, nil) 136 shutdown := make(chan struct{}) 137 tm := newTaskletMock(exec, testlog.HCLogger(t), time.Hour, 3*time.Second) 138 tm.shutdownCh = shutdown 139 handle := tm.run() 140 141 defer handle.cancel() // just-in-case cleanup 142 close(shutdown) // tell script to exit 143 144 select { 145 case update := <-tm.calls: 146 if update.err != nil { 147 t.Errorf("expected clean shutdown but received %q", update.err) 148 } 149 case <-time.After(3 * time.Second): 150 t.Fatalf("timed out waiting for script check to exit") 151 } 152 153 select { 154 case <-handle.wait(): // ok 155 case <-time.After(3 * time.Second): 156 t.Fatalf("timed out waiting for script check to exit") 157 } 158 } 159 160 // test helpers 161 162 type taskletMock struct { 163 tasklet 164 calls chan execResult 165 } 166 167 func newTaskletMock(exec interfaces.ScriptExecutor, logger hclog.Logger, interval, timeout time.Duration) *taskletMock { 168 tm := &taskletMock{calls: make(chan execResult)} 169 tm.exec = exec 170 tm.logger = logger 171 tm.Interval = interval 172 tm.Timeout = timeout 173 tm.callback = func(ctx context.Context, params execResult) { 174 tm.calls <- params 175 } 176 return tm 177 } 178 179 // blockingScriptExec implements ScriptExec by running a subcommand that never 180 // exits. 181 type blockingScriptExec struct { 182 // pctx is canceled *only* for test cleanup. Just like real 183 // ScriptExecutors its Exec method cannot be canceled directly -- only 184 // with a timeout. 185 pctx context.Context 186 187 // running is ticked before blocking to allow synchronizing operations 188 running chan struct{} 189 190 // set to 1 with atomics if Exec is called and has exited 191 exited int32 192 } 193 194 // newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the 195 // caller recvs on the b.running chan. It also returns a CancelFunc for test 196 // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout 197 // expires. 198 func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) { 199 ctx, cancel := context.WithCancel(context.Background()) 200 exec := &blockingScriptExec{ 201 pctx: ctx, 202 running: make(chan struct{}), 203 } 204 return exec, cancel 205 } 206 207 func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) { 208 b.running <- struct{}{} 209 ctx, cancel := context.WithTimeout(b.pctx, dur) 210 defer cancel() 211 cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h") 212 testtask.SetCmdEnv(cmd) 213 err := cmd.Run() 214 code := 0 215 if exitErr, ok := err.(*exec.ExitError); ok { 216 if !exitErr.Success() { 217 code = 1 218 } 219 } 220 atomic.StoreInt32(&b.exited, 1) 221 return []byte{}, code, err 222 } 223 224 // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions 225 type sleeperExec struct{} 226 227 func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) { 228 time.Sleep(100 * time.Millisecond) 229 return []byte{}, 0, nil 230 } 231 232 // simpleExec is a fake ScriptExecutor that returns whatever is specified. 233 type simpleExec struct { 234 code int 235 err error 236 } 237 238 func (s simpleExec) Exec(time.Duration, string, []string) ([]byte, int, error) { 239 return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err 240 } 241 242 // newSimpleExec creates a new ScriptExecutor that returns the given code and err. 243 func newSimpleExec(code int, err error) simpleExec { 244 return simpleExec{code: code, err: err} 245 } 246 247 // scriptedExec is a fake ScriptExecutor with a predetermined sequence 248 // of results. 249 type scriptedExec struct { 250 fn func() ([]byte, int, error) 251 } 252 253 // For each call to Exec, scriptedExec returns the next result in its 254 // sequence of results 255 func (s scriptedExec) Exec(time.Duration, string, []string) ([]byte, int, error) { 256 return s.fn() 257 } 258 259 func newScriptedExec(results []execResult) scriptedExec { 260 index := 0 261 s := scriptedExec{} 262 // we have to close over the index because the interface we're 263 // mocking expects a value and not a pointer, which prevents 264 // us from updating the index 265 fn := func() ([]byte, int, error) { 266 result := results[index] 267 // prevents us from iterating off the end of the results 268 if index+1 < len(results) { 269 index = index + 1 270 } 271 return result.output, result.code, result.err 272 } 273 s.fn = fn 274 return s 275 }