github.com/smithx10/nomad@v0.9.1-rc1/command/agent/consul/script_test.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "os/exec" 8 "sync/atomic" 9 "testing" 10 "time" 11 12 "github.com/hashicorp/consul/api" 13 "github.com/hashicorp/nomad/helper/testlog" 14 "github.com/hashicorp/nomad/helper/testtask" 15 "github.com/hashicorp/nomad/nomad/structs" 16 ) 17 18 func TestMain(m *testing.M) { 19 if !testtask.Run() { 20 os.Exit(m.Run()) 21 } 22 } 23 24 // blockingScriptExec implements ScriptExec by running a subcommand that never 25 // exits. 26 type blockingScriptExec struct { 27 // pctx is canceled *only* for test cleanup. Just like real 28 // ScriptExecutors its Exec method cannot be canceled directly -- only 29 // with a timeout. 30 pctx context.Context 31 32 // running is ticked before blocking to allow synchronizing operations 33 running chan struct{} 34 35 // set to 1 with atomics if Exec is called and has exited 36 exited int32 37 } 38 39 // newBlockingScriptExec returns a ScriptExecutor that blocks Exec() until the 40 // caller recvs on the b.running chan. It also returns a CancelFunc for test 41 // cleanup only. The runtime cannot cancel ScriptExecutors before their timeout 42 // expires. 43 func newBlockingScriptExec() (*blockingScriptExec, context.CancelFunc) { 44 ctx, cancel := context.WithCancel(context.Background()) 45 exec := &blockingScriptExec{ 46 pctx: ctx, 47 running: make(chan struct{}), 48 } 49 return exec, cancel 50 } 51 52 func (b *blockingScriptExec) Exec(dur time.Duration, _ string, _ []string) ([]byte, int, error) { 53 b.running <- struct{}{} 54 ctx, cancel := context.WithTimeout(b.pctx, dur) 55 defer cancel() 56 cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h") 57 testtask.SetCmdEnv(cmd) 58 err := cmd.Run() 59 code := 0 60 if exitErr, ok := err.(*exec.ExitError); ok { 61 if !exitErr.Success() { 62 code = 1 63 } 64 } 65 atomic.StoreInt32(&b.exited, 1) 66 return []byte{}, code, err 67 } 68 69 // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits 70 // any running scripts. 71 func TestConsulScript_Exec_Cancel(t *testing.T) { 72 serviceCheck := structs.ServiceCheck{ 73 Name: "sleeper", 74 Interval: time.Hour, 75 Timeout: time.Hour, 76 } 77 exec, cancel := newBlockingScriptExec() 78 defer cancel() 79 80 // pass nil for heartbeater as it shouldn't be called 81 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.HCLogger(t), nil) 82 handle := check.run() 83 84 // wait until Exec is called 85 <-exec.running 86 87 // cancel now that we're blocked in exec 88 handle.cancel() 89 90 select { 91 case <-handle.wait(): 92 case <-time.After(3 * time.Second): 93 t.Fatalf("timed out waiting for script check to exit") 94 } 95 96 // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be 97 // canceled. Only a wrapper around it obeys the context cancelation. 98 if atomic.LoadInt32(&exec.exited) == 1 { 99 t.Errorf("expected script executor to still be running after timeout") 100 } 101 } 102 103 type execStatus struct { 104 checkID string 105 output string 106 status string 107 } 108 109 // fakeHeartbeater implements the heartbeater interface to allow mocking out 110 // Consul in script executor tests. 111 type fakeHeartbeater struct { 112 updates chan execStatus 113 } 114 115 func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error { 116 f.updates <- execStatus{checkID: checkID, output: output, status: status} 117 return nil 118 } 119 120 func newFakeHeartbeater() *fakeHeartbeater { 121 return &fakeHeartbeater{updates: make(chan execStatus)} 122 } 123 124 // TestConsulScript_Exec_TimeoutBasic asserts a script will be killed when the 125 // timeout is reached. 126 func TestConsulScript_Exec_TimeoutBasic(t *testing.T) { 127 t.Parallel() 128 129 serviceCheck := structs.ServiceCheck{ 130 Name: "sleeper", 131 Interval: time.Hour, 132 Timeout: time.Second, 133 } 134 135 exec, cancel := newBlockingScriptExec() 136 defer cancel() 137 138 hb := newFakeHeartbeater() 139 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), nil) 140 handle := check.run() 141 defer handle.cancel() // just-in-case cleanup 142 <-exec.running 143 144 // Check for UpdateTTL call 145 select { 146 case update := <-hb.updates: 147 if update.status != api.HealthCritical { 148 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 149 } 150 case <-time.After(3 * time.Second): 151 t.Fatalf("timed out waiting for script check to exit") 152 } 153 154 // The underlying ScriptExecutor (newBlockScriptExec) *cannot* be 155 // canceled. Only a wrapper around it obeys the context cancelation. 156 if atomic.LoadInt32(&exec.exited) == 1 { 157 t.Errorf("expected script executor to still be running after timeout") 158 } 159 160 // Cancel and watch for exit 161 handle.cancel() 162 select { 163 case <-handle.wait(): 164 // ok! 165 case update := <-hb.updates: 166 t.Errorf("unexpected UpdateTTL call on exit with status=%q", update) 167 case <-time.After(3 * time.Second): 168 t.Fatalf("timed out waiting for script check to exit") 169 } 170 } 171 172 // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions 173 type sleeperExec struct{} 174 175 func (sleeperExec) Exec(time.Duration, string, []string) ([]byte, int, error) { 176 time.Sleep(100 * time.Millisecond) 177 return []byte{}, 0, nil 178 } 179 180 // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when 181 // the timeout is reached and always set a critical status regardless of what 182 // Exec returns. 183 func TestConsulScript_Exec_TimeoutCritical(t *testing.T) { 184 t.Parallel() 185 186 serviceCheck := structs.ServiceCheck{ 187 Name: "sleeper", 188 Interval: time.Hour, 189 Timeout: time.Nanosecond, 190 } 191 hb := newFakeHeartbeater() 192 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.HCLogger(t), nil) 193 handle := check.run() 194 defer handle.cancel() // just-in-case cleanup 195 196 // Check for UpdateTTL call 197 select { 198 case update := <-hb.updates: 199 if update.status != api.HealthCritical { 200 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 201 } 202 if update.output != context.DeadlineExceeded.Error() { 203 t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output) 204 } 205 case <-time.After(3 * time.Second): 206 t.Fatalf("timed out waiting for script check to timeout") 207 } 208 } 209 210 // simpleExec is a fake ScriptExecutor that returns whatever is specified. 211 type simpleExec struct { 212 code int 213 err error 214 } 215 216 func (s simpleExec) Exec(time.Duration, string, []string) ([]byte, int, error) { 217 return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err 218 } 219 220 // newSimpleExec creates a new ScriptExecutor that returns the given code and err. 221 func newSimpleExec(code int, err error) simpleExec { 222 return simpleExec{code: code, err: err} 223 } 224 225 // TestConsulScript_Exec_Shutdown asserts a script will be executed once more 226 // when told to shutdown. 227 func TestConsulScript_Exec_Shutdown(t *testing.T) { 228 serviceCheck := structs.ServiceCheck{ 229 Name: "sleeper", 230 Interval: time.Hour, 231 Timeout: 3 * time.Second, 232 } 233 234 hb := newFakeHeartbeater() 235 shutdown := make(chan struct{}) 236 exec := newSimpleExec(0, nil) 237 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown) 238 handle := check.run() 239 defer handle.cancel() // just-in-case cleanup 240 241 // Tell scriptCheck to exit 242 close(shutdown) 243 244 select { 245 case update := <-hb.updates: 246 if update.status != api.HealthPassing { 247 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 248 } 249 case <-time.After(3 * time.Second): 250 t.Fatalf("timed out waiting for script check to exit") 251 } 252 253 select { 254 case <-handle.wait(): 255 // ok! 256 case <-time.After(3 * time.Second): 257 t.Fatalf("timed out waiting for script check to exit") 258 } 259 } 260 261 func TestConsulScript_Exec_Codes(t *testing.T) { 262 run := func(code int, err error, expected string) func(t *testing.T) { 263 return func(t *testing.T) { 264 t.Parallel() 265 serviceCheck := structs.ServiceCheck{ 266 Name: "test", 267 Interval: time.Hour, 268 Timeout: 3 * time.Second, 269 } 270 271 hb := newFakeHeartbeater() 272 shutdown := make(chan struct{}) 273 exec := newSimpleExec(code, err) 274 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.HCLogger(t), shutdown) 275 handle := check.run() 276 defer handle.cancel() 277 278 select { 279 case update := <-hb.updates: 280 if update.status != expected { 281 t.Errorf("expected %q but received %q", expected, update) 282 } 283 // assert output is being reported 284 expectedOutput := fmt.Sprintf("code=%d err=%v", code, err) 285 if err != nil { 286 expectedOutput = err.Error() 287 } 288 if update.output != expectedOutput { 289 t.Errorf("expected output=%q but found: %q", expectedOutput, update.output) 290 } 291 case <-time.After(3 * time.Second): 292 t.Fatalf("timed out waiting for script check to exec") 293 } 294 } 295 } 296 297 // Test exit codes with errors 298 t.Run("Passing", run(0, nil, api.HealthPassing)) 299 t.Run("Warning", run(1, nil, api.HealthWarning)) 300 t.Run("Critical-2", run(2, nil, api.HealthCritical)) 301 t.Run("Critical-9000", run(9000, nil, api.HealthCritical)) 302 303 // Errors should always cause Critical status 304 err := fmt.Errorf("test error") 305 t.Run("Error-0", run(0, err, api.HealthCritical)) 306 t.Run("Error-1", run(1, err, api.HealthCritical)) 307 t.Run("Error-2", run(2, err, api.HealthCritical)) 308 t.Run("Error-9000", run(9000, err, api.HealthCritical)) 309 }