github.com/smintz/nomad@v0.8.3/command/agent/consul/script_test.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "os/exec" 8 "testing" 9 "time" 10 11 "github.com/hashicorp/consul/api" 12 "github.com/hashicorp/nomad/helper/testlog" 13 "github.com/hashicorp/nomad/helper/testtask" 14 "github.com/hashicorp/nomad/nomad/structs" 15 ) 16 17 func TestMain(m *testing.M) { 18 if !testtask.Run() { 19 os.Exit(m.Run()) 20 } 21 } 22 23 // blockingScriptExec implements ScriptExec by running a subcommand that never 24 // exits. 25 type blockingScriptExec struct { 26 // running is ticked before blocking to allow synchronizing operations 27 running chan struct{} 28 29 // set to true if Exec is called and has exited 30 exited bool 31 } 32 33 func newBlockingScriptExec() *blockingScriptExec { 34 return &blockingScriptExec{running: make(chan struct{})} 35 } 36 37 func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) { 38 b.running <- struct{}{} 39 cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h") 40 testtask.SetCmdEnv(cmd) 41 err := cmd.Run() 42 code := 0 43 if exitErr, ok := err.(*exec.ExitError); ok { 44 if !exitErr.Success() { 45 code = 1 46 } 47 } 48 b.exited = true 49 return []byte{}, code, err 50 } 51 52 // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits 53 // any running scripts. 54 func TestConsulScript_Exec_Cancel(t *testing.T) { 55 serviceCheck := structs.ServiceCheck{ 56 Name: "sleeper", 57 Interval: time.Hour, 58 Timeout: time.Hour, 59 } 60 exec := newBlockingScriptExec() 61 62 // pass nil for heartbeater as it shouldn't be called 63 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testlog.Logger(t), nil) 64 handle := check.run() 65 66 // wait until Exec is called 67 <-exec.running 68 69 // cancel now that we're blocked in exec 70 handle.cancel() 71 72 select { 73 case <-handle.wait(): 74 case <-time.After(3 * time.Second): 75 t.Fatalf("timed out waiting for script check to exit") 76 } 77 if !exec.exited { 78 t.Errorf("expected script executor to run and exit but it has not") 79 } 80 } 81 82 type execStatus struct { 83 checkID string 84 output string 85 status string 86 } 87 88 // fakeHeartbeater implements the heartbeater interface to allow mocking out 89 // Consul in script executor tests. 90 type fakeHeartbeater struct { 91 updates chan execStatus 92 } 93 94 func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error { 95 f.updates <- execStatus{checkID: checkID, output: output, status: status} 96 return nil 97 } 98 99 func newFakeHeartbeater() *fakeHeartbeater { 100 return &fakeHeartbeater{updates: make(chan execStatus)} 101 } 102 103 // TestConsulScript_Exec_Timeout asserts a script will be killed when the 104 // timeout is reached. 105 func TestConsulScript_Exec_Timeout(t *testing.T) { 106 t.Parallel() // run the slow tests in parallel 107 serviceCheck := structs.ServiceCheck{ 108 Name: "sleeper", 109 Interval: time.Hour, 110 Timeout: time.Second, 111 } 112 exec := newBlockingScriptExec() 113 114 hb := newFakeHeartbeater() 115 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), nil) 116 handle := check.run() 117 defer handle.cancel() // just-in-case cleanup 118 <-exec.running 119 120 // Check for UpdateTTL call 121 select { 122 case update := <-hb.updates: 123 if update.status != api.HealthCritical { 124 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 125 } 126 case <-time.After(3 * time.Second): 127 t.Fatalf("timed out waiting for script check to exit") 128 } 129 if !exec.exited { 130 t.Errorf("expected script executor to run and exit but it has not") 131 } 132 133 // Cancel and watch for exit 134 handle.cancel() 135 select { 136 case <-handle.wait(): 137 // ok! 138 case update := <-hb.updates: 139 t.Errorf("unexpected UpdateTTL call on exit with status=%q", update) 140 case <-time.After(3 * time.Second): 141 t.Fatalf("timed out waiting for script check to exit") 142 } 143 } 144 145 // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions 146 type sleeperExec struct{} 147 148 func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) { 149 time.Sleep(100 * time.Millisecond) 150 return []byte{}, 0, nil 151 } 152 153 // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when 154 // the timeout is reached and always set a critical status regardless of what 155 // Exec returns. 156 func TestConsulScript_Exec_TimeoutCritical(t *testing.T) { 157 t.Parallel() // run the slow tests in parallel 158 serviceCheck := structs.ServiceCheck{ 159 Name: "sleeper", 160 Interval: time.Hour, 161 Timeout: time.Nanosecond, 162 } 163 hb := newFakeHeartbeater() 164 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testlog.Logger(t), nil) 165 handle := check.run() 166 defer handle.cancel() // just-in-case cleanup 167 168 // Check for UpdateTTL call 169 select { 170 case update := <-hb.updates: 171 if update.status != api.HealthCritical { 172 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 173 } 174 if update.output != context.DeadlineExceeded.Error() { 175 t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output) 176 } 177 case <-time.After(3 * time.Second): 178 t.Fatalf("timed out waiting for script check to timeout") 179 } 180 } 181 182 // simpleExec is a fake ScriptExecutor that returns whatever is specified. 183 type simpleExec struct { 184 code int 185 err error 186 } 187 188 func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) { 189 return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err 190 } 191 192 // newSimpleExec creates a new ScriptExecutor that returns the given code and err. 193 func newSimpleExec(code int, err error) simpleExec { 194 return simpleExec{code: code, err: err} 195 } 196 197 // TestConsulScript_Exec_Shutdown asserts a script will be executed once more 198 // when told to shutdown. 199 func TestConsulScript_Exec_Shutdown(t *testing.T) { 200 serviceCheck := structs.ServiceCheck{ 201 Name: "sleeper", 202 Interval: time.Hour, 203 Timeout: 3 * time.Second, 204 } 205 206 hb := newFakeHeartbeater() 207 shutdown := make(chan struct{}) 208 exec := newSimpleExec(0, nil) 209 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown) 210 handle := check.run() 211 defer handle.cancel() // just-in-case cleanup 212 213 // Tell scriptCheck to exit 214 close(shutdown) 215 216 select { 217 case update := <-hb.updates: 218 if update.status != api.HealthPassing { 219 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 220 } 221 case <-time.After(3 * time.Second): 222 t.Fatalf("timed out waiting for script check to exit") 223 } 224 225 select { 226 case <-handle.wait(): 227 // ok! 228 case <-time.After(3 * time.Second): 229 t.Fatalf("timed out waiting for script check to exit") 230 } 231 } 232 233 func TestConsulScript_Exec_Codes(t *testing.T) { 234 run := func(code int, err error, expected string) func(t *testing.T) { 235 return func(t *testing.T) { 236 t.Parallel() 237 serviceCheck := structs.ServiceCheck{ 238 Name: "test", 239 Interval: time.Hour, 240 Timeout: 3 * time.Second, 241 } 242 243 hb := newFakeHeartbeater() 244 shutdown := make(chan struct{}) 245 exec := newSimpleExec(code, err) 246 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testlog.Logger(t), shutdown) 247 handle := check.run() 248 defer handle.cancel() 249 250 select { 251 case update := <-hb.updates: 252 if update.status != expected { 253 t.Errorf("expected %q but received %q", expected, update) 254 } 255 // assert output is being reported 256 expectedOutput := fmt.Sprintf("code=%d err=%v", code, err) 257 if err != nil { 258 expectedOutput = err.Error() 259 } 260 if update.output != expectedOutput { 261 t.Errorf("expected output=%q but found: %q", expectedOutput, update.output) 262 } 263 case <-time.After(3 * time.Second): 264 t.Fatalf("timed out waiting for script check to exec") 265 } 266 } 267 } 268 269 // Test exit codes with errors 270 t.Run("Passing", run(0, nil, api.HealthPassing)) 271 t.Run("Warning", run(1, nil, api.HealthWarning)) 272 t.Run("Critical-2", run(2, nil, api.HealthCritical)) 273 t.Run("Critical-9000", run(9000, nil, api.HealthCritical)) 274 275 // Errors should always cause Critical status 276 err := fmt.Errorf("test error") 277 t.Run("Error-0", run(0, err, api.HealthCritical)) 278 t.Run("Error-1", run(1, err, api.HealthCritical)) 279 t.Run("Error-2", run(2, err, api.HealthCritical)) 280 t.Run("Error-9000", run(9000, err, api.HealthCritical)) 281 }