github.com/djenriquez/nomad-1@v0.8.1/command/agent/consul/script_test.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "os/exec" 8 "testing" 9 "time" 10 11 "github.com/hashicorp/consul/api" 12 "github.com/hashicorp/nomad/helper/testtask" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 func TestMain(m *testing.M) { 17 if !testtask.Run() { 18 os.Exit(m.Run()) 19 } 20 } 21 22 // blockingScriptExec implements ScriptExec by running a subcommand that never 23 // exits. 24 type blockingScriptExec struct { 25 // running is ticked before blocking to allow synchronizing operations 26 running chan struct{} 27 28 // set to true if Exec is called and has exited 29 exited bool 30 } 31 32 func newBlockingScriptExec() *blockingScriptExec { 33 return &blockingScriptExec{running: make(chan struct{})} 34 } 35 36 func (b *blockingScriptExec) Exec(ctx context.Context, _ string, _ []string) ([]byte, int, error) { 37 b.running <- struct{}{} 38 cmd := exec.CommandContext(ctx, testtask.Path(), "sleep", "9000h") 39 testtask.SetCmdEnv(cmd) 40 err := cmd.Run() 41 code := 0 42 if exitErr, ok := err.(*exec.ExitError); ok { 43 if !exitErr.Success() { 44 code = 1 45 } 46 } 47 b.exited = true 48 return []byte{}, code, err 49 } 50 51 // TestConsulScript_Exec_Cancel asserts cancelling a script check shortcircuits 52 // any running scripts. 53 func TestConsulScript_Exec_Cancel(t *testing.T) { 54 serviceCheck := structs.ServiceCheck{ 55 Name: "sleeper", 56 Interval: time.Hour, 57 Timeout: time.Hour, 58 } 59 exec := newBlockingScriptExec() 60 61 // pass nil for heartbeater as it shouldn't be called 62 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, nil, testLogger(), nil) 63 handle := check.run() 64 65 // wait until Exec is called 66 <-exec.running 67 68 // cancel now that we're blocked in exec 69 handle.cancel() 70 71 select { 72 case <-handle.wait(): 73 case <-time.After(3 * time.Second): 74 t.Fatalf("timed out waiting for script check to exit") 75 } 76 if !exec.exited { 77 t.Errorf("expected script executor to run and exit but it has not") 78 } 79 } 80 81 type execStatus struct { 82 checkID string 83 output string 84 status string 85 } 86 87 // fakeHeartbeater implements the heartbeater interface to allow mocking out 88 // Consul in script executor tests. 89 type fakeHeartbeater struct { 90 updates chan execStatus 91 } 92 93 func (f *fakeHeartbeater) UpdateTTL(checkID, output, status string) error { 94 f.updates <- execStatus{checkID: checkID, output: output, status: status} 95 return nil 96 } 97 98 func newFakeHeartbeater() *fakeHeartbeater { 99 return &fakeHeartbeater{updates: make(chan execStatus)} 100 } 101 102 // TestConsulScript_Exec_Timeout asserts a script will be killed when the 103 // timeout is reached. 104 func TestConsulScript_Exec_Timeout(t *testing.T) { 105 t.Parallel() // run the slow tests in parallel 106 serviceCheck := structs.ServiceCheck{ 107 Name: "sleeper", 108 Interval: time.Hour, 109 Timeout: time.Second, 110 } 111 exec := newBlockingScriptExec() 112 113 hb := newFakeHeartbeater() 114 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), nil) 115 handle := check.run() 116 defer handle.cancel() // just-in-case cleanup 117 <-exec.running 118 119 // Check for UpdateTTL call 120 select { 121 case update := <-hb.updates: 122 if update.status != api.HealthCritical { 123 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 124 } 125 case <-time.After(3 * time.Second): 126 t.Fatalf("timed out waiting for script check to exit") 127 } 128 if !exec.exited { 129 t.Errorf("expected script executor to run and exit but it has not") 130 } 131 132 // Cancel and watch for exit 133 handle.cancel() 134 select { 135 case <-handle.wait(): 136 // ok! 137 case update := <-hb.updates: 138 t.Errorf("unexpected UpdateTTL call on exit with status=%q", update) 139 case <-time.After(3 * time.Second): 140 t.Fatalf("timed out waiting for script check to exit") 141 } 142 } 143 144 // sleeperExec sleeps for 100ms but returns successfully to allow testing timeout conditions 145 type sleeperExec struct{} 146 147 func (sleeperExec) Exec(context.Context, string, []string) ([]byte, int, error) { 148 time.Sleep(100 * time.Millisecond) 149 return []byte{}, 0, nil 150 } 151 152 // TestConsulScript_Exec_TimeoutCritical asserts a script will be killed when 153 // the timeout is reached and always set a critical status regardless of what 154 // Exec returns. 155 func TestConsulScript_Exec_TimeoutCritical(t *testing.T) { 156 t.Parallel() // run the slow tests in parallel 157 serviceCheck := structs.ServiceCheck{ 158 Name: "sleeper", 159 Interval: time.Hour, 160 Timeout: time.Nanosecond, 161 } 162 hb := newFakeHeartbeater() 163 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, sleeperExec{}, hb, testLogger(), nil) 164 handle := check.run() 165 defer handle.cancel() // just-in-case cleanup 166 167 // Check for UpdateTTL call 168 select { 169 case update := <-hb.updates: 170 if update.status != api.HealthCritical { 171 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 172 } 173 if update.output != context.DeadlineExceeded.Error() { 174 t.Errorf("expected output=%q but found: %q", context.DeadlineExceeded.Error(), update.output) 175 } 176 case <-time.After(3 * time.Second): 177 t.Fatalf("timed out waiting for script check to timeout") 178 } 179 } 180 181 // simpleExec is a fake ScriptExecutor that returns whatever is specified. 182 type simpleExec struct { 183 code int 184 err error 185 } 186 187 func (s simpleExec) Exec(context.Context, string, []string) ([]byte, int, error) { 188 return []byte(fmt.Sprintf("code=%d err=%v", s.code, s.err)), s.code, s.err 189 } 190 191 // newSimpleExec creates a new ScriptExecutor that returns the given code and err. 192 func newSimpleExec(code int, err error) simpleExec { 193 return simpleExec{code: code, err: err} 194 } 195 196 // TestConsulScript_Exec_Shutdown asserts a script will be executed once more 197 // when told to shutdown. 198 func TestConsulScript_Exec_Shutdown(t *testing.T) { 199 serviceCheck := structs.ServiceCheck{ 200 Name: "sleeper", 201 Interval: time.Hour, 202 Timeout: 3 * time.Second, 203 } 204 205 hb := newFakeHeartbeater() 206 shutdown := make(chan struct{}) 207 exec := newSimpleExec(0, nil) 208 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), shutdown) 209 handle := check.run() 210 defer handle.cancel() // just-in-case cleanup 211 212 // Tell scriptCheck to exit 213 close(shutdown) 214 215 select { 216 case update := <-hb.updates: 217 if update.status != api.HealthPassing { 218 t.Errorf("expected %q due to timeout but received %q", api.HealthCritical, update) 219 } 220 case <-time.After(3 * time.Second): 221 t.Fatalf("timed out waiting for script check to exit") 222 } 223 224 select { 225 case <-handle.wait(): 226 // ok! 227 case <-time.After(3 * time.Second): 228 t.Fatalf("timed out waiting for script check to exit") 229 } 230 } 231 232 func TestConsulScript_Exec_Codes(t *testing.T) { 233 run := func(code int, err error, expected string) func(t *testing.T) { 234 return func(t *testing.T) { 235 t.Parallel() 236 serviceCheck := structs.ServiceCheck{ 237 Name: "test", 238 Interval: time.Hour, 239 Timeout: 3 * time.Second, 240 } 241 242 hb := newFakeHeartbeater() 243 shutdown := make(chan struct{}) 244 exec := newSimpleExec(code, err) 245 check := newScriptCheck("allocid", "testtask", "checkid", &serviceCheck, exec, hb, testLogger(), shutdown) 246 handle := check.run() 247 defer handle.cancel() 248 249 select { 250 case update := <-hb.updates: 251 if update.status != expected { 252 t.Errorf("expected %q but received %q", expected, update) 253 } 254 // assert output is being reported 255 expectedOutput := fmt.Sprintf("code=%d err=%v", code, err) 256 if err != nil { 257 expectedOutput = err.Error() 258 } 259 if update.output != expectedOutput { 260 t.Errorf("expected output=%q but found: %q", expectedOutput, update.output) 261 } 262 case <-time.After(3 * time.Second): 263 t.Fatalf("timed out waiting for script check to exec") 264 } 265 } 266 } 267 268 // Test exit codes with errors 269 t.Run("Passing", run(0, nil, api.HealthPassing)) 270 t.Run("Warning", run(1, nil, api.HealthWarning)) 271 t.Run("Critical-2", run(2, nil, api.HealthCritical)) 272 t.Run("Critical-9000", run(9000, nil, api.HealthCritical)) 273 274 // Errors should always cause Critical status 275 err := fmt.Errorf("test error") 276 t.Run("Error-0", run(0, err, api.HealthCritical)) 277 t.Run("Error-1", run(1, err, api.HealthCritical)) 278 t.Run("Error-2", run(2, err, api.HealthCritical)) 279 t.Run("Error-9000", run(9000, err, api.HealthCritical)) 280 }