github.com/djenriquez/nomad-1@v0.8.1/command/agent/consul/check_watcher_test.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "testing" 7 "time" 8 9 "github.com/hashicorp/consul/api" 10 "github.com/hashicorp/nomad/nomad/structs" 11 ) 12 13 // checkRestartRecord is used by a testFakeCtx to record when restarts occur 14 // due to a watched check. 15 type checkRestartRecord struct { 16 timestamp time.Time 17 source string 18 reason string 19 failure bool 20 } 21 22 // fakeCheckRestarter is a test implementation of TaskRestarter. 23 type fakeCheckRestarter struct { 24 // restarts is a slice of all of the restarts triggered by the checkWatcher 25 restarts []checkRestartRecord 26 27 // need the checkWatcher to re-Watch restarted tasks like TaskRunner 28 watcher *checkWatcher 29 30 // check to re-Watch on restarts 31 check *structs.ServiceCheck 32 allocID string 33 taskName string 34 checkName string 35 } 36 37 // newFakeCheckRestart creates a new TaskRestarter. It needs all of the 38 // parameters checkWatcher.Watch expects. 39 func newFakeCheckRestarter(w *checkWatcher, allocID, taskName, checkName string, c *structs.ServiceCheck) *fakeCheckRestarter { 40 return &fakeCheckRestarter{ 41 watcher: w, 42 check: c, 43 allocID: allocID, 44 taskName: taskName, 45 checkName: checkName, 46 } 47 } 48 49 // Restart implements part of the TaskRestarter interface needed for check 50 // watching and is normally fulfilled by a TaskRunner. 51 // 52 // Restarts are recorded in the []restarts field and re-Watch the check. 53 func (c *fakeCheckRestarter) Restart(source, reason string, failure bool) { 54 c.restarts = append(c.restarts, checkRestartRecord{time.Now(), source, reason, failure}) 55 56 // Re-Watch the check just like TaskRunner 57 c.watcher.Watch(c.allocID, c.taskName, c.checkName, c.check, c) 58 } 59 60 // String for debugging 61 func (c *fakeCheckRestarter) String() string { 62 s := fmt.Sprintf("%s %s %s restarts:\n", c.allocID, c.taskName, c.checkName) 63 for _, r := range c.restarts { 64 s += fmt.Sprintf("%s - %s: %s (failure: %t)\n", r.timestamp, r.source, r.reason, r.failure) 65 } 66 return s 67 } 68 69 // checkResponse is a response returned by the fakeChecksAPI after the given 70 // time. 71 type checkResponse struct { 72 at time.Time 73 id string 74 status string 75 } 76 77 // fakeChecksAPI implements the Checks() method for testing Consul. 78 type fakeChecksAPI struct { 79 // responses is a map of check ids to their status at a particular 80 // time. checkResponses must be in chronological order. 81 responses map[string][]checkResponse 82 } 83 84 func newFakeChecksAPI() *fakeChecksAPI { 85 return &fakeChecksAPI{responses: make(map[string][]checkResponse)} 86 } 87 88 // add a new check status to Consul at the given time. 89 func (c *fakeChecksAPI) add(id, status string, at time.Time) { 90 c.responses[id] = append(c.responses[id], checkResponse{at, id, status}) 91 } 92 93 func (c *fakeChecksAPI) Checks() (map[string]*api.AgentCheck, error) { 94 now := time.Now() 95 result := make(map[string]*api.AgentCheck, len(c.responses)) 96 97 // Use the latest response for each check 98 for k, vs := range c.responses { 99 for _, v := range vs { 100 if v.at.After(now) { 101 break 102 } 103 result[k] = &api.AgentCheck{ 104 CheckID: k, 105 Name: k, 106 Status: v.status, 107 } 108 } 109 } 110 111 return result, nil 112 } 113 114 // testWatcherSetup sets up a fakeChecksAPI and a real checkWatcher with a test 115 // logger and faster poll frequency. 116 func testWatcherSetup() (*fakeChecksAPI, *checkWatcher) { 117 fakeAPI := newFakeChecksAPI() 118 cw := newCheckWatcher(testLogger(), fakeAPI) 119 cw.pollFreq = 10 * time.Millisecond 120 return fakeAPI, cw 121 } 122 123 func testCheck() *structs.ServiceCheck { 124 return &structs.ServiceCheck{ 125 Name: "testcheck", 126 Interval: 100 * time.Millisecond, 127 Timeout: 100 * time.Millisecond, 128 CheckRestart: &structs.CheckRestart{ 129 Limit: 3, 130 Grace: 100 * time.Millisecond, 131 IgnoreWarnings: false, 132 }, 133 } 134 } 135 136 // TestCheckWatcher_Skip asserts unwatched checks are ignored. 137 func TestCheckWatcher_Skip(t *testing.T) { 138 t.Parallel() 139 140 // Create a check with restarting disabled 141 check := testCheck() 142 check.CheckRestart = nil 143 144 cw := newCheckWatcher(testLogger(), newFakeChecksAPI()) 145 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check) 146 cw.Watch("testalloc1", "testtask1", "testcheck1", check, restarter1) 147 148 // Check should have been dropped as it's not watched 149 if n := len(cw.checkUpdateCh); n != 0 { 150 t.Fatalf("expected 0 checks to be enqueued for watching but found %d", n) 151 } 152 } 153 154 // TestCheckWatcher_Healthy asserts healthy tasks are not restarted. 155 func TestCheckWatcher_Healthy(t *testing.T) { 156 t.Parallel() 157 158 fakeAPI, cw := testWatcherSetup() 159 160 check1 := testCheck() 161 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1) 162 cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1) 163 164 check2 := testCheck() 165 check2.CheckRestart.Limit = 1 166 check2.CheckRestart.Grace = 0 167 restarter2 := newFakeCheckRestarter(cw, "testalloc2", "testtask2", "testcheck2", check2) 168 cw.Watch("testalloc2", "testtask2", "testcheck2", check2, restarter2) 169 170 // Make both checks healthy from the beginning 171 fakeAPI.add("testcheck1", "passing", time.Time{}) 172 fakeAPI.add("testcheck2", "passing", time.Time{}) 173 174 // Run 175 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) 176 defer cancel() 177 cw.Run(ctx) 178 179 // Ensure restart was never called 180 if n := len(restarter1.restarts); n > 0 { 181 t.Errorf("expected check 1 to not be restarted but found %d:\n%s", n, restarter1) 182 } 183 if n := len(restarter2.restarts); n > 0 { 184 t.Errorf("expected check 2 to not be restarted but found %d:\n%s", n, restarter2) 185 } 186 } 187 188 // TestCheckWatcher_HealthyWarning asserts checks in warning with 189 // ignore_warnings=true do not restart tasks. 190 func TestCheckWatcher_HealthyWarning(t *testing.T) { 191 t.Parallel() 192 193 fakeAPI, cw := testWatcherSetup() 194 195 check1 := testCheck() 196 check1.CheckRestart.Limit = 1 197 check1.CheckRestart.Grace = 0 198 check1.CheckRestart.IgnoreWarnings = true 199 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1) 200 cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1) 201 202 // Check is always in warning but that's ok 203 fakeAPI.add("testcheck1", "warning", time.Time{}) 204 205 // Run 206 ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) 207 defer cancel() 208 cw.Run(ctx) 209 210 // Ensure restart was never called on check 1 211 if n := len(restarter1.restarts); n > 0 { 212 t.Errorf("expected check 1 to not be restarted but found %d", n) 213 } 214 } 215 216 // TestCheckWatcher_Flapping asserts checks that flap from healthy to unhealthy 217 // before the unhealthy limit is reached do not restart tasks. 218 func TestCheckWatcher_Flapping(t *testing.T) { 219 t.Parallel() 220 221 fakeAPI, cw := testWatcherSetup() 222 223 check1 := testCheck() 224 check1.CheckRestart.Grace = 0 225 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1) 226 cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1) 227 228 // Check flaps and is never failing for the full 200ms needed to restart 229 now := time.Now() 230 fakeAPI.add("testcheck1", "passing", now) 231 fakeAPI.add("testcheck1", "critical", now.Add(100*time.Millisecond)) 232 fakeAPI.add("testcheck1", "passing", now.Add(250*time.Millisecond)) 233 fakeAPI.add("testcheck1", "critical", now.Add(300*time.Millisecond)) 234 fakeAPI.add("testcheck1", "passing", now.Add(450*time.Millisecond)) 235 236 ctx, cancel := context.WithTimeout(context.Background(), 600*time.Millisecond) 237 defer cancel() 238 cw.Run(ctx) 239 240 // Ensure restart was never called on check 1 241 if n := len(restarter1.restarts); n > 0 { 242 t.Errorf("expected check 1 to not be restarted but found %d\n%s", n, restarter1) 243 } 244 } 245 246 // TestCheckWatcher_Unwatch asserts unwatching checks prevents restarts. 247 func TestCheckWatcher_Unwatch(t *testing.T) { 248 t.Parallel() 249 250 fakeAPI, cw := testWatcherSetup() 251 252 // Unwatch immediately 253 check1 := testCheck() 254 check1.CheckRestart.Limit = 1 255 check1.CheckRestart.Grace = 100 * time.Millisecond 256 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1) 257 cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1) 258 cw.Unwatch("testcheck1") 259 260 // Always failing 261 fakeAPI.add("testcheck1", "critical", time.Time{}) 262 263 ctx, cancel := context.WithTimeout(context.Background(), 300*time.Millisecond) 264 defer cancel() 265 cw.Run(ctx) 266 267 // Ensure restart was never called on check 1 268 if n := len(restarter1.restarts); n > 0 { 269 t.Errorf("expected check 1 to not be restarted but found %d\n%s", n, restarter1) 270 } 271 } 272 273 // TestCheckWatcher_MultipleChecks asserts that when there are multiple checks 274 // for a single task, all checks should be removed when any of them restart the 275 // task to avoid multiple restarts. 276 func TestCheckWatcher_MultipleChecks(t *testing.T) { 277 t.Parallel() 278 279 fakeAPI, cw := testWatcherSetup() 280 281 check1 := testCheck() 282 check1.CheckRestart.Limit = 1 283 restarter1 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck1", check1) 284 cw.Watch("testalloc1", "testtask1", "testcheck1", check1, restarter1) 285 286 check2 := testCheck() 287 check2.CheckRestart.Limit = 1 288 restarter2 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck2", check2) 289 cw.Watch("testalloc1", "testtask1", "testcheck2", check2, restarter2) 290 291 check3 := testCheck() 292 check3.CheckRestart.Limit = 1 293 restarter3 := newFakeCheckRestarter(cw, "testalloc1", "testtask1", "testcheck3", check3) 294 cw.Watch("testalloc1", "testtask1", "testcheck3", check3, restarter3) 295 296 // check 2 & 3 fail long enough to cause 1 restart, but only 1 should restart 297 now := time.Now() 298 fakeAPI.add("testcheck1", "critical", now) 299 fakeAPI.add("testcheck1", "passing", now.Add(150*time.Millisecond)) 300 fakeAPI.add("testcheck2", "critical", now) 301 fakeAPI.add("testcheck2", "passing", now.Add(150*time.Millisecond)) 302 fakeAPI.add("testcheck3", "passing", time.Time{}) 303 304 // Run 305 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) 306 defer cancel() 307 cw.Run(ctx) 308 309 // Ensure that restart was only called once on check 1 or 2. Since 310 // checks are in a map it's random which check triggers the restart 311 // first. 312 if n := len(restarter1.restarts) + len(restarter2.restarts); n != 1 { 313 t.Errorf("expected check 1 & 2 to be restarted 1 time but found %d\ncheck 1:\n%s\ncheck 2:%s", 314 n, restarter1, restarter2) 315 } 316 317 if n := len(restarter3.restarts); n != 0 { 318 t.Errorf("expected check 3 to not be restarted but found %d:\n%s", n, restarter3) 319 } 320 }