github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/health_hook_test.go (about) 1 package allocrunner 2 3 import ( 4 "sync" 5 "testing" 6 "time" 7 8 consulapi "github.com/hashicorp/consul/api" 9 "github.com/hashicorp/nomad/ci" 10 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 11 "github.com/hashicorp/nomad/client/serviceregistration" 12 regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" 13 cstructs "github.com/hashicorp/nomad/client/structs" 14 "github.com/hashicorp/nomad/helper/testlog" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/hashicorp/nomad/nomad/mock" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/stretchr/testify/assert" 19 "github.com/stretchr/testify/require" 20 ) 21 22 // statically assert health hook implements the expected interfaces 23 var _ interfaces.RunnerPrerunHook = (*allocHealthWatcherHook)(nil) 24 var _ interfaces.RunnerUpdateHook = (*allocHealthWatcherHook)(nil) 25 var _ interfaces.RunnerPostrunHook = (*allocHealthWatcherHook)(nil) 26 var _ interfaces.ShutdownHook = (*allocHealthWatcherHook)(nil) 27 28 // allocHealth is emitted to a chan whenever SetHealth is called 29 type allocHealth struct { 30 healthy bool 31 taskEvents map[string]*structs.TaskEvent 32 } 33 34 // mockHealthSetter implements healthSetter that stores health internally 35 type mockHealthSetter struct { 36 setCalls int 37 clearCalls int 38 healthy *bool 39 isDeploy *bool 40 taskEvents map[string]*structs.TaskEvent 41 mu sync.Mutex 42 43 healthCh chan allocHealth 44 } 45 46 // newMockHealthSetter returns a mock HealthSetter that emits all SetHealth 47 // calls on a buffered chan. Callers who do need need notifications of health 48 // changes may just create the struct directly. 49 func newMockHealthSetter() *mockHealthSetter { 50 return &mockHealthSetter{ 51 healthCh: make(chan allocHealth, 1), 52 } 53 } 54 55 func (m *mockHealthSetter) SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent) { 56 m.mu.Lock() 57 defer m.mu.Unlock() 58 59 m.setCalls++ 60 m.healthy = &healthy 61 m.isDeploy = &isDeploy 62 m.taskEvents = taskEvents 63 64 if m.healthCh != nil { 65 m.healthCh <- allocHealth{healthy, taskEvents} 66 } 67 } 68 69 func (m *mockHealthSetter) ClearHealth() { 70 m.mu.Lock() 71 defer m.mu.Unlock() 72 73 m.clearCalls++ 74 m.healthy = nil 75 m.isDeploy = nil 76 m.taskEvents = nil 77 } 78 79 func (m *mockHealthSetter) HasHealth() bool { 80 m.mu.Lock() 81 defer m.mu.Unlock() 82 return m.healthy != nil 83 } 84 85 // TestHealthHook_PrerunPostrun asserts a health hook does not error if it is 86 // run and postrunned. 87 func TestHealthHook_PrerunPostrun(t *testing.T) { 88 ci.Parallel(t) 89 require := require.New(t) 90 91 logger := testlog.HCLogger(t) 92 93 b := cstructs.NewAllocBroadcaster(logger) 94 defer b.Close() 95 96 consul := regMock.NewServiceRegistrationHandler(logger) 97 hs := &mockHealthSetter{} 98 99 checks := new(mock.CheckShim) 100 h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks) 101 102 // Assert we implemented the right interfaces 103 prerunh, ok := h.(interfaces.RunnerPrerunHook) 104 require.True(ok) 105 _, ok = h.(interfaces.RunnerUpdateHook) 106 require.True(ok) 107 postrunh, ok := h.(interfaces.RunnerPostrunHook) 108 require.True(ok) 109 110 // Prerun 111 require.NoError(prerunh.Prerun()) 112 113 // Assert isDeploy is false (other tests peek at isDeploy to determine 114 // if an Update applied) 115 ahw := h.(*allocHealthWatcherHook) 116 ahw.hookLock.Lock() 117 assert.False(t, ahw.isDeploy) 118 ahw.hookLock.Unlock() 119 120 // Postrun 121 require.NoError(postrunh.Postrun()) 122 } 123 124 // TestHealthHook_PrerunUpdatePostrun asserts Updates may be applied concurrently. 125 func TestHealthHook_PrerunUpdatePostrun(t *testing.T) { 126 ci.Parallel(t) 127 require := require.New(t) 128 129 alloc := mock.Alloc() 130 131 logger := testlog.HCLogger(t) 132 b := cstructs.NewAllocBroadcaster(logger) 133 defer b.Close() 134 135 consul := regMock.NewServiceRegistrationHandler(logger) 136 hs := &mockHealthSetter{} 137 138 checks := new(mock.CheckShim) 139 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook) 140 141 // Prerun 142 require.NoError(h.Prerun()) 143 144 // Update multiple times in a goroutine to mimic Client behavior 145 // (Updates are concurrent with alloc runner but are applied serially). 146 errs := make(chan error, 2) 147 go func() { 148 defer close(errs) 149 for i := 0; i < cap(errs); i++ { 150 alloc.AllocModifyIndex++ 151 errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc.Copy()}) 152 } 153 }() 154 155 for err := range errs { 156 assert.NoError(t, err) 157 } 158 159 // Postrun 160 require.NoError(h.Postrun()) 161 } 162 163 // TestHealthHook_UpdatePrerunPostrun asserts that a hook may have Update 164 // called before Prerun. 165 func TestHealthHook_UpdatePrerunPostrun(t *testing.T) { 166 ci.Parallel(t) 167 require := require.New(t) 168 169 alloc := mock.Alloc() 170 171 logger := testlog.HCLogger(t) 172 b := cstructs.NewAllocBroadcaster(logger) 173 defer b.Close() 174 175 consul := regMock.NewServiceRegistrationHandler(logger) 176 hs := &mockHealthSetter{} 177 178 checks := new(mock.CheckShim) 179 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook) 180 181 // Set a DeploymentID to cause ClearHealth to be called 182 alloc.DeploymentID = uuid.Generate() 183 184 // Update in a goroutine to mimic Client behavior (Updates are 185 // concurrent with alloc runner). 186 errs := make(chan error, 1) 187 go func(alloc *structs.Allocation) { 188 errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc}) 189 close(errs) 190 }(alloc.Copy()) 191 192 for err := range errs { 193 assert.NoError(t, err) 194 } 195 196 // Prerun should be a noop 197 require.NoError(h.Prerun()) 198 199 // Assert that the Update took affect by isDeploy being true 200 h.hookLock.Lock() 201 assert.True(t, h.isDeploy) 202 h.hookLock.Unlock() 203 204 // Postrun 205 require.NoError(h.Postrun()) 206 } 207 208 // TestHealthHook_Postrun asserts that a hook may have only Postrun called. 209 func TestHealthHook_Postrun(t *testing.T) { 210 ci.Parallel(t) 211 require := require.New(t) 212 213 logger := testlog.HCLogger(t) 214 b := cstructs.NewAllocBroadcaster(logger) 215 defer b.Close() 216 217 consul := regMock.NewServiceRegistrationHandler(logger) 218 hs := &mockHealthSetter{} 219 220 checks := new(mock.CheckShim) 221 h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook) 222 223 // Postrun 224 require.NoError(h.Postrun()) 225 } 226 227 // TestHealthHook_SetHealth_healthy asserts SetHealth is called when health status is 228 // set. Uses task state and health checks. 229 func TestHealthHook_SetHealth_healthy(t *testing.T) { 230 ci.Parallel(t) 231 require := require.New(t) 232 233 alloc := mock.Alloc() 234 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 235 task := alloc.Job.TaskGroups[0].Tasks[0] 236 237 // Synthesize running alloc and tasks 238 alloc.ClientStatus = structs.AllocClientStatusRunning 239 alloc.TaskStates = map[string]*structs.TaskState{ 240 task.Name: { 241 State: structs.TaskStateRunning, 242 StartedAt: time.Now(), 243 }, 244 } 245 246 // Make Consul response 247 check := &consulapi.AgentCheck{ 248 Name: task.Services[0].Checks[0].Name, 249 Status: consulapi.HealthPassing, 250 } 251 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 252 task.Name: { 253 Services: map[string]*serviceregistration.ServiceRegistration{ 254 task.Services[0].Name: { 255 Service: &consulapi.AgentService{ 256 ID: "foo", 257 Service: task.Services[0].Name, 258 }, 259 Checks: []*consulapi.AgentCheck{check}, 260 }, 261 }, 262 }, 263 } 264 265 logger := testlog.HCLogger(t) 266 b := cstructs.NewAllocBroadcaster(logger) 267 defer b.Close() 268 269 // Don't reply on the first call 270 called := false 271 consul := regMock.NewServiceRegistrationHandler(logger) 272 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 273 if !called { 274 called = true 275 return nil, nil 276 } 277 278 reg := &serviceregistration.AllocRegistration{ 279 Tasks: taskRegs, 280 } 281 282 return reg, nil 283 } 284 285 hs := newMockHealthSetter() 286 287 checks := new(mock.CheckShim) 288 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook) 289 290 // Prerun 291 require.NoError(h.Prerun()) 292 293 // Wait for health to be set (healthy) 294 select { 295 case <-time.After(5 * time.Second): 296 t.Fatalf("timeout waiting for health to be set") 297 case health := <-hs.healthCh: 298 require.True(health.healthy) 299 300 // Healthy allocs shouldn't emit task events 301 ev := health.taskEvents[task.Name] 302 require.Nilf(ev, "%#v", health.taskEvents) 303 } 304 305 // Postrun 306 require.NoError(h.Postrun()) 307 } 308 309 // TestHealthHook_SetHealth_unhealthy asserts SetHealth notices unhealthy allocs 310 func TestHealthHook_SetHealth_unhealthy(t *testing.T) { 311 ci.Parallel(t) 312 require := require.New(t) 313 314 alloc := mock.Alloc() 315 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 316 task := alloc.Job.TaskGroups[0].Tasks[0] 317 318 newCheck := task.Services[0].Checks[0].Copy() 319 newCheck.Name = "failing-check" 320 task.Services[0].Checks = append(task.Services[0].Checks, newCheck) 321 322 // Synthesize running alloc and tasks 323 alloc.ClientStatus = structs.AllocClientStatusRunning 324 alloc.TaskStates = map[string]*structs.TaskState{ 325 task.Name: { 326 State: structs.TaskStateRunning, 327 StartedAt: time.Now(), 328 }, 329 } 330 331 // Make Consul response 332 checkHealthy := &consulapi.AgentCheck{ 333 Name: task.Services[0].Checks[0].Name, 334 Status: consulapi.HealthPassing, 335 } 336 checksUnhealthy := &consulapi.AgentCheck{ 337 Name: task.Services[0].Checks[1].Name, 338 Status: consulapi.HealthCritical, 339 } 340 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 341 task.Name: { 342 Services: map[string]*serviceregistration.ServiceRegistration{ 343 task.Services[0].Name: { 344 Service: &consulapi.AgentService{ 345 ID: "foo", 346 Service: task.Services[0].Name, 347 }, 348 Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, 349 }, 350 }, 351 }, 352 } 353 354 logger := testlog.HCLogger(t) 355 b := cstructs.NewAllocBroadcaster(logger) 356 defer b.Close() 357 358 // Don't reply on the first call 359 called := false 360 consul := regMock.NewServiceRegistrationHandler(logger) 361 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 362 if !called { 363 called = true 364 return nil, nil 365 } 366 367 reg := &serviceregistration.AllocRegistration{ 368 Tasks: taskRegs, 369 } 370 371 return reg, nil 372 } 373 374 hs := newMockHealthSetter() 375 376 checks := new(mock.CheckShim) 377 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul, checks).(*allocHealthWatcherHook) 378 379 // Prerun 380 require.NoError(h.Prerun()) 381 382 // Wait to ensure we don't get a healthy status 383 select { 384 case <-time.After(2 * time.Second): 385 // great no healthy status 386 case health := <-hs.healthCh: 387 require.Fail("expected no health event", "got %v", health) 388 } 389 390 // Postrun 391 require.NoError(h.Postrun()) 392 } 393 394 // TestHealthHook_SystemNoop asserts that system jobs return the noop tracker. 395 func TestHealthHook_SystemNoop(t *testing.T) { 396 ci.Parallel(t) 397 398 h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil, nil) 399 400 // Assert that it's the noop impl 401 _, ok := h.(noopAllocHealthWatcherHook) 402 require.True(t, ok) 403 404 // Assert the noop impl does not implement any hooks 405 _, ok = h.(interfaces.RunnerPrerunHook) 406 require.False(t, ok) 407 _, ok = h.(interfaces.RunnerUpdateHook) 408 require.False(t, ok) 409 _, ok = h.(interfaces.RunnerPostrunHook) 410 require.False(t, ok) 411 _, ok = h.(interfaces.ShutdownHook) 412 require.False(t, ok) 413 } 414 415 // TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker. 416 func TestHealthHook_BatchNoop(t *testing.T) { 417 ci.Parallel(t) 418 419 h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil, nil) 420 421 // Assert that it's the noop impl 422 _, ok := h.(noopAllocHealthWatcherHook) 423 require.True(t, ok) 424 }