github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/health_hook_test.go (about) 1 package allocrunner 2 3 import ( 4 "sync" 5 "testing" 6 "time" 7 8 consulapi "github.com/hashicorp/consul/api" 9 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 10 "github.com/hashicorp/nomad/client/consul" 11 cstructs "github.com/hashicorp/nomad/client/structs" 12 agentconsul "github.com/hashicorp/nomad/command/agent/consul" 13 "github.com/hashicorp/nomad/helper/testlog" 14 "github.com/hashicorp/nomad/helper/uuid" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/stretchr/testify/assert" 18 "github.com/stretchr/testify/require" 19 ) 20 21 // statically assert health hook implements the expected interfaces 22 var _ interfaces.RunnerPrerunHook = (*allocHealthWatcherHook)(nil) 23 var _ interfaces.RunnerUpdateHook = (*allocHealthWatcherHook)(nil) 24 var _ interfaces.RunnerPostrunHook = (*allocHealthWatcherHook)(nil) 25 var _ interfaces.ShutdownHook = (*allocHealthWatcherHook)(nil) 26 27 // allocHealth is emitted to a chan whenever SetHealth is called 28 type allocHealth struct { 29 healthy bool 30 taskEvents map[string]*structs.TaskEvent 31 } 32 33 // mockHealthSetter implements healthSetter that stores health internally 34 type mockHealthSetter struct { 35 setCalls int 36 clearCalls int 37 healthy *bool 38 isDeploy *bool 39 taskEvents map[string]*structs.TaskEvent 40 mu sync.Mutex 41 42 healthCh chan allocHealth 43 } 44 45 // newMockHealthSetter returns a mock HealthSetter that emits all SetHealth 46 // calls on a buffered chan. Callers who do need need notifications of health 47 // changes may just create the struct directly. 48 func newMockHealthSetter() *mockHealthSetter { 49 return &mockHealthSetter{ 50 healthCh: make(chan allocHealth, 1), 51 } 52 } 53 54 func (m *mockHealthSetter) SetHealth(healthy, isDeploy bool, taskEvents map[string]*structs.TaskEvent) { 55 m.mu.Lock() 56 defer m.mu.Unlock() 57 58 m.setCalls++ 59 m.healthy = &healthy 60 m.isDeploy = &isDeploy 61 m.taskEvents = taskEvents 62 63 if m.healthCh != nil { 64 m.healthCh <- allocHealth{healthy, taskEvents} 65 } 66 } 67 68 func (m *mockHealthSetter) ClearHealth() { 69 m.mu.Lock() 70 defer m.mu.Unlock() 71 72 m.clearCalls++ 73 m.healthy = nil 74 m.isDeploy = nil 75 m.taskEvents = nil 76 } 77 78 func (m *mockHealthSetter) HasHealth() bool { 79 m.mu.Lock() 80 defer m.mu.Unlock() 81 return m.healthy != nil 82 } 83 84 // TestHealthHook_PrerunPostrun asserts a health hook does not error if it is 85 // run and postrunned. 86 func TestHealthHook_PrerunPostrun(t *testing.T) { 87 t.Parallel() 88 require := require.New(t) 89 90 logger := testlog.HCLogger(t) 91 92 b := cstructs.NewAllocBroadcaster(logger) 93 defer b.Close() 94 95 consul := consul.NewMockConsulServiceClient(t, logger) 96 hs := &mockHealthSetter{} 97 98 h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul) 99 100 // Assert we implemented the right interfaces 101 prerunh, ok := h.(interfaces.RunnerPrerunHook) 102 require.True(ok) 103 _, ok = h.(interfaces.RunnerUpdateHook) 104 require.True(ok) 105 postrunh, ok := h.(interfaces.RunnerPostrunHook) 106 require.True(ok) 107 108 // Prerun 109 require.NoError(prerunh.Prerun()) 110 111 // Assert isDeploy is false (other tests peek at isDeploy to determine 112 // if an Update applied) 113 ahw := h.(*allocHealthWatcherHook) 114 ahw.hookLock.Lock() 115 assert.False(t, ahw.isDeploy) 116 ahw.hookLock.Unlock() 117 118 // Postrun 119 require.NoError(postrunh.Postrun()) 120 } 121 122 // TestHealthHook_PrerunUpdatePostrun asserts Updates may be applied concurrently. 123 func TestHealthHook_PrerunUpdatePostrun(t *testing.T) { 124 t.Parallel() 125 require := require.New(t) 126 127 alloc := mock.Alloc() 128 129 logger := testlog.HCLogger(t) 130 b := cstructs.NewAllocBroadcaster(logger) 131 defer b.Close() 132 133 consul := consul.NewMockConsulServiceClient(t, logger) 134 hs := &mockHealthSetter{} 135 136 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) 137 138 // Prerun 139 require.NoError(h.Prerun()) 140 141 // Update multiple times in a goroutine to mimic Client behavior 142 // (Updates are concurrent with alloc runner but are applied serially). 143 errs := make(chan error, 2) 144 go func() { 145 defer close(errs) 146 for i := 0; i < cap(errs); i++ { 147 alloc.AllocModifyIndex++ 148 errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc.Copy()}) 149 } 150 }() 151 152 for err := range errs { 153 assert.NoError(t, err) 154 } 155 156 // Postrun 157 require.NoError(h.Postrun()) 158 } 159 160 // TestHealthHook_UpdatePrerunPostrun asserts that a hook may have Update 161 // called before Prerun. 162 func TestHealthHook_UpdatePrerunPostrun(t *testing.T) { 163 t.Parallel() 164 require := require.New(t) 165 166 alloc := mock.Alloc() 167 168 logger := testlog.HCLogger(t) 169 b := cstructs.NewAllocBroadcaster(logger) 170 defer b.Close() 171 172 consul := consul.NewMockConsulServiceClient(t, logger) 173 hs := &mockHealthSetter{} 174 175 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) 176 177 // Set a DeploymentID to cause ClearHealth to be called 178 alloc.DeploymentID = uuid.Generate() 179 180 // Update in a goroutine to mimic Client behavior (Updates are 181 // concurrent with alloc runner). 182 errs := make(chan error, 1) 183 go func(alloc *structs.Allocation) { 184 errs <- h.Update(&interfaces.RunnerUpdateRequest{Alloc: alloc}) 185 close(errs) 186 }(alloc.Copy()) 187 188 for err := range errs { 189 assert.NoError(t, err) 190 } 191 192 // Prerun should be a noop 193 require.NoError(h.Prerun()) 194 195 // Assert that the Update took affect by isDeploy being true 196 h.hookLock.Lock() 197 assert.True(t, h.isDeploy) 198 h.hookLock.Unlock() 199 200 // Postrun 201 require.NoError(h.Postrun()) 202 } 203 204 // TestHealthHook_Postrun asserts that a hook may have only Postrun called. 205 func TestHealthHook_Postrun(t *testing.T) { 206 t.Parallel() 207 require := require.New(t) 208 209 logger := testlog.HCLogger(t) 210 b := cstructs.NewAllocBroadcaster(logger) 211 defer b.Close() 212 213 consul := consul.NewMockConsulServiceClient(t, logger) 214 hs := &mockHealthSetter{} 215 216 h := newAllocHealthWatcherHook(logger, mock.Alloc(), hs, b.Listen(), consul).(*allocHealthWatcherHook) 217 218 // Postrun 219 require.NoError(h.Postrun()) 220 } 221 222 // TestHealthHook_SetHealth_healthy asserts SetHealth is called when health status is 223 // set. Uses task state and health checks. 224 func TestHealthHook_SetHealth_healthy(t *testing.T) { 225 t.Parallel() 226 require := require.New(t) 227 228 alloc := mock.Alloc() 229 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 230 task := alloc.Job.TaskGroups[0].Tasks[0] 231 232 // Synthesize running alloc and tasks 233 alloc.ClientStatus = structs.AllocClientStatusRunning 234 alloc.TaskStates = map[string]*structs.TaskState{ 235 task.Name: { 236 State: structs.TaskStateRunning, 237 StartedAt: time.Now(), 238 }, 239 } 240 241 // Make Consul response 242 check := &consulapi.AgentCheck{ 243 Name: task.Services[0].Checks[0].Name, 244 Status: consulapi.HealthPassing, 245 } 246 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 247 task.Name: { 248 Services: map[string]*agentconsul.ServiceRegistration{ 249 task.Services[0].Name: { 250 Service: &consulapi.AgentService{ 251 ID: "foo", 252 Service: task.Services[0].Name, 253 }, 254 Checks: []*consulapi.AgentCheck{check}, 255 }, 256 }, 257 }, 258 } 259 260 logger := testlog.HCLogger(t) 261 b := cstructs.NewAllocBroadcaster(logger) 262 defer b.Close() 263 264 // Don't reply on the first call 265 called := false 266 consul := consul.NewMockConsulServiceClient(t, logger) 267 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 268 if !called { 269 called = true 270 return nil, nil 271 } 272 273 reg := &agentconsul.AllocRegistration{ 274 Tasks: taskRegs, 275 } 276 277 return reg, nil 278 } 279 280 hs := newMockHealthSetter() 281 282 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) 283 284 // Prerun 285 require.NoError(h.Prerun()) 286 287 // Wait for health to be set (healthy) 288 select { 289 case <-time.After(5 * time.Second): 290 t.Fatalf("timeout waiting for health to be set") 291 case health := <-hs.healthCh: 292 require.True(health.healthy) 293 294 // Healthy allocs shouldn't emit task events 295 ev := health.taskEvents[task.Name] 296 require.Nilf(ev, "%#v", health.taskEvents) 297 } 298 299 // Postrun 300 require.NoError(h.Postrun()) 301 } 302 303 // TestHealthHook_SetHealth_unhealthy asserts SetHealth notices unhealthy allocs 304 func TestHealthHook_SetHealth_unhealthy(t *testing.T) { 305 t.Parallel() 306 require := require.New(t) 307 308 alloc := mock.Alloc() 309 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 310 task := alloc.Job.TaskGroups[0].Tasks[0] 311 312 newCheck := task.Services[0].Checks[0].Copy() 313 newCheck.Name = "failing-check" 314 task.Services[0].Checks = append(task.Services[0].Checks, newCheck) 315 316 // Synthesize running alloc and tasks 317 alloc.ClientStatus = structs.AllocClientStatusRunning 318 alloc.TaskStates = map[string]*structs.TaskState{ 319 task.Name: { 320 State: structs.TaskStateRunning, 321 StartedAt: time.Now(), 322 }, 323 } 324 325 // Make Consul response 326 checkHealthy := &consulapi.AgentCheck{ 327 Name: task.Services[0].Checks[0].Name, 328 Status: consulapi.HealthPassing, 329 } 330 checksUnhealthy := &consulapi.AgentCheck{ 331 Name: task.Services[0].Checks[1].Name, 332 Status: consulapi.HealthCritical, 333 } 334 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 335 task.Name: { 336 Services: map[string]*agentconsul.ServiceRegistration{ 337 task.Services[0].Name: { 338 Service: &consulapi.AgentService{ 339 ID: "foo", 340 Service: task.Services[0].Name, 341 }, 342 Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, 343 }, 344 }, 345 }, 346 } 347 348 logger := testlog.HCLogger(t) 349 b := cstructs.NewAllocBroadcaster(logger) 350 defer b.Close() 351 352 // Don't reply on the first call 353 called := false 354 consul := consul.NewMockConsulServiceClient(t, logger) 355 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 356 if !called { 357 called = true 358 return nil, nil 359 } 360 361 reg := &agentconsul.AllocRegistration{ 362 Tasks: taskRegs, 363 } 364 365 return reg, nil 366 } 367 368 hs := newMockHealthSetter() 369 370 h := newAllocHealthWatcherHook(logger, alloc.Copy(), hs, b.Listen(), consul).(*allocHealthWatcherHook) 371 372 // Prerun 373 require.NoError(h.Prerun()) 374 375 // Wait to ensure we don't get a healthy status 376 select { 377 case <-time.After(2 * time.Second): 378 // great no healthy status 379 case health := <-hs.healthCh: 380 require.Fail("expected no health event", "got %v", health) 381 } 382 383 // Postrun 384 require.NoError(h.Postrun()) 385 } 386 387 // TestHealthHook_SystemNoop asserts that system jobs return the noop tracker. 388 func TestHealthHook_SystemNoop(t *testing.T) { 389 t.Parallel() 390 391 h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.SystemAlloc(), nil, nil, nil) 392 393 // Assert that it's the noop impl 394 _, ok := h.(noopAllocHealthWatcherHook) 395 require.True(t, ok) 396 397 // Assert the noop impl does not implement any hooks 398 _, ok = h.(interfaces.RunnerPrerunHook) 399 require.False(t, ok) 400 _, ok = h.(interfaces.RunnerUpdateHook) 401 require.False(t, ok) 402 _, ok = h.(interfaces.RunnerPostrunHook) 403 require.False(t, ok) 404 _, ok = h.(interfaces.ShutdownHook) 405 require.False(t, ok) 406 } 407 408 // TestHealthHook_BatchNoop asserts that batch jobs return the noop tracker. 409 func TestHealthHook_BatchNoop(t *testing.T) { 410 t.Parallel() 411 412 h := newAllocHealthWatcherHook(testlog.HCLogger(t), mock.BatchAlloc(), nil, nil, nil) 413 414 // Assert that it's the noop impl 415 _, ok := h.(noopAllocHealthWatcherHook) 416 require.True(t, ok) 417 }