github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allochealth/tracker_test.go (about) 1 package allochealth 2 3 import ( 4 "context" 5 "fmt" 6 "sync/atomic" 7 "testing" 8 "time" 9 10 consulapi "github.com/hashicorp/consul/api" 11 "github.com/hashicorp/nomad/client/consul" 12 cstructs "github.com/hashicorp/nomad/client/structs" 13 agentconsul "github.com/hashicorp/nomad/command/agent/consul" 14 "github.com/hashicorp/nomad/helper/testlog" 15 "github.com/hashicorp/nomad/nomad/mock" 16 "github.com/hashicorp/nomad/nomad/structs" 17 "github.com/hashicorp/nomad/testutil" 18 "github.com/stretchr/testify/require" 19 ) 20 21 func TestTracker_Checks_Healthy(t *testing.T) { 22 t.Parallel() 23 24 alloc := mock.Alloc() 25 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 26 task := alloc.Job.TaskGroups[0].Tasks[0] 27 28 // Synthesize running alloc and tasks 29 alloc.ClientStatus = structs.AllocClientStatusRunning 30 alloc.TaskStates = map[string]*structs.TaskState{ 31 task.Name: { 32 State: structs.TaskStateRunning, 33 StartedAt: time.Now(), 34 }, 35 } 36 37 // Make Consul response 38 check := &consulapi.AgentCheck{ 39 Name: task.Services[0].Checks[0].Name, 40 Status: consulapi.HealthPassing, 41 } 42 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 43 task.Name: { 44 Services: map[string]*agentconsul.ServiceRegistration{ 45 task.Services[0].Name: { 46 Service: &consulapi.AgentService{ 47 ID: "foo", 48 Service: task.Services[0].Name, 49 }, 50 Checks: []*consulapi.AgentCheck{check}, 51 }, 52 }, 53 }, 54 } 55 56 logger := testlog.HCLogger(t) 57 b := cstructs.NewAllocBroadcaster(logger) 58 defer b.Close() 59 60 // Don't reply on the first call 61 var called uint64 62 consul := consul.NewMockConsulServiceClient(t, logger) 63 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 64 if atomic.AddUint64(&called, 1) == 1 { 65 return nil, nil 66 } 67 68 reg := &agentconsul.AllocRegistration{ 69 Tasks: taskRegs, 70 } 71 72 return reg, nil 73 } 74 75 ctx, cancelFn := context.WithCancel(context.Background()) 76 defer cancelFn() 77 78 checkInterval := 10 * time.Millisecond 79 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 80 time.Millisecond, true) 81 tracker.checkLookupInterval = checkInterval 82 tracker.Start() 83 84 select { 85 case <-time.After(4 * checkInterval): 86 require.Fail(t, "timed out while waiting for health") 87 case h := <-tracker.HealthyCh(): 88 require.True(t, h) 89 } 90 } 91 92 func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) { 93 t.Parallel() 94 95 alloc := mock.LifecycleAllocWithPoststopDeploy() 96 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 97 98 // Synthesize running alloc and tasks 99 alloc.ClientStatus = structs.AllocClientStatusRunning 100 alloc.TaskStates = map[string]*structs.TaskState{ 101 "web": { 102 State: structs.TaskStateRunning, 103 StartedAt: time.Now(), 104 }, 105 "post": { 106 State: structs.TaskStatePending, 107 }, 108 } 109 110 logger := testlog.HCLogger(t) 111 b := cstructs.NewAllocBroadcaster(logger) 112 defer b.Close() 113 114 consul := consul.NewMockConsulServiceClient(t, logger) 115 ctx, cancelFn := context.WithCancel(context.Background()) 116 defer cancelFn() 117 118 checkInterval := 10 * time.Millisecond 119 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 120 time.Millisecond, true) 121 tracker.checkLookupInterval = checkInterval 122 tracker.Start() 123 124 select { 125 case <-time.After(4 * checkInterval): 126 require.Fail(t, "timed out while waiting for health") 127 case h := <-tracker.HealthyCh(): 128 require.True(t, h) 129 } 130 } 131 132 func TestTracker_Checks_Unhealthy(t *testing.T) { 133 t.Parallel() 134 135 alloc := mock.Alloc() 136 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 137 task := alloc.Job.TaskGroups[0].Tasks[0] 138 139 newCheck := task.Services[0].Checks[0].Copy() 140 newCheck.Name = "failing-check" 141 task.Services[0].Checks = append(task.Services[0].Checks, newCheck) 142 143 // Synthesize running alloc and tasks 144 alloc.ClientStatus = structs.AllocClientStatusRunning 145 alloc.TaskStates = map[string]*structs.TaskState{ 146 task.Name: { 147 State: structs.TaskStateRunning, 148 StartedAt: time.Now(), 149 }, 150 } 151 152 // Make Consul response 153 checkHealthy := &consulapi.AgentCheck{ 154 Name: task.Services[0].Checks[0].Name, 155 Status: consulapi.HealthPassing, 156 } 157 checksUnhealthy := &consulapi.AgentCheck{ 158 Name: task.Services[0].Checks[1].Name, 159 Status: consulapi.HealthCritical, 160 } 161 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 162 task.Name: { 163 Services: map[string]*agentconsul.ServiceRegistration{ 164 task.Services[0].Name: { 165 Service: &consulapi.AgentService{ 166 ID: "foo", 167 Service: task.Services[0].Name, 168 }, 169 Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, 170 }, 171 }, 172 }, 173 } 174 175 logger := testlog.HCLogger(t) 176 b := cstructs.NewAllocBroadcaster(logger) 177 defer b.Close() 178 179 // Don't reply on the first call 180 var called uint64 181 consul := consul.NewMockConsulServiceClient(t, logger) 182 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 183 if atomic.AddUint64(&called, 1) == 1 { 184 return nil, nil 185 } 186 187 reg := &agentconsul.AllocRegistration{ 188 Tasks: taskRegs, 189 } 190 191 return reg, nil 192 } 193 194 ctx, cancelFn := context.WithCancel(context.Background()) 195 defer cancelFn() 196 197 checkInterval := 10 * time.Millisecond 198 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 199 time.Millisecond, true) 200 tracker.checkLookupInterval = checkInterval 201 tracker.Start() 202 203 testutil.WaitForResult(func() (bool, error) { 204 lookup := atomic.LoadUint64(&called) 205 return lookup < 4, fmt.Errorf("wait to get more task registration lookups: %v", lookup) 206 }, func(err error) { 207 require.NoError(t, err) 208 }) 209 210 tracker.l.Lock() 211 require.False(t, tracker.checksHealthy) 212 tracker.l.Unlock() 213 214 select { 215 case v := <-tracker.HealthyCh(): 216 require.Failf(t, "expected no health value", " got %v", v) 217 default: 218 // good 219 } 220 } 221 222 func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) { 223 t.Parallel() 224 225 alloc := mock.Alloc() 226 logger := testlog.HCLogger(t) 227 228 ctx, cancelFn := context.WithCancel(context.Background()) 229 defer cancelFn() 230 231 tracker := NewTracker(ctx, logger, alloc, nil, nil, 232 time.Millisecond, true) 233 234 assertNoHealth := func() { 235 require.NoError(t, tracker.ctx.Err()) 236 select { 237 case v := <-tracker.HealthyCh(): 238 require.Failf(t, "unexpected healthy event", "got %v", v) 239 default: 240 } 241 } 242 243 // first set task health without checks 244 tracker.setTaskHealth(true, false) 245 assertNoHealth() 246 247 // now fail task health again before checks are successful 248 tracker.setTaskHealth(false, false) 249 assertNoHealth() 250 251 // now pass health checks - do not propagate health yet 252 tracker.setCheckHealth(true) 253 assertNoHealth() 254 255 // set tasks to healthy - don't propagate health yet, wait for the next check 256 tracker.setTaskHealth(true, false) 257 assertNoHealth() 258 259 // set checks to true, now propagate health status 260 tracker.setCheckHealth(true) 261 262 require.Error(t, tracker.ctx.Err()) 263 select { 264 case v := <-tracker.HealthyCh(): 265 require.True(t, v) 266 default: 267 require.Fail(t, "expected a health status") 268 } 269 } 270 271 // TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc 272 // healthy, if the checks pass before task health pass 273 func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) { 274 t.Parallel() 275 276 alloc := mock.Alloc() 277 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 278 task := alloc.Job.TaskGroups[0].Tasks[0] 279 280 // new task starting unhealthy, without services 281 task2 := task.Copy() 282 task2.Name = task2.Name + "2" 283 task2.Services = nil 284 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 285 286 // Synthesize running alloc and tasks 287 alloc.ClientStatus = structs.AllocClientStatusRunning 288 alloc.TaskStates = map[string]*structs.TaskState{ 289 task.Name: { 290 State: structs.TaskStateRunning, 291 StartedAt: time.Now(), 292 }, 293 task2.Name: { 294 State: structs.TaskStatePending, 295 }, 296 } 297 298 // Make Consul response 299 check := &consulapi.AgentCheck{ 300 Name: task.Services[0].Checks[0].Name, 301 Status: consulapi.HealthPassing, 302 } 303 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 304 task.Name: { 305 Services: map[string]*agentconsul.ServiceRegistration{ 306 task.Services[0].Name: { 307 Service: &consulapi.AgentService{ 308 ID: "foo", 309 Service: task.Services[0].Name, 310 }, 311 Checks: []*consulapi.AgentCheck{check}, 312 }, 313 }, 314 }, 315 } 316 317 logger := testlog.HCLogger(t) 318 b := cstructs.NewAllocBroadcaster(logger) 319 defer b.Close() 320 321 // Don't reply on the first call 322 var called uint64 323 consul := consul.NewMockConsulServiceClient(t, logger) 324 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 325 if atomic.AddUint64(&called, 1) == 1 { 326 return nil, nil 327 } 328 329 reg := &agentconsul.AllocRegistration{ 330 Tasks: taskRegs, 331 } 332 333 return reg, nil 334 } 335 336 ctx, cancelFn := context.WithCancel(context.Background()) 337 defer cancelFn() 338 339 checkInterval := 10 * time.Millisecond 340 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 341 time.Millisecond, true) 342 tracker.checkLookupInterval = checkInterval 343 tracker.Start() 344 345 // assert that we don't get marked healthy 346 select { 347 case <-time.After(4 * checkInterval): 348 // still unhealthy, good 349 case h := <-tracker.HealthyCh(): 350 require.Fail(t, "unexpected health event", h) 351 } 352 require.False(t, tracker.tasksHealthy) 353 require.False(t, tracker.checksHealthy) 354 355 // now set task to healthy 356 runningAlloc := alloc.Copy() 357 runningAlloc.TaskStates = map[string]*structs.TaskState{ 358 task.Name: { 359 State: structs.TaskStateRunning, 360 StartedAt: time.Now(), 361 }, 362 task2.Name: { 363 State: structs.TaskStateRunning, 364 StartedAt: time.Now(), 365 }, 366 } 367 err := b.Send(runningAlloc) 368 require.NoError(t, err) 369 370 // eventually, it is marked as healthy 371 select { 372 case <-time.After(4 * checkInterval): 373 require.Fail(t, "timed out while waiting for health") 374 case h := <-tracker.HealthyCh(): 375 require.True(t, h) 376 } 377 378 } 379 380 func TestTracker_Checks_OnUpdate(t *testing.T) { 381 t.Parallel() 382 383 cases := []struct { 384 desc string 385 checkOnUpdate string 386 consulResp string 387 expectedPass bool 388 }{ 389 { 390 desc: "check require_healthy consul healthy", 391 checkOnUpdate: structs.OnUpdateRequireHealthy, 392 consulResp: consulapi.HealthPassing, 393 expectedPass: true, 394 }, 395 { 396 desc: "check on_update ignore_warning, consul warn", 397 checkOnUpdate: structs.OnUpdateIgnoreWarn, 398 consulResp: consulapi.HealthWarning, 399 expectedPass: true, 400 }, 401 { 402 desc: "check on_update ignore_warning, consul critical", 403 checkOnUpdate: structs.OnUpdateIgnoreWarn, 404 consulResp: consulapi.HealthCritical, 405 expectedPass: false, 406 }, 407 { 408 desc: "check on_update ignore_warning, consul healthy", 409 checkOnUpdate: structs.OnUpdateIgnoreWarn, 410 consulResp: consulapi.HealthPassing, 411 expectedPass: true, 412 }, 413 { 414 desc: "check on_update ignore, consul critical", 415 checkOnUpdate: structs.OnUpdateIgnore, 416 consulResp: consulapi.HealthCritical, 417 expectedPass: true, 418 }, 419 } 420 421 for _, tc := range cases { 422 t.Run(tc.desc, func(t *testing.T) { 423 424 alloc := mock.Alloc() 425 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 426 task := alloc.Job.TaskGroups[0].Tasks[0] 427 428 // Synthesize running alloc and tasks 429 alloc.ClientStatus = structs.AllocClientStatusRunning 430 alloc.TaskStates = map[string]*structs.TaskState{ 431 task.Name: { 432 State: structs.TaskStateRunning, 433 StartedAt: time.Now(), 434 }, 435 } 436 437 // Make Consul response 438 check := &consulapi.AgentCheck{ 439 Name: task.Services[0].Checks[0].Name, 440 Status: tc.consulResp, 441 } 442 taskRegs := map[string]*agentconsul.ServiceRegistrations{ 443 task.Name: { 444 Services: map[string]*agentconsul.ServiceRegistration{ 445 task.Services[0].Name: { 446 Service: &consulapi.AgentService{ 447 ID: "foo", 448 Service: task.Services[0].Name, 449 }, 450 Checks: []*consulapi.AgentCheck{check}, 451 CheckOnUpdate: map[string]string{ 452 check.CheckID: tc.checkOnUpdate, 453 }, 454 }, 455 }, 456 }, 457 } 458 459 logger := testlog.HCLogger(t) 460 b := cstructs.NewAllocBroadcaster(logger) 461 defer b.Close() 462 463 // Don't reply on the first call 464 var called uint64 465 consul := consul.NewMockConsulServiceClient(t, logger) 466 consul.AllocRegistrationsFn = func(string) (*agentconsul.AllocRegistration, error) { 467 if atomic.AddUint64(&called, 1) == 1 { 468 return nil, nil 469 } 470 471 reg := &agentconsul.AllocRegistration{ 472 Tasks: taskRegs, 473 } 474 475 return reg, nil 476 } 477 478 ctx, cancelFn := context.WithCancel(context.Background()) 479 defer cancelFn() 480 481 checkInterval := 10 * time.Millisecond 482 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, 483 time.Millisecond, true) 484 tracker.checkLookupInterval = checkInterval 485 tracker.Start() 486 487 select { 488 case <-time.After(4 * checkInterval): 489 if !tc.expectedPass { 490 // tracker should still be running 491 require.Nil(t, tracker.ctx.Err()) 492 return 493 } 494 require.Fail(t, "timed out while waiting for health") 495 case h := <-tracker.HealthyCh(): 496 require.True(t, h) 497 } 498 499 // For healthy checks, the tracker should stop watching 500 select { 501 case <-tracker.ctx.Done(): 502 // Ok, tracker should exit after reporting healthy 503 default: 504 require.Fail(t, "expected tracker to exit after reporting healthy") 505 } 506 }) 507 } 508 }