github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allochealth/tracker_test.go (about) 1 package allochealth 2 3 import ( 4 "context" 5 "fmt" 6 "sync/atomic" 7 "testing" 8 "time" 9 10 consulapi "github.com/hashicorp/consul/api" 11 "github.com/hashicorp/nomad/ci" 12 "github.com/hashicorp/nomad/client/serviceregistration" 13 "github.com/hashicorp/nomad/client/serviceregistration/checks/checkstore" 14 regmock "github.com/hashicorp/nomad/client/serviceregistration/mock" 15 "github.com/hashicorp/nomad/client/state" 16 cstructs "github.com/hashicorp/nomad/client/structs" 17 "github.com/hashicorp/nomad/helper" 18 "github.com/hashicorp/nomad/helper/testlog" 19 "github.com/hashicorp/nomad/nomad/mock" 20 "github.com/hashicorp/nomad/nomad/structs" 21 "github.com/hashicorp/nomad/testutil" 22 "github.com/shoenig/test/must" 23 "github.com/stretchr/testify/require" 24 ) 25 26 func TestTracker_ConsulChecks_Healthy(t *testing.T) { 27 ci.Parallel(t) 28 29 alloc := mock.Alloc() 30 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 31 task := alloc.Job.TaskGroups[0].Tasks[0] 32 33 // Synthesize running alloc and tasks 34 alloc.ClientStatus = structs.AllocClientStatusRunning 35 alloc.TaskStates = map[string]*structs.TaskState{ 36 task.Name: { 37 State: structs.TaskStateRunning, 38 StartedAt: time.Now(), 39 }, 40 } 41 42 // Make Consul response 43 check := &consulapi.AgentCheck{ 44 Name: task.Services[0].Checks[0].Name, 45 Status: consulapi.HealthPassing, 46 } 47 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 48 task.Name: { 49 Services: map[string]*serviceregistration.ServiceRegistration{ 50 task.Services[0].Name: { 51 Service: &consulapi.AgentService{ 52 ID: "foo", 53 Service: task.Services[0].Name, 54 }, 55 Checks: []*consulapi.AgentCheck{check}, 56 }, 57 }, 58 }, 59 } 60 61 logger := testlog.HCLogger(t) 62 b := cstructs.NewAllocBroadcaster(logger) 63 defer b.Close() 64 65 // Don't reply on the first call 66 var called uint64 67 consul := regmock.NewServiceRegistrationHandler(logger) 68 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 69 if atomic.AddUint64(&called, 1) == 1 { 70 return nil, nil 71 } 72 73 reg := &serviceregistration.AllocRegistration{ 74 Tasks: taskRegs, 75 } 76 77 return reg, nil 78 } 79 80 ctx, cancelFn := context.WithCancel(context.Background()) 81 defer cancelFn() 82 83 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 84 checkInterval := 10 * time.Millisecond 85 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 86 tracker.checkLookupInterval = checkInterval 87 tracker.Start() 88 89 select { 90 case <-time.After(4 * checkInterval): 91 require.Fail(t, "timed out while waiting for health") 92 case h := <-tracker.HealthyCh(): 93 require.True(t, h) 94 } 95 } 96 97 func TestTracker_NomadChecks_Healthy(t *testing.T) { 98 ci.Parallel(t) 99 100 alloc := mock.Alloc() 101 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 102 alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad" 103 104 logger := testlog.HCLogger(t) 105 b := cstructs.NewAllocBroadcaster(logger) 106 defer b.Close() 107 108 ctx, cancel := context.WithCancel(context.Background()) 109 defer cancel() 110 111 // Synthesize running alloc and tasks 112 alloc.ClientStatus = structs.AllocClientStatusRunning 113 alloc.TaskStates = map[string]*structs.TaskState{ 114 alloc.Job.TaskGroups[0].Tasks[0].Name: { 115 State: structs.TaskStateRunning, 116 StartedAt: time.Now(), 117 }, 118 } 119 120 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 121 err := checks.Set(alloc.ID, &structs.CheckQueryResult{ 122 ID: "abc123", 123 Mode: "healthiness", 124 Status: "pending", 125 Output: "nomad: waiting to run", 126 Timestamp: time.Now().Unix(), 127 Group: alloc.TaskGroup, 128 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 129 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 130 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 131 }) 132 must.NoError(t, err) 133 134 consul := regmock.NewServiceRegistrationHandler(logger) 135 checkInterval := 10 * time.Millisecond 136 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 137 tracker.checkLookupInterval = checkInterval 138 tracker.Start() 139 140 go func() { 141 // wait a bit then update the check to passing 142 time.Sleep(15 * time.Millisecond) 143 must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{ 144 ID: "abc123", 145 Mode: "healthiness", 146 Status: "success", 147 Output: "nomad: http ok", 148 Timestamp: time.Now().Unix(), 149 Group: alloc.TaskGroup, 150 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 151 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 152 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 153 })) 154 }() 155 156 select { 157 case <-time.After(4 * checkInterval): 158 t.Fatalf("timed out while waiting for success") 159 case healthy := <-tracker.HealthyCh(): 160 must.True(t, healthy) 161 } 162 } 163 164 func TestTracker_NomadChecks_Unhealthy(t *testing.T) { 165 ci.Parallel(t) 166 167 alloc := mock.Alloc() 168 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 169 alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad" 170 171 logger := testlog.HCLogger(t) 172 b := cstructs.NewAllocBroadcaster(logger) 173 defer b.Close() 174 175 ctx, cancel := context.WithCancel(context.Background()) 176 defer cancel() 177 178 // Synthesize running alloc and tasks 179 alloc.ClientStatus = structs.AllocClientStatusRunning 180 alloc.TaskStates = map[string]*structs.TaskState{ 181 alloc.Job.TaskGroups[0].Tasks[0].Name: { 182 State: structs.TaskStateRunning, 183 StartedAt: time.Now(), 184 }, 185 } 186 187 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 188 err := checks.Set(alloc.ID, &structs.CheckQueryResult{ 189 ID: "abc123", 190 Mode: "healthiness", 191 Status: "pending", // start out pending 192 Output: "nomad: waiting to run", 193 Timestamp: time.Now().Unix(), 194 Group: alloc.TaskGroup, 195 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 196 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 197 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 198 }) 199 must.NoError(t, err) 200 201 consul := regmock.NewServiceRegistrationHandler(logger) 202 checkInterval := 10 * time.Millisecond 203 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 204 tracker.checkLookupInterval = checkInterval 205 tracker.Start() 206 207 go func() { 208 // wait a bit then update the check to failing 209 time.Sleep(15 * time.Millisecond) 210 must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{ 211 ID: "abc123", 212 Mode: "healthiness", 213 Status: "failing", 214 Output: "connection refused", 215 Timestamp: time.Now().Unix(), 216 Group: alloc.TaskGroup, 217 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 218 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 219 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 220 })) 221 }() 222 223 // make sure we are always unhealthy across 4 check intervals 224 for i := 0; i < 4; i++ { 225 <-time.After(checkInterval) 226 select { 227 case <-tracker.HealthyCh(): 228 t.Fatalf("should not receive on healthy chan with failing check") 229 default: 230 } 231 } 232 } 233 234 func TestTracker_Checks_PendingPostStop_Healthy(t *testing.T) { 235 ci.Parallel(t) 236 237 alloc := mock.LifecycleAllocWithPoststopDeploy() 238 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 239 240 // Synthesize running alloc and tasks 241 alloc.ClientStatus = structs.AllocClientStatusRunning 242 alloc.TaskStates = map[string]*structs.TaskState{ 243 "web": { 244 State: structs.TaskStateRunning, 245 StartedAt: time.Now(), 246 }, 247 "post": { 248 State: structs.TaskStatePending, 249 }, 250 } 251 252 logger := testlog.HCLogger(t) 253 b := cstructs.NewAllocBroadcaster(logger) 254 defer b.Close() 255 256 consul := regmock.NewServiceRegistrationHandler(logger) 257 ctx, cancelFn := context.WithCancel(context.Background()) 258 defer cancelFn() 259 260 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 261 checkInterval := 10 * time.Millisecond 262 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 263 tracker.checkLookupInterval = checkInterval 264 tracker.Start() 265 266 select { 267 case <-time.After(4 * checkInterval): 268 require.Fail(t, "timed out while waiting for health") 269 case h := <-tracker.HealthyCh(): 270 require.True(t, h) 271 } 272 } 273 274 func TestTracker_Succeeded_PostStart_Healthy(t *testing.T) { 275 ci.Parallel(t) 276 277 alloc := mock.LifecycleAllocWithPoststartDeploy() 278 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = time.Millisecond * 1 279 // Synthesize running alloc and tasks 280 alloc.ClientStatus = structs.AllocClientStatusRunning 281 alloc.TaskStates = map[string]*structs.TaskState{ 282 "web": { 283 State: structs.TaskStateRunning, 284 StartedAt: time.Now(), 285 }, 286 "post": { 287 State: structs.TaskStateDead, 288 StartedAt: time.Now(), 289 FinishedAt: time.Now().Add(alloc.Job.TaskGroups[0].Migrate.MinHealthyTime / 2), 290 }, 291 } 292 293 logger := testlog.HCLogger(t) 294 b := cstructs.NewAllocBroadcaster(logger) 295 defer b.Close() 296 297 consul := regmock.NewServiceRegistrationHandler(logger) 298 ctx, cancelFn := context.WithCancel(context.Background()) 299 defer cancelFn() 300 301 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 302 checkInterval := 10 * time.Millisecond 303 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, alloc.Job.TaskGroups[0].Migrate.MinHealthyTime, true) 304 tracker.checkLookupInterval = checkInterval 305 tracker.Start() 306 307 select { 308 case <-time.After(alloc.Job.TaskGroups[0].Migrate.MinHealthyTime * 2): 309 require.Fail(t, "timed out while waiting for health") 310 case h := <-tracker.HealthyCh(): 311 require.True(t, h) 312 } 313 } 314 315 func TestTracker_ConsulChecks_Unhealthy(t *testing.T) { 316 ci.Parallel(t) 317 318 alloc := mock.Alloc() 319 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 320 task := alloc.Job.TaskGroups[0].Tasks[0] 321 322 newCheck := task.Services[0].Checks[0].Copy() 323 newCheck.Name = "failing-check" 324 task.Services[0].Checks = append(task.Services[0].Checks, newCheck) 325 326 // Synthesize running alloc and tasks 327 alloc.ClientStatus = structs.AllocClientStatusRunning 328 alloc.TaskStates = map[string]*structs.TaskState{ 329 task.Name: { 330 State: structs.TaskStateRunning, 331 StartedAt: time.Now(), 332 }, 333 } 334 335 // Make Consul response 336 checkHealthy := &consulapi.AgentCheck{ 337 Name: task.Services[0].Checks[0].Name, 338 Status: consulapi.HealthPassing, 339 } 340 checksUnhealthy := &consulapi.AgentCheck{ 341 Name: task.Services[0].Checks[1].Name, 342 Status: consulapi.HealthCritical, 343 } 344 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 345 task.Name: { 346 Services: map[string]*serviceregistration.ServiceRegistration{ 347 task.Services[0].Name: { 348 Service: &consulapi.AgentService{ 349 ID: "foo", 350 Service: task.Services[0].Name, 351 }, 352 Checks: []*consulapi.AgentCheck{checkHealthy, checksUnhealthy}, 353 }, 354 }, 355 }, 356 } 357 358 logger := testlog.HCLogger(t) 359 b := cstructs.NewAllocBroadcaster(logger) 360 defer b.Close() 361 362 // Don't reply on the first call 363 var called uint64 364 consul := regmock.NewServiceRegistrationHandler(logger) 365 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 366 if atomic.AddUint64(&called, 1) == 1 { 367 return nil, nil 368 } 369 370 reg := &serviceregistration.AllocRegistration{ 371 Tasks: taskRegs, 372 } 373 374 return reg, nil 375 } 376 377 ctx, cancelFn := context.WithCancel(context.Background()) 378 defer cancelFn() 379 380 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 381 checkInterval := 10 * time.Millisecond 382 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 383 tracker.checkLookupInterval = checkInterval 384 tracker.Start() 385 386 testutil.WaitForResult(func() (bool, error) { 387 lookup := atomic.LoadUint64(&called) 388 return lookup < 4, fmt.Errorf("wait to get more task registration lookups: %v", lookup) 389 }, func(err error) { 390 require.NoError(t, err) 391 }) 392 393 tracker.lock.Lock() 394 require.False(t, tracker.checksHealthy) 395 tracker.lock.Unlock() 396 397 select { 398 case v := <-tracker.HealthyCh(): 399 require.Failf(t, "expected no health value", " got %v", v) 400 default: 401 // good 402 } 403 } 404 405 func TestTracker_Healthy_IfBothTasksAndConsulChecksAreHealthy(t *testing.T) { 406 ci.Parallel(t) 407 408 alloc := mock.Alloc() 409 logger := testlog.HCLogger(t) 410 411 ctx, cancelFn := context.WithCancel(context.Background()) 412 defer cancelFn() 413 414 tracker := NewTracker(ctx, logger, alloc, nil, nil, nil, time.Millisecond, true) 415 416 assertNoHealth := func() { 417 require.NoError(t, tracker.ctx.Err()) 418 select { 419 case v := <-tracker.HealthyCh(): 420 require.Failf(t, "unexpected healthy event", "got %v", v) 421 default: 422 } 423 } 424 425 // first set task health without checks 426 tracker.setTaskHealth(true, false) 427 assertNoHealth() 428 429 // now fail task health again before checks are successful 430 tracker.setTaskHealth(false, false) 431 assertNoHealth() 432 433 // now pass health checks - do not propagate health yet 434 tracker.setCheckHealth(true) 435 assertNoHealth() 436 437 // set tasks to healthy - don't propagate health yet, wait for the next check 438 tracker.setTaskHealth(true, false) 439 assertNoHealth() 440 441 // set checks to true, now propagate health status 442 tracker.setCheckHealth(true) 443 444 require.Error(t, tracker.ctx.Err()) 445 select { 446 case v := <-tracker.HealthyCh(): 447 require.True(t, v) 448 default: 449 require.Fail(t, "expected a health status") 450 } 451 } 452 453 // TestTracker_Checks_Healthy_Before_TaskHealth asserts that we mark an alloc 454 // healthy, if the checks pass before task health pass 455 func TestTracker_Checks_Healthy_Before_TaskHealth(t *testing.T) { 456 ci.Parallel(t) 457 458 alloc := mock.Alloc() 459 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 460 task := alloc.Job.TaskGroups[0].Tasks[0] 461 462 // new task starting unhealthy, without services 463 task2 := task.Copy() 464 task2.Name = task2.Name + "2" 465 task2.Services = nil 466 alloc.Job.TaskGroups[0].Tasks = append(alloc.Job.TaskGroups[0].Tasks, task2) 467 468 // Synthesize running alloc and tasks 469 alloc.ClientStatus = structs.AllocClientStatusRunning 470 alloc.TaskStates = map[string]*structs.TaskState{ 471 task.Name: { 472 State: structs.TaskStateRunning, 473 StartedAt: time.Now(), 474 }, 475 task2.Name: { 476 State: structs.TaskStatePending, 477 }, 478 } 479 480 // Make Consul response 481 check := &consulapi.AgentCheck{ 482 Name: task.Services[0].Checks[0].Name, 483 Status: consulapi.HealthPassing, 484 } 485 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 486 task.Name: { 487 Services: map[string]*serviceregistration.ServiceRegistration{ 488 task.Services[0].Name: { 489 Service: &consulapi.AgentService{ 490 ID: "foo", 491 Service: task.Services[0].Name, 492 }, 493 Checks: []*consulapi.AgentCheck{check}, 494 }, 495 }, 496 }, 497 } 498 499 logger := testlog.HCLogger(t) 500 b := cstructs.NewAllocBroadcaster(logger) 501 defer b.Close() 502 503 // Don't reply on the first call 504 var called uint64 505 consul := regmock.NewServiceRegistrationHandler(logger) 506 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 507 if atomic.AddUint64(&called, 1) == 1 { 508 return nil, nil 509 } 510 511 reg := &serviceregistration.AllocRegistration{ 512 Tasks: taskRegs, 513 } 514 515 return reg, nil 516 } 517 518 ctx, cancelFn := context.WithCancel(context.Background()) 519 defer cancelFn() 520 521 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 522 checkInterval := 10 * time.Millisecond 523 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 524 tracker.checkLookupInterval = checkInterval 525 tracker.Start() 526 527 // assert that we don't get marked healthy 528 select { 529 case <-time.After(4 * checkInterval): 530 // still unhealthy, good 531 case h := <-tracker.HealthyCh(): 532 require.Fail(t, "unexpected health event", h) 533 } 534 535 helper.WithLock(&tracker.lock, func() { 536 require.False(t, tracker.tasksHealthy) 537 require.False(t, tracker.checksHealthy) 538 }) 539 540 // now set task to healthy 541 runningAlloc := alloc.Copy() 542 runningAlloc.TaskStates = map[string]*structs.TaskState{ 543 task.Name: { 544 State: structs.TaskStateRunning, 545 StartedAt: time.Now(), 546 }, 547 task2.Name: { 548 State: structs.TaskStateRunning, 549 StartedAt: time.Now(), 550 }, 551 } 552 err := b.Send(runningAlloc) 553 require.NoError(t, err) 554 555 // eventually, it is marked as healthy 556 select { 557 case <-time.After(4 * checkInterval): 558 require.Fail(t, "timed out while waiting for health") 559 case h := <-tracker.HealthyCh(): 560 require.True(t, h) 561 } 562 563 } 564 565 func TestTracker_ConsulChecks_OnUpdate(t *testing.T) { 566 ci.Parallel(t) 567 568 cases := []struct { 569 desc string 570 checkOnUpdate string 571 consulResp string 572 expectedPass bool 573 }{ 574 { 575 desc: "check require_healthy consul healthy", 576 checkOnUpdate: structs.OnUpdateRequireHealthy, 577 consulResp: consulapi.HealthPassing, 578 expectedPass: true, 579 }, 580 { 581 desc: "check on_update ignore_warning, consul warn", 582 checkOnUpdate: structs.OnUpdateIgnoreWarn, 583 consulResp: consulapi.HealthWarning, 584 expectedPass: true, 585 }, 586 { 587 desc: "check on_update ignore_warning, consul critical", 588 checkOnUpdate: structs.OnUpdateIgnoreWarn, 589 consulResp: consulapi.HealthCritical, 590 expectedPass: false, 591 }, 592 { 593 desc: "check on_update ignore_warning, consul healthy", 594 checkOnUpdate: structs.OnUpdateIgnoreWarn, 595 consulResp: consulapi.HealthPassing, 596 expectedPass: true, 597 }, 598 { 599 desc: "check on_update ignore, consul critical", 600 checkOnUpdate: structs.OnUpdateIgnore, 601 consulResp: consulapi.HealthCritical, 602 expectedPass: true, 603 }, 604 } 605 606 for _, tc := range cases { 607 t.Run(tc.desc, func(t *testing.T) { 608 609 alloc := mock.Alloc() 610 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 611 task := alloc.Job.TaskGroups[0].Tasks[0] 612 613 // Synthesize running alloc and tasks 614 alloc.ClientStatus = structs.AllocClientStatusRunning 615 alloc.TaskStates = map[string]*structs.TaskState{ 616 task.Name: { 617 State: structs.TaskStateRunning, 618 StartedAt: time.Now(), 619 }, 620 } 621 622 // Make Consul response 623 check := &consulapi.AgentCheck{ 624 Name: task.Services[0].Checks[0].Name, 625 Status: tc.consulResp, 626 } 627 taskRegs := map[string]*serviceregistration.ServiceRegistrations{ 628 task.Name: { 629 Services: map[string]*serviceregistration.ServiceRegistration{ 630 task.Services[0].Name: { 631 Service: &consulapi.AgentService{ 632 ID: "foo", 633 Service: task.Services[0].Name, 634 }, 635 Checks: []*consulapi.AgentCheck{check}, 636 CheckOnUpdate: map[string]string{ 637 check.CheckID: tc.checkOnUpdate, 638 }, 639 }, 640 }, 641 }, 642 } 643 644 logger := testlog.HCLogger(t) 645 b := cstructs.NewAllocBroadcaster(logger) 646 defer b.Close() 647 648 // Don't reply on the first call 649 var called uint64 650 consul := regmock.NewServiceRegistrationHandler(logger) 651 consul.AllocRegistrationsFn = func(string) (*serviceregistration.AllocRegistration, error) { 652 if atomic.AddUint64(&called, 1) == 1 { 653 return nil, nil 654 } 655 656 reg := &serviceregistration.AllocRegistration{ 657 Tasks: taskRegs, 658 } 659 660 return reg, nil 661 } 662 663 ctx, cancelFn := context.WithCancel(context.Background()) 664 defer cancelFn() 665 666 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 667 checkInterval := 10 * time.Millisecond 668 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, time.Millisecond, true) 669 tracker.checkLookupInterval = checkInterval 670 tracker.Start() 671 672 select { 673 case <-time.After(4 * checkInterval): 674 if !tc.expectedPass { 675 // tracker should still be running 676 require.Nil(t, tracker.ctx.Err()) 677 return 678 } 679 require.Fail(t, "timed out while waiting for health") 680 case h := <-tracker.HealthyCh(): 681 require.True(t, h) 682 } 683 684 // For healthy checks, the tracker should stop watching 685 select { 686 case <-tracker.ctx.Done(): 687 // Ok, tracker should exit after reporting healthy 688 default: 689 require.Fail(t, "expected tracker to exit after reporting healthy") 690 } 691 }) 692 } 693 } 694 695 func TestTracker_NomadChecks_OnUpdate(t *testing.T) { 696 ci.Parallel(t) 697 698 cases := []struct { 699 name string 700 checkMode structs.CheckMode 701 checkResult structs.CheckStatus 702 expectedPass bool 703 }{ 704 { 705 name: "mode is healthiness and check is healthy", 706 checkMode: structs.Healthiness, 707 checkResult: structs.CheckSuccess, 708 expectedPass: true, 709 }, 710 { 711 name: "mode is healthiness and check is unhealthy", 712 checkMode: structs.Healthiness, 713 checkResult: structs.CheckFailure, 714 expectedPass: false, 715 }, 716 { 717 name: "mode is readiness and check is healthy", 718 checkMode: structs.Readiness, 719 checkResult: structs.CheckSuccess, 720 expectedPass: true, 721 }, 722 { 723 name: "mode is readiness and check is healthy", 724 checkMode: structs.Readiness, 725 checkResult: structs.CheckFailure, 726 expectedPass: true, 727 }, 728 } 729 730 for i := range cases { 731 tc := cases[i] 732 t.Run(tc.name, func(t *testing.T) { 733 alloc := mock.Alloc() 734 alloc.Job.TaskGroups[0].Migrate.MinHealthyTime = 1 // let's speed things up 735 alloc.Job.TaskGroups[0].Tasks[0].Services[0].Provider = "nomad" 736 737 logger := testlog.HCLogger(t) 738 b := cstructs.NewAllocBroadcaster(logger) 739 defer b.Close() 740 741 // Synthesize running alloc and tasks 742 alloc.ClientStatus = structs.AllocClientStatusRunning 743 alloc.TaskStates = map[string]*structs.TaskState{ 744 alloc.Job.TaskGroups[0].Tasks[0].Name: { 745 State: structs.TaskStateRunning, 746 StartedAt: time.Now(), 747 }, 748 } 749 750 // Set a check that is pending 751 checks := checkstore.NewStore(logger, state.NewMemDB(logger)) 752 err := checks.Set(alloc.ID, &structs.CheckQueryResult{ 753 ID: "abc123", 754 Mode: tc.checkMode, 755 Status: structs.CheckPending, 756 Output: "nomad: waiting to run", 757 Timestamp: time.Now().Unix(), 758 Group: alloc.TaskGroup, 759 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 760 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 761 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 762 }) 763 must.NoError(t, err) 764 765 go func() { 766 // wait a bit then update the check to passing 767 time.Sleep(15 * time.Millisecond) 768 must.NoError(t, checks.Set(alloc.ID, &structs.CheckQueryResult{ 769 ID: "abc123", 770 Mode: tc.checkMode, 771 Status: tc.checkResult, 772 Output: "some output", 773 Timestamp: time.Now().Unix(), 774 Group: alloc.TaskGroup, 775 Task: alloc.Job.TaskGroups[0].Tasks[0].Name, 776 Service: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Name, 777 Check: alloc.Job.TaskGroups[0].Tasks[0].Services[0].Checks[0].Name, 778 })) 779 }() 780 781 ctx, cancel := context.WithCancel(context.Background()) 782 defer cancel() 783 784 consul := regmock.NewServiceRegistrationHandler(logger) 785 minHealthyTime := 1 * time.Millisecond 786 tracker := NewTracker(ctx, logger, alloc, b.Listen(), consul, checks, minHealthyTime, true) 787 tracker.checkLookupInterval = 10 * time.Millisecond 788 tracker.Start() 789 790 select { 791 case <-time.After(8 * tracker.checkLookupInterval): 792 if !tc.expectedPass { 793 // tracker should still be running 794 must.NoError(t, tracker.ctx.Err()) 795 return 796 } 797 t.Fatal("timed out while waiting for health") 798 case h := <-tracker.HealthyCh(): 799 require.True(t, h) 800 } 801 802 // For healthy checks, the tracker should stop watching 803 select { 804 case <-tracker.ctx.Done(): 805 // Ok, tracker should exit after reporting healthy 806 default: 807 t.Fatal("expected tracker to exit after reporting healthy") 808 } 809 }) 810 } 811 }