github.com/hernad/nomad@v1.6.112/nomad/worker_test.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package nomad 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "reflect" 11 "sync" 12 "testing" 13 "time" 14 15 log "github.com/hashicorp/go-hclog" 16 "github.com/hashicorp/go-memdb" 17 "github.com/hernad/nomad/ci" 18 "github.com/hernad/nomad/helper" 19 "github.com/shoenig/test/must" 20 "github.com/stretchr/testify/require" 21 22 "github.com/hernad/nomad/helper/testlog" 23 "github.com/hernad/nomad/helper/uuid" 24 "github.com/hernad/nomad/nomad/mock" 25 "github.com/hernad/nomad/nomad/structs" 26 "github.com/hernad/nomad/scheduler" 27 "github.com/hernad/nomad/testutil" 28 "github.com/stretchr/testify/assert" 29 ) 30 31 type NoopScheduler struct { 32 state scheduler.State 33 planner scheduler.Planner 34 eval *structs.Evaluation 35 eventsCh chan<- interface{} 36 err error 37 } 38 39 func (n *NoopScheduler) Process(eval *structs.Evaluation) error { 40 if n.state == nil { 41 panic("missing state") 42 } 43 if n.planner == nil { 44 panic("missing planner") 45 } 46 n.eval = eval 47 return n.err 48 } 49 50 func init() { 51 scheduler.BuiltinSchedulers["noop"] = func(logger log.Logger, eventsCh chan<- interface{}, s scheduler.State, p scheduler.Planner) scheduler.Scheduler { 52 n := &NoopScheduler{ 53 state: s, 54 planner: p, 55 } 56 return n 57 } 58 } 59 60 // NewTestWorker returns the worker without calling it's run method. 61 func NewTestWorker(shutdownCtx context.Context, srv *Server) *Worker { 62 w := &Worker{ 63 srv: srv, 64 start: time.Now(), 65 id: uuid.Generate(), 66 enabledSchedulers: srv.config.EnabledSchedulers, 67 } 68 w.logger = srv.logger.ResetNamed("worker").With("worker_id", w.id) 69 w.pauseCond = sync.NewCond(&w.pauseLock) 70 w.ctx, w.cancelFn = context.WithCancel(shutdownCtx) 71 return w 72 } 73 74 func TestWorker_dequeueEvaluation(t *testing.T) { 75 ci.Parallel(t) 76 77 s1, cleanupS1 := TestServer(t, func(c *Config) { 78 c.NumSchedulers = 0 79 c.EnabledSchedulers = []string{structs.JobTypeService} 80 }) 81 defer cleanupS1() 82 testutil.WaitForLeader(t, s1.RPC) 83 84 // Create the evaluation 85 eval1 := mock.Eval() 86 s1.evalBroker.Enqueue(eval1) 87 88 // Create a worker 89 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 90 w, _ := NewWorker(s1.shutdownCtx, s1, poolArgs) 91 92 // Attempt dequeue 93 eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond) 94 if shutdown { 95 t.Fatalf("should not shutdown") 96 } 97 if token == "" { 98 t.Fatalf("should get token") 99 } 100 if waitIndex != eval1.ModifyIndex { 101 t.Fatalf("bad wait index; got %d; want %d", waitIndex, eval1.ModifyIndex) 102 } 103 104 // Ensure we get a sane eval 105 if !reflect.DeepEqual(eval, eval1) { 106 t.Fatalf("bad: %#v %#v", eval, eval1) 107 } 108 } 109 110 // Test that the worker picks up the correct wait index when there are multiple 111 // evals for the same job. 112 func TestWorker_dequeueEvaluation_SerialJobs(t *testing.T) { 113 ci.Parallel(t) 114 115 s1, cleanupS1 := TestServer(t, func(c *Config) { 116 c.NumSchedulers = 0 117 c.EnabledSchedulers = []string{structs.JobTypeService} 118 }) 119 defer cleanupS1() 120 testutil.WaitForLeader(t, s1.RPC) 121 122 // Create the evaluation 123 eval1 := mock.Eval() 124 eval2 := mock.Eval() 125 eval2.JobID = eval1.JobID 126 127 // Insert the evals into the state store 128 must.NoError(t, s1.fsm.State().UpsertEvals( 129 structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1})) 130 must.NoError(t, s1.fsm.State().UpsertEvals( 131 structs.MsgTypeTestSetup, 2000, []*structs.Evaluation{eval2})) 132 133 s1.evalBroker.Enqueue(eval1) 134 s1.evalBroker.Enqueue(eval2) 135 136 // Create a worker 137 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 138 w := newWorker(s1.shutdownCtx, s1, poolArgs) 139 140 // Attempt dequeue 141 eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond) 142 must.False(t, shutdown, must.Sprint("should not be shutdown")) 143 must.NotEq(t, token, "", must.Sprint("should get a token")) 144 must.NotEq(t, eval1.ModifyIndex, waitIndex, must.Sprintf("bad wait index")) 145 must.Eq(t, eval, eval1) 146 147 // Update the modify index of the first eval 148 must.NoError(t, s1.fsm.State().UpsertEvals( 149 structs.MsgTypeTestSetup, 1500, []*structs.Evaluation{eval1})) 150 151 // Send the Ack 152 w.sendAck(eval1, token) 153 154 // Attempt second dequeue; it should succeed because the 2nd eval has a 155 // lower modify index than the snapshot used to schedule the 1st 156 // eval. Normally this can only happen if the worker is on a follower that's 157 // trailing behind in raft logs 158 eval, token, waitIndex, shutdown = w.dequeueEvaluation(10 * time.Millisecond) 159 160 must.False(t, shutdown, must.Sprint("should not be shutdown")) 161 must.NotEq(t, token, "", must.Sprint("should get a token")) 162 must.Eq(t, waitIndex, 2000, must.Sprintf("bad wait index")) 163 must.Eq(t, eval, eval2) 164 165 } 166 167 func TestWorker_dequeueEvaluation_paused(t *testing.T) { 168 ci.Parallel(t) 169 170 s1, cleanupS1 := TestServer(t, func(c *Config) { 171 c.NumSchedulers = 0 172 c.EnabledSchedulers = []string{structs.JobTypeService} 173 }) 174 defer cleanupS1() 175 testutil.WaitForLeader(t, s1.RPC) 176 177 // Create the evaluation 178 eval1 := mock.Eval() 179 s1.evalBroker.Enqueue(eval1) 180 181 // Create a worker 182 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 183 w := newWorker(s1.shutdownCtx, s1, poolArgs) 184 w.pauseCond = sync.NewCond(&w.pauseLock) 185 186 // PAUSE the worker 187 w.Pause() 188 189 go func() { 190 time.Sleep(100 * time.Millisecond) 191 w.Resume() 192 }() 193 194 // Attempt dequeue 195 start := time.Now() 196 eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond) 197 if diff := time.Since(start); diff < 100*time.Millisecond { 198 t.Fatalf("should have paused: %v", diff) 199 } 200 if shutdown { 201 t.Fatalf("should not shutdown") 202 } 203 if token == "" { 204 t.Fatalf("should get token") 205 } 206 if waitIndex != eval1.ModifyIndex { 207 t.Fatalf("bad wait index; got %d; want %d", waitIndex, eval1.ModifyIndex) 208 } 209 210 // Ensure we get a sane eval 211 if !reflect.DeepEqual(eval, eval1) { 212 t.Fatalf("bad: %#v %#v", eval, eval1) 213 } 214 } 215 216 func TestWorker_dequeueEvaluation_shutdown(t *testing.T) { 217 ci.Parallel(t) 218 219 s1, cleanupS1 := TestServer(t, func(c *Config) { 220 c.NumSchedulers = 0 221 c.EnabledSchedulers = []string{structs.JobTypeService} 222 }) 223 defer cleanupS1() 224 testutil.WaitForLeader(t, s1.RPC) 225 226 // Create a worker 227 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 228 w := newWorker(s1.shutdownCtx, s1, poolArgs) 229 230 go func() { 231 time.Sleep(10 * time.Millisecond) 232 s1.Shutdown() 233 }() 234 235 // Attempt dequeue 236 eval, _, _, shutdown := w.dequeueEvaluation(10 * time.Millisecond) 237 if !shutdown { 238 t.Fatalf("should not shutdown") 239 } 240 241 // Ensure we get a sane eval 242 if eval != nil { 243 t.Fatalf("bad: %#v", eval) 244 } 245 } 246 247 func TestWorker_Shutdown(t *testing.T) { 248 ci.Parallel(t) 249 250 s1, cleanupS1 := TestServer(t, func(c *Config) { 251 c.NumSchedulers = 0 252 c.EnabledSchedulers = []string{structs.JobTypeService} 253 }) 254 defer cleanupS1() 255 testutil.WaitForLeader(t, s1.RPC) 256 257 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 258 w := newWorker(s1.shutdownCtx, s1, poolArgs) 259 260 go func() { 261 time.Sleep(10 * time.Millisecond) 262 w.Stop() 263 }() 264 265 // Attempt dequeue 266 eval, _, _, shutdown := w.dequeueEvaluation(10 * time.Millisecond) 267 require.True(t, shutdown) 268 require.Nil(t, eval) 269 } 270 271 func TestWorker_Shutdown_paused(t *testing.T) { 272 ci.Parallel(t) 273 274 s1, cleanupS1 := TestServer(t, func(c *Config) { 275 c.NumSchedulers = 0 276 c.EnabledSchedulers = []string{structs.JobTypeService} 277 }) 278 defer cleanupS1() 279 testutil.WaitForLeader(t, s1.RPC) 280 281 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 282 w, _ := NewWorker(s1.shutdownCtx, s1, poolArgs) 283 284 w.Pause() 285 286 // pausing can take up to 500ms because of the blocking query timeout in dequeueEvaluation. 287 require.Eventually(t, w.IsPaused, 550*time.Millisecond, 10*time.Millisecond, "should pause") 288 289 go func() { 290 w.Stop() 291 }() 292 293 // transitioning to stopped from paused should be very quick, 294 // but might not be immediate. 295 require.Eventually(t, w.IsStopped, 100*time.Millisecond, 10*time.Millisecond, "should stop when paused") 296 } 297 298 func TestWorker_sendAck(t *testing.T) { 299 ci.Parallel(t) 300 301 s1, cleanupS1 := TestServer(t, func(c *Config) { 302 c.NumSchedulers = 0 303 c.EnabledSchedulers = []string{structs.JobTypeService} 304 }) 305 defer cleanupS1() 306 testutil.WaitForLeader(t, s1.RPC) 307 308 // Create the evaluation 309 eval1 := mock.Eval() 310 s1.evalBroker.Enqueue(eval1) 311 312 // Create a worker 313 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 314 w := newWorker(s1.shutdownCtx, s1, poolArgs) 315 316 // Attempt dequeue 317 eval, token, _, _ := w.dequeueEvaluation(10 * time.Millisecond) 318 319 // Check the depth is 0, 1 unacked 320 stats := s1.evalBroker.Stats() 321 if stats.TotalReady != 0 && stats.TotalUnacked != 1 { 322 t.Fatalf("bad: %#v", stats) 323 } 324 325 // Send the Nack 326 w.sendNack(eval, token) 327 328 // Check the depth is 1, nothing unacked 329 stats = s1.evalBroker.Stats() 330 if stats.TotalReady != 1 && stats.TotalUnacked != 0 { 331 t.Fatalf("bad: %#v", stats) 332 } 333 334 // Attempt dequeue 335 eval, token, _, _ = w.dequeueEvaluation(10 * time.Millisecond) 336 337 // Send the Ack 338 w.sendAck(eval, token) 339 340 // Check the depth is 0 341 stats = s1.evalBroker.Stats() 342 if stats.TotalReady != 0 && stats.TotalUnacked != 0 { 343 t.Fatalf("bad: %#v", stats) 344 } 345 } 346 347 func TestWorker_runBackoff(t *testing.T) { 348 ci.Parallel(t) 349 350 srv, cleanupSrv := TestServer(t, func(c *Config) { 351 c.NumSchedulers = 0 352 c.EnabledSchedulers = []string{structs.JobTypeService} 353 }) 354 defer cleanupSrv() 355 testutil.WaitForLeader(t, srv.RPC) 356 357 eval1 := mock.Eval() 358 eval1.ModifyIndex = 1000 359 srv.evalBroker.Enqueue(eval1) 360 must.Eq(t, 1, srv.evalBroker.Stats().TotalReady) 361 362 // make a new context here so we can still check the broker's state after 363 // we've shut down the worker 364 workerCtx, workerCancel := context.WithCancel(srv.shutdownCtx) 365 defer workerCancel() 366 367 w := NewTestWorker(workerCtx, srv) 368 doneCh := make(chan struct{}) 369 370 go func() { 371 w.run(time.Millisecond) 372 doneCh <- struct{}{} 373 }() 374 375 // We expect to be paused for 10ms + 1ms but otherwise can't be all that 376 // precise here because of concurrency. But checking coverage for this test 377 // shows we've covered the logic 378 t1, cancelT1 := helper.NewSafeTimer(100 * time.Millisecond) 379 defer cancelT1() 380 select { 381 case <-doneCh: 382 t.Fatal("returned early") 383 case <-t1.C: 384 } 385 386 workerCancel() 387 <-doneCh 388 389 must.Eq(t, 1, srv.evalBroker.Stats().TotalWaiting) 390 must.Eq(t, 0, srv.evalBroker.Stats().TotalReady) 391 must.Eq(t, 0, srv.evalBroker.Stats().TotalPending) 392 must.Eq(t, 0, srv.evalBroker.Stats().TotalUnacked) 393 } 394 395 func TestWorker_waitForIndex(t *testing.T) { 396 ci.Parallel(t) 397 398 s1, cleanupS1 := TestServer(t, func(c *Config) { 399 c.NumSchedulers = 0 400 c.EnabledSchedulers = []string{structs.JobTypeService} 401 }) 402 defer cleanupS1() 403 testutil.WaitForLeader(t, s1.RPC) 404 405 // Get the current index 406 index := s1.raft.AppliedIndex() 407 408 // Cause an increment 409 errCh := make(chan error, 1) 410 go func() { 411 time.Sleep(10 * time.Millisecond) 412 n := mock.Node() 413 errCh <- s1.fsm.state.UpsertNode(structs.MsgTypeTestSetup, index+1, n) 414 }() 415 416 // Wait for a future index 417 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 418 w := newWorker(s1.shutdownCtx, s1, poolArgs) 419 snap, err := w.snapshotMinIndex(index+1, time.Second) 420 require.NoError(t, err) 421 require.NotNil(t, snap) 422 423 // No error from upserting 424 require.NoError(t, <-errCh) 425 426 // Cause a timeout 427 waitIndex := index + 100 428 timeout := 10 * time.Millisecond 429 snap, err = w.snapshotMinIndex(index+100, timeout) 430 require.Nil(t, snap) 431 require.EqualError(t, err, 432 fmt.Sprintf("timed out after %s waiting for index=%d", timeout, waitIndex)) 433 require.True(t, errors.Is(err, context.DeadlineExceeded), "expect error to wrap DeadlineExceeded") 434 } 435 436 func TestWorker_invokeScheduler(t *testing.T) { 437 ci.Parallel(t) 438 439 s1, cleanupS1 := TestServer(t, func(c *Config) { 440 c.NumSchedulers = 0 441 c.EnabledSchedulers = []string{structs.JobTypeService} 442 }) 443 defer cleanupS1() 444 445 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 446 w := newWorker(s1.shutdownCtx, s1, poolArgs) 447 eval := mock.Eval() 448 eval.Type = "noop" 449 450 snap, err := s1.fsm.state.Snapshot() 451 require.NoError(t, err) 452 453 err = w.invokeScheduler(snap, eval, uuid.Generate()) 454 require.NoError(t, err) 455 } 456 457 func TestWorker_SubmitPlan(t *testing.T) { 458 ci.Parallel(t) 459 460 s1, cleanupS1 := TestServer(t, func(c *Config) { 461 c.NumSchedulers = 0 462 c.EnabledSchedulers = []string{structs.JobTypeService} 463 }) 464 defer cleanupS1() 465 testutil.WaitForLeader(t, s1.RPC) 466 467 // Register node 468 node := mock.Node() 469 testRegisterNode(t, s1, node) 470 471 job := mock.Job() 472 eval1 := mock.Eval() 473 eval1.JobID = job.ID 474 s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job) 475 s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1}) 476 477 // Create the register request 478 s1.evalBroker.Enqueue(eval1) 479 480 evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second) 481 if err != nil { 482 t.Fatalf("err: %v", err) 483 } 484 if evalOut != eval1 { 485 t.Fatalf("Bad eval") 486 } 487 488 // Create an allocation plan 489 alloc := mock.Alloc() 490 plan := &structs.Plan{ 491 Job: job, 492 EvalID: eval1.ID, 493 NodeAllocation: map[string][]*structs.Allocation{ 494 node.ID: {alloc}, 495 }, 496 } 497 498 // Attempt to submit a plan 499 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 500 w := newWorker(s1.shutdownCtx, s1, poolArgs) 501 w.evalToken = token 502 503 result, state, err := w.SubmitPlan(plan) 504 if err != nil { 505 t.Fatalf("err: %v", err) 506 } 507 508 // Should have no update 509 if state != nil { 510 t.Fatalf("unexpected state update") 511 } 512 513 // Result should have allocated 514 if result == nil { 515 t.Fatalf("missing result") 516 } 517 518 if result.AllocIndex == 0 { 519 t.Fatalf("Bad: %#v", result) 520 } 521 if len(result.NodeAllocation) != 1 { 522 t.Fatalf("Bad: %#v", result) 523 } 524 } 525 526 func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) { 527 ci.Parallel(t) 528 529 s1, cleanupS1 := TestServer(t, func(c *Config) { 530 c.NumSchedulers = 0 531 c.EnabledSchedulers = []string{structs.JobTypeService} 532 c.Build = "1.4.0" 533 }) 534 defer cleanupS1() 535 testutil.WaitForLeader(t, s1.RPC) 536 537 // Register node 538 node := mock.Node() 539 testRegisterNode(t, s1, node) 540 541 job := mock.Job() 542 eval1 := mock.Eval() 543 eval1.JobID = job.ID 544 s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 0, nil, job) 545 s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 0, []*structs.Evaluation{eval1}) 546 547 stoppedAlloc := mock.Alloc() 548 preemptedAlloc := mock.Alloc() 549 s1.fsm.State().UpsertAllocs(structs.MsgTypeTestSetup, 5, []*structs.Allocation{stoppedAlloc, preemptedAlloc}) 550 551 // Create an allocation plan 552 plan := &structs.Plan{ 553 Job: job, 554 EvalID: eval1.ID, 555 NodeUpdate: make(map[string][]*structs.Allocation), 556 NodePreemptions: make(map[string][]*structs.Allocation), 557 } 558 desiredDescription := "desired desc" 559 plan.AppendStoppedAlloc(stoppedAlloc, desiredDescription, structs.AllocClientStatusLost, "") 560 preemptingAllocID := uuid.Generate() 561 plan.AppendPreemptedAlloc(preemptedAlloc, preemptingAllocID) 562 563 // Attempt to submit a plan 564 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 565 w := newWorker(s1.shutdownCtx, s1, poolArgs) 566 w.SubmitPlan(plan) 567 568 assert.Equal(t, &structs.Allocation{ 569 ID: preemptedAlloc.ID, 570 PreemptedByAllocation: preemptingAllocID, 571 }, plan.NodePreemptions[preemptedAlloc.NodeID][0]) 572 assert.Equal(t, &structs.Allocation{ 573 ID: stoppedAlloc.ID, 574 DesiredDescription: desiredDescription, 575 ClientStatus: structs.AllocClientStatusLost, 576 }, plan.NodeUpdate[stoppedAlloc.NodeID][0]) 577 } 578 579 func TestWorker_SubmitPlan_MissingNodeRefresh(t *testing.T) { 580 ci.Parallel(t) 581 582 s1, cleanupS1 := TestServer(t, func(c *Config) { 583 c.NumSchedulers = 0 584 c.EnabledSchedulers = []string{structs.JobTypeService} 585 }) 586 defer cleanupS1() 587 testutil.WaitForLeader(t, s1.RPC) 588 589 // Register node 590 node := mock.Node() 591 testRegisterNode(t, s1, node) 592 593 // Create the job 594 job := mock.Job() 595 s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job) 596 597 // Create the register request 598 eval1 := mock.Eval() 599 eval1.JobID = job.ID 600 s1.evalBroker.Enqueue(eval1) 601 602 evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second) 603 if err != nil { 604 t.Fatalf("err: %v", err) 605 } 606 if evalOut != eval1 { 607 t.Fatalf("Bad eval") 608 } 609 610 // Create an allocation plan, with unregistered node 611 node2 := mock.Node() 612 alloc := mock.Alloc() 613 plan := &structs.Plan{ 614 Job: job, 615 EvalID: eval1.ID, 616 NodeAllocation: map[string][]*structs.Allocation{ 617 node2.ID: {alloc}, 618 }, 619 } 620 621 // Attempt to submit a plan 622 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 623 w := newWorker(s1.shutdownCtx, s1, poolArgs) 624 w.evalToken = token 625 626 result, state, err := w.SubmitPlan(plan) 627 if err != nil { 628 t.Fatalf("err: %v", err) 629 } 630 631 // Result should have allocated 632 if result == nil { 633 t.Fatalf("missing result") 634 } 635 636 // Expect no allocation and forced refresh 637 if result.AllocIndex != 0 { 638 t.Fatalf("Bad: %#v", result) 639 } 640 if result.RefreshIndex == 0 { 641 t.Fatalf("Bad: %#v", result) 642 } 643 if len(result.NodeAllocation) != 0 { 644 t.Fatalf("Bad: %#v", result) 645 } 646 647 // Should have an update 648 if state == nil { 649 t.Fatalf("expected state update") 650 } 651 } 652 653 func TestWorker_UpdateEval(t *testing.T) { 654 ci.Parallel(t) 655 656 s1, cleanupS1 := TestServer(t, func(c *Config) { 657 c.NumSchedulers = 0 658 c.EnabledSchedulers = []string{structs.JobTypeService} 659 }) 660 defer cleanupS1() 661 testutil.WaitForLeader(t, s1.RPC) 662 663 // Register node 664 node := mock.Node() 665 testRegisterNode(t, s1, node) 666 667 // Create the register request 668 eval1 := mock.Eval() 669 s1.evalBroker.Enqueue(eval1) 670 evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second) 671 if err != nil { 672 t.Fatalf("err: %v", err) 673 } 674 if evalOut != eval1 { 675 t.Fatalf("Bad eval") 676 } 677 678 eval2 := evalOut.Copy() 679 eval2.Status = structs.EvalStatusComplete 680 681 // Attempt to update eval 682 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 683 w := newWorker(s1.shutdownCtx, s1, poolArgs) 684 w.evalToken = token 685 686 err = w.UpdateEval(eval2) 687 if err != nil { 688 t.Fatalf("err: %v", err) 689 } 690 691 ws := memdb.NewWatchSet() 692 out, err := s1.fsm.State().EvalByID(ws, eval2.ID) 693 if err != nil { 694 t.Fatalf("err: %v", err) 695 } 696 if out.Status != structs.EvalStatusComplete { 697 t.Fatalf("bad: %v", out) 698 } 699 if out.SnapshotIndex != w.snapshotIndex { 700 t.Fatalf("bad: %v", out) 701 } 702 } 703 704 func TestWorker_CreateEval(t *testing.T) { 705 ci.Parallel(t) 706 707 s1, cleanupS1 := TestServer(t, func(c *Config) { 708 c.NumSchedulers = 0 709 c.EnabledSchedulers = []string{structs.JobTypeService} 710 }) 711 defer cleanupS1() 712 testutil.WaitForLeader(t, s1.RPC) 713 714 // Register node 715 node := mock.Node() 716 testRegisterNode(t, s1, node) 717 718 // Create the register request 719 eval1 := mock.Eval() 720 s1.evalBroker.Enqueue(eval1) 721 722 evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second) 723 if err != nil { 724 t.Fatalf("err: %v", err) 725 } 726 if evalOut != eval1 { 727 t.Fatalf("Bad eval") 728 } 729 730 eval2 := mock.Eval() 731 eval2.PreviousEval = eval1.ID 732 733 // Attempt to create eval 734 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 735 w := newWorker(s1.shutdownCtx, s1, poolArgs) 736 w.evalToken = token 737 738 err = w.CreateEval(eval2) 739 if err != nil { 740 t.Fatalf("err: %v", err) 741 } 742 743 ws := memdb.NewWatchSet() 744 out, err := s1.fsm.State().EvalByID(ws, eval2.ID) 745 if err != nil { 746 t.Fatalf("err: %v", err) 747 } 748 if out.PreviousEval != eval1.ID { 749 t.Fatalf("bad: %v", out) 750 } 751 if out.SnapshotIndex != w.snapshotIndex { 752 t.Fatalf("bad: %v", out) 753 } 754 } 755 756 func TestWorker_ReblockEval(t *testing.T) { 757 ci.Parallel(t) 758 759 s1, cleanupS1 := TestServer(t, func(c *Config) { 760 c.NumSchedulers = 0 761 c.EnabledSchedulers = []string{structs.JobTypeService} 762 }) 763 defer cleanupS1() 764 testutil.WaitForLeader(t, s1.RPC) 765 766 // Create the blocked eval 767 eval1 := mock.Eval() 768 eval1.Status = structs.EvalStatusBlocked 769 eval1.QueuedAllocations = map[string]int{"cache": 100} 770 771 // Insert it into the state store 772 if err := s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1}); err != nil { 773 t.Fatal(err) 774 } 775 776 // Create the job summary 777 js := mock.JobSummary(eval1.JobID) 778 tg := js.Summary["web"] 779 tg.Queued = 100 780 js.Summary["web"] = tg 781 if err := s1.fsm.State().UpsertJobSummary(1001, js); err != nil { 782 t.Fatal(err) 783 } 784 785 // Enqueue the eval and then dequeue 786 s1.evalBroker.Enqueue(eval1) 787 evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second) 788 if err != nil { 789 t.Fatalf("err: %v", err) 790 } 791 if evalOut != eval1 { 792 t.Fatalf("Bad eval") 793 } 794 795 eval2 := evalOut.Copy() 796 eval2.QueuedAllocations = map[string]int{"web": 50} 797 798 // Attempt to reblock eval 799 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 800 w := newWorker(s1.shutdownCtx, s1, poolArgs) 801 w.evalToken = token 802 803 err = w.ReblockEval(eval2) 804 if err != nil { 805 t.Fatalf("err: %v", err) 806 } 807 808 // Ack the eval 809 w.sendAck(evalOut, token) 810 811 // Check that it is blocked 812 bStats := s1.blockedEvals.Stats() 813 if bStats.TotalBlocked+bStats.TotalEscaped != 1 { 814 t.Fatalf("ReblockEval didn't insert eval into the blocked eval tracker: %#v", bStats) 815 } 816 817 // Check that the eval was updated 818 ws := memdb.NewWatchSet() 819 eval, err := s1.fsm.State().EvalByID(ws, eval2.ID) 820 if err != nil { 821 t.Fatal(err) 822 } 823 if !reflect.DeepEqual(eval.QueuedAllocations, eval2.QueuedAllocations) { 824 t.Fatalf("expected: %#v, actual: %#v", eval2.QueuedAllocations, eval.QueuedAllocations) 825 } 826 827 // Check that the snapshot index was set properly by unblocking the eval and 828 // then dequeuing. 829 s1.blockedEvals.Unblock("foobar", 1000) 830 831 reblockedEval, _, err := s1.evalBroker.Dequeue([]string{eval1.Type}, 1*time.Second) 832 if err != nil { 833 t.Fatalf("err: %v", err) 834 } 835 if reblockedEval == nil { 836 t.Fatalf("Nil eval") 837 } 838 if reblockedEval.ID != eval1.ID { 839 t.Fatalf("Bad eval") 840 } 841 842 // Check that the SnapshotIndex is set 843 if reblockedEval.SnapshotIndex != w.snapshotIndex { 844 t.Fatalf("incorrect snapshot index; got %d; want %d", 845 reblockedEval.SnapshotIndex, w.snapshotIndex) 846 } 847 } 848 849 func TestWorker_Info(t *testing.T) { 850 ci.Parallel(t) 851 852 s1, cleanupS1 := TestServer(t, func(c *Config) { 853 c.NumSchedulers = 0 854 c.EnabledSchedulers = []string{structs.JobTypeService} 855 }) 856 defer cleanupS1() 857 testutil.WaitForLeader(t, s1.RPC) 858 859 poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy() 860 861 // Create a worker 862 w := newWorker(s1.shutdownCtx, s1, poolArgs) 863 864 require.Equal(t, WorkerStarting, w.GetStatus()) 865 workerInfo := w.Info() 866 require.Equal(t, WorkerStarting.String(), workerInfo.Status) 867 } 868 869 const ( 870 longWait = 100 * time.Millisecond 871 tinyWait = 10 * time.Millisecond 872 ) 873 874 func TestWorker_SetPause(t *testing.T) { 875 ci.Parallel(t) 876 logger := testlog.HCLogger(t) 877 srv := &Server{ 878 logger: logger, 879 shutdownCtx: context.Background(), 880 } 881 args := SchedulerWorkerPoolArgs{ 882 EnabledSchedulers: []string{structs.JobTypeCore, structs.JobTypeBatch, structs.JobTypeSystem}, 883 } 884 w := newWorker(context.Background(), srv, args) 885 w._start(testWorkload) 886 require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have started") 887 888 go func() { 889 time.Sleep(tinyWait) 890 w.Pause() 891 }() 892 require.Eventually(t, w.IsPaused, longWait, tinyWait, "should have paused") 893 894 go func() { 895 time.Sleep(tinyWait) 896 w.Pause() 897 }() 898 require.Eventually(t, w.IsPaused, longWait, tinyWait, "pausing a paused should be okay") 899 900 go func() { 901 time.Sleep(tinyWait) 902 w.Resume() 903 }() 904 require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have restarted from pause") 905 906 go func() { 907 time.Sleep(tinyWait) 908 w.Stop() 909 }() 910 require.Eventually(t, w.IsStopped, longWait, tinyWait, "should have shutdown") 911 } 912 913 func TestWorker_SetPause_OutOfOrderEvents(t *testing.T) { 914 ci.Parallel(t) 915 logger := testlog.HCLogger(t) 916 srv := &Server{ 917 logger: logger, 918 shutdownCtx: context.Background(), 919 } 920 args := SchedulerWorkerPoolArgs{ 921 EnabledSchedulers: []string{structs.JobTypeCore, structs.JobTypeBatch, structs.JobTypeSystem}, 922 } 923 w := newWorker(context.Background(), srv, args) 924 w._start(testWorkload) 925 require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have started") 926 927 go func() { 928 time.Sleep(tinyWait) 929 w.Pause() 930 }() 931 require.Eventually(t, w.IsPaused, longWait, tinyWait, "should have paused") 932 933 go func() { 934 time.Sleep(tinyWait) 935 w.Stop() 936 }() 937 require.Eventually(t, w.IsStopped, longWait, tinyWait, "stop from pause should have shutdown") 938 939 go func() { 940 time.Sleep(tinyWait) 941 w.Pause() 942 }() 943 require.Eventually(t, w.IsStopped, longWait, tinyWait, "pausing a stopped should stay stopped") 944 945 } 946 947 // _start is a test helper function used to start a worker with an alternate workload 948 func (w *Worker) _start(inFunc func(w *Worker)) { 949 w.setStatus(WorkerStarting) 950 go inFunc(w) 951 } 952 953 // testWorkload is a very simple function that performs the same status updating behaviors that the 954 // real workload does. 955 func testWorkload(w *Worker) { 956 defer w.markStopped() 957 w.setStatuses(WorkerStarted, WorkloadRunning) 958 w.logger.Debug("testWorkload running") 959 for { 960 // ensure state variables are happy after resuming. 961 w.maybeWait() 962 if w.workerShuttingDown() { 963 w.logger.Debug("testWorkload stopped") 964 return 965 } 966 // do some fake work 967 time.Sleep(10 * time.Millisecond) 968 } 969 }