github.com/hernad/nomad@v1.6.112/nomad/worker_test.go

github.com/hernad/nomad@v1.6.112/nomad/worker_test.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"context"
     8  	"errors"
     9  	"fmt"
    10  	"reflect"
    11  	"sync"
    12  	"testing"
    13  	"time"
    14  
    15  	log "github.com/hashicorp/go-hclog"
    16  	"github.com/hashicorp/go-memdb"
    17  	"github.com/hernad/nomad/ci"
    18  	"github.com/hernad/nomad/helper"
    19  	"github.com/shoenig/test/must"
    20  	"github.com/stretchr/testify/require"
    21  
    22  	"github.com/hernad/nomad/helper/testlog"
    23  	"github.com/hernad/nomad/helper/uuid"
    24  	"github.com/hernad/nomad/nomad/mock"
    25  	"github.com/hernad/nomad/nomad/structs"
    26  	"github.com/hernad/nomad/scheduler"
    27  	"github.com/hernad/nomad/testutil"
    28  	"github.com/stretchr/testify/assert"
    29  )
    30  
    31  type NoopScheduler struct {
    32  	state    scheduler.State
    33  	planner  scheduler.Planner
    34  	eval     *structs.Evaluation
    35  	eventsCh chan<- interface{}
    36  	err      error
    37  }
    38  
    39  func (n *NoopScheduler) Process(eval *structs.Evaluation) error {
    40  	if n.state == nil {
    41  		panic("missing state")
    42  	}
    43  	if n.planner == nil {
    44  		panic("missing planner")
    45  	}
    46  	n.eval = eval
    47  	return n.err
    48  }
    49  
    50  func init() {
    51  	scheduler.BuiltinSchedulers["noop"] = func(logger log.Logger, eventsCh chan<- interface{}, s scheduler.State, p scheduler.Planner) scheduler.Scheduler {
    52  		n := &NoopScheduler{
    53  			state:   s,
    54  			planner: p,
    55  		}
    56  		return n
    57  	}
    58  }
    59  
    60  // NewTestWorker returns the worker without calling it's run method.
    61  func NewTestWorker(shutdownCtx context.Context, srv *Server) *Worker {
    62  	w := &Worker{
    63  		srv:               srv,
    64  		start:             time.Now(),
    65  		id:                uuid.Generate(),
    66  		enabledSchedulers: srv.config.EnabledSchedulers,
    67  	}
    68  	w.logger = srv.logger.ResetNamed("worker").With("worker_id", w.id)
    69  	w.pauseCond = sync.NewCond(&w.pauseLock)
    70  	w.ctx, w.cancelFn = context.WithCancel(shutdownCtx)
    71  	return w
    72  }
    73  
    74  func TestWorker_dequeueEvaluation(t *testing.T) {
    75  	ci.Parallel(t)
    76  
    77  	s1, cleanupS1 := TestServer(t, func(c *Config) {
    78  		c.NumSchedulers = 0
    79  		c.EnabledSchedulers = []string{structs.JobTypeService}
    80  	})
    81  	defer cleanupS1()
    82  	testutil.WaitForLeader(t, s1.RPC)
    83  
    84  	// Create the evaluation
    85  	eval1 := mock.Eval()
    86  	s1.evalBroker.Enqueue(eval1)
    87  
    88  	// Create a worker
    89  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
    90  	w, _ := NewWorker(s1.shutdownCtx, s1, poolArgs)
    91  
    92  	// Attempt dequeue
    93  	eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond)
    94  	if shutdown {
    95  		t.Fatalf("should not shutdown")
    96  	}
    97  	if token == "" {
    98  		t.Fatalf("should get token")
    99  	}
   100  	if waitIndex != eval1.ModifyIndex {
   101  		t.Fatalf("bad wait index; got %d; want %d", waitIndex, eval1.ModifyIndex)
   102  	}
   103  
   104  	// Ensure we get a sane eval
   105  	if !reflect.DeepEqual(eval, eval1) {
   106  		t.Fatalf("bad: %#v %#v", eval, eval1)
   107  	}
   108  }
   109  
   110  // Test that the worker picks up the correct wait index when there are multiple
   111  // evals for the same job.
   112  func TestWorker_dequeueEvaluation_SerialJobs(t *testing.T) {
   113  	ci.Parallel(t)
   114  
   115  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   116  		c.NumSchedulers = 0
   117  		c.EnabledSchedulers = []string{structs.JobTypeService}
   118  	})
   119  	defer cleanupS1()
   120  	testutil.WaitForLeader(t, s1.RPC)
   121  
   122  	// Create the evaluation
   123  	eval1 := mock.Eval()
   124  	eval2 := mock.Eval()
   125  	eval2.JobID = eval1.JobID
   126  
   127  	// Insert the evals into the state store
   128  	must.NoError(t, s1.fsm.State().UpsertEvals(
   129  		structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1}))
   130  	must.NoError(t, s1.fsm.State().UpsertEvals(
   131  		structs.MsgTypeTestSetup, 2000, []*structs.Evaluation{eval2}))
   132  
   133  	s1.evalBroker.Enqueue(eval1)
   134  	s1.evalBroker.Enqueue(eval2)
   135  
   136  	// Create a worker
   137  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   138  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   139  
   140  	// Attempt dequeue
   141  	eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond)
   142  	must.False(t, shutdown, must.Sprint("should not be shutdown"))
   143  	must.NotEq(t, token, "", must.Sprint("should get a token"))
   144  	must.NotEq(t, eval1.ModifyIndex, waitIndex, must.Sprintf("bad wait index"))
   145  	must.Eq(t, eval, eval1)
   146  
   147  	// Update the modify index of the first eval
   148  	must.NoError(t, s1.fsm.State().UpsertEvals(
   149  		structs.MsgTypeTestSetup, 1500, []*structs.Evaluation{eval1}))
   150  
   151  	// Send the Ack
   152  	w.sendAck(eval1, token)
   153  
   154  	// Attempt second dequeue; it should succeed because the 2nd eval has a
   155  	// lower modify index than the snapshot used to schedule the 1st
   156  	// eval. Normally this can only happen if the worker is on a follower that's
   157  	// trailing behind in raft logs
   158  	eval, token, waitIndex, shutdown = w.dequeueEvaluation(10 * time.Millisecond)
   159  
   160  	must.False(t, shutdown, must.Sprint("should not be shutdown"))
   161  	must.NotEq(t, token, "", must.Sprint("should get a token"))
   162  	must.Eq(t, waitIndex, 2000, must.Sprintf("bad wait index"))
   163  	must.Eq(t, eval, eval2)
   164  
   165  }
   166  
   167  func TestWorker_dequeueEvaluation_paused(t *testing.T) {
   168  	ci.Parallel(t)
   169  
   170  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   171  		c.NumSchedulers = 0
   172  		c.EnabledSchedulers = []string{structs.JobTypeService}
   173  	})
   174  	defer cleanupS1()
   175  	testutil.WaitForLeader(t, s1.RPC)
   176  
   177  	// Create the evaluation
   178  	eval1 := mock.Eval()
   179  	s1.evalBroker.Enqueue(eval1)
   180  
   181  	// Create a worker
   182  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   183  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   184  	w.pauseCond = sync.NewCond(&w.pauseLock)
   185  
   186  	// PAUSE the worker
   187  	w.Pause()
   188  
   189  	go func() {
   190  		time.Sleep(100 * time.Millisecond)
   191  		w.Resume()
   192  	}()
   193  
   194  	// Attempt dequeue
   195  	start := time.Now()
   196  	eval, token, waitIndex, shutdown := w.dequeueEvaluation(10 * time.Millisecond)
   197  	if diff := time.Since(start); diff < 100*time.Millisecond {
   198  		t.Fatalf("should have paused: %v", diff)
   199  	}
   200  	if shutdown {
   201  		t.Fatalf("should not shutdown")
   202  	}
   203  	if token == "" {
   204  		t.Fatalf("should get token")
   205  	}
   206  	if waitIndex != eval1.ModifyIndex {
   207  		t.Fatalf("bad wait index; got %d; want %d", waitIndex, eval1.ModifyIndex)
   208  	}
   209  
   210  	// Ensure we get a sane eval
   211  	if !reflect.DeepEqual(eval, eval1) {
   212  		t.Fatalf("bad: %#v %#v", eval, eval1)
   213  	}
   214  }
   215  
   216  func TestWorker_dequeueEvaluation_shutdown(t *testing.T) {
   217  	ci.Parallel(t)
   218  
   219  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   220  		c.NumSchedulers = 0
   221  		c.EnabledSchedulers = []string{structs.JobTypeService}
   222  	})
   223  	defer cleanupS1()
   224  	testutil.WaitForLeader(t, s1.RPC)
   225  
   226  	// Create a worker
   227  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   228  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   229  
   230  	go func() {
   231  		time.Sleep(10 * time.Millisecond)
   232  		s1.Shutdown()
   233  	}()
   234  
   235  	// Attempt dequeue
   236  	eval, _, _, shutdown := w.dequeueEvaluation(10 * time.Millisecond)
   237  	if !shutdown {
   238  		t.Fatalf("should not shutdown")
   239  	}
   240  
   241  	// Ensure we get a sane eval
   242  	if eval != nil {
   243  		t.Fatalf("bad: %#v", eval)
   244  	}
   245  }
   246  
   247  func TestWorker_Shutdown(t *testing.T) {
   248  	ci.Parallel(t)
   249  
   250  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   251  		c.NumSchedulers = 0
   252  		c.EnabledSchedulers = []string{structs.JobTypeService}
   253  	})
   254  	defer cleanupS1()
   255  	testutil.WaitForLeader(t, s1.RPC)
   256  
   257  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   258  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   259  
   260  	go func() {
   261  		time.Sleep(10 * time.Millisecond)
   262  		w.Stop()
   263  	}()
   264  
   265  	// Attempt dequeue
   266  	eval, _, _, shutdown := w.dequeueEvaluation(10 * time.Millisecond)
   267  	require.True(t, shutdown)
   268  	require.Nil(t, eval)
   269  }
   270  
   271  func TestWorker_Shutdown_paused(t *testing.T) {
   272  	ci.Parallel(t)
   273  
   274  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   275  		c.NumSchedulers = 0
   276  		c.EnabledSchedulers = []string{structs.JobTypeService}
   277  	})
   278  	defer cleanupS1()
   279  	testutil.WaitForLeader(t, s1.RPC)
   280  
   281  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   282  	w, _ := NewWorker(s1.shutdownCtx, s1, poolArgs)
   283  
   284  	w.Pause()
   285  
   286  	// pausing can take up to 500ms because of the blocking query timeout in dequeueEvaluation.
   287  	require.Eventually(t, w.IsPaused, 550*time.Millisecond, 10*time.Millisecond, "should pause")
   288  
   289  	go func() {
   290  		w.Stop()
   291  	}()
   292  
   293  	// transitioning to stopped from paused should be very quick,
   294  	// but might not be immediate.
   295  	require.Eventually(t, w.IsStopped, 100*time.Millisecond, 10*time.Millisecond, "should stop when paused")
   296  }
   297  
   298  func TestWorker_sendAck(t *testing.T) {
   299  	ci.Parallel(t)
   300  
   301  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   302  		c.NumSchedulers = 0
   303  		c.EnabledSchedulers = []string{structs.JobTypeService}
   304  	})
   305  	defer cleanupS1()
   306  	testutil.WaitForLeader(t, s1.RPC)
   307  
   308  	// Create the evaluation
   309  	eval1 := mock.Eval()
   310  	s1.evalBroker.Enqueue(eval1)
   311  
   312  	// Create a worker
   313  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   314  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   315  
   316  	// Attempt dequeue
   317  	eval, token, _, _ := w.dequeueEvaluation(10 * time.Millisecond)
   318  
   319  	// Check the depth is 0, 1 unacked
   320  	stats := s1.evalBroker.Stats()
   321  	if stats.TotalReady != 0 && stats.TotalUnacked != 1 {
   322  		t.Fatalf("bad: %#v", stats)
   323  	}
   324  
   325  	// Send the Nack
   326  	w.sendNack(eval, token)
   327  
   328  	// Check the depth is 1, nothing unacked
   329  	stats = s1.evalBroker.Stats()
   330  	if stats.TotalReady != 1 && stats.TotalUnacked != 0 {
   331  		t.Fatalf("bad: %#v", stats)
   332  	}
   333  
   334  	// Attempt dequeue
   335  	eval, token, _, _ = w.dequeueEvaluation(10 * time.Millisecond)
   336  
   337  	// Send the Ack
   338  	w.sendAck(eval, token)
   339  
   340  	// Check the depth is 0
   341  	stats = s1.evalBroker.Stats()
   342  	if stats.TotalReady != 0 && stats.TotalUnacked != 0 {
   343  		t.Fatalf("bad: %#v", stats)
   344  	}
   345  }
   346  
   347  func TestWorker_runBackoff(t *testing.T) {
   348  	ci.Parallel(t)
   349  
   350  	srv, cleanupSrv := TestServer(t, func(c *Config) {
   351  		c.NumSchedulers = 0
   352  		c.EnabledSchedulers = []string{structs.JobTypeService}
   353  	})
   354  	defer cleanupSrv()
   355  	testutil.WaitForLeader(t, srv.RPC)
   356  
   357  	eval1 := mock.Eval()
   358  	eval1.ModifyIndex = 1000
   359  	srv.evalBroker.Enqueue(eval1)
   360  	must.Eq(t, 1, srv.evalBroker.Stats().TotalReady)
   361  
   362  	// make a new context here so we can still check the broker's state after
   363  	// we've shut down the worker
   364  	workerCtx, workerCancel := context.WithCancel(srv.shutdownCtx)
   365  	defer workerCancel()
   366  
   367  	w := NewTestWorker(workerCtx, srv)
   368  	doneCh := make(chan struct{})
   369  
   370  	go func() {
   371  		w.run(time.Millisecond)
   372  		doneCh <- struct{}{}
   373  	}()
   374  
   375  	// We expect to be paused for 10ms + 1ms but otherwise can't be all that
   376  	// precise here because of concurrency. But checking coverage for this test
   377  	// shows we've covered the logic
   378  	t1, cancelT1 := helper.NewSafeTimer(100 * time.Millisecond)
   379  	defer cancelT1()
   380  	select {
   381  	case <-doneCh:
   382  		t.Fatal("returned early")
   383  	case <-t1.C:
   384  	}
   385  
   386  	workerCancel()
   387  	<-doneCh
   388  
   389  	must.Eq(t, 1, srv.evalBroker.Stats().TotalWaiting)
   390  	must.Eq(t, 0, srv.evalBroker.Stats().TotalReady)
   391  	must.Eq(t, 0, srv.evalBroker.Stats().TotalPending)
   392  	must.Eq(t, 0, srv.evalBroker.Stats().TotalUnacked)
   393  }
   394  
   395  func TestWorker_waitForIndex(t *testing.T) {
   396  	ci.Parallel(t)
   397  
   398  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   399  		c.NumSchedulers = 0
   400  		c.EnabledSchedulers = []string{structs.JobTypeService}
   401  	})
   402  	defer cleanupS1()
   403  	testutil.WaitForLeader(t, s1.RPC)
   404  
   405  	// Get the current index
   406  	index := s1.raft.AppliedIndex()
   407  
   408  	// Cause an increment
   409  	errCh := make(chan error, 1)
   410  	go func() {
   411  		time.Sleep(10 * time.Millisecond)
   412  		n := mock.Node()
   413  		errCh <- s1.fsm.state.UpsertNode(structs.MsgTypeTestSetup, index+1, n)
   414  	}()
   415  
   416  	// Wait for a future index
   417  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   418  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   419  	snap, err := w.snapshotMinIndex(index+1, time.Second)
   420  	require.NoError(t, err)
   421  	require.NotNil(t, snap)
   422  
   423  	// No error from upserting
   424  	require.NoError(t, <-errCh)
   425  
   426  	// Cause a timeout
   427  	waitIndex := index + 100
   428  	timeout := 10 * time.Millisecond
   429  	snap, err = w.snapshotMinIndex(index+100, timeout)
   430  	require.Nil(t, snap)
   431  	require.EqualError(t, err,
   432  		fmt.Sprintf("timed out after %s waiting for index=%d", timeout, waitIndex))
   433  	require.True(t, errors.Is(err, context.DeadlineExceeded), "expect error to wrap DeadlineExceeded")
   434  }
   435  
   436  func TestWorker_invokeScheduler(t *testing.T) {
   437  	ci.Parallel(t)
   438  
   439  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   440  		c.NumSchedulers = 0
   441  		c.EnabledSchedulers = []string{structs.JobTypeService}
   442  	})
   443  	defer cleanupS1()
   444  
   445  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   446  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   447  	eval := mock.Eval()
   448  	eval.Type = "noop"
   449  
   450  	snap, err := s1.fsm.state.Snapshot()
   451  	require.NoError(t, err)
   452  
   453  	err = w.invokeScheduler(snap, eval, uuid.Generate())
   454  	require.NoError(t, err)
   455  }
   456  
   457  func TestWorker_SubmitPlan(t *testing.T) {
   458  	ci.Parallel(t)
   459  
   460  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   461  		c.NumSchedulers = 0
   462  		c.EnabledSchedulers = []string{structs.JobTypeService}
   463  	})
   464  	defer cleanupS1()
   465  	testutil.WaitForLeader(t, s1.RPC)
   466  
   467  	// Register node
   468  	node := mock.Node()
   469  	testRegisterNode(t, s1, node)
   470  
   471  	job := mock.Job()
   472  	eval1 := mock.Eval()
   473  	eval1.JobID = job.ID
   474  	s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job)
   475  	s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1})
   476  
   477  	// Create the register request
   478  	s1.evalBroker.Enqueue(eval1)
   479  
   480  	evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
   481  	if err != nil {
   482  		t.Fatalf("err: %v", err)
   483  	}
   484  	if evalOut != eval1 {
   485  		t.Fatalf("Bad eval")
   486  	}
   487  
   488  	// Create an allocation plan
   489  	alloc := mock.Alloc()
   490  	plan := &structs.Plan{
   491  		Job:    job,
   492  		EvalID: eval1.ID,
   493  		NodeAllocation: map[string][]*structs.Allocation{
   494  			node.ID: {alloc},
   495  		},
   496  	}
   497  
   498  	// Attempt to submit a plan
   499  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   500  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   501  	w.evalToken = token
   502  
   503  	result, state, err := w.SubmitPlan(plan)
   504  	if err != nil {
   505  		t.Fatalf("err: %v", err)
   506  	}
   507  
   508  	// Should have no update
   509  	if state != nil {
   510  		t.Fatalf("unexpected state update")
   511  	}
   512  
   513  	// Result should have allocated
   514  	if result == nil {
   515  		t.Fatalf("missing result")
   516  	}
   517  
   518  	if result.AllocIndex == 0 {
   519  		t.Fatalf("Bad: %#v", result)
   520  	}
   521  	if len(result.NodeAllocation) != 1 {
   522  		t.Fatalf("Bad: %#v", result)
   523  	}
   524  }
   525  
   526  func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) {
   527  	ci.Parallel(t)
   528  
   529  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   530  		c.NumSchedulers = 0
   531  		c.EnabledSchedulers = []string{structs.JobTypeService}
   532  		c.Build = "1.4.0"
   533  	})
   534  	defer cleanupS1()
   535  	testutil.WaitForLeader(t, s1.RPC)
   536  
   537  	// Register node
   538  	node := mock.Node()
   539  	testRegisterNode(t, s1, node)
   540  
   541  	job := mock.Job()
   542  	eval1 := mock.Eval()
   543  	eval1.JobID = job.ID
   544  	s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 0, nil, job)
   545  	s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 0, []*structs.Evaluation{eval1})
   546  
   547  	stoppedAlloc := mock.Alloc()
   548  	preemptedAlloc := mock.Alloc()
   549  	s1.fsm.State().UpsertAllocs(structs.MsgTypeTestSetup, 5, []*structs.Allocation{stoppedAlloc, preemptedAlloc})
   550  
   551  	// Create an allocation plan
   552  	plan := &structs.Plan{
   553  		Job:             job,
   554  		EvalID:          eval1.ID,
   555  		NodeUpdate:      make(map[string][]*structs.Allocation),
   556  		NodePreemptions: make(map[string][]*structs.Allocation),
   557  	}
   558  	desiredDescription := "desired desc"
   559  	plan.AppendStoppedAlloc(stoppedAlloc, desiredDescription, structs.AllocClientStatusLost, "")
   560  	preemptingAllocID := uuid.Generate()
   561  	plan.AppendPreemptedAlloc(preemptedAlloc, preemptingAllocID)
   562  
   563  	// Attempt to submit a plan
   564  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   565  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   566  	w.SubmitPlan(plan)
   567  
   568  	assert.Equal(t, &structs.Allocation{
   569  		ID:                    preemptedAlloc.ID,
   570  		PreemptedByAllocation: preemptingAllocID,
   571  	}, plan.NodePreemptions[preemptedAlloc.NodeID][0])
   572  	assert.Equal(t, &structs.Allocation{
   573  		ID:                 stoppedAlloc.ID,
   574  		DesiredDescription: desiredDescription,
   575  		ClientStatus:       structs.AllocClientStatusLost,
   576  	}, plan.NodeUpdate[stoppedAlloc.NodeID][0])
   577  }
   578  
   579  func TestWorker_SubmitPlan_MissingNodeRefresh(t *testing.T) {
   580  	ci.Parallel(t)
   581  
   582  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   583  		c.NumSchedulers = 0
   584  		c.EnabledSchedulers = []string{structs.JobTypeService}
   585  	})
   586  	defer cleanupS1()
   587  	testutil.WaitForLeader(t, s1.RPC)
   588  
   589  	// Register node
   590  	node := mock.Node()
   591  	testRegisterNode(t, s1, node)
   592  
   593  	// Create the job
   594  	job := mock.Job()
   595  	s1.fsm.State().UpsertJob(structs.MsgTypeTestSetup, 1000, nil, job)
   596  
   597  	// Create the register request
   598  	eval1 := mock.Eval()
   599  	eval1.JobID = job.ID
   600  	s1.evalBroker.Enqueue(eval1)
   601  
   602  	evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
   603  	if err != nil {
   604  		t.Fatalf("err: %v", err)
   605  	}
   606  	if evalOut != eval1 {
   607  		t.Fatalf("Bad eval")
   608  	}
   609  
   610  	// Create an allocation plan, with unregistered node
   611  	node2 := mock.Node()
   612  	alloc := mock.Alloc()
   613  	plan := &structs.Plan{
   614  		Job:    job,
   615  		EvalID: eval1.ID,
   616  		NodeAllocation: map[string][]*structs.Allocation{
   617  			node2.ID: {alloc},
   618  		},
   619  	}
   620  
   621  	// Attempt to submit a plan
   622  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   623  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   624  	w.evalToken = token
   625  
   626  	result, state, err := w.SubmitPlan(plan)
   627  	if err != nil {
   628  		t.Fatalf("err: %v", err)
   629  	}
   630  
   631  	// Result should have allocated
   632  	if result == nil {
   633  		t.Fatalf("missing result")
   634  	}
   635  
   636  	// Expect no allocation and forced refresh
   637  	if result.AllocIndex != 0 {
   638  		t.Fatalf("Bad: %#v", result)
   639  	}
   640  	if result.RefreshIndex == 0 {
   641  		t.Fatalf("Bad: %#v", result)
   642  	}
   643  	if len(result.NodeAllocation) != 0 {
   644  		t.Fatalf("Bad: %#v", result)
   645  	}
   646  
   647  	// Should have an update
   648  	if state == nil {
   649  		t.Fatalf("expected state update")
   650  	}
   651  }
   652  
   653  func TestWorker_UpdateEval(t *testing.T) {
   654  	ci.Parallel(t)
   655  
   656  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   657  		c.NumSchedulers = 0
   658  		c.EnabledSchedulers = []string{structs.JobTypeService}
   659  	})
   660  	defer cleanupS1()
   661  	testutil.WaitForLeader(t, s1.RPC)
   662  
   663  	// Register node
   664  	node := mock.Node()
   665  	testRegisterNode(t, s1, node)
   666  
   667  	// Create the register request
   668  	eval1 := mock.Eval()
   669  	s1.evalBroker.Enqueue(eval1)
   670  	evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
   671  	if err != nil {
   672  		t.Fatalf("err: %v", err)
   673  	}
   674  	if evalOut != eval1 {
   675  		t.Fatalf("Bad eval")
   676  	}
   677  
   678  	eval2 := evalOut.Copy()
   679  	eval2.Status = structs.EvalStatusComplete
   680  
   681  	// Attempt to update eval
   682  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   683  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   684  	w.evalToken = token
   685  
   686  	err = w.UpdateEval(eval2)
   687  	if err != nil {
   688  		t.Fatalf("err: %v", err)
   689  	}
   690  
   691  	ws := memdb.NewWatchSet()
   692  	out, err := s1.fsm.State().EvalByID(ws, eval2.ID)
   693  	if err != nil {
   694  		t.Fatalf("err: %v", err)
   695  	}
   696  	if out.Status != structs.EvalStatusComplete {
   697  		t.Fatalf("bad: %v", out)
   698  	}
   699  	if out.SnapshotIndex != w.snapshotIndex {
   700  		t.Fatalf("bad: %v", out)
   701  	}
   702  }
   703  
   704  func TestWorker_CreateEval(t *testing.T) {
   705  	ci.Parallel(t)
   706  
   707  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   708  		c.NumSchedulers = 0
   709  		c.EnabledSchedulers = []string{structs.JobTypeService}
   710  	})
   711  	defer cleanupS1()
   712  	testutil.WaitForLeader(t, s1.RPC)
   713  
   714  	// Register node
   715  	node := mock.Node()
   716  	testRegisterNode(t, s1, node)
   717  
   718  	// Create the register request
   719  	eval1 := mock.Eval()
   720  	s1.evalBroker.Enqueue(eval1)
   721  
   722  	evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
   723  	if err != nil {
   724  		t.Fatalf("err: %v", err)
   725  	}
   726  	if evalOut != eval1 {
   727  		t.Fatalf("Bad eval")
   728  	}
   729  
   730  	eval2 := mock.Eval()
   731  	eval2.PreviousEval = eval1.ID
   732  
   733  	// Attempt to create eval
   734  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   735  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   736  	w.evalToken = token
   737  
   738  	err = w.CreateEval(eval2)
   739  	if err != nil {
   740  		t.Fatalf("err: %v", err)
   741  	}
   742  
   743  	ws := memdb.NewWatchSet()
   744  	out, err := s1.fsm.State().EvalByID(ws, eval2.ID)
   745  	if err != nil {
   746  		t.Fatalf("err: %v", err)
   747  	}
   748  	if out.PreviousEval != eval1.ID {
   749  		t.Fatalf("bad: %v", out)
   750  	}
   751  	if out.SnapshotIndex != w.snapshotIndex {
   752  		t.Fatalf("bad: %v", out)
   753  	}
   754  }
   755  
   756  func TestWorker_ReblockEval(t *testing.T) {
   757  	ci.Parallel(t)
   758  
   759  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   760  		c.NumSchedulers = 0
   761  		c.EnabledSchedulers = []string{structs.JobTypeService}
   762  	})
   763  	defer cleanupS1()
   764  	testutil.WaitForLeader(t, s1.RPC)
   765  
   766  	// Create the blocked eval
   767  	eval1 := mock.Eval()
   768  	eval1.Status = structs.EvalStatusBlocked
   769  	eval1.QueuedAllocations = map[string]int{"cache": 100}
   770  
   771  	// Insert it into the state store
   772  	if err := s1.fsm.State().UpsertEvals(structs.MsgTypeTestSetup, 1000, []*structs.Evaluation{eval1}); err != nil {
   773  		t.Fatal(err)
   774  	}
   775  
   776  	// Create the job summary
   777  	js := mock.JobSummary(eval1.JobID)
   778  	tg := js.Summary["web"]
   779  	tg.Queued = 100
   780  	js.Summary["web"] = tg
   781  	if err := s1.fsm.State().UpsertJobSummary(1001, js); err != nil {
   782  		t.Fatal(err)
   783  	}
   784  
   785  	// Enqueue the eval and then dequeue
   786  	s1.evalBroker.Enqueue(eval1)
   787  	evalOut, token, err := s1.evalBroker.Dequeue([]string{eval1.Type}, time.Second)
   788  	if err != nil {
   789  		t.Fatalf("err: %v", err)
   790  	}
   791  	if evalOut != eval1 {
   792  		t.Fatalf("Bad eval")
   793  	}
   794  
   795  	eval2 := evalOut.Copy()
   796  	eval2.QueuedAllocations = map[string]int{"web": 50}
   797  
   798  	// Attempt to reblock eval
   799  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   800  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   801  	w.evalToken = token
   802  
   803  	err = w.ReblockEval(eval2)
   804  	if err != nil {
   805  		t.Fatalf("err: %v", err)
   806  	}
   807  
   808  	// Ack the eval
   809  	w.sendAck(evalOut, token)
   810  
   811  	// Check that it is blocked
   812  	bStats := s1.blockedEvals.Stats()
   813  	if bStats.TotalBlocked+bStats.TotalEscaped != 1 {
   814  		t.Fatalf("ReblockEval didn't insert eval into the blocked eval tracker: %#v", bStats)
   815  	}
   816  
   817  	// Check that the eval was updated
   818  	ws := memdb.NewWatchSet()
   819  	eval, err := s1.fsm.State().EvalByID(ws, eval2.ID)
   820  	if err != nil {
   821  		t.Fatal(err)
   822  	}
   823  	if !reflect.DeepEqual(eval.QueuedAllocations, eval2.QueuedAllocations) {
   824  		t.Fatalf("expected: %#v, actual: %#v", eval2.QueuedAllocations, eval.QueuedAllocations)
   825  	}
   826  
   827  	// Check that the snapshot index was set properly by unblocking the eval and
   828  	// then dequeuing.
   829  	s1.blockedEvals.Unblock("foobar", 1000)
   830  
   831  	reblockedEval, _, err := s1.evalBroker.Dequeue([]string{eval1.Type}, 1*time.Second)
   832  	if err != nil {
   833  		t.Fatalf("err: %v", err)
   834  	}
   835  	if reblockedEval == nil {
   836  		t.Fatalf("Nil eval")
   837  	}
   838  	if reblockedEval.ID != eval1.ID {
   839  		t.Fatalf("Bad eval")
   840  	}
   841  
   842  	// Check that the SnapshotIndex is set
   843  	if reblockedEval.SnapshotIndex != w.snapshotIndex {
   844  		t.Fatalf("incorrect snapshot index; got %d; want %d",
   845  			reblockedEval.SnapshotIndex, w.snapshotIndex)
   846  	}
   847  }
   848  
   849  func TestWorker_Info(t *testing.T) {
   850  	ci.Parallel(t)
   851  
   852  	s1, cleanupS1 := TestServer(t, func(c *Config) {
   853  		c.NumSchedulers = 0
   854  		c.EnabledSchedulers = []string{structs.JobTypeService}
   855  	})
   856  	defer cleanupS1()
   857  	testutil.WaitForLeader(t, s1.RPC)
   858  
   859  	poolArgs := getSchedulerWorkerPoolArgsFromConfigLocked(s1.config).Copy()
   860  
   861  	// Create a worker
   862  	w := newWorker(s1.shutdownCtx, s1, poolArgs)
   863  
   864  	require.Equal(t, WorkerStarting, w.GetStatus())
   865  	workerInfo := w.Info()
   866  	require.Equal(t, WorkerStarting.String(), workerInfo.Status)
   867  }
   868  
   869  const (
   870  	longWait = 100 * time.Millisecond
   871  	tinyWait = 10 * time.Millisecond
   872  )
   873  
   874  func TestWorker_SetPause(t *testing.T) {
   875  	ci.Parallel(t)
   876  	logger := testlog.HCLogger(t)
   877  	srv := &Server{
   878  		logger:      logger,
   879  		shutdownCtx: context.Background(),
   880  	}
   881  	args := SchedulerWorkerPoolArgs{
   882  		EnabledSchedulers: []string{structs.JobTypeCore, structs.JobTypeBatch, structs.JobTypeSystem},
   883  	}
   884  	w := newWorker(context.Background(), srv, args)
   885  	w._start(testWorkload)
   886  	require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have started")
   887  
   888  	go func() {
   889  		time.Sleep(tinyWait)
   890  		w.Pause()
   891  	}()
   892  	require.Eventually(t, w.IsPaused, longWait, tinyWait, "should have paused")
   893  
   894  	go func() {
   895  		time.Sleep(tinyWait)
   896  		w.Pause()
   897  	}()
   898  	require.Eventually(t, w.IsPaused, longWait, tinyWait, "pausing a paused should be okay")
   899  
   900  	go func() {
   901  		time.Sleep(tinyWait)
   902  		w.Resume()
   903  	}()
   904  	require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have restarted from pause")
   905  
   906  	go func() {
   907  		time.Sleep(tinyWait)
   908  		w.Stop()
   909  	}()
   910  	require.Eventually(t, w.IsStopped, longWait, tinyWait, "should have shutdown")
   911  }
   912  
   913  func TestWorker_SetPause_OutOfOrderEvents(t *testing.T) {
   914  	ci.Parallel(t)
   915  	logger := testlog.HCLogger(t)
   916  	srv := &Server{
   917  		logger:      logger,
   918  		shutdownCtx: context.Background(),
   919  	}
   920  	args := SchedulerWorkerPoolArgs{
   921  		EnabledSchedulers: []string{structs.JobTypeCore, structs.JobTypeBatch, structs.JobTypeSystem},
   922  	}
   923  	w := newWorker(context.Background(), srv, args)
   924  	w._start(testWorkload)
   925  	require.Eventually(t, w.IsStarted, longWait, tinyWait, "should have started")
   926  
   927  	go func() {
   928  		time.Sleep(tinyWait)
   929  		w.Pause()
   930  	}()
   931  	require.Eventually(t, w.IsPaused, longWait, tinyWait, "should have paused")
   932  
   933  	go func() {
   934  		time.Sleep(tinyWait)
   935  		w.Stop()
   936  	}()
   937  	require.Eventually(t, w.IsStopped, longWait, tinyWait, "stop from pause should have shutdown")
   938  
   939  	go func() {
   940  		time.Sleep(tinyWait)
   941  		w.Pause()
   942  	}()
   943  	require.Eventually(t, w.IsStopped, longWait, tinyWait, "pausing a stopped should stay stopped")
   944  
   945  }
   946  
   947  // _start is a test helper function used to start a worker with an alternate workload
   948  func (w *Worker) _start(inFunc func(w *Worker)) {
   949  	w.setStatus(WorkerStarting)
   950  	go inFunc(w)
   951  }
   952  
   953  // testWorkload is a very simple function that performs the same status updating behaviors that the
   954  // real workload does.
   955  func testWorkload(w *Worker) {
   956  	defer w.markStopped()
   957  	w.setStatuses(WorkerStarted, WorkloadRunning)
   958  	w.logger.Debug("testWorkload running")
   959  	for {
   960  		// ensure state variables are happy after resuming.
   961  		w.maybeWait()
   962  		if w.workerShuttingDown() {
   963  			w.logger.Debug("testWorkload stopped")
   964  			return
   965  		}
   966  		// do some fake work
   967  		time.Sleep(10 * time.Millisecond)
   968  	}
   969  }