github.com/bigcommerce/nomad@v0.9.3-bc/nomad/eval_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	metrics "github.com/armon/go-metrics"
     8  	log "github.com/hashicorp/go-hclog"
     9  	memdb "github.com/hashicorp/go-memdb"
    10  	multierror "github.com/hashicorp/go-multierror"
    11  
    12  	"github.com/hashicorp/nomad/acl"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/hashicorp/nomad/scheduler"
    16  )
    17  
    18  const (
    19  	// DefaultDequeueTimeout is used if no dequeue timeout is provided
    20  	DefaultDequeueTimeout = time.Second
    21  )
    22  
    23  // Eval endpoint is used for eval interactions
    24  type Eval struct {
    25  	srv    *Server
    26  	logger log.Logger
    27  }
    28  
    29  // GetEval is used to request information about a specific evaluation
    30  func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
    31  	reply *structs.SingleEvalResponse) error {
    32  	if done, err := e.srv.forward("Eval.GetEval", args, args, reply); done {
    33  		return err
    34  	}
    35  	defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
    36  
    37  	// Check for read-job permissions
    38  	if aclObj, err := e.srv.ResolveToken(args.AuthToken); err != nil {
    39  		return err
    40  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
    41  		return structs.ErrPermissionDenied
    42  	}
    43  
    44  	// Setup the blocking query
    45  	opts := blockingOptions{
    46  		queryOpts: &args.QueryOptions,
    47  		queryMeta: &reply.QueryMeta,
    48  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
    49  			// Look for the job
    50  			out, err := state.EvalByID(ws, args.EvalID)
    51  			if err != nil {
    52  				return err
    53  			}
    54  
    55  			// Setup the output
    56  			reply.Eval = out
    57  			if out != nil {
    58  				reply.Index = out.ModifyIndex
    59  			} else {
    60  				// Use the last index that affected the nodes table
    61  				index, err := state.Index("evals")
    62  				if err != nil {
    63  					return err
    64  				}
    65  				reply.Index = index
    66  			}
    67  
    68  			// Set the query response
    69  			e.srv.setQueryMeta(&reply.QueryMeta)
    70  			return nil
    71  		}}
    72  	return e.srv.blockingRPC(&opts)
    73  }
    74  
    75  // Dequeue is used to dequeue a pending evaluation
    76  func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
    77  	reply *structs.EvalDequeueResponse) error {
    78  	if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done {
    79  		return err
    80  	}
    81  	defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())
    82  
    83  	// Ensure there is at least one scheduler
    84  	if len(args.Schedulers) == 0 {
    85  		return fmt.Errorf("dequeue requires at least one scheduler type")
    86  	}
    87  
    88  	// Check that there isn't a scheduler version mismatch
    89  	if args.SchedulerVersion != scheduler.SchedulerVersion {
    90  		return fmt.Errorf("dequeue disallowed: calling scheduler version is %d; leader version is %d",
    91  			args.SchedulerVersion, scheduler.SchedulerVersion)
    92  	}
    93  
    94  	// Ensure there is a default timeout
    95  	if args.Timeout <= 0 {
    96  		args.Timeout = DefaultDequeueTimeout
    97  	}
    98  
    99  	// Attempt the dequeue
   100  	eval, token, err := e.srv.evalBroker.Dequeue(args.Schedulers, args.Timeout)
   101  	if err != nil {
   102  		return err
   103  	}
   104  
   105  	// Provide the output if any
   106  	if eval != nil {
   107  		// Get the index that the worker should wait until before scheduling.
   108  		waitIndex, err := e.getWaitIndex(eval.Namespace, eval.JobID, eval.ModifyIndex)
   109  		if err != nil {
   110  			var mErr multierror.Error
   111  			multierror.Append(&mErr, err)
   112  
   113  			// We have dequeued the evaluation but won't be returning it to the
   114  			// worker so Nack the eval.
   115  			if err := e.srv.evalBroker.Nack(eval.ID, token); err != nil {
   116  				multierror.Append(&mErr, err)
   117  			}
   118  
   119  			return &mErr
   120  		}
   121  
   122  		reply.Eval = eval
   123  		reply.Token = token
   124  		reply.WaitIndex = waitIndex
   125  	}
   126  
   127  	// Set the query response
   128  	e.srv.setQueryMeta(&reply.QueryMeta)
   129  	return nil
   130  }
   131  
   132  // getWaitIndex returns the wait index that should be used by the worker before
   133  // invoking the scheduler. The index should be the highest modify index of any
   134  // evaluation for the job. This prevents scheduling races for the same job when
   135  // there are blocked evaluations.
   136  func (e *Eval) getWaitIndex(namespace, job string, evalModifyIndex uint64) (uint64, error) {
   137  	snap, err := e.srv.State().Snapshot()
   138  	if err != nil {
   139  		return 0, err
   140  	}
   141  
   142  	evals, err := snap.EvalsByJob(nil, namespace, job)
   143  	if err != nil {
   144  		return 0, err
   145  	}
   146  
   147  	// Since dequeueing evals is concurrent with applying Raft messages to
   148  	// the state store, initialize to the currently dequeued eval's index
   149  	// in case it isn't in the snapshot used by EvalsByJob yet.
   150  	max := evalModifyIndex
   151  	for _, eval := range evals {
   152  		if max < eval.ModifyIndex {
   153  			max = eval.ModifyIndex
   154  		}
   155  	}
   156  
   157  	return max, nil
   158  }
   159  
   160  // Ack is used to acknowledge completion of a dequeued evaluation
   161  func (e *Eval) Ack(args *structs.EvalAckRequest,
   162  	reply *structs.GenericResponse) error {
   163  	if done, err := e.srv.forward("Eval.Ack", args, args, reply); done {
   164  		return err
   165  	}
   166  	defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())
   167  
   168  	// Ack the EvalID
   169  	if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
   170  		return err
   171  	}
   172  	return nil
   173  }
   174  
   175  // NAck is used to negative acknowledge completion of a dequeued evaluation
   176  func (e *Eval) Nack(args *structs.EvalAckRequest,
   177  	reply *structs.GenericResponse) error {
   178  	if done, err := e.srv.forward("Eval.Nack", args, args, reply); done {
   179  		return err
   180  	}
   181  	defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())
   182  
   183  	// Nack the EvalID
   184  	if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
   185  		return err
   186  	}
   187  	return nil
   188  }
   189  
   190  // Update is used to perform an update of an Eval if it is outstanding.
   191  func (e *Eval) Update(args *structs.EvalUpdateRequest,
   192  	reply *structs.GenericResponse) error {
   193  	if done, err := e.srv.forward("Eval.Update", args, args, reply); done {
   194  		return err
   195  	}
   196  	defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())
   197  
   198  	// Ensure there is only a single update with token
   199  	if len(args.Evals) != 1 {
   200  		return fmt.Errorf("only a single eval can be updated")
   201  	}
   202  	eval := args.Evals[0]
   203  
   204  	// Verify the evaluation is outstanding, and that the tokens match.
   205  	if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
   206  		return err
   207  	}
   208  
   209  	// Update via Raft
   210  	_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
   211  	if err != nil {
   212  		return err
   213  	}
   214  
   215  	// Update the index
   216  	reply.Index = index
   217  	return nil
   218  }
   219  
   220  // Create is used to make a new evaluation
   221  func (e *Eval) Create(args *structs.EvalUpdateRequest,
   222  	reply *structs.GenericResponse) error {
   223  	if done, err := e.srv.forward("Eval.Create", args, args, reply); done {
   224  		return err
   225  	}
   226  	defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())
   227  
   228  	// Ensure there is only a single update with token
   229  	if len(args.Evals) != 1 {
   230  		return fmt.Errorf("only a single eval can be created")
   231  	}
   232  	eval := args.Evals[0]
   233  
   234  	// Verify the parent evaluation is outstanding, and that the tokens match.
   235  	if err := e.srv.evalBroker.OutstandingReset(eval.PreviousEval, args.EvalToken); err != nil {
   236  		return err
   237  	}
   238  
   239  	// Look for the eval
   240  	snap, err := e.srv.fsm.State().Snapshot()
   241  	if err != nil {
   242  		return err
   243  	}
   244  
   245  	ws := memdb.NewWatchSet()
   246  	out, err := snap.EvalByID(ws, eval.ID)
   247  	if err != nil {
   248  		return err
   249  	}
   250  	if out != nil {
   251  		return fmt.Errorf("evaluation already exists")
   252  	}
   253  
   254  	// Update via Raft
   255  	_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
   256  	if err != nil {
   257  		return err
   258  	}
   259  
   260  	// Update the index
   261  	reply.Index = index
   262  	return nil
   263  }
   264  
   265  // Reblock is used to reinsert an existing blocked evaluation into the blocked
   266  // evaluation tracker.
   267  func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error {
   268  	if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done {
   269  		return err
   270  	}
   271  	defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())
   272  
   273  	// Ensure there is only a single update with token
   274  	if len(args.Evals) != 1 {
   275  		return fmt.Errorf("only a single eval can be reblocked")
   276  	}
   277  	eval := args.Evals[0]
   278  
   279  	// Verify the evaluation is outstanding, and that the tokens match.
   280  	if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
   281  		return err
   282  	}
   283  
   284  	// Look for the eval
   285  	snap, err := e.srv.fsm.State().Snapshot()
   286  	if err != nil {
   287  		return err
   288  	}
   289  
   290  	ws := memdb.NewWatchSet()
   291  	out, err := snap.EvalByID(ws, eval.ID)
   292  	if err != nil {
   293  		return err
   294  	}
   295  	if out == nil {
   296  		return fmt.Errorf("evaluation does not exist")
   297  	}
   298  	if out.Status != structs.EvalStatusBlocked {
   299  		return fmt.Errorf("evaluation not blocked")
   300  	}
   301  
   302  	// Reblock the eval
   303  	e.srv.blockedEvals.Reblock(eval, args.EvalToken)
   304  	return nil
   305  }
   306  
   307  // Reap is used to cleanup dead evaluations and allocations
   308  func (e *Eval) Reap(args *structs.EvalDeleteRequest,
   309  	reply *structs.GenericResponse) error {
   310  	if done, err := e.srv.forward("Eval.Reap", args, args, reply); done {
   311  		return err
   312  	}
   313  	defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())
   314  
   315  	// Update via Raft
   316  	_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
   317  	if err != nil {
   318  		return err
   319  	}
   320  
   321  	// Update the index
   322  	reply.Index = index
   323  	return nil
   324  }
   325  
   326  // List is used to get a list of the evaluations in the system
   327  func (e *Eval) List(args *structs.EvalListRequest,
   328  	reply *structs.EvalListResponse) error {
   329  	if done, err := e.srv.forward("Eval.List", args, args, reply); done {
   330  		return err
   331  	}
   332  	defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
   333  
   334  	// Check for read-job permissions
   335  	if aclObj, err := e.srv.ResolveToken(args.AuthToken); err != nil {
   336  		return err
   337  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   338  		return structs.ErrPermissionDenied
   339  	}
   340  
   341  	// Setup the blocking query
   342  	opts := blockingOptions{
   343  		queryOpts: &args.QueryOptions,
   344  		queryMeta: &reply.QueryMeta,
   345  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   346  			// Scan all the evaluations
   347  			var err error
   348  			var iter memdb.ResultIterator
   349  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   350  				iter, err = state.EvalsByIDPrefix(ws, args.RequestNamespace(), prefix)
   351  			} else {
   352  				iter, err = state.EvalsByNamespace(ws, args.RequestNamespace())
   353  			}
   354  			if err != nil {
   355  				return err
   356  			}
   357  
   358  			var evals []*structs.Evaluation
   359  			for {
   360  				raw := iter.Next()
   361  				if raw == nil {
   362  					break
   363  				}
   364  				eval := raw.(*structs.Evaluation)
   365  				evals = append(evals, eval)
   366  			}
   367  			reply.Evaluations = evals
   368  
   369  			// Use the last index that affected the jobs table
   370  			index, err := state.Index("evals")
   371  			if err != nil {
   372  				return err
   373  			}
   374  			reply.Index = index
   375  
   376  			// Set the query response
   377  			e.srv.setQueryMeta(&reply.QueryMeta)
   378  			return nil
   379  		}}
   380  	return e.srv.blockingRPC(&opts)
   381  }
   382  
   383  // Allocations is used to list the allocations for an evaluation
   384  func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
   385  	reply *structs.EvalAllocationsResponse) error {
   386  	if done, err := e.srv.forward("Eval.Allocations", args, args, reply); done {
   387  		return err
   388  	}
   389  	defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
   390  
   391  	// Check for read-job permissions
   392  	if aclObj, err := e.srv.ResolveToken(args.AuthToken); err != nil {
   393  		return err
   394  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   395  		return structs.ErrPermissionDenied
   396  	}
   397  
   398  	// Setup the blocking query
   399  	opts := blockingOptions{
   400  		queryOpts: &args.QueryOptions,
   401  		queryMeta: &reply.QueryMeta,
   402  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   403  			// Capture the allocations
   404  			allocs, err := state.AllocsByEval(ws, args.EvalID)
   405  			if err != nil {
   406  				return err
   407  			}
   408  
   409  			// Convert to a stub
   410  			if len(allocs) > 0 {
   411  				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
   412  				for _, alloc := range allocs {
   413  					reply.Allocations = append(reply.Allocations, alloc.Stub())
   414  				}
   415  			}
   416  
   417  			// Use the last index that affected the allocs table
   418  			index, err := state.Index("allocs")
   419  			if err != nil {
   420  				return err
   421  			}
   422  			reply.Index = index
   423  
   424  			// Set the query response
   425  			e.srv.setQueryMeta(&reply.QueryMeta)
   426  			return nil
   427  		}}
   428  	return e.srv.blockingRPC(&opts)
   429  }