github.com/manicqin/nomad@v0.9.5/nomad/eval_endpoint.go

github.com/manicqin/nomad@v0.9.5/nomad/eval_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	metrics "github.com/armon/go-metrics"
     8  	log "github.com/hashicorp/go-hclog"
     9  	memdb "github.com/hashicorp/go-memdb"
    10  	multierror "github.com/hashicorp/go-multierror"
    11  
    12  	"github.com/hashicorp/nomad/acl"
    13  	"github.com/hashicorp/nomad/nomad/state"
    14  	"github.com/hashicorp/nomad/nomad/structs"
    15  	"github.com/hashicorp/nomad/scheduler"
    16  )
    17  
    18  const (
    19  	// DefaultDequeueTimeout is used if no dequeue timeout is provided
    20  	DefaultDequeueTimeout = time.Second
    21  )
    22  
    23  // Eval endpoint is used for eval interactions
    24  type Eval struct {
    25  	srv    *Server
    26  	logger log.Logger
    27  }
    28  
    29  // GetEval is used to request information about a specific evaluation
    30  func (e *Eval) GetEval(args *structs.EvalSpecificRequest,
    31  	reply *structs.SingleEvalResponse) error {
    32  	if done, err := e.srv.forward("Eval.GetEval", args, args, reply); done {
    33  		return err
    34  	}
    35  	defer metrics.MeasureSince([]string{"nomad", "eval", "get_eval"}, time.Now())
    36  
    37  	// Check for read-job permissions before performing blocking query.
    38  	allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob)
    39  	aclObj, err := e.srv.ResolveToken(args.AuthToken)
    40  	if err != nil {
    41  		return err
    42  	} else if !allowNsOp(aclObj, args.RequestNamespace()) {
    43  		return structs.ErrPermissionDenied
    44  	}
    45  
    46  	// Setup the blocking query
    47  	opts := blockingOptions{
    48  		queryOpts: &args.QueryOptions,
    49  		queryMeta: &reply.QueryMeta,
    50  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
    51  			// Look for the job
    52  			out, err := state.EvalByID(ws, args.EvalID)
    53  			if err != nil {
    54  				return err
    55  			}
    56  
    57  			// Setup the output
    58  			reply.Eval = out
    59  			if out != nil {
    60  				// Re-check namespace in case it differs from request.
    61  				if !allowNsOp(aclObj, out.Namespace) {
    62  					return structs.ErrPermissionDenied
    63  				}
    64  
    65  				reply.Index = out.ModifyIndex
    66  			} else {
    67  				// Use the last index that affected the nodes table
    68  				index, err := state.Index("evals")
    69  				if err != nil {
    70  					return err
    71  				}
    72  				reply.Index = index
    73  			}
    74  
    75  			// Set the query response
    76  			e.srv.setQueryMeta(&reply.QueryMeta)
    77  			return nil
    78  		}}
    79  	return e.srv.blockingRPC(&opts)
    80  }
    81  
    82  // Dequeue is used to dequeue a pending evaluation
    83  func (e *Eval) Dequeue(args *structs.EvalDequeueRequest,
    84  	reply *structs.EvalDequeueResponse) error {
    85  	if done, err := e.srv.forward("Eval.Dequeue", args, args, reply); done {
    86  		return err
    87  	}
    88  	defer metrics.MeasureSince([]string{"nomad", "eval", "dequeue"}, time.Now())
    89  
    90  	// Ensure there is at least one scheduler
    91  	if len(args.Schedulers) == 0 {
    92  		return fmt.Errorf("dequeue requires at least one scheduler type")
    93  	}
    94  
    95  	// Check that there isn't a scheduler version mismatch
    96  	if args.SchedulerVersion != scheduler.SchedulerVersion {
    97  		return fmt.Errorf("dequeue disallowed: calling scheduler version is %d; leader version is %d",
    98  			args.SchedulerVersion, scheduler.SchedulerVersion)
    99  	}
   100  
   101  	// Ensure there is a default timeout
   102  	if args.Timeout <= 0 {
   103  		args.Timeout = DefaultDequeueTimeout
   104  	}
   105  
   106  	// Attempt the dequeue
   107  	eval, token, err := e.srv.evalBroker.Dequeue(args.Schedulers, args.Timeout)
   108  	if err != nil {
   109  		return err
   110  	}
   111  
   112  	// Provide the output if any
   113  	if eval != nil {
   114  		// Get the index that the worker should wait until before scheduling.
   115  		waitIndex, err := e.getWaitIndex(eval.Namespace, eval.JobID, eval.ModifyIndex)
   116  		if err != nil {
   117  			var mErr multierror.Error
   118  			multierror.Append(&mErr, err)
   119  
   120  			// We have dequeued the evaluation but won't be returning it to the
   121  			// worker so Nack the eval.
   122  			if err := e.srv.evalBroker.Nack(eval.ID, token); err != nil {
   123  				multierror.Append(&mErr, err)
   124  			}
   125  
   126  			return &mErr
   127  		}
   128  
   129  		reply.Eval = eval
   130  		reply.Token = token
   131  		reply.WaitIndex = waitIndex
   132  	}
   133  
   134  	// Set the query response
   135  	e.srv.setQueryMeta(&reply.QueryMeta)
   136  	return nil
   137  }
   138  
   139  // getWaitIndex returns the wait index that should be used by the worker before
   140  // invoking the scheduler. The index should be the highest modify index of any
   141  // evaluation for the job. This prevents scheduling races for the same job when
   142  // there are blocked evaluations.
   143  func (e *Eval) getWaitIndex(namespace, job string, evalModifyIndex uint64) (uint64, error) {
   144  	snap, err := e.srv.State().Snapshot()
   145  	if err != nil {
   146  		return 0, err
   147  	}
   148  
   149  	evals, err := snap.EvalsByJob(nil, namespace, job)
   150  	if err != nil {
   151  		return 0, err
   152  	}
   153  
   154  	// Since dequeueing evals is concurrent with applying Raft messages to
   155  	// the state store, initialize to the currently dequeued eval's index
   156  	// in case it isn't in the snapshot used by EvalsByJob yet.
   157  	max := evalModifyIndex
   158  	for _, eval := range evals {
   159  		if max < eval.ModifyIndex {
   160  			max = eval.ModifyIndex
   161  		}
   162  	}
   163  
   164  	return max, nil
   165  }
   166  
   167  // Ack is used to acknowledge completion of a dequeued evaluation
   168  func (e *Eval) Ack(args *structs.EvalAckRequest,
   169  	reply *structs.GenericResponse) error {
   170  	if done, err := e.srv.forward("Eval.Ack", args, args, reply); done {
   171  		return err
   172  	}
   173  	defer metrics.MeasureSince([]string{"nomad", "eval", "ack"}, time.Now())
   174  
   175  	// Ack the EvalID
   176  	if err := e.srv.evalBroker.Ack(args.EvalID, args.Token); err != nil {
   177  		return err
   178  	}
   179  	return nil
   180  }
   181  
   182  // NAck is used to negative acknowledge completion of a dequeued evaluation
   183  func (e *Eval) Nack(args *structs.EvalAckRequest,
   184  	reply *structs.GenericResponse) error {
   185  	if done, err := e.srv.forward("Eval.Nack", args, args, reply); done {
   186  		return err
   187  	}
   188  	defer metrics.MeasureSince([]string{"nomad", "eval", "nack"}, time.Now())
   189  
   190  	// Nack the EvalID
   191  	if err := e.srv.evalBroker.Nack(args.EvalID, args.Token); err != nil {
   192  		return err
   193  	}
   194  	return nil
   195  }
   196  
   197  // Update is used to perform an update of an Eval if it is outstanding.
   198  func (e *Eval) Update(args *structs.EvalUpdateRequest,
   199  	reply *structs.GenericResponse) error {
   200  	if done, err := e.srv.forward("Eval.Update", args, args, reply); done {
   201  		return err
   202  	}
   203  	defer metrics.MeasureSince([]string{"nomad", "eval", "update"}, time.Now())
   204  
   205  	// Ensure there is only a single update with token
   206  	if len(args.Evals) != 1 {
   207  		return fmt.Errorf("only a single eval can be updated")
   208  	}
   209  	eval := args.Evals[0]
   210  
   211  	// Verify the evaluation is outstanding, and that the tokens match.
   212  	if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
   213  		return err
   214  	}
   215  
   216  	// Update via Raft
   217  	_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
   218  	if err != nil {
   219  		return err
   220  	}
   221  
   222  	// Update the index
   223  	reply.Index = index
   224  	return nil
   225  }
   226  
   227  // Create is used to make a new evaluation
   228  func (e *Eval) Create(args *structs.EvalUpdateRequest,
   229  	reply *structs.GenericResponse) error {
   230  	if done, err := e.srv.forward("Eval.Create", args, args, reply); done {
   231  		return err
   232  	}
   233  	defer metrics.MeasureSince([]string{"nomad", "eval", "create"}, time.Now())
   234  
   235  	// Ensure there is only a single update with token
   236  	if len(args.Evals) != 1 {
   237  		return fmt.Errorf("only a single eval can be created")
   238  	}
   239  	eval := args.Evals[0]
   240  
   241  	// Verify the parent evaluation is outstanding, and that the tokens match.
   242  	if err := e.srv.evalBroker.OutstandingReset(eval.PreviousEval, args.EvalToken); err != nil {
   243  		return err
   244  	}
   245  
   246  	// Look for the eval
   247  	snap, err := e.srv.fsm.State().Snapshot()
   248  	if err != nil {
   249  		return err
   250  	}
   251  
   252  	ws := memdb.NewWatchSet()
   253  	out, err := snap.EvalByID(ws, eval.ID)
   254  	if err != nil {
   255  		return err
   256  	}
   257  	if out != nil {
   258  		return fmt.Errorf("evaluation already exists")
   259  	}
   260  
   261  	// Update via Raft
   262  	_, index, err := e.srv.raftApply(structs.EvalUpdateRequestType, args)
   263  	if err != nil {
   264  		return err
   265  	}
   266  
   267  	// Update the index
   268  	reply.Index = index
   269  	return nil
   270  }
   271  
   272  // Reblock is used to reinsert an existing blocked evaluation into the blocked
   273  // evaluation tracker.
   274  func (e *Eval) Reblock(args *structs.EvalUpdateRequest, reply *structs.GenericResponse) error {
   275  	if done, err := e.srv.forward("Eval.Reblock", args, args, reply); done {
   276  		return err
   277  	}
   278  	defer metrics.MeasureSince([]string{"nomad", "eval", "reblock"}, time.Now())
   279  
   280  	// Ensure there is only a single update with token
   281  	if len(args.Evals) != 1 {
   282  		return fmt.Errorf("only a single eval can be reblocked")
   283  	}
   284  	eval := args.Evals[0]
   285  
   286  	// Verify the evaluation is outstanding, and that the tokens match.
   287  	if err := e.srv.evalBroker.OutstandingReset(eval.ID, args.EvalToken); err != nil {
   288  		return err
   289  	}
   290  
   291  	// Look for the eval
   292  	snap, err := e.srv.fsm.State().Snapshot()
   293  	if err != nil {
   294  		return err
   295  	}
   296  
   297  	ws := memdb.NewWatchSet()
   298  	out, err := snap.EvalByID(ws, eval.ID)
   299  	if err != nil {
   300  		return err
   301  	}
   302  	if out == nil {
   303  		return fmt.Errorf("evaluation does not exist")
   304  	}
   305  	if out.Status != structs.EvalStatusBlocked {
   306  		return fmt.Errorf("evaluation not blocked")
   307  	}
   308  
   309  	// Reblock the eval
   310  	e.srv.blockedEvals.Reblock(eval, args.EvalToken)
   311  	return nil
   312  }
   313  
   314  // Reap is used to cleanup dead evaluations and allocations
   315  func (e *Eval) Reap(args *structs.EvalDeleteRequest,
   316  	reply *structs.GenericResponse) error {
   317  	if done, err := e.srv.forward("Eval.Reap", args, args, reply); done {
   318  		return err
   319  	}
   320  	defer metrics.MeasureSince([]string{"nomad", "eval", "reap"}, time.Now())
   321  
   322  	// Update via Raft
   323  	_, index, err := e.srv.raftApply(structs.EvalDeleteRequestType, args)
   324  	if err != nil {
   325  		return err
   326  	}
   327  
   328  	// Update the index
   329  	reply.Index = index
   330  	return nil
   331  }
   332  
   333  // List is used to get a list of the evaluations in the system
   334  func (e *Eval) List(args *structs.EvalListRequest,
   335  	reply *structs.EvalListResponse) error {
   336  	if done, err := e.srv.forward("Eval.List", args, args, reply); done {
   337  		return err
   338  	}
   339  	defer metrics.MeasureSince([]string{"nomad", "eval", "list"}, time.Now())
   340  
   341  	// Check for read-job permissions
   342  	if aclObj, err := e.srv.ResolveToken(args.AuthToken); err != nil {
   343  		return err
   344  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   345  		return structs.ErrPermissionDenied
   346  	}
   347  
   348  	// Setup the blocking query
   349  	opts := blockingOptions{
   350  		queryOpts: &args.QueryOptions,
   351  		queryMeta: &reply.QueryMeta,
   352  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   353  			// Scan all the evaluations
   354  			var err error
   355  			var iter memdb.ResultIterator
   356  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   357  				iter, err = state.EvalsByIDPrefix(ws, args.RequestNamespace(), prefix)
   358  			} else {
   359  				iter, err = state.EvalsByNamespace(ws, args.RequestNamespace())
   360  			}
   361  			if err != nil {
   362  				return err
   363  			}
   364  
   365  			var evals []*structs.Evaluation
   366  			for {
   367  				raw := iter.Next()
   368  				if raw == nil {
   369  					break
   370  				}
   371  				eval := raw.(*structs.Evaluation)
   372  				evals = append(evals, eval)
   373  			}
   374  			reply.Evaluations = evals
   375  
   376  			// Use the last index that affected the jobs table
   377  			index, err := state.Index("evals")
   378  			if err != nil {
   379  				return err
   380  			}
   381  			reply.Index = index
   382  
   383  			// Set the query response
   384  			e.srv.setQueryMeta(&reply.QueryMeta)
   385  			return nil
   386  		}}
   387  	return e.srv.blockingRPC(&opts)
   388  }
   389  
   390  // Allocations is used to list the allocations for an evaluation
   391  func (e *Eval) Allocations(args *structs.EvalSpecificRequest,
   392  	reply *structs.EvalAllocationsResponse) error {
   393  	if done, err := e.srv.forward("Eval.Allocations", args, args, reply); done {
   394  		return err
   395  	}
   396  	defer metrics.MeasureSince([]string{"nomad", "eval", "allocations"}, time.Now())
   397  
   398  	// Check for read-job permissions
   399  	allowNsOp := acl.NamespaceValidator(acl.NamespaceCapabilityReadJob)
   400  	aclObj, err := e.srv.ResolveToken(args.AuthToken)
   401  	if err != nil {
   402  		return err
   403  	} else if !allowNsOp(aclObj, args.RequestNamespace()) {
   404  		return structs.ErrPermissionDenied
   405  	}
   406  
   407  	// Setup the blocking query
   408  	opts := blockingOptions{
   409  		queryOpts: &args.QueryOptions,
   410  		queryMeta: &reply.QueryMeta,
   411  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   412  			// Capture the allocations
   413  			allocs, err := state.AllocsByEval(ws, args.EvalID)
   414  			if err != nil {
   415  				return err
   416  			}
   417  
   418  			// Convert to a stub
   419  			if len(allocs) > 0 {
   420  				// Evaluations do not span namespaces so just check the
   421  				// first allocs namespace.
   422  				ns := allocs[0].Namespace
   423  				if ns != args.RequestNamespace() && !allowNsOp(aclObj, ns) {
   424  					return structs.ErrPermissionDenied
   425  				}
   426  
   427  				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
   428  				for _, alloc := range allocs {
   429  					reply.Allocations = append(reply.Allocations, alloc.Stub())
   430  				}
   431  			}
   432  
   433  			// Use the last index that affected the allocs table
   434  			index, err := state.Index("allocs")
   435  			if err != nil {
   436  				return err
   437  			}
   438  			reply.Index = index
   439  
   440  			// Set the query response
   441  			e.srv.setQueryMeta(&reply.QueryMeta)
   442  			return nil
   443  		}}
   444  	return e.srv.blockingRPC(&opts)
   445  }