github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/job_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/armon/go-metrics"
     8  	"github.com/hashicorp/go-memdb"
     9  	"github.com/hashicorp/go-multierror"
    10  	"github.com/hashicorp/nomad/client/driver"
    11  	"github.com/hashicorp/nomad/nomad/structs"
    12  	"github.com/hashicorp/nomad/nomad/watch"
    13  	"github.com/hashicorp/nomad/scheduler"
    14  )
    15  
    16  // Job endpoint is used for job interactions
    17  type Job struct {
    18  	srv *Server
    19  }
    20  
    21  // Register is used to upsert a job for scheduling
    22  func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegisterResponse) error {
    23  	if done, err := j.srv.forward("Job.Register", args, args, reply); done {
    24  		return err
    25  	}
    26  	defer metrics.MeasureSince([]string{"nomad", "job", "register"}, time.Now())
    27  
    28  	// Validate the arguments
    29  	if args.Job == nil {
    30  		return fmt.Errorf("missing job for registration")
    31  	}
    32  
    33  	// Initialize the job fields (sets defaults and any necessary init work).
    34  	args.Job.InitFields()
    35  
    36  	// Validate the job.
    37  	if err := validateJob(args.Job); err != nil {
    38  		return err
    39  	}
    40  
    41  	// Commit this update via Raft
    42  	_, index, err := j.srv.raftApply(structs.JobRegisterRequestType, args)
    43  	if err != nil {
    44  		j.srv.logger.Printf("[ERR] nomad.job: Register failed: %v", err)
    45  		return err
    46  	}
    47  
    48  	// Populate the reply with job information
    49  	reply.JobModifyIndex = index
    50  
    51  	// If the job is periodic, we don't create an eval.
    52  	if args.Job.IsPeriodic() {
    53  		return nil
    54  	}
    55  
    56  	// Create a new evaluation
    57  	eval := &structs.Evaluation{
    58  		ID:             structs.GenerateUUID(),
    59  		Priority:       args.Job.Priority,
    60  		Type:           args.Job.Type,
    61  		TriggeredBy:    structs.EvalTriggerJobRegister,
    62  		JobID:          args.Job.ID,
    63  		JobModifyIndex: index,
    64  		Status:         structs.EvalStatusPending,
    65  	}
    66  	update := &structs.EvalUpdateRequest{
    67  		Evals:        []*structs.Evaluation{eval},
    68  		WriteRequest: structs.WriteRequest{Region: args.Region},
    69  	}
    70  
    71  	// Commit this evaluation via Raft
    72  	// XXX: There is a risk of partial failure where the JobRegister succeeds
    73  	// but that the EvalUpdate does not.
    74  	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
    75  	if err != nil {
    76  		j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err)
    77  		return err
    78  	}
    79  
    80  	// Populate the reply with eval information
    81  	reply.EvalID = eval.ID
    82  	reply.EvalCreateIndex = evalIndex
    83  	reply.Index = evalIndex
    84  	return nil
    85  }
    86  
    87  // Evaluate is used to force a job for re-evaluation
    88  func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error {
    89  	if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done {
    90  		return err
    91  	}
    92  	defer metrics.MeasureSince([]string{"nomad", "job", "evaluate"}, time.Now())
    93  
    94  	// Validate the arguments
    95  	if args.JobID == "" {
    96  		return fmt.Errorf("missing job ID for evaluation")
    97  	}
    98  
    99  	// Lookup the job
   100  	snap, err := j.srv.fsm.State().Snapshot()
   101  	if err != nil {
   102  		return err
   103  	}
   104  	job, err := snap.JobByID(args.JobID)
   105  	if err != nil {
   106  		return err
   107  	}
   108  	if job == nil {
   109  		return fmt.Errorf("job not found")
   110  	}
   111  
   112  	if job.IsPeriodic() {
   113  		return fmt.Errorf("can't evaluate periodic job")
   114  	}
   115  
   116  	// Create a new evaluation
   117  	eval := &structs.Evaluation{
   118  		ID:             structs.GenerateUUID(),
   119  		Priority:       job.Priority,
   120  		Type:           job.Type,
   121  		TriggeredBy:    structs.EvalTriggerJobRegister,
   122  		JobID:          job.ID,
   123  		JobModifyIndex: job.ModifyIndex,
   124  		Status:         structs.EvalStatusPending,
   125  	}
   126  	update := &structs.EvalUpdateRequest{
   127  		Evals:        []*structs.Evaluation{eval},
   128  		WriteRequest: structs.WriteRequest{Region: args.Region},
   129  	}
   130  
   131  	// Commit this evaluation via Raft
   132  	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
   133  	if err != nil {
   134  		j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err)
   135  		return err
   136  	}
   137  
   138  	// Setup the reply
   139  	reply.EvalID = eval.ID
   140  	reply.EvalCreateIndex = evalIndex
   141  	reply.JobModifyIndex = job.ModifyIndex
   142  	reply.Index = evalIndex
   143  	return nil
   144  }
   145  
   146  // Deregister is used to remove a job the cluster.
   147  func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobDeregisterResponse) error {
   148  	if done, err := j.srv.forward("Job.Deregister", args, args, reply); done {
   149  		return err
   150  	}
   151  	defer metrics.MeasureSince([]string{"nomad", "job", "deregister"}, time.Now())
   152  
   153  	// Validate the arguments
   154  	if args.JobID == "" {
   155  		return fmt.Errorf("missing job ID for evaluation")
   156  	}
   157  
   158  	// Lookup the job
   159  	snap, err := j.srv.fsm.State().Snapshot()
   160  	if err != nil {
   161  		return err
   162  	}
   163  	job, err := snap.JobByID(args.JobID)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	// Commit this update via Raft
   169  	_, index, err := j.srv.raftApply(structs.JobDeregisterRequestType, args)
   170  	if err != nil {
   171  		j.srv.logger.Printf("[ERR] nomad.job: Deregister failed: %v", err)
   172  		return err
   173  	}
   174  
   175  	// Populate the reply with job information
   176  	reply.JobModifyIndex = index
   177  
   178  	// If the job is periodic, we don't create an eval.
   179  	if job != nil && job.IsPeriodic() {
   180  		return nil
   181  	}
   182  
   183  	// Create a new evaluation
   184  	// XXX: The job priority / type is strange for this, since it's not a high
   185  	// priority even if the job was. The scheduler itself also doesn't matter,
   186  	// since all should be able to handle deregistration in the same way.
   187  	eval := &structs.Evaluation{
   188  		ID:             structs.GenerateUUID(),
   189  		Priority:       structs.JobDefaultPriority,
   190  		Type:           structs.JobTypeService,
   191  		TriggeredBy:    structs.EvalTriggerJobDeregister,
   192  		JobID:          args.JobID,
   193  		JobModifyIndex: index,
   194  		Status:         structs.EvalStatusPending,
   195  	}
   196  	update := &structs.EvalUpdateRequest{
   197  		Evals:        []*structs.Evaluation{eval},
   198  		WriteRequest: structs.WriteRequest{Region: args.Region},
   199  	}
   200  
   201  	// Commit this evaluation via Raft
   202  	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
   203  	if err != nil {
   204  		j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err)
   205  		return err
   206  	}
   207  
   208  	// Populate the reply with eval information
   209  	reply.EvalID = eval.ID
   210  	reply.EvalCreateIndex = evalIndex
   211  	reply.Index = evalIndex
   212  	return nil
   213  }
   214  
   215  // GetJob is used to request information about a specific job
   216  func (j *Job) GetJob(args *structs.JobSpecificRequest,
   217  	reply *structs.SingleJobResponse) error {
   218  	if done, err := j.srv.forward("Job.GetJob", args, args, reply); done {
   219  		return err
   220  	}
   221  	defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())
   222  
   223  	// Setup the blocking query
   224  	opts := blockingOptions{
   225  		queryOpts: &args.QueryOptions,
   226  		queryMeta: &reply.QueryMeta,
   227  		watch:     watch.NewItems(watch.Item{Job: args.JobID}),
   228  		run: func() error {
   229  
   230  			// Look for the job
   231  			snap, err := j.srv.fsm.State().Snapshot()
   232  			if err != nil {
   233  				return err
   234  			}
   235  			out, err := snap.JobByID(args.JobID)
   236  			if err != nil {
   237  				return err
   238  			}
   239  
   240  			// Setup the output
   241  			reply.Job = out
   242  			if out != nil {
   243  				reply.Index = out.ModifyIndex
   244  			} else {
   245  				// Use the last index that affected the nodes table
   246  				index, err := snap.Index("jobs")
   247  				if err != nil {
   248  					return err
   249  				}
   250  				reply.Index = index
   251  			}
   252  
   253  			// Set the query response
   254  			j.srv.setQueryMeta(&reply.QueryMeta)
   255  			return nil
   256  		}}
   257  	return j.srv.blockingRPC(&opts)
   258  }
   259  
   260  // List is used to list the jobs registered in the system
   261  func (j *Job) List(args *structs.JobListRequest,
   262  	reply *structs.JobListResponse) error {
   263  	if done, err := j.srv.forward("Job.List", args, args, reply); done {
   264  		return err
   265  	}
   266  	defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())
   267  
   268  	// Setup the blocking query
   269  	opts := blockingOptions{
   270  		queryOpts: &args.QueryOptions,
   271  		queryMeta: &reply.QueryMeta,
   272  		watch:     watch.NewItems(watch.Item{Table: "jobs"}),
   273  		run: func() error {
   274  			// Capture all the jobs
   275  			snap, err := j.srv.fsm.State().Snapshot()
   276  			if err != nil {
   277  				return err
   278  			}
   279  			var iter memdb.ResultIterator
   280  			if prefix := args.QueryOptions.Prefix; prefix != "" {
   281  				iter, err = snap.JobsByIDPrefix(prefix)
   282  			} else {
   283  				iter, err = snap.Jobs()
   284  			}
   285  			if err != nil {
   286  				return err
   287  			}
   288  
   289  			var jobs []*structs.JobListStub
   290  			for {
   291  				raw := iter.Next()
   292  				if raw == nil {
   293  					break
   294  				}
   295  				job := raw.(*structs.Job)
   296  				jobs = append(jobs, job.Stub())
   297  			}
   298  			reply.Jobs = jobs
   299  
   300  			// Use the last index that affected the jobs table
   301  			index, err := snap.Index("jobs")
   302  			if err != nil {
   303  				return err
   304  			}
   305  			reply.Index = index
   306  
   307  			// Set the query response
   308  			j.srv.setQueryMeta(&reply.QueryMeta)
   309  			return nil
   310  		}}
   311  	return j.srv.blockingRPC(&opts)
   312  }
   313  
   314  // Allocations is used to list the allocations for a job
   315  func (j *Job) Allocations(args *structs.JobSpecificRequest,
   316  	reply *structs.JobAllocationsResponse) error {
   317  	if done, err := j.srv.forward("Job.Allocations", args, args, reply); done {
   318  		return err
   319  	}
   320  	defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())
   321  
   322  	// Setup the blocking query
   323  	opts := blockingOptions{
   324  		queryOpts: &args.QueryOptions,
   325  		queryMeta: &reply.QueryMeta,
   326  		watch:     watch.NewItems(watch.Item{AllocJob: args.JobID}),
   327  		run: func() error {
   328  			// Capture the allocations
   329  			snap, err := j.srv.fsm.State().Snapshot()
   330  			if err != nil {
   331  				return err
   332  			}
   333  			allocs, err := snap.AllocsByJob(args.JobID)
   334  			if err != nil {
   335  				return err
   336  			}
   337  
   338  			// Convert to stubs
   339  			if len(allocs) > 0 {
   340  				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
   341  				for _, alloc := range allocs {
   342  					reply.Allocations = append(reply.Allocations, alloc.Stub())
   343  				}
   344  			}
   345  
   346  			// Use the last index that affected the allocs table
   347  			index, err := snap.Index("allocs")
   348  			if err != nil {
   349  				return err
   350  			}
   351  			reply.Index = index
   352  
   353  			// Set the query response
   354  			j.srv.setQueryMeta(&reply.QueryMeta)
   355  			return nil
   356  
   357  		}}
   358  	return j.srv.blockingRPC(&opts)
   359  }
   360  
   361  // Evaluations is used to list the evaluations for a job
   362  func (j *Job) Evaluations(args *structs.JobSpecificRequest,
   363  	reply *structs.JobEvaluationsResponse) error {
   364  	if done, err := j.srv.forward("Job.Evaluations", args, args, reply); done {
   365  		return err
   366  	}
   367  	defer metrics.MeasureSince([]string{"nomad", "job", "evaluations"}, time.Now())
   368  
   369  	// Capture the evaluations
   370  	snap, err := j.srv.fsm.State().Snapshot()
   371  	if err != nil {
   372  		return err
   373  	}
   374  	reply.Evaluations, err = snap.EvalsByJob(args.JobID)
   375  	if err != nil {
   376  		return err
   377  	}
   378  
   379  	// Use the last index that affected the evals table
   380  	index, err := snap.Index("evals")
   381  	if err != nil {
   382  		return err
   383  	}
   384  	reply.Index = index
   385  
   386  	// Set the query response
   387  	j.srv.setQueryMeta(&reply.QueryMeta)
   388  	return nil
   389  }
   390  
   391  // Plan is used to cause a dry-run evaluation of the Job and return the results
   392  // with a potential diff containing annotations.
   393  func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) error {
   394  	if done, err := j.srv.forward("Job.Plan", args, args, reply); done {
   395  		return err
   396  	}
   397  	defer metrics.MeasureSince([]string{"nomad", "job", "plan"}, time.Now())
   398  
   399  	// Validate the arguments
   400  	if args.Job == nil {
   401  		return fmt.Errorf("Job required for plan")
   402  	}
   403  
   404  	// Initialize the job fields (sets defaults and any necessary init work).
   405  	args.Job.InitFields()
   406  
   407  	// Validate the job.
   408  	if err := validateJob(args.Job); err != nil {
   409  		return err
   410  	}
   411  
   412  	// Acquire a snapshot of the state
   413  	snap, err := j.srv.fsm.State().Snapshot()
   414  	if err != nil {
   415  		return err
   416  	}
   417  
   418  	// Get the original job
   419  	oldJob, err := snap.JobByID(args.Job.ID)
   420  	if err != nil {
   421  		return err
   422  	}
   423  
   424  	var index uint64
   425  	if oldJob != nil {
   426  		index = oldJob.JobModifyIndex + 1
   427  	}
   428  
   429  	// Insert the updated Job into the snapshot
   430  	snap.UpsertJob(index, args.Job)
   431  
   432  	// Create an eval and mark it as requiring annotations and insert that as well
   433  	eval := &structs.Evaluation{
   434  		ID:             structs.GenerateUUID(),
   435  		Priority:       args.Job.Priority,
   436  		Type:           args.Job.Type,
   437  		TriggeredBy:    structs.EvalTriggerJobRegister,
   438  		JobID:          args.Job.ID,
   439  		JobModifyIndex: index,
   440  		Status:         structs.EvalStatusPending,
   441  		AnnotatePlan:   true,
   442  	}
   443  
   444  	// Create an in-memory Planner that returns no errors and stores the
   445  	// submitted plan and created evals.
   446  	planner := &scheduler.Harness{
   447  		State: &snap.StateStore,
   448  	}
   449  
   450  	// Create the scheduler and run it
   451  	sched, err := scheduler.NewScheduler(eval.Type, j.srv.logger, snap, planner)
   452  	if err != nil {
   453  		return err
   454  	}
   455  
   456  	if err := sched.Process(eval); err != nil {
   457  		return err
   458  	}
   459  
   460  	// Annotate and store the diff
   461  	if plans := len(planner.Plans); plans != 1 {
   462  		return fmt.Errorf("scheduler resulted in an unexpected number of plans: %d", plans)
   463  	}
   464  	annotations := planner.Plans[0].Annotations
   465  	if args.Diff {
   466  		jobDiff, err := oldJob.Diff(args.Job, true)
   467  		if err != nil {
   468  			return fmt.Errorf("failed to create job diff: %v", err)
   469  		}
   470  
   471  		if err := scheduler.Annotate(jobDiff, annotations); err != nil {
   472  			return fmt.Errorf("failed to annotate job diff: %v", err)
   473  		}
   474  		reply.Diff = jobDiff
   475  	}
   476  
   477  	reply.JobModifyIndex = index
   478  	reply.Annotations = annotations
   479  	reply.CreatedEvals = planner.CreateEvals
   480  	reply.Index = index
   481  	return nil
   482  }
   483  
   484  // validateJob validates a Job and task drivers and returns an error if there is
   485  // a validation problem or if the Job is of a type a user is not allowed to
   486  // submit.
   487  func validateJob(job *structs.Job) error {
   488  	validationErrors := new(multierror.Error)
   489  	if err := job.Validate(); err != nil {
   490  		multierror.Append(validationErrors, err)
   491  	}
   492  
   493  	// Validate the driver configurations.
   494  	for _, tg := range job.TaskGroups {
   495  		for _, task := range tg.Tasks {
   496  			d, err := driver.NewDriver(
   497  				task.Driver,
   498  				driver.NewEmptyDriverContext(),
   499  			)
   500  			if err != nil {
   501  				msg := "failed to create driver for task %q in group %q for validation: %v"
   502  				multierror.Append(validationErrors, fmt.Errorf(msg, tg.Name, task.Name, err))
   503  				continue
   504  			}
   505  
   506  			if err := d.Validate(task.Config); err != nil {
   507  				formatted := fmt.Errorf("group %q -> task %q -> config: %v", tg.Name, task.Name, err)
   508  				multierror.Append(validationErrors, formatted)
   509  			}
   510  		}
   511  	}
   512  
   513  	if job.Type == structs.JobTypeCore {
   514  		multierror.Append(validationErrors, fmt.Errorf("job type cannot be core"))
   515  	}
   516  
   517  	return validationErrors.ErrorOrNil()
   518  }