github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/job_endpoint.go (about) 1 package nomad 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/armon/go-metrics" 8 "github.com/hashicorp/go-memdb" 9 "github.com/hashicorp/go-multierror" 10 "github.com/hashicorp/nomad/client/driver" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "github.com/hashicorp/nomad/nomad/watch" 13 "github.com/hashicorp/nomad/scheduler" 14 ) 15 16 // Job endpoint is used for job interactions 17 type Job struct { 18 srv *Server 19 } 20 21 // Register is used to upsert a job for scheduling 22 func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegisterResponse) error { 23 if done, err := j.srv.forward("Job.Register", args, args, reply); done { 24 return err 25 } 26 defer metrics.MeasureSince([]string{"nomad", "job", "register"}, time.Now()) 27 28 // Validate the arguments 29 if args.Job == nil { 30 return fmt.Errorf("missing job for registration") 31 } 32 33 // Initialize the job fields (sets defaults and any necessary init work). 34 args.Job.InitFields() 35 36 // Validate the job. 37 if err := validateJob(args.Job); err != nil { 38 return err 39 } 40 41 // Commit this update via Raft 42 _, index, err := j.srv.raftApply(structs.JobRegisterRequestType, args) 43 if err != nil { 44 j.srv.logger.Printf("[ERR] nomad.job: Register failed: %v", err) 45 return err 46 } 47 48 // Populate the reply with job information 49 reply.JobModifyIndex = index 50 51 // If the job is periodic, we don't create an eval. 52 if args.Job.IsPeriodic() { 53 return nil 54 } 55 56 // Create a new evaluation 57 eval := &structs.Evaluation{ 58 ID: structs.GenerateUUID(), 59 Priority: args.Job.Priority, 60 Type: args.Job.Type, 61 TriggeredBy: structs.EvalTriggerJobRegister, 62 JobID: args.Job.ID, 63 JobModifyIndex: index, 64 Status: structs.EvalStatusPending, 65 } 66 update := &structs.EvalUpdateRequest{ 67 Evals: []*structs.Evaluation{eval}, 68 WriteRequest: structs.WriteRequest{Region: args.Region}, 69 } 70 71 // Commit this evaluation via Raft 72 // XXX: There is a risk of partial failure where the JobRegister succeeds 73 // but that the EvalUpdate does not. 74 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 75 if err != nil { 76 j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) 77 return err 78 } 79 80 // Populate the reply with eval information 81 reply.EvalID = eval.ID 82 reply.EvalCreateIndex = evalIndex 83 reply.Index = evalIndex 84 return nil 85 } 86 87 // Evaluate is used to force a job for re-evaluation 88 func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error { 89 if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done { 90 return err 91 } 92 defer metrics.MeasureSince([]string{"nomad", "job", "evaluate"}, time.Now()) 93 94 // Validate the arguments 95 if args.JobID == "" { 96 return fmt.Errorf("missing job ID for evaluation") 97 } 98 99 // Lookup the job 100 snap, err := j.srv.fsm.State().Snapshot() 101 if err != nil { 102 return err 103 } 104 job, err := snap.JobByID(args.JobID) 105 if err != nil { 106 return err 107 } 108 if job == nil { 109 return fmt.Errorf("job not found") 110 } 111 112 if job.IsPeriodic() { 113 return fmt.Errorf("can't evaluate periodic job") 114 } 115 116 // Create a new evaluation 117 eval := &structs.Evaluation{ 118 ID: structs.GenerateUUID(), 119 Priority: job.Priority, 120 Type: job.Type, 121 TriggeredBy: structs.EvalTriggerJobRegister, 122 JobID: job.ID, 123 JobModifyIndex: job.ModifyIndex, 124 Status: structs.EvalStatusPending, 125 } 126 update := &structs.EvalUpdateRequest{ 127 Evals: []*structs.Evaluation{eval}, 128 WriteRequest: structs.WriteRequest{Region: args.Region}, 129 } 130 131 // Commit this evaluation via Raft 132 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 133 if err != nil { 134 j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) 135 return err 136 } 137 138 // Setup the reply 139 reply.EvalID = eval.ID 140 reply.EvalCreateIndex = evalIndex 141 reply.JobModifyIndex = job.ModifyIndex 142 reply.Index = evalIndex 143 return nil 144 } 145 146 // Deregister is used to remove a job the cluster. 147 func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobDeregisterResponse) error { 148 if done, err := j.srv.forward("Job.Deregister", args, args, reply); done { 149 return err 150 } 151 defer metrics.MeasureSince([]string{"nomad", "job", "deregister"}, time.Now()) 152 153 // Validate the arguments 154 if args.JobID == "" { 155 return fmt.Errorf("missing job ID for evaluation") 156 } 157 158 // Lookup the job 159 snap, err := j.srv.fsm.State().Snapshot() 160 if err != nil { 161 return err 162 } 163 job, err := snap.JobByID(args.JobID) 164 if err != nil { 165 return err 166 } 167 168 // Commit this update via Raft 169 _, index, err := j.srv.raftApply(structs.JobDeregisterRequestType, args) 170 if err != nil { 171 j.srv.logger.Printf("[ERR] nomad.job: Deregister failed: %v", err) 172 return err 173 } 174 175 // Populate the reply with job information 176 reply.JobModifyIndex = index 177 178 // If the job is periodic, we don't create an eval. 179 if job != nil && job.IsPeriodic() { 180 return nil 181 } 182 183 // Create a new evaluation 184 // XXX: The job priority / type is strange for this, since it's not a high 185 // priority even if the job was. The scheduler itself also doesn't matter, 186 // since all should be able to handle deregistration in the same way. 187 eval := &structs.Evaluation{ 188 ID: structs.GenerateUUID(), 189 Priority: structs.JobDefaultPriority, 190 Type: structs.JobTypeService, 191 TriggeredBy: structs.EvalTriggerJobDeregister, 192 JobID: args.JobID, 193 JobModifyIndex: index, 194 Status: structs.EvalStatusPending, 195 } 196 update := &structs.EvalUpdateRequest{ 197 Evals: []*structs.Evaluation{eval}, 198 WriteRequest: structs.WriteRequest{Region: args.Region}, 199 } 200 201 // Commit this evaluation via Raft 202 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 203 if err != nil { 204 j.srv.logger.Printf("[ERR] nomad.job: Eval create failed: %v", err) 205 return err 206 } 207 208 // Populate the reply with eval information 209 reply.EvalID = eval.ID 210 reply.EvalCreateIndex = evalIndex 211 reply.Index = evalIndex 212 return nil 213 } 214 215 // GetJob is used to request information about a specific job 216 func (j *Job) GetJob(args *structs.JobSpecificRequest, 217 reply *structs.SingleJobResponse) error { 218 if done, err := j.srv.forward("Job.GetJob", args, args, reply); done { 219 return err 220 } 221 defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now()) 222 223 // Setup the blocking query 224 opts := blockingOptions{ 225 queryOpts: &args.QueryOptions, 226 queryMeta: &reply.QueryMeta, 227 watch: watch.NewItems(watch.Item{Job: args.JobID}), 228 run: func() error { 229 230 // Look for the job 231 snap, err := j.srv.fsm.State().Snapshot() 232 if err != nil { 233 return err 234 } 235 out, err := snap.JobByID(args.JobID) 236 if err != nil { 237 return err 238 } 239 240 // Setup the output 241 reply.Job = out 242 if out != nil { 243 reply.Index = out.ModifyIndex 244 } else { 245 // Use the last index that affected the nodes table 246 index, err := snap.Index("jobs") 247 if err != nil { 248 return err 249 } 250 reply.Index = index 251 } 252 253 // Set the query response 254 j.srv.setQueryMeta(&reply.QueryMeta) 255 return nil 256 }} 257 return j.srv.blockingRPC(&opts) 258 } 259 260 // List is used to list the jobs registered in the system 261 func (j *Job) List(args *structs.JobListRequest, 262 reply *structs.JobListResponse) error { 263 if done, err := j.srv.forward("Job.List", args, args, reply); done { 264 return err 265 } 266 defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now()) 267 268 // Setup the blocking query 269 opts := blockingOptions{ 270 queryOpts: &args.QueryOptions, 271 queryMeta: &reply.QueryMeta, 272 watch: watch.NewItems(watch.Item{Table: "jobs"}), 273 run: func() error { 274 // Capture all the jobs 275 snap, err := j.srv.fsm.State().Snapshot() 276 if err != nil { 277 return err 278 } 279 var iter memdb.ResultIterator 280 if prefix := args.QueryOptions.Prefix; prefix != "" { 281 iter, err = snap.JobsByIDPrefix(prefix) 282 } else { 283 iter, err = snap.Jobs() 284 } 285 if err != nil { 286 return err 287 } 288 289 var jobs []*structs.JobListStub 290 for { 291 raw := iter.Next() 292 if raw == nil { 293 break 294 } 295 job := raw.(*structs.Job) 296 jobs = append(jobs, job.Stub()) 297 } 298 reply.Jobs = jobs 299 300 // Use the last index that affected the jobs table 301 index, err := snap.Index("jobs") 302 if err != nil { 303 return err 304 } 305 reply.Index = index 306 307 // Set the query response 308 j.srv.setQueryMeta(&reply.QueryMeta) 309 return nil 310 }} 311 return j.srv.blockingRPC(&opts) 312 } 313 314 // Allocations is used to list the allocations for a job 315 func (j *Job) Allocations(args *structs.JobSpecificRequest, 316 reply *structs.JobAllocationsResponse) error { 317 if done, err := j.srv.forward("Job.Allocations", args, args, reply); done { 318 return err 319 } 320 defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now()) 321 322 // Setup the blocking query 323 opts := blockingOptions{ 324 queryOpts: &args.QueryOptions, 325 queryMeta: &reply.QueryMeta, 326 watch: watch.NewItems(watch.Item{AllocJob: args.JobID}), 327 run: func() error { 328 // Capture the allocations 329 snap, err := j.srv.fsm.State().Snapshot() 330 if err != nil { 331 return err 332 } 333 allocs, err := snap.AllocsByJob(args.JobID) 334 if err != nil { 335 return err 336 } 337 338 // Convert to stubs 339 if len(allocs) > 0 { 340 reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) 341 for _, alloc := range allocs { 342 reply.Allocations = append(reply.Allocations, alloc.Stub()) 343 } 344 } 345 346 // Use the last index that affected the allocs table 347 index, err := snap.Index("allocs") 348 if err != nil { 349 return err 350 } 351 reply.Index = index 352 353 // Set the query response 354 j.srv.setQueryMeta(&reply.QueryMeta) 355 return nil 356 357 }} 358 return j.srv.blockingRPC(&opts) 359 } 360 361 // Evaluations is used to list the evaluations for a job 362 func (j *Job) Evaluations(args *structs.JobSpecificRequest, 363 reply *structs.JobEvaluationsResponse) error { 364 if done, err := j.srv.forward("Job.Evaluations", args, args, reply); done { 365 return err 366 } 367 defer metrics.MeasureSince([]string{"nomad", "job", "evaluations"}, time.Now()) 368 369 // Capture the evaluations 370 snap, err := j.srv.fsm.State().Snapshot() 371 if err != nil { 372 return err 373 } 374 reply.Evaluations, err = snap.EvalsByJob(args.JobID) 375 if err != nil { 376 return err 377 } 378 379 // Use the last index that affected the evals table 380 index, err := snap.Index("evals") 381 if err != nil { 382 return err 383 } 384 reply.Index = index 385 386 // Set the query response 387 j.srv.setQueryMeta(&reply.QueryMeta) 388 return nil 389 } 390 391 // Plan is used to cause a dry-run evaluation of the Job and return the results 392 // with a potential diff containing annotations. 393 func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) error { 394 if done, err := j.srv.forward("Job.Plan", args, args, reply); done { 395 return err 396 } 397 defer metrics.MeasureSince([]string{"nomad", "job", "plan"}, time.Now()) 398 399 // Validate the arguments 400 if args.Job == nil { 401 return fmt.Errorf("Job required for plan") 402 } 403 404 // Initialize the job fields (sets defaults and any necessary init work). 405 args.Job.InitFields() 406 407 // Validate the job. 408 if err := validateJob(args.Job); err != nil { 409 return err 410 } 411 412 // Acquire a snapshot of the state 413 snap, err := j.srv.fsm.State().Snapshot() 414 if err != nil { 415 return err 416 } 417 418 // Get the original job 419 oldJob, err := snap.JobByID(args.Job.ID) 420 if err != nil { 421 return err 422 } 423 424 var index uint64 425 if oldJob != nil { 426 index = oldJob.JobModifyIndex + 1 427 } 428 429 // Insert the updated Job into the snapshot 430 snap.UpsertJob(index, args.Job) 431 432 // Create an eval and mark it as requiring annotations and insert that as well 433 eval := &structs.Evaluation{ 434 ID: structs.GenerateUUID(), 435 Priority: args.Job.Priority, 436 Type: args.Job.Type, 437 TriggeredBy: structs.EvalTriggerJobRegister, 438 JobID: args.Job.ID, 439 JobModifyIndex: index, 440 Status: structs.EvalStatusPending, 441 AnnotatePlan: true, 442 } 443 444 // Create an in-memory Planner that returns no errors and stores the 445 // submitted plan and created evals. 446 planner := &scheduler.Harness{ 447 State: &snap.StateStore, 448 } 449 450 // Create the scheduler and run it 451 sched, err := scheduler.NewScheduler(eval.Type, j.srv.logger, snap, planner) 452 if err != nil { 453 return err 454 } 455 456 if err := sched.Process(eval); err != nil { 457 return err 458 } 459 460 // Annotate and store the diff 461 if plans := len(planner.Plans); plans != 1 { 462 return fmt.Errorf("scheduler resulted in an unexpected number of plans: %d", plans) 463 } 464 annotations := planner.Plans[0].Annotations 465 if args.Diff { 466 jobDiff, err := oldJob.Diff(args.Job, true) 467 if err != nil { 468 return fmt.Errorf("failed to create job diff: %v", err) 469 } 470 471 if err := scheduler.Annotate(jobDiff, annotations); err != nil { 472 return fmt.Errorf("failed to annotate job diff: %v", err) 473 } 474 reply.Diff = jobDiff 475 } 476 477 reply.JobModifyIndex = index 478 reply.Annotations = annotations 479 reply.CreatedEvals = planner.CreateEvals 480 reply.Index = index 481 return nil 482 } 483 484 // validateJob validates a Job and task drivers and returns an error if there is 485 // a validation problem or if the Job is of a type a user is not allowed to 486 // submit. 487 func validateJob(job *structs.Job) error { 488 validationErrors := new(multierror.Error) 489 if err := job.Validate(); err != nil { 490 multierror.Append(validationErrors, err) 491 } 492 493 // Validate the driver configurations. 494 for _, tg := range job.TaskGroups { 495 for _, task := range tg.Tasks { 496 d, err := driver.NewDriver( 497 task.Driver, 498 driver.NewEmptyDriverContext(), 499 ) 500 if err != nil { 501 msg := "failed to create driver for task %q in group %q for validation: %v" 502 multierror.Append(validationErrors, fmt.Errorf(msg, tg.Name, task.Name, err)) 503 continue 504 } 505 506 if err := d.Validate(task.Config); err != nil { 507 formatted := fmt.Errorf("group %q -> task %q -> config: %v", tg.Name, task.Name, err) 508 multierror.Append(validationErrors, formatted) 509 } 510 } 511 } 512 513 if job.Type == structs.JobTypeCore { 514 multierror.Append(validationErrors, fmt.Errorf("job type cannot be core")) 515 } 516 517 return validationErrors.ErrorOrNil() 518 }