github.com/adityamillind98/nomad@v0.11.8/nomad/job_endpoint.go (about) 1 package nomad 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "strings" 8 "time" 9 10 metrics "github.com/armon/go-metrics" 11 log "github.com/hashicorp/go-hclog" 12 memdb "github.com/hashicorp/go-memdb" 13 multierror "github.com/hashicorp/go-multierror" 14 15 "github.com/golang/snappy" 16 "github.com/hashicorp/consul/lib" 17 "github.com/pkg/errors" 18 19 "github.com/hashicorp/nomad/acl" 20 "github.com/hashicorp/nomad/helper" 21 "github.com/hashicorp/nomad/helper/uuid" 22 "github.com/hashicorp/nomad/nomad/state" 23 "github.com/hashicorp/nomad/nomad/structs" 24 "github.com/hashicorp/nomad/scheduler" 25 ) 26 27 const ( 28 // RegisterEnforceIndexErrPrefix is the prefix to use in errors caused by 29 // enforcing the job modify index during registers. 30 RegisterEnforceIndexErrPrefix = "Enforcing job modify index" 31 32 // DispatchPayloadSizeLimit is the maximum size of the uncompressed input 33 // data payload. 34 DispatchPayloadSizeLimit = 16 * 1024 35 ) 36 37 var ( 38 // allowRescheduleTransition is the transition that allows failed 39 // allocations to be force rescheduled. We create a one off 40 // variable to avoid creating a new object for every request. 41 allowForceRescheduleTransition = &structs.DesiredTransition{ 42 ForceReschedule: helper.BoolToPtr(true), 43 } 44 ) 45 46 // Job endpoint is used for job interactions 47 type Job struct { 48 srv *Server 49 logger log.Logger 50 51 // builtin admission controllers 52 mutators []jobMutator 53 validators []jobValidator 54 } 55 56 // NewJobEndpoints creates a new job endpoint with builtin admission controllers 57 func NewJobEndpoints(s *Server) *Job { 58 return &Job{ 59 srv: s, 60 logger: s.logger.Named("job"), 61 mutators: []jobMutator{ 62 jobCanonicalizer{}, 63 jobConnectHook{}, 64 jobExposeCheckHook{}, 65 jobImpliedConstraints{}, 66 }, 67 validators: []jobValidator{ 68 jobConnectHook{}, 69 jobExposeCheckHook{}, 70 jobValidate{}, 71 }, 72 } 73 } 74 75 // Register is used to upsert a job for scheduling 76 func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegisterResponse) error { 77 if done, err := j.srv.forward("Job.Register", args, args, reply); done { 78 return err 79 } 80 defer metrics.MeasureSince([]string{"nomad", "job", "register"}, time.Now()) 81 82 // Validate the arguments 83 if args.Job == nil { 84 return fmt.Errorf("missing job for registration") 85 } 86 87 // defensive check; http layer and RPC requester should ensure namespaces are set consistently 88 if args.RequestNamespace() != args.Job.Namespace { 89 return fmt.Errorf("mismatched request namespace in request: %q, %q", args.RequestNamespace(), args.Job.Namespace) 90 } 91 92 // Run admission controllers 93 job, warnings, err := j.admissionControllers(args.Job) 94 if err != nil { 95 return err 96 } 97 args.Job = job 98 99 // Set the warning message 100 reply.Warnings = structs.MergeMultierrorWarnings(warnings...) 101 102 // Check job submission permissions 103 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 104 return err 105 } else if aclObj != nil { 106 if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { 107 return structs.ErrPermissionDenied 108 } 109 110 // Validate Volume Permissions 111 for _, tg := range args.Job.TaskGroups { 112 for _, vol := range tg.Volumes { 113 switch vol.Type { 114 case structs.VolumeTypeCSI: 115 if !allowCSIMount(aclObj, args.RequestNamespace()) { 116 return structs.ErrPermissionDenied 117 } 118 case structs.VolumeTypeHost: 119 // If a volume is readonly, then we allow access if the user has ReadOnly 120 // or ReadWrite access to the volume. Otherwise we only allow access if 121 // they have ReadWrite access. 122 if vol.ReadOnly { 123 if !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadOnly) && 124 !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) { 125 return structs.ErrPermissionDenied 126 } 127 } else { 128 if !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) { 129 return structs.ErrPermissionDenied 130 } 131 } 132 default: 133 return structs.ErrPermissionDenied 134 } 135 } 136 137 for _, t := range tg.Tasks { 138 for _, vm := range t.VolumeMounts { 139 vol := tg.Volumes[vm.Volume] 140 if vm.PropagationMode == structs.VolumeMountPropagationBidirectional && 141 !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) { 142 return structs.ErrPermissionDenied 143 } 144 } 145 146 if t.CSIPluginConfig != nil { 147 if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityCSIRegisterPlugin) { 148 return structs.ErrPermissionDenied 149 } 150 } 151 } 152 } 153 154 // Check if override is set and we do not have permissions 155 if args.PolicyOverride { 156 if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySentinelOverride) { 157 j.logger.Warn("policy override attempted without permissions for job", "job", args.Job.ID) 158 return structs.ErrPermissionDenied 159 } 160 j.logger.Warn("policy override set for job", "job", args.Job.ID) 161 } 162 } 163 164 // Lookup the job 165 snap, err := j.srv.State().Snapshot() 166 if err != nil { 167 return err 168 } 169 ws := memdb.NewWatchSet() 170 existingJob, err := snap.JobByID(ws, args.RequestNamespace(), args.Job.ID) 171 if err != nil { 172 return err 173 } 174 175 // If EnforceIndex set, check it before trying to apply 176 if args.EnforceIndex { 177 jmi := args.JobModifyIndex 178 if existingJob != nil { 179 if jmi == 0 { 180 return fmt.Errorf("%s 0: job already exists", RegisterEnforceIndexErrPrefix) 181 } else if jmi != existingJob.JobModifyIndex { 182 return fmt.Errorf("%s %d: job exists with conflicting job modify index: %d", 183 RegisterEnforceIndexErrPrefix, jmi, existingJob.JobModifyIndex) 184 } 185 } else if jmi != 0 { 186 return fmt.Errorf("%s %d: job does not exist", RegisterEnforceIndexErrPrefix, jmi) 187 } 188 } 189 190 // Validate job transitions if its an update 191 if err := validateJobUpdate(existingJob, args.Job); err != nil { 192 return err 193 } 194 195 // Ensure that all scaling policies have an appropriate ID 196 if err := propagateScalingPolicyIDs(existingJob, args.Job); err != nil { 197 return err 198 } 199 200 // Ensure that the job has permissions for the requested Vault tokens 201 policies := args.Job.VaultPolicies() 202 if len(policies) != 0 { 203 vconf := j.srv.config.VaultConfig 204 if !vconf.IsEnabled() { 205 return fmt.Errorf("Vault not enabled and Vault policies requested") 206 } 207 208 // Have to check if the user has permissions 209 if !vconf.AllowsUnauthenticated() { 210 if args.Job.VaultToken == "" { 211 return fmt.Errorf("Vault policies requested but missing Vault Token") 212 } 213 214 vault := j.srv.vault 215 s, err := vault.LookupToken(context.Background(), args.Job.VaultToken) 216 if err != nil { 217 return err 218 } 219 220 allowedPolicies, err := PoliciesFrom(s) 221 if err != nil { 222 return err 223 } 224 225 // If we are given a root token it can access all policies 226 if !lib.StrContains(allowedPolicies, "root") { 227 flatPolicies := structs.VaultPoliciesSet(policies) 228 subset, offending := helper.SliceStringIsSubset(allowedPolicies, flatPolicies) 229 if !subset { 230 return fmt.Errorf("Passed Vault Token doesn't allow access to the following policies: %s", 231 strings.Join(offending, ", ")) 232 } 233 } 234 } 235 } 236 237 // helper function that checks if the "operator token" supplied with the 238 // job has sufficient ACL permissions for establishing consul connect services 239 checkOperatorToken := func(task string) error { 240 if j.srv.config.ConsulConfig.AllowsUnauthenticated() { 241 // if consul.allow_unauthenticated is enabled (which is the default) 242 // just let the Job through without checking anything. 243 return nil 244 } 245 proxiedTask := strings.TrimPrefix(task, structs.ConnectProxyPrefix+"-") 246 ctx := context.Background() 247 if err := j.srv.consulACLs.CheckSIPolicy(ctx, proxiedTask, args.Job.ConsulToken); err != nil { 248 // not much in the way of exported error types, we could parse 249 // the content, but all errors are going to be failures anyway 250 return errors.Wrap(err, "operator token denied") 251 } 252 return nil 253 } 254 255 // Enforce that the operator has necessary Consul ACL permissions 256 for _, tg := range args.Job.ConnectTasks() { 257 for _, task := range tg { 258 if err := checkOperatorToken(task); err != nil { 259 return err 260 } 261 } 262 } 263 264 // Enforce Sentinel policies. Pass a copy of the job to prevent 265 // sentinel from altering it. 266 policyWarnings, err := j.enforceSubmitJob(args.PolicyOverride, args.Job.Copy()) 267 if err != nil { 268 return err 269 } 270 if policyWarnings != nil { 271 warnings = append(warnings, policyWarnings) 272 reply.Warnings = structs.MergeMultierrorWarnings(warnings...) 273 } 274 275 // Clear the Vault token 276 args.Job.VaultToken = "" 277 278 // Clear the Consul token 279 args.Job.ConsulToken = "" 280 281 // Check if the job has changed at all 282 if existingJob == nil || existingJob.SpecChanged(args.Job) { 283 // Set the submit time 284 args.Job.SetSubmitTime() 285 286 // Commit this update via Raft 287 fsmErr, index, err := j.srv.raftApply(structs.JobRegisterRequestType, args) 288 if err, ok := fsmErr.(error); ok && err != nil { 289 j.logger.Error("registering job failed", "error", err, "fsm", true) 290 return err 291 } 292 if err != nil { 293 j.logger.Error("registering job failed", "error", err, "raft", true) 294 return err 295 } 296 297 // Populate the reply with job information 298 reply.JobModifyIndex = index 299 } else { 300 reply.JobModifyIndex = existingJob.JobModifyIndex 301 } 302 303 // If the job is periodic or parameterized, we don't create an eval. 304 if args.Job.IsPeriodic() || args.Job.IsParameterized() { 305 return nil 306 } 307 308 // Create a new evaluation 309 now := time.Now().UTC().UnixNano() 310 eval := &structs.Evaluation{ 311 ID: uuid.Generate(), 312 Namespace: args.RequestNamespace(), 313 Priority: args.Job.Priority, 314 Type: args.Job.Type, 315 TriggeredBy: structs.EvalTriggerJobRegister, 316 JobID: args.Job.ID, 317 JobModifyIndex: reply.JobModifyIndex, 318 Status: structs.EvalStatusPending, 319 CreateTime: now, 320 ModifyTime: now, 321 } 322 update := &structs.EvalUpdateRequest{ 323 Evals: []*structs.Evaluation{eval}, 324 WriteRequest: structs.WriteRequest{Region: args.Region}, 325 } 326 327 // Commit this evaluation via Raft 328 // XXX: There is a risk of partial failure where the JobRegister succeeds 329 // but that the EvalUpdate does not. 330 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 331 if err != nil { 332 j.logger.Error("eval create failed", "error", err, "method", "register") 333 return err 334 } 335 336 // Populate the reply with eval information 337 reply.EvalID = eval.ID 338 reply.EvalCreateIndex = evalIndex 339 reply.Index = evalIndex 340 return nil 341 } 342 343 // propagateScalingPolicyIDs propagates scaling policy IDs from existing job 344 // to updated job, or generates random IDs in new job 345 func propagateScalingPolicyIDs(old, new *structs.Job) error { 346 347 oldIDs := make(map[string]string) 348 if old != nil { 349 // jobs currently only have scaling policies on task groups, so we can 350 // find correspondences using task group names 351 for _, p := range old.GetScalingPolicies() { 352 oldIDs[p.Target[structs.ScalingTargetGroup]] = p.ID 353 } 354 } 355 356 // ignore any existing ID in the policy, they should be empty 357 for _, p := range new.GetScalingPolicies() { 358 if id, ok := oldIDs[p.Target[structs.ScalingTargetGroup]]; ok { 359 p.ID = id 360 } else { 361 p.ID = uuid.Generate() 362 } 363 } 364 365 return nil 366 } 367 368 // getSignalConstraint builds a suitable constraint based on the required 369 // signals 370 func getSignalConstraint(signals []string) *structs.Constraint { 371 sort.Strings(signals) 372 return &structs.Constraint{ 373 Operand: structs.ConstraintSetContains, 374 LTarget: "${attr.os.signals}", 375 RTarget: strings.Join(signals, ","), 376 } 377 } 378 379 // Summary retrieves the summary of a job 380 func (j *Job) Summary(args *structs.JobSummaryRequest, 381 reply *structs.JobSummaryResponse) error { 382 383 if done, err := j.srv.forward("Job.Summary", args, args, reply); done { 384 return err 385 } 386 defer metrics.MeasureSince([]string{"nomad", "job_summary", "get_job_summary"}, time.Now()) 387 388 // Check for read-job permissions 389 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 390 return err 391 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 392 return structs.ErrPermissionDenied 393 } 394 395 // Setup the blocking query 396 opts := blockingOptions{ 397 queryOpts: &args.QueryOptions, 398 queryMeta: &reply.QueryMeta, 399 run: func(ws memdb.WatchSet, state *state.StateStore) error { 400 // Look for job summary 401 out, err := state.JobSummaryByID(ws, args.RequestNamespace(), args.JobID) 402 if err != nil { 403 return err 404 } 405 406 // Setup the output 407 reply.JobSummary = out 408 if out != nil { 409 reply.Index = out.ModifyIndex 410 } else { 411 // Use the last index that affected the job_summary table 412 index, err := state.Index("job_summary") 413 if err != nil { 414 return err 415 } 416 reply.Index = index 417 } 418 419 // Set the query response 420 j.srv.setQueryMeta(&reply.QueryMeta) 421 return nil 422 }} 423 return j.srv.blockingRPC(&opts) 424 } 425 426 // Validate validates a job 427 func (j *Job) Validate(args *structs.JobValidateRequest, reply *structs.JobValidateResponse) error { 428 defer metrics.MeasureSince([]string{"nomad", "job", "validate"}, time.Now()) 429 430 // defensive check; http layer and RPC requester should ensure namespaces are set consistently 431 if args.RequestNamespace() != args.Job.Namespace { 432 return fmt.Errorf("mismatched request namespace in request: %q, %q", args.RequestNamespace(), args.Job.Namespace) 433 } 434 435 job, mutateWarnings, err := j.admissionMutators(args.Job) 436 if err != nil { 437 return err 438 } 439 args.Job = job 440 441 // Check for read-job permissions 442 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 443 return err 444 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 445 return structs.ErrPermissionDenied 446 } 447 448 // Validate the job and capture any warnings 449 validateWarnings, err := j.admissionValidators(args.Job) 450 if err != nil { 451 if merr, ok := err.(*multierror.Error); ok { 452 for _, err := range merr.Errors { 453 reply.ValidationErrors = append(reply.ValidationErrors, err.Error()) 454 } 455 reply.Error = merr.Error() 456 } else { 457 reply.ValidationErrors = append(reply.ValidationErrors, err.Error()) 458 reply.Error = err.Error() 459 } 460 } 461 462 validateWarnings = append(validateWarnings, mutateWarnings...) 463 464 // Set the warning message 465 reply.Warnings = structs.MergeMultierrorWarnings(validateWarnings...) 466 reply.DriverConfigValidated = true 467 return nil 468 } 469 470 // Revert is used to revert the job to a prior version 471 func (j *Job) Revert(args *structs.JobRevertRequest, reply *structs.JobRegisterResponse) error { 472 if done, err := j.srv.forward("Job.Revert", args, args, reply); done { 473 return err 474 } 475 defer metrics.MeasureSince([]string{"nomad", "job", "revert"}, time.Now()) 476 477 // Check for submit-job permissions 478 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 479 return err 480 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { 481 return structs.ErrPermissionDenied 482 } 483 484 // Validate the arguments 485 if args.JobID == "" { 486 return fmt.Errorf("missing job ID for revert") 487 } 488 489 // Lookup the job by version 490 snap, err := j.srv.fsm.State().Snapshot() 491 if err != nil { 492 return err 493 } 494 495 ws := memdb.NewWatchSet() 496 cur, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID) 497 if err != nil { 498 return err 499 } 500 if cur == nil { 501 return fmt.Errorf("job %q not found", args.JobID) 502 } 503 if args.JobVersion == cur.Version { 504 return fmt.Errorf("can't revert to current version") 505 } 506 507 jobV, err := snap.JobByIDAndVersion(ws, args.RequestNamespace(), args.JobID, args.JobVersion) 508 if err != nil { 509 return err 510 } 511 if jobV == nil { 512 return fmt.Errorf("job %q in namespace %q at version %d not found", args.JobID, args.RequestNamespace(), args.JobVersion) 513 } 514 515 // Build the register request 516 revJob := jobV.Copy() 517 // Use Vault Token from revert request to perform registration of reverted job. 518 revJob.VaultToken = args.VaultToken 519 reg := &structs.JobRegisterRequest{ 520 Job: revJob, 521 WriteRequest: args.WriteRequest, 522 } 523 524 // If the request is enforcing the existing version do a check. 525 if args.EnforcePriorVersion != nil { 526 if cur.Version != *args.EnforcePriorVersion { 527 return fmt.Errorf("Current job has version %d; enforcing version %d", cur.Version, *args.EnforcePriorVersion) 528 } 529 530 reg.EnforceIndex = true 531 reg.JobModifyIndex = cur.JobModifyIndex 532 } 533 534 // Register the version. 535 return j.Register(reg, reply) 536 } 537 538 // Stable is used to mark the job version as stable 539 func (j *Job) Stable(args *structs.JobStabilityRequest, reply *structs.JobStabilityResponse) error { 540 if done, err := j.srv.forward("Job.Stable", args, args, reply); done { 541 return err 542 } 543 defer metrics.MeasureSince([]string{"nomad", "job", "stable"}, time.Now()) 544 545 // Check for read-job permissions 546 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 547 return err 548 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { 549 return structs.ErrPermissionDenied 550 } 551 552 // Validate the arguments 553 if args.JobID == "" { 554 return fmt.Errorf("missing job ID for marking job as stable") 555 } 556 557 // Lookup the job by version 558 snap, err := j.srv.fsm.State().Snapshot() 559 if err != nil { 560 return err 561 } 562 563 ws := memdb.NewWatchSet() 564 jobV, err := snap.JobByIDAndVersion(ws, args.RequestNamespace(), args.JobID, args.JobVersion) 565 if err != nil { 566 return err 567 } 568 if jobV == nil { 569 return fmt.Errorf("job %q in namespace %q at version %d not found", args.JobID, args.RequestNamespace(), args.JobVersion) 570 } 571 572 // Commit this stability request via Raft 573 _, modifyIndex, err := j.srv.raftApply(structs.JobStabilityRequestType, args) 574 if err != nil { 575 j.logger.Error("submitting job stability request failed", "error", err) 576 return err 577 } 578 579 // Setup the reply 580 reply.Index = modifyIndex 581 return nil 582 } 583 584 // Evaluate is used to force a job for re-evaluation 585 func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error { 586 if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done { 587 return err 588 } 589 defer metrics.MeasureSince([]string{"nomad", "job", "evaluate"}, time.Now()) 590 591 // Check for read-job permissions 592 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 593 return err 594 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 595 return structs.ErrPermissionDenied 596 } 597 598 // Validate the arguments 599 if args.JobID == "" { 600 return fmt.Errorf("missing job ID for evaluation") 601 } 602 603 // Lookup the job 604 snap, err := j.srv.fsm.State().Snapshot() 605 if err != nil { 606 return err 607 } 608 ws := memdb.NewWatchSet() 609 job, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID) 610 if err != nil { 611 return err 612 } 613 if job == nil { 614 return fmt.Errorf("job not found") 615 } 616 617 if job.IsPeriodic() { 618 return fmt.Errorf("can't evaluate periodic job") 619 } else if job.IsParameterized() { 620 return fmt.Errorf("can't evaluate parameterized job") 621 } 622 623 forceRescheduleAllocs := make(map[string]*structs.DesiredTransition) 624 625 if args.EvalOptions.ForceReschedule { 626 // Find any failed allocs that could be force rescheduled 627 allocs, err := snap.AllocsByJob(ws, args.RequestNamespace(), args.JobID, false) 628 if err != nil { 629 return err 630 } 631 632 for _, alloc := range allocs { 633 taskGroup := job.LookupTaskGroup(alloc.TaskGroup) 634 // Forcing rescheduling is only allowed if task group has rescheduling enabled 635 if taskGroup == nil || !taskGroup.ReschedulePolicy.Enabled() { 636 continue 637 } 638 639 if alloc.NextAllocation == "" && alloc.ClientStatus == structs.AllocClientStatusFailed && !alloc.DesiredTransition.ShouldForceReschedule() { 640 forceRescheduleAllocs[alloc.ID] = allowForceRescheduleTransition 641 } 642 } 643 } 644 645 // Create a new evaluation 646 now := time.Now().UTC().UnixNano() 647 eval := &structs.Evaluation{ 648 ID: uuid.Generate(), 649 Namespace: args.RequestNamespace(), 650 Priority: job.Priority, 651 Type: job.Type, 652 TriggeredBy: structs.EvalTriggerJobRegister, 653 JobID: job.ID, 654 JobModifyIndex: job.ModifyIndex, 655 Status: structs.EvalStatusPending, 656 CreateTime: now, 657 ModifyTime: now, 658 } 659 660 // Create a AllocUpdateDesiredTransitionRequest request with the eval and any forced rescheduled allocs 661 updateTransitionReq := &structs.AllocUpdateDesiredTransitionRequest{ 662 Allocs: forceRescheduleAllocs, 663 Evals: []*structs.Evaluation{eval}, 664 } 665 _, evalIndex, err := j.srv.raftApply(structs.AllocUpdateDesiredTransitionRequestType, updateTransitionReq) 666 667 if err != nil { 668 j.logger.Error("eval create failed", "error", err, "method", "evaluate") 669 return err 670 } 671 672 // Setup the reply 673 reply.EvalID = eval.ID 674 reply.EvalCreateIndex = evalIndex 675 reply.JobModifyIndex = job.ModifyIndex 676 reply.Index = evalIndex 677 return nil 678 } 679 680 // Deregister is used to remove a job the cluster. 681 func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobDeregisterResponse) error { 682 if done, err := j.srv.forward("Job.Deregister", args, args, reply); done { 683 return err 684 } 685 defer metrics.MeasureSince([]string{"nomad", "job", "deregister"}, time.Now()) 686 687 // Check for submit-job permissions 688 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 689 return err 690 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { 691 return structs.ErrPermissionDenied 692 } 693 694 // Validate the arguments 695 if args.JobID == "" { 696 return fmt.Errorf("missing job ID for deregistering") 697 } 698 699 // Lookup the job 700 snap, err := j.srv.fsm.State().Snapshot() 701 if err != nil { 702 return err 703 } 704 ws := memdb.NewWatchSet() 705 job, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID) 706 if err != nil { 707 return err 708 } 709 710 // For a job with volumes, find its volumes before deleting the job. 711 // Later we'll apply this raft. 712 volumesToGC := newCSIBatchRelease(j.srv, j.logger, 100) 713 if job != nil { 714 for _, tg := range job.TaskGroups { 715 for _, vol := range tg.Volumes { 716 if vol.Type == structs.VolumeTypeCSI { 717 volumesToGC.add(vol.Source, job.Namespace) 718 } 719 } 720 } 721 } 722 723 // Commit the job update via Raft 724 _, index, err := j.srv.raftApply(structs.JobDeregisterRequestType, args) 725 if err != nil { 726 j.logger.Error("deregister failed", "error", err) 727 return err 728 } 729 730 // Populate the reply with job information 731 reply.JobModifyIndex = index 732 733 // Make a raft apply to release the CSI volume claims of terminal allocs. 734 var result *multierror.Error 735 err = volumesToGC.apply() 736 if err != nil { 737 result = multierror.Append(result, err) 738 } 739 740 // If the job is periodic or parameterized, we don't create an eval. 741 if job != nil && (job.IsPeriodic() || job.IsParameterized()) { 742 return nil 743 } 744 745 // Create a new evaluation 746 // XXX: The job priority / type is strange for this, since it's not a high 747 // priority even if the job was. 748 now := time.Now().UTC().UnixNano() 749 eval := &structs.Evaluation{ 750 ID: uuid.Generate(), 751 Namespace: args.RequestNamespace(), 752 Priority: structs.JobDefaultPriority, 753 Type: structs.JobTypeService, 754 TriggeredBy: structs.EvalTriggerJobDeregister, 755 JobID: args.JobID, 756 JobModifyIndex: index, 757 Status: structs.EvalStatusPending, 758 CreateTime: now, 759 ModifyTime: now, 760 } 761 update := &structs.EvalUpdateRequest{ 762 Evals: []*structs.Evaluation{eval}, 763 WriteRequest: structs.WriteRequest{Region: args.Region}, 764 } 765 766 // Commit this evaluation via Raft 767 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 768 if err != nil { 769 result = multierror.Append(result, err) 770 j.logger.Error("eval create failed", "error", err, "method", "deregister") 771 return result.ErrorOrNil() 772 } 773 774 // Populate the reply with eval information 775 reply.EvalID = eval.ID 776 reply.EvalCreateIndex = evalIndex 777 reply.Index = evalIndex 778 return result.ErrorOrNil() 779 } 780 781 // BatchDeregister is used to remove a set of jobs from the cluster. 782 func (j *Job) BatchDeregister(args *structs.JobBatchDeregisterRequest, reply *structs.JobBatchDeregisterResponse) error { 783 if done, err := j.srv.forward("Job.BatchDeregister", args, args, reply); done { 784 return err 785 } 786 defer metrics.MeasureSince([]string{"nomad", "job", "batch_deregister"}, time.Now()) 787 788 // Resolve the ACL token 789 aclObj, err := j.srv.ResolveToken(args.AuthToken) 790 if err != nil { 791 return err 792 } 793 794 // Validate the arguments 795 if len(args.Jobs) == 0 { 796 return fmt.Errorf("given no jobs to deregister") 797 } 798 if len(args.Evals) != 0 { 799 return fmt.Errorf("evaluations should not be populated") 800 } 801 802 // Loop through checking for permissions 803 for jobNS := range args.Jobs { 804 // Check for submit-job permissions 805 if aclObj != nil && !aclObj.AllowNsOp(jobNS.Namespace, acl.NamespaceCapabilitySubmitJob) { 806 return structs.ErrPermissionDenied 807 } 808 } 809 810 // Grab a snapshot 811 snap, err := j.srv.fsm.State().Snapshot() 812 if err != nil { 813 return err 814 } 815 816 // Loop through to create evals 817 for jobNS, options := range args.Jobs { 818 if options == nil { 819 return fmt.Errorf("no deregister options provided for %v", jobNS) 820 } 821 822 job, err := snap.JobByID(nil, jobNS.Namespace, jobNS.ID) 823 if err != nil { 824 return err 825 } 826 827 // If the job is periodic or parameterized, we don't create an eval. 828 if job != nil && (job.IsPeriodic() || job.IsParameterized()) { 829 continue 830 } 831 832 priority := structs.JobDefaultPriority 833 jtype := structs.JobTypeService 834 if job != nil { 835 priority = job.Priority 836 jtype = job.Type 837 } 838 839 // Create a new evaluation 840 now := time.Now().UTC().UnixNano() 841 eval := &structs.Evaluation{ 842 ID: uuid.Generate(), 843 Namespace: jobNS.Namespace, 844 Priority: priority, 845 Type: jtype, 846 TriggeredBy: structs.EvalTriggerJobDeregister, 847 JobID: jobNS.ID, 848 Status: structs.EvalStatusPending, 849 CreateTime: now, 850 ModifyTime: now, 851 } 852 args.Evals = append(args.Evals, eval) 853 } 854 855 // Commit this update via Raft 856 _, index, err := j.srv.raftApply(structs.JobBatchDeregisterRequestType, args) 857 if err != nil { 858 j.logger.Error("batch deregister failed", "error", err) 859 return err 860 } 861 862 reply.Index = index 863 return nil 864 } 865 866 // Scale is used to modify one of the scaling targets in the job 867 func (j *Job) Scale(args *structs.JobScaleRequest, reply *structs.JobRegisterResponse) error { 868 if done, err := j.srv.forward("Job.Scale", args, args, reply); done { 869 return err 870 } 871 defer metrics.MeasureSince([]string{"nomad", "job", "scale"}, time.Now()) 872 873 // Validate the arguments 874 namespace := args.Target[structs.ScalingTargetNamespace] 875 jobID := args.Target[structs.ScalingTargetJob] 876 groupName := args.Target[structs.ScalingTargetGroup] 877 if namespace != "" && namespace != args.RequestNamespace() { 878 return structs.NewErrRPCCoded(400, "namespace in payload did not match header") 879 } else if namespace == "" { 880 namespace = args.RequestNamespace() 881 } 882 if jobID != "" && jobID != args.JobID { 883 return fmt.Errorf("job ID in payload did not match URL") 884 } 885 if groupName == "" { 886 return structs.NewErrRPCCoded(400, "missing task group name for scaling action") 887 } 888 if args.Error && args.Count != nil { 889 return structs.NewErrRPCCoded(400, "scaling action should not contain count if error is true") 890 } 891 if args.Count != nil && *args.Count < 0 { 892 return structs.NewErrRPCCoded(400, "scaling action count can't be negative") 893 } 894 895 // Check for submit-job permissions 896 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 897 return err 898 } else if aclObj != nil { 899 hasScaleJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityScaleJob) 900 hasSubmitJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) 901 if !(hasScaleJob || hasSubmitJob) { 902 return structs.ErrPermissionDenied 903 } 904 } 905 906 // Lookup the job 907 snap, err := j.srv.fsm.State().Snapshot() 908 if err != nil { 909 return err 910 } 911 ws := memdb.NewWatchSet() 912 job, err := snap.JobByID(ws, namespace, args.JobID) 913 if err != nil { 914 return err 915 } 916 if job == nil { 917 return structs.NewErrRPCCoded(404, fmt.Sprintf("job %q not found", args.JobID)) 918 } 919 920 var found *structs.TaskGroup 921 for _, tg := range job.TaskGroups { 922 if groupName == tg.Name { 923 found = tg 924 break 925 } 926 } 927 if found == nil { 928 return structs.NewErrRPCCoded(400, 929 fmt.Sprintf("task group %q specified for scaling does not exist in job", groupName)) 930 } 931 932 now := time.Now().UTC().UnixNano() 933 934 // If the count is present, commit the job update via Raft 935 // for now, we'll do this even if count didn't change 936 if args.Count != nil { 937 truncCount := int(*args.Count) 938 if int64(truncCount) != *args.Count { 939 return structs.NewErrRPCCoded(400, 940 fmt.Sprintf("new scaling count is too large for TaskGroup.Count (int): %v", args.Count)) 941 } 942 found.Count = truncCount 943 944 registerReq := structs.JobRegisterRequest{ 945 Job: job, 946 EnforceIndex: true, 947 JobModifyIndex: job.ModifyIndex, 948 PolicyOverride: args.PolicyOverride, 949 WriteRequest: args.WriteRequest, 950 } 951 _, jobModifyIndex, err := j.srv.raftApply(structs.JobRegisterRequestType, registerReq) 952 if err != nil { 953 j.logger.Error("job register for scale failed", "error", err) 954 return err 955 } 956 reply.JobModifyIndex = jobModifyIndex 957 } else { 958 reply.JobModifyIndex = job.ModifyIndex 959 } 960 961 // Only create an eval for non-dispatch jobs and if the count was provided 962 // for now, we'll do this even if count didn't change 963 if !job.IsPeriodic() && !job.IsParameterized() && args.Count != nil { 964 eval := &structs.Evaluation{ 965 ID: uuid.Generate(), 966 Namespace: args.RequestNamespace(), 967 Priority: structs.JobDefaultPriority, 968 Type: structs.JobTypeService, 969 TriggeredBy: structs.EvalTriggerScaling, 970 JobID: args.JobID, 971 JobModifyIndex: reply.JobModifyIndex, 972 Status: structs.EvalStatusPending, 973 CreateTime: now, 974 ModifyTime: now, 975 } 976 update := &structs.EvalUpdateRequest{ 977 Evals: []*structs.Evaluation{eval}, 978 WriteRequest: structs.WriteRequest{Region: args.Region}, 979 } 980 981 // Commit this evaluation via Raft 982 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 983 if err != nil { 984 j.logger.Error("eval create failed", "error", err, "method", "scale") 985 return err 986 } 987 988 reply.EvalID = eval.ID 989 reply.EvalCreateIndex = evalIndex 990 } else { 991 reply.EvalID = "" 992 reply.EvalCreateIndex = 0 993 } 994 995 event := &structs.ScalingEventRequest{ 996 Namespace: job.Namespace, 997 JobID: job.ID, 998 TaskGroup: groupName, 999 ScalingEvent: &structs.ScalingEvent{ 1000 Time: now, 1001 Count: args.Count, 1002 Message: args.Message, 1003 Error: args.Error, 1004 Meta: args.Meta, 1005 }, 1006 } 1007 if reply.EvalID != "" { 1008 event.ScalingEvent.EvalID = &reply.EvalID 1009 } 1010 _, eventIndex, err := j.srv.raftApply(structs.ScalingEventRegisterRequestType, event) 1011 if err != nil { 1012 j.logger.Error("scaling event create failed", "error", err) 1013 return err 1014 } 1015 1016 reply.Index = eventIndex 1017 j.srv.setQueryMeta(&reply.QueryMeta) 1018 return nil 1019 } 1020 1021 // GetJob is used to request information about a specific job 1022 func (j *Job) GetJob(args *structs.JobSpecificRequest, 1023 reply *structs.SingleJobResponse) error { 1024 if done, err := j.srv.forward("Job.GetJob", args, args, reply); done { 1025 return err 1026 } 1027 defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now()) 1028 1029 // Check for read-job permissions 1030 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1031 return err 1032 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1033 return structs.ErrPermissionDenied 1034 } 1035 1036 // Setup the blocking query 1037 opts := blockingOptions{ 1038 queryOpts: &args.QueryOptions, 1039 queryMeta: &reply.QueryMeta, 1040 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1041 // Look for the job 1042 out, err := state.JobByID(ws, args.RequestNamespace(), args.JobID) 1043 if err != nil { 1044 return err 1045 } 1046 1047 // Setup the output 1048 reply.Job = out 1049 if out != nil { 1050 reply.Index = out.ModifyIndex 1051 } else { 1052 // Use the last index that affected the nodes table 1053 index, err := state.Index("jobs") 1054 if err != nil { 1055 return err 1056 } 1057 reply.Index = index 1058 } 1059 1060 // Set the query response 1061 j.srv.setQueryMeta(&reply.QueryMeta) 1062 return nil 1063 }} 1064 return j.srv.blockingRPC(&opts) 1065 } 1066 1067 // GetJobVersions is used to retrieve all tracked versions of a job. 1068 func (j *Job) GetJobVersions(args *structs.JobVersionsRequest, 1069 reply *structs.JobVersionsResponse) error { 1070 if done, err := j.srv.forward("Job.GetJobVersions", args, args, reply); done { 1071 return err 1072 } 1073 defer metrics.MeasureSince([]string{"nomad", "job", "get_job_versions"}, time.Now()) 1074 1075 // Check for read-job permissions 1076 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1077 return err 1078 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1079 return structs.ErrPermissionDenied 1080 } 1081 1082 // Setup the blocking query 1083 opts := blockingOptions{ 1084 queryOpts: &args.QueryOptions, 1085 queryMeta: &reply.QueryMeta, 1086 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1087 // Look for the job 1088 out, err := state.JobVersionsByID(ws, args.RequestNamespace(), args.JobID) 1089 if err != nil { 1090 return err 1091 } 1092 1093 // Setup the output 1094 reply.Versions = out 1095 if len(out) != 0 { 1096 reply.Index = out[0].ModifyIndex 1097 1098 // Compute the diffs 1099 if args.Diffs { 1100 for i := 0; i < len(out)-1; i++ { 1101 old, new := out[i+1], out[i] 1102 d, err := old.Diff(new, true) 1103 if err != nil { 1104 return fmt.Errorf("failed to create job diff: %v", err) 1105 } 1106 reply.Diffs = append(reply.Diffs, d) 1107 } 1108 } 1109 } else { 1110 // Use the last index that affected the nodes table 1111 index, err := state.Index("job_version") 1112 if err != nil { 1113 return err 1114 } 1115 reply.Index = index 1116 } 1117 1118 // Set the query response 1119 j.srv.setQueryMeta(&reply.QueryMeta) 1120 return nil 1121 }} 1122 return j.srv.blockingRPC(&opts) 1123 } 1124 1125 // List is used to list the jobs registered in the system 1126 func (j *Job) List(args *structs.JobListRequest, 1127 reply *structs.JobListResponse) error { 1128 if done, err := j.srv.forward("Job.List", args, args, reply); done { 1129 return err 1130 } 1131 defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now()) 1132 1133 // Check for list-job permissions 1134 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1135 return err 1136 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityListJobs) { 1137 return structs.ErrPermissionDenied 1138 } 1139 1140 // Setup the blocking query 1141 opts := blockingOptions{ 1142 queryOpts: &args.QueryOptions, 1143 queryMeta: &reply.QueryMeta, 1144 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1145 // Capture all the jobs 1146 var err error 1147 var iter memdb.ResultIterator 1148 if prefix := args.QueryOptions.Prefix; prefix != "" { 1149 iter, err = state.JobsByIDPrefix(ws, args.RequestNamespace(), prefix) 1150 } else { 1151 iter, err = state.JobsByNamespace(ws, args.RequestNamespace()) 1152 } 1153 if err != nil { 1154 return err 1155 } 1156 1157 var jobs []*structs.JobListStub 1158 for { 1159 raw := iter.Next() 1160 if raw == nil { 1161 break 1162 } 1163 job := raw.(*structs.Job) 1164 summary, err := state.JobSummaryByID(ws, args.RequestNamespace(), job.ID) 1165 if err != nil { 1166 return fmt.Errorf("unable to look up summary for job: %v", job.ID) 1167 } 1168 jobs = append(jobs, job.Stub(summary)) 1169 } 1170 reply.Jobs = jobs 1171 1172 // Use the last index that affected the jobs table or summary 1173 jindex, err := state.Index("jobs") 1174 if err != nil { 1175 return err 1176 } 1177 sindex, err := state.Index("job_summary") 1178 if err != nil { 1179 return err 1180 } 1181 reply.Index = helper.Uint64Max(jindex, sindex) 1182 1183 // Set the query response 1184 j.srv.setQueryMeta(&reply.QueryMeta) 1185 return nil 1186 }} 1187 return j.srv.blockingRPC(&opts) 1188 } 1189 1190 // Allocations is used to list the allocations for a job 1191 func (j *Job) Allocations(args *structs.JobSpecificRequest, 1192 reply *structs.JobAllocationsResponse) error { 1193 if done, err := j.srv.forward("Job.Allocations", args, args, reply); done { 1194 return err 1195 } 1196 defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now()) 1197 1198 // Check for read-job permissions 1199 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1200 return err 1201 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1202 return structs.ErrPermissionDenied 1203 } 1204 1205 // Ensure JobID is set otherwise everything works and never returns 1206 // allocations which can hide bugs in request code. 1207 if args.JobID == "" { 1208 return fmt.Errorf("missing job ID") 1209 } 1210 1211 // Setup the blocking query 1212 opts := blockingOptions{ 1213 queryOpts: &args.QueryOptions, 1214 queryMeta: &reply.QueryMeta, 1215 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1216 // Capture the allocations 1217 allocs, err := state.AllocsByJob(ws, args.RequestNamespace(), args.JobID, args.All) 1218 if err != nil { 1219 return err 1220 } 1221 1222 // Convert to stubs 1223 if len(allocs) > 0 { 1224 reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs)) 1225 for _, alloc := range allocs { 1226 reply.Allocations = append(reply.Allocations, alloc.Stub()) 1227 } 1228 } 1229 1230 // Use the last index that affected the allocs table 1231 index, err := state.Index("allocs") 1232 if err != nil { 1233 return err 1234 } 1235 reply.Index = index 1236 1237 // Set the query response 1238 j.srv.setQueryMeta(&reply.QueryMeta) 1239 return nil 1240 1241 }} 1242 return j.srv.blockingRPC(&opts) 1243 } 1244 1245 // Evaluations is used to list the evaluations for a job 1246 func (j *Job) Evaluations(args *structs.JobSpecificRequest, 1247 reply *structs.JobEvaluationsResponse) error { 1248 if done, err := j.srv.forward("Job.Evaluations", args, args, reply); done { 1249 return err 1250 } 1251 defer metrics.MeasureSince([]string{"nomad", "job", "evaluations"}, time.Now()) 1252 1253 // Check for read-job permissions 1254 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1255 return err 1256 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1257 return structs.ErrPermissionDenied 1258 } 1259 1260 // Setup the blocking query 1261 opts := blockingOptions{ 1262 queryOpts: &args.QueryOptions, 1263 queryMeta: &reply.QueryMeta, 1264 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1265 // Capture the evals 1266 var err error 1267 reply.Evaluations, err = state.EvalsByJob(ws, args.RequestNamespace(), args.JobID) 1268 if err != nil { 1269 return err 1270 } 1271 1272 // Use the last index that affected the evals table 1273 index, err := state.Index("evals") 1274 if err != nil { 1275 return err 1276 } 1277 reply.Index = index 1278 1279 // Set the query response 1280 j.srv.setQueryMeta(&reply.QueryMeta) 1281 return nil 1282 }} 1283 1284 return j.srv.blockingRPC(&opts) 1285 } 1286 1287 // Deployments is used to list the deployments for a job 1288 func (j *Job) Deployments(args *structs.JobSpecificRequest, 1289 reply *structs.DeploymentListResponse) error { 1290 if done, err := j.srv.forward("Job.Deployments", args, args, reply); done { 1291 return err 1292 } 1293 defer metrics.MeasureSince([]string{"nomad", "job", "deployments"}, time.Now()) 1294 1295 // Check for read-job permissions 1296 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1297 return err 1298 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1299 return structs.ErrPermissionDenied 1300 } 1301 1302 // Setup the blocking query 1303 opts := blockingOptions{ 1304 queryOpts: &args.QueryOptions, 1305 queryMeta: &reply.QueryMeta, 1306 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1307 // Capture the deployments 1308 deploys, err := state.DeploymentsByJobID(ws, args.RequestNamespace(), args.JobID, args.All) 1309 if err != nil { 1310 return err 1311 } 1312 1313 // Use the last index that affected the deployment table 1314 index, err := state.Index("deployment") 1315 if err != nil { 1316 return err 1317 } 1318 reply.Index = index 1319 reply.Deployments = deploys 1320 1321 // Set the query response 1322 j.srv.setQueryMeta(&reply.QueryMeta) 1323 return nil 1324 1325 }} 1326 return j.srv.blockingRPC(&opts) 1327 } 1328 1329 // LatestDeployment is used to retrieve the latest deployment for a job 1330 func (j *Job) LatestDeployment(args *structs.JobSpecificRequest, 1331 reply *structs.SingleDeploymentResponse) error { 1332 if done, err := j.srv.forward("Job.LatestDeployment", args, args, reply); done { 1333 return err 1334 } 1335 defer metrics.MeasureSince([]string{"nomad", "job", "latest_deployment"}, time.Now()) 1336 1337 // Check for read-job permissions 1338 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1339 return err 1340 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) { 1341 return structs.ErrPermissionDenied 1342 } 1343 1344 // Setup the blocking query 1345 opts := blockingOptions{ 1346 queryOpts: &args.QueryOptions, 1347 queryMeta: &reply.QueryMeta, 1348 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1349 // Capture the deployments 1350 deploys, err := state.DeploymentsByJobID(ws, args.RequestNamespace(), args.JobID, args.All) 1351 if err != nil { 1352 return err 1353 } 1354 1355 // Use the last index that affected the deployment table 1356 index, err := state.Index("deployment") 1357 if err != nil { 1358 return err 1359 } 1360 reply.Index = index 1361 if len(deploys) > 0 { 1362 sort.Slice(deploys, func(i, j int) bool { 1363 return deploys[i].CreateIndex > deploys[j].CreateIndex 1364 }) 1365 reply.Deployment = deploys[0] 1366 } 1367 1368 // Set the query response 1369 j.srv.setQueryMeta(&reply.QueryMeta) 1370 return nil 1371 1372 }} 1373 return j.srv.blockingRPC(&opts) 1374 } 1375 1376 // Plan is used to cause a dry-run evaluation of the Job and return the results 1377 // with a potential diff containing annotations. 1378 func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) error { 1379 if done, err := j.srv.forward("Job.Plan", args, args, reply); done { 1380 return err 1381 } 1382 defer metrics.MeasureSince([]string{"nomad", "job", "plan"}, time.Now()) 1383 1384 // Validate the arguments 1385 if args.Job == nil { 1386 return fmt.Errorf("Job required for plan") 1387 } 1388 1389 // Run admission controllers 1390 job, warnings, err := j.admissionControllers(args.Job) 1391 if err != nil { 1392 return err 1393 } 1394 args.Job = job 1395 1396 // Set the warning message 1397 reply.Warnings = structs.MergeMultierrorWarnings(warnings...) 1398 1399 // Check job submission permissions, which we assume is the same for plan 1400 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1401 return err 1402 } else if aclObj != nil { 1403 if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) { 1404 return structs.ErrPermissionDenied 1405 } 1406 // Check if override is set and we do not have permissions 1407 if args.PolicyOverride { 1408 if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySentinelOverride) { 1409 return structs.ErrPermissionDenied 1410 } 1411 } 1412 } 1413 1414 // Enforce Sentinel policies 1415 policyWarnings, err := j.enforceSubmitJob(args.PolicyOverride, args.Job) 1416 if err != nil { 1417 return err 1418 } 1419 if policyWarnings != nil { 1420 warnings = append(warnings, policyWarnings) 1421 reply.Warnings = structs.MergeMultierrorWarnings(warnings...) 1422 } 1423 1424 // Acquire a snapshot of the state 1425 snap, err := j.srv.fsm.State().Snapshot() 1426 if err != nil { 1427 return err 1428 } 1429 1430 // Get the original job 1431 ws := memdb.NewWatchSet() 1432 oldJob, err := snap.JobByID(ws, args.RequestNamespace(), args.Job.ID) 1433 if err != nil { 1434 return err 1435 } 1436 1437 var index uint64 1438 var updatedIndex uint64 1439 1440 if oldJob != nil { 1441 index = oldJob.JobModifyIndex 1442 1443 // We want to reuse deployments where possible, so only insert the job if 1444 // it has changed or the job didn't exist 1445 if oldJob.SpecChanged(args.Job) { 1446 // Insert the updated Job into the snapshot 1447 updatedIndex = oldJob.JobModifyIndex + 1 1448 snap.UpsertJob(updatedIndex, args.Job) 1449 } 1450 } else if oldJob == nil { 1451 // Insert the updated Job into the snapshot 1452 snap.UpsertJob(100, args.Job) 1453 } 1454 1455 // Create an eval and mark it as requiring annotations and insert that as well 1456 now := time.Now().UTC().UnixNano() 1457 eval := &structs.Evaluation{ 1458 ID: uuid.Generate(), 1459 Namespace: args.RequestNamespace(), 1460 Priority: args.Job.Priority, 1461 Type: args.Job.Type, 1462 TriggeredBy: structs.EvalTriggerJobRegister, 1463 JobID: args.Job.ID, 1464 JobModifyIndex: updatedIndex, 1465 Status: structs.EvalStatusPending, 1466 AnnotatePlan: true, 1467 // Timestamps are added for consistency but this eval is never persisted 1468 CreateTime: now, 1469 ModifyTime: now, 1470 } 1471 1472 snap.UpsertEvals(100, []*structs.Evaluation{eval}) 1473 1474 // Create an in-memory Planner that returns no errors and stores the 1475 // submitted plan and created evals. 1476 planner := &scheduler.Harness{ 1477 State: &snap.StateStore, 1478 } 1479 1480 // Create the scheduler and run it 1481 sched, err := scheduler.NewScheduler(eval.Type, j.logger, snap, planner) 1482 if err != nil { 1483 return err 1484 } 1485 1486 if err := sched.Process(eval); err != nil { 1487 return err 1488 } 1489 1490 // Annotate and store the diff 1491 if plans := len(planner.Plans); plans != 1 { 1492 return fmt.Errorf("scheduler resulted in an unexpected number of plans: %v", plans) 1493 } 1494 annotations := planner.Plans[0].Annotations 1495 if args.Diff { 1496 jobDiff, err := oldJob.Diff(args.Job, true) 1497 if err != nil { 1498 return fmt.Errorf("failed to create job diff: %v", err) 1499 } 1500 1501 if err := scheduler.Annotate(jobDiff, annotations); err != nil { 1502 return fmt.Errorf("failed to annotate job diff: %v", err) 1503 } 1504 reply.Diff = jobDiff 1505 } 1506 1507 // Grab the failures 1508 if len(planner.Evals) != 1 { 1509 return fmt.Errorf("scheduler resulted in an unexpected number of eval updates: %v", planner.Evals) 1510 } 1511 updatedEval := planner.Evals[0] 1512 1513 // If it is a periodic job calculate the next launch 1514 if args.Job.IsPeriodic() && args.Job.Periodic.Enabled { 1515 reply.NextPeriodicLaunch, err = args.Job.Periodic.Next(time.Now().In(args.Job.Periodic.GetLocation())) 1516 if err != nil { 1517 return fmt.Errorf("Failed to parse cron expression: %v", err) 1518 } 1519 } 1520 1521 reply.FailedTGAllocs = updatedEval.FailedTGAllocs 1522 reply.JobModifyIndex = index 1523 reply.Annotations = annotations 1524 reply.CreatedEvals = planner.CreateEvals 1525 reply.Index = index 1526 return nil 1527 } 1528 1529 // validateJobUpdate ensures updates to a job are valid. 1530 func validateJobUpdate(old, new *structs.Job) error { 1531 // Validate Dispatch not set on new Jobs 1532 if old == nil { 1533 if new.Dispatched { 1534 return fmt.Errorf("job can't be submitted with 'Dispatched' set") 1535 } 1536 return nil 1537 } 1538 1539 // Type transitions are disallowed 1540 if old.Type != new.Type { 1541 return fmt.Errorf("cannot update job from type %q to %q", old.Type, new.Type) 1542 } 1543 1544 // Transitioning to/from periodic is disallowed 1545 if old.IsPeriodic() && !new.IsPeriodic() { 1546 return fmt.Errorf("cannot update periodic job to being non-periodic") 1547 } 1548 if new.IsPeriodic() && !old.IsPeriodic() { 1549 return fmt.Errorf("cannot update non-periodic job to being periodic") 1550 } 1551 1552 // Transitioning to/from parameterized is disallowed 1553 if old.IsParameterized() && !new.IsParameterized() { 1554 return fmt.Errorf("cannot update non-parameterized job to being parameterized") 1555 } 1556 if new.IsParameterized() && !old.IsParameterized() { 1557 return fmt.Errorf("cannot update parameterized job to being non-parameterized") 1558 } 1559 1560 if old.Dispatched != new.Dispatched { 1561 return fmt.Errorf("field 'Dispatched' is read-only") 1562 } 1563 1564 return nil 1565 } 1566 1567 // Dispatch a parameterized job. 1568 func (j *Job) Dispatch(args *structs.JobDispatchRequest, reply *structs.JobDispatchResponse) error { 1569 if done, err := j.srv.forward("Job.Dispatch", args, args, reply); done { 1570 return err 1571 } 1572 defer metrics.MeasureSince([]string{"nomad", "job", "dispatch"}, time.Now()) 1573 1574 // Check for submit-job permissions 1575 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1576 return err 1577 } else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityDispatchJob) { 1578 return structs.ErrPermissionDenied 1579 } 1580 1581 // Lookup the parameterized job 1582 if args.JobID == "" { 1583 return fmt.Errorf("missing parameterized job ID") 1584 } 1585 1586 snap, err := j.srv.fsm.State().Snapshot() 1587 if err != nil { 1588 return err 1589 } 1590 ws := memdb.NewWatchSet() 1591 parameterizedJob, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID) 1592 if err != nil { 1593 return err 1594 } 1595 if parameterizedJob == nil { 1596 return fmt.Errorf("parameterized job not found") 1597 } 1598 1599 if !parameterizedJob.IsParameterized() { 1600 return fmt.Errorf("Specified job %q is not a parameterized job", args.JobID) 1601 } 1602 1603 if parameterizedJob.Stop { 1604 return fmt.Errorf("Specified job %q is stopped", args.JobID) 1605 } 1606 1607 // Validate the arguments 1608 if err := validateDispatchRequest(args, parameterizedJob); err != nil { 1609 return err 1610 } 1611 1612 // Derive the child job and commit it via Raft 1613 dispatchJob := parameterizedJob.Copy() 1614 dispatchJob.ID = structs.DispatchedID(parameterizedJob.ID, time.Now()) 1615 dispatchJob.ParentID = parameterizedJob.ID 1616 dispatchJob.Name = dispatchJob.ID 1617 dispatchJob.SetSubmitTime() 1618 dispatchJob.Dispatched = true 1619 1620 // Merge in the meta data 1621 for k, v := range args.Meta { 1622 if dispatchJob.Meta == nil { 1623 dispatchJob.Meta = make(map[string]string, len(args.Meta)) 1624 } 1625 dispatchJob.Meta[k] = v 1626 } 1627 1628 // Compress the payload 1629 dispatchJob.Payload = snappy.Encode(nil, args.Payload) 1630 1631 regReq := &structs.JobRegisterRequest{ 1632 Job: dispatchJob, 1633 WriteRequest: args.WriteRequest, 1634 } 1635 1636 // Commit this update via Raft 1637 fsmErr, jobCreateIndex, err := j.srv.raftApply(structs.JobRegisterRequestType, regReq) 1638 if err, ok := fsmErr.(error); ok && err != nil { 1639 j.logger.Error("dispatched job register failed", "error", err, "fsm", true) 1640 return err 1641 } 1642 if err != nil { 1643 j.logger.Error("dispatched job register failed", "error", err, "raft", true) 1644 return err 1645 } 1646 1647 reply.JobCreateIndex = jobCreateIndex 1648 reply.DispatchedJobID = dispatchJob.ID 1649 reply.Index = jobCreateIndex 1650 1651 // If the job is periodic, we don't create an eval. 1652 if !dispatchJob.IsPeriodic() { 1653 // Create a new evaluation 1654 now := time.Now().UTC().UnixNano() 1655 eval := &structs.Evaluation{ 1656 ID: uuid.Generate(), 1657 Namespace: args.RequestNamespace(), 1658 Priority: dispatchJob.Priority, 1659 Type: dispatchJob.Type, 1660 TriggeredBy: structs.EvalTriggerJobRegister, 1661 JobID: dispatchJob.ID, 1662 JobModifyIndex: jobCreateIndex, 1663 Status: structs.EvalStatusPending, 1664 CreateTime: now, 1665 ModifyTime: now, 1666 } 1667 update := &structs.EvalUpdateRequest{ 1668 Evals: []*structs.Evaluation{eval}, 1669 WriteRequest: structs.WriteRequest{Region: args.Region}, 1670 } 1671 1672 // Commit this evaluation via Raft 1673 _, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update) 1674 if err != nil { 1675 j.logger.Error("eval create failed", "error", err, "method", "dispatch") 1676 return err 1677 } 1678 1679 // Setup the reply 1680 reply.EvalID = eval.ID 1681 reply.EvalCreateIndex = evalIndex 1682 reply.Index = evalIndex 1683 } 1684 1685 return nil 1686 } 1687 1688 // validateDispatchRequest returns whether the request is valid given the 1689 // parameterized job. 1690 func validateDispatchRequest(req *structs.JobDispatchRequest, job *structs.Job) error { 1691 // Check the payload constraint is met 1692 hasInputData := len(req.Payload) != 0 1693 if job.ParameterizedJob.Payload == structs.DispatchPayloadRequired && !hasInputData { 1694 return fmt.Errorf("Payload is not provided but required by parameterized job") 1695 } else if job.ParameterizedJob.Payload == structs.DispatchPayloadForbidden && hasInputData { 1696 return fmt.Errorf("Payload provided but forbidden by parameterized job") 1697 } 1698 1699 // Check the payload doesn't exceed the size limit 1700 if l := len(req.Payload); l > DispatchPayloadSizeLimit { 1701 return fmt.Errorf("Payload exceeds maximum size; %d > %d", l, DispatchPayloadSizeLimit) 1702 } 1703 1704 // Check if the metadata is a set 1705 keys := make(map[string]struct{}, len(req.Meta)) 1706 for k := range keys { 1707 if _, ok := keys[k]; ok { 1708 return fmt.Errorf("Duplicate key %q in passed metadata", k) 1709 } 1710 keys[k] = struct{}{} 1711 } 1712 1713 required := helper.SliceStringToSet(job.ParameterizedJob.MetaRequired) 1714 optional := helper.SliceStringToSet(job.ParameterizedJob.MetaOptional) 1715 1716 // Check the metadata key constraints are met 1717 unpermitted := make(map[string]struct{}) 1718 for k := range req.Meta { 1719 _, req := required[k] 1720 _, opt := optional[k] 1721 if !req && !opt { 1722 unpermitted[k] = struct{}{} 1723 } 1724 } 1725 1726 if len(unpermitted) != 0 { 1727 flat := make([]string, 0, len(unpermitted)) 1728 for k := range unpermitted { 1729 flat = append(flat, k) 1730 } 1731 1732 return fmt.Errorf("Dispatch request included unpermitted metadata keys: %v", flat) 1733 } 1734 1735 missing := make(map[string]struct{}) 1736 for _, k := range job.ParameterizedJob.MetaRequired { 1737 if _, ok := req.Meta[k]; !ok { 1738 missing[k] = struct{}{} 1739 } 1740 } 1741 1742 if len(missing) != 0 { 1743 flat := make([]string, 0, len(missing)) 1744 for k := range missing { 1745 flat = append(flat, k) 1746 } 1747 1748 return fmt.Errorf("Dispatch did not provide required meta keys: %v", flat) 1749 } 1750 1751 return nil 1752 } 1753 1754 // ScaleStatus retrieves the scaling status for a job 1755 func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest, 1756 reply *structs.JobScaleStatusResponse) error { 1757 1758 if done, err := j.srv.forward("Job.ScaleStatus", args, args, reply); done { 1759 return err 1760 } 1761 defer metrics.MeasureSince([]string{"nomad", "job", "scale_status"}, time.Now()) 1762 1763 // Check for autoscaler permissions 1764 if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil { 1765 return err 1766 } else if aclObj != nil { 1767 hasReadJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) 1768 hasReadJobScaling := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJobScaling) 1769 if !(hasReadJob || hasReadJobScaling) { 1770 return structs.ErrPermissionDenied 1771 } 1772 } 1773 1774 // Setup the blocking query 1775 opts := blockingOptions{ 1776 queryOpts: &args.QueryOptions, 1777 queryMeta: &reply.QueryMeta, 1778 run: func(ws memdb.WatchSet, state *state.StateStore) error { 1779 1780 // We need the job and the job summary 1781 job, err := state.JobByID(ws, args.RequestNamespace(), args.JobID) 1782 if err != nil { 1783 return err 1784 } 1785 if job == nil { 1786 reply.JobScaleStatus = nil 1787 return nil 1788 } 1789 1790 events, eventsIndex, err := state.ScalingEventsByJob(ws, args.RequestNamespace(), args.JobID) 1791 if err != nil { 1792 return err 1793 } 1794 if events == nil { 1795 events = make(map[string][]*structs.ScalingEvent) 1796 } 1797 1798 var allocs []*structs.Allocation 1799 var allocsIndex uint64 1800 allocs, err = state.AllocsByJob(ws, job.Namespace, job.ID, false) 1801 if err != nil { 1802 return err 1803 } 1804 1805 // Setup the output 1806 reply.JobScaleStatus = &structs.JobScaleStatus{ 1807 JobID: job.ID, 1808 JobCreateIndex: job.CreateIndex, 1809 JobModifyIndex: job.ModifyIndex, 1810 JobStopped: job.Stop, 1811 TaskGroups: make(map[string]*structs.TaskGroupScaleStatus), 1812 } 1813 1814 for _, tg := range job.TaskGroups { 1815 tgScale := &structs.TaskGroupScaleStatus{ 1816 Desired: tg.Count, 1817 } 1818 tgScale.Events = events[tg.Name] 1819 reply.JobScaleStatus.TaskGroups[tg.Name] = tgScale 1820 } 1821 1822 for _, alloc := range allocs { 1823 // TODO: ignore canaries until we figure out what we should do with canaries 1824 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary { 1825 continue 1826 } 1827 if alloc.TerminalStatus() { 1828 continue 1829 } 1830 tgScale, ok := reply.JobScaleStatus.TaskGroups[alloc.TaskGroup] 1831 if !ok || tgScale == nil { 1832 continue 1833 } 1834 tgScale.Placed++ 1835 if alloc.ClientStatus == structs.AllocClientStatusRunning { 1836 tgScale.Running++ 1837 } 1838 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.HasHealth() { 1839 if alloc.DeploymentStatus.IsHealthy() { 1840 tgScale.Healthy++ 1841 } else if alloc.DeploymentStatus.IsUnhealthy() { 1842 tgScale.Unhealthy++ 1843 } 1844 } 1845 if alloc.ModifyIndex > allocsIndex { 1846 allocsIndex = alloc.ModifyIndex 1847 } 1848 } 1849 1850 maxIndex := job.ModifyIndex 1851 if eventsIndex > maxIndex { 1852 maxIndex = eventsIndex 1853 } 1854 if allocsIndex > maxIndex { 1855 maxIndex = allocsIndex 1856 } 1857 reply.Index = maxIndex 1858 1859 // Set the query response 1860 j.srv.setQueryMeta(&reply.QueryMeta) 1861 return nil 1862 }} 1863 return j.srv.blockingRPC(&opts) 1864 }