github.com/adityamillind98/nomad@v0.11.8/nomad/job_endpoint.go (about)

     1  package nomad
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"strings"
     8  	"time"
     9  
    10  	metrics "github.com/armon/go-metrics"
    11  	log "github.com/hashicorp/go-hclog"
    12  	memdb "github.com/hashicorp/go-memdb"
    13  	multierror "github.com/hashicorp/go-multierror"
    14  
    15  	"github.com/golang/snappy"
    16  	"github.com/hashicorp/consul/lib"
    17  	"github.com/pkg/errors"
    18  
    19  	"github.com/hashicorp/nomad/acl"
    20  	"github.com/hashicorp/nomad/helper"
    21  	"github.com/hashicorp/nomad/helper/uuid"
    22  	"github.com/hashicorp/nomad/nomad/state"
    23  	"github.com/hashicorp/nomad/nomad/structs"
    24  	"github.com/hashicorp/nomad/scheduler"
    25  )
    26  
    27  const (
    28  	// RegisterEnforceIndexErrPrefix is the prefix to use in errors caused by
    29  	// enforcing the job modify index during registers.
    30  	RegisterEnforceIndexErrPrefix = "Enforcing job modify index"
    31  
    32  	// DispatchPayloadSizeLimit is the maximum size of the uncompressed input
    33  	// data payload.
    34  	DispatchPayloadSizeLimit = 16 * 1024
    35  )
    36  
    37  var (
    38  	// allowRescheduleTransition is the transition that allows failed
    39  	// allocations to be force rescheduled. We create a one off
    40  	// variable to avoid creating a new object for every request.
    41  	allowForceRescheduleTransition = &structs.DesiredTransition{
    42  		ForceReschedule: helper.BoolToPtr(true),
    43  	}
    44  )
    45  
    46  // Job endpoint is used for job interactions
    47  type Job struct {
    48  	srv    *Server
    49  	logger log.Logger
    50  
    51  	// builtin admission controllers
    52  	mutators   []jobMutator
    53  	validators []jobValidator
    54  }
    55  
    56  // NewJobEndpoints creates a new job endpoint with builtin admission controllers
    57  func NewJobEndpoints(s *Server) *Job {
    58  	return &Job{
    59  		srv:    s,
    60  		logger: s.logger.Named("job"),
    61  		mutators: []jobMutator{
    62  			jobCanonicalizer{},
    63  			jobConnectHook{},
    64  			jobExposeCheckHook{},
    65  			jobImpliedConstraints{},
    66  		},
    67  		validators: []jobValidator{
    68  			jobConnectHook{},
    69  			jobExposeCheckHook{},
    70  			jobValidate{},
    71  		},
    72  	}
    73  }
    74  
    75  // Register is used to upsert a job for scheduling
    76  func (j *Job) Register(args *structs.JobRegisterRequest, reply *structs.JobRegisterResponse) error {
    77  	if done, err := j.srv.forward("Job.Register", args, args, reply); done {
    78  		return err
    79  	}
    80  	defer metrics.MeasureSince([]string{"nomad", "job", "register"}, time.Now())
    81  
    82  	// Validate the arguments
    83  	if args.Job == nil {
    84  		return fmt.Errorf("missing job for registration")
    85  	}
    86  
    87  	// defensive check; http layer and RPC requester should ensure namespaces are set consistently
    88  	if args.RequestNamespace() != args.Job.Namespace {
    89  		return fmt.Errorf("mismatched request namespace in request: %q, %q", args.RequestNamespace(), args.Job.Namespace)
    90  	}
    91  
    92  	// Run admission controllers
    93  	job, warnings, err := j.admissionControllers(args.Job)
    94  	if err != nil {
    95  		return err
    96  	}
    97  	args.Job = job
    98  
    99  	// Set the warning message
   100  	reply.Warnings = structs.MergeMultierrorWarnings(warnings...)
   101  
   102  	// Check job submission permissions
   103  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   104  		return err
   105  	} else if aclObj != nil {
   106  		if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
   107  			return structs.ErrPermissionDenied
   108  		}
   109  
   110  		// Validate Volume Permissions
   111  		for _, tg := range args.Job.TaskGroups {
   112  			for _, vol := range tg.Volumes {
   113  				switch vol.Type {
   114  				case structs.VolumeTypeCSI:
   115  					if !allowCSIMount(aclObj, args.RequestNamespace()) {
   116  						return structs.ErrPermissionDenied
   117  					}
   118  				case structs.VolumeTypeHost:
   119  					// If a volume is readonly, then we allow access if the user has ReadOnly
   120  					// or ReadWrite access to the volume. Otherwise we only allow access if
   121  					// they have ReadWrite access.
   122  					if vol.ReadOnly {
   123  						if !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadOnly) &&
   124  							!aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) {
   125  							return structs.ErrPermissionDenied
   126  						}
   127  					} else {
   128  						if !aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) {
   129  							return structs.ErrPermissionDenied
   130  						}
   131  					}
   132  				default:
   133  					return structs.ErrPermissionDenied
   134  				}
   135  			}
   136  
   137  			for _, t := range tg.Tasks {
   138  				for _, vm := range t.VolumeMounts {
   139  					vol := tg.Volumes[vm.Volume]
   140  					if vm.PropagationMode == structs.VolumeMountPropagationBidirectional &&
   141  						!aclObj.AllowHostVolumeOperation(vol.Source, acl.HostVolumeCapabilityMountReadWrite) {
   142  						return structs.ErrPermissionDenied
   143  					}
   144  				}
   145  
   146  				if t.CSIPluginConfig != nil {
   147  					if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityCSIRegisterPlugin) {
   148  						return structs.ErrPermissionDenied
   149  					}
   150  				}
   151  			}
   152  		}
   153  
   154  		// Check if override is set and we do not have permissions
   155  		if args.PolicyOverride {
   156  			if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySentinelOverride) {
   157  				j.logger.Warn("policy override attempted without permissions for job", "job", args.Job.ID)
   158  				return structs.ErrPermissionDenied
   159  			}
   160  			j.logger.Warn("policy override set for job", "job", args.Job.ID)
   161  		}
   162  	}
   163  
   164  	// Lookup the job
   165  	snap, err := j.srv.State().Snapshot()
   166  	if err != nil {
   167  		return err
   168  	}
   169  	ws := memdb.NewWatchSet()
   170  	existingJob, err := snap.JobByID(ws, args.RequestNamespace(), args.Job.ID)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	// If EnforceIndex set, check it before trying to apply
   176  	if args.EnforceIndex {
   177  		jmi := args.JobModifyIndex
   178  		if existingJob != nil {
   179  			if jmi == 0 {
   180  				return fmt.Errorf("%s 0: job already exists", RegisterEnforceIndexErrPrefix)
   181  			} else if jmi != existingJob.JobModifyIndex {
   182  				return fmt.Errorf("%s %d: job exists with conflicting job modify index: %d",
   183  					RegisterEnforceIndexErrPrefix, jmi, existingJob.JobModifyIndex)
   184  			}
   185  		} else if jmi != 0 {
   186  			return fmt.Errorf("%s %d: job does not exist", RegisterEnforceIndexErrPrefix, jmi)
   187  		}
   188  	}
   189  
   190  	// Validate job transitions if its an update
   191  	if err := validateJobUpdate(existingJob, args.Job); err != nil {
   192  		return err
   193  	}
   194  
   195  	// Ensure that all scaling policies have an appropriate ID
   196  	if err := propagateScalingPolicyIDs(existingJob, args.Job); err != nil {
   197  		return err
   198  	}
   199  
   200  	// Ensure that the job has permissions for the requested Vault tokens
   201  	policies := args.Job.VaultPolicies()
   202  	if len(policies) != 0 {
   203  		vconf := j.srv.config.VaultConfig
   204  		if !vconf.IsEnabled() {
   205  			return fmt.Errorf("Vault not enabled and Vault policies requested")
   206  		}
   207  
   208  		// Have to check if the user has permissions
   209  		if !vconf.AllowsUnauthenticated() {
   210  			if args.Job.VaultToken == "" {
   211  				return fmt.Errorf("Vault policies requested but missing Vault Token")
   212  			}
   213  
   214  			vault := j.srv.vault
   215  			s, err := vault.LookupToken(context.Background(), args.Job.VaultToken)
   216  			if err != nil {
   217  				return err
   218  			}
   219  
   220  			allowedPolicies, err := PoliciesFrom(s)
   221  			if err != nil {
   222  				return err
   223  			}
   224  
   225  			// If we are given a root token it can access all policies
   226  			if !lib.StrContains(allowedPolicies, "root") {
   227  				flatPolicies := structs.VaultPoliciesSet(policies)
   228  				subset, offending := helper.SliceStringIsSubset(allowedPolicies, flatPolicies)
   229  				if !subset {
   230  					return fmt.Errorf("Passed Vault Token doesn't allow access to the following policies: %s",
   231  						strings.Join(offending, ", "))
   232  				}
   233  			}
   234  		}
   235  	}
   236  
   237  	// helper function that checks if the "operator token" supplied with the
   238  	// job has sufficient ACL permissions for establishing consul connect services
   239  	checkOperatorToken := func(task string) error {
   240  		if j.srv.config.ConsulConfig.AllowsUnauthenticated() {
   241  			// if consul.allow_unauthenticated is enabled (which is the default)
   242  			// just let the Job through without checking anything.
   243  			return nil
   244  		}
   245  		proxiedTask := strings.TrimPrefix(task, structs.ConnectProxyPrefix+"-")
   246  		ctx := context.Background()
   247  		if err := j.srv.consulACLs.CheckSIPolicy(ctx, proxiedTask, args.Job.ConsulToken); err != nil {
   248  			// not much in the way of exported error types, we could parse
   249  			// the content, but all errors are going to be failures anyway
   250  			return errors.Wrap(err, "operator token denied")
   251  		}
   252  		return nil
   253  	}
   254  
   255  	// Enforce that the operator has necessary Consul ACL permissions
   256  	for _, tg := range args.Job.ConnectTasks() {
   257  		for _, task := range tg {
   258  			if err := checkOperatorToken(task); err != nil {
   259  				return err
   260  			}
   261  		}
   262  	}
   263  
   264  	// Enforce Sentinel policies. Pass a copy of the job to prevent
   265  	// sentinel from altering it.
   266  	policyWarnings, err := j.enforceSubmitJob(args.PolicyOverride, args.Job.Copy())
   267  	if err != nil {
   268  		return err
   269  	}
   270  	if policyWarnings != nil {
   271  		warnings = append(warnings, policyWarnings)
   272  		reply.Warnings = structs.MergeMultierrorWarnings(warnings...)
   273  	}
   274  
   275  	// Clear the Vault token
   276  	args.Job.VaultToken = ""
   277  
   278  	// Clear the Consul token
   279  	args.Job.ConsulToken = ""
   280  
   281  	// Check if the job has changed at all
   282  	if existingJob == nil || existingJob.SpecChanged(args.Job) {
   283  		// Set the submit time
   284  		args.Job.SetSubmitTime()
   285  
   286  		// Commit this update via Raft
   287  		fsmErr, index, err := j.srv.raftApply(structs.JobRegisterRequestType, args)
   288  		if err, ok := fsmErr.(error); ok && err != nil {
   289  			j.logger.Error("registering job failed", "error", err, "fsm", true)
   290  			return err
   291  		}
   292  		if err != nil {
   293  			j.logger.Error("registering job failed", "error", err, "raft", true)
   294  			return err
   295  		}
   296  
   297  		// Populate the reply with job information
   298  		reply.JobModifyIndex = index
   299  	} else {
   300  		reply.JobModifyIndex = existingJob.JobModifyIndex
   301  	}
   302  
   303  	// If the job is periodic or parameterized, we don't create an eval.
   304  	if args.Job.IsPeriodic() || args.Job.IsParameterized() {
   305  		return nil
   306  	}
   307  
   308  	// Create a new evaluation
   309  	now := time.Now().UTC().UnixNano()
   310  	eval := &structs.Evaluation{
   311  		ID:             uuid.Generate(),
   312  		Namespace:      args.RequestNamespace(),
   313  		Priority:       args.Job.Priority,
   314  		Type:           args.Job.Type,
   315  		TriggeredBy:    structs.EvalTriggerJobRegister,
   316  		JobID:          args.Job.ID,
   317  		JobModifyIndex: reply.JobModifyIndex,
   318  		Status:         structs.EvalStatusPending,
   319  		CreateTime:     now,
   320  		ModifyTime:     now,
   321  	}
   322  	update := &structs.EvalUpdateRequest{
   323  		Evals:        []*structs.Evaluation{eval},
   324  		WriteRequest: structs.WriteRequest{Region: args.Region},
   325  	}
   326  
   327  	// Commit this evaluation via Raft
   328  	// XXX: There is a risk of partial failure where the JobRegister succeeds
   329  	// but that the EvalUpdate does not.
   330  	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
   331  	if err != nil {
   332  		j.logger.Error("eval create failed", "error", err, "method", "register")
   333  		return err
   334  	}
   335  
   336  	// Populate the reply with eval information
   337  	reply.EvalID = eval.ID
   338  	reply.EvalCreateIndex = evalIndex
   339  	reply.Index = evalIndex
   340  	return nil
   341  }
   342  
   343  // propagateScalingPolicyIDs propagates scaling policy IDs from existing job
   344  // to updated job, or generates random IDs in new job
   345  func propagateScalingPolicyIDs(old, new *structs.Job) error {
   346  
   347  	oldIDs := make(map[string]string)
   348  	if old != nil {
   349  		// jobs currently only have scaling policies on task groups, so we can
   350  		// find correspondences using task group names
   351  		for _, p := range old.GetScalingPolicies() {
   352  			oldIDs[p.Target[structs.ScalingTargetGroup]] = p.ID
   353  		}
   354  	}
   355  
   356  	// ignore any existing ID in the policy, they should be empty
   357  	for _, p := range new.GetScalingPolicies() {
   358  		if id, ok := oldIDs[p.Target[structs.ScalingTargetGroup]]; ok {
   359  			p.ID = id
   360  		} else {
   361  			p.ID = uuid.Generate()
   362  		}
   363  	}
   364  
   365  	return nil
   366  }
   367  
   368  // getSignalConstraint builds a suitable constraint based on the required
   369  // signals
   370  func getSignalConstraint(signals []string) *structs.Constraint {
   371  	sort.Strings(signals)
   372  	return &structs.Constraint{
   373  		Operand: structs.ConstraintSetContains,
   374  		LTarget: "${attr.os.signals}",
   375  		RTarget: strings.Join(signals, ","),
   376  	}
   377  }
   378  
   379  // Summary retrieves the summary of a job
   380  func (j *Job) Summary(args *structs.JobSummaryRequest,
   381  	reply *structs.JobSummaryResponse) error {
   382  
   383  	if done, err := j.srv.forward("Job.Summary", args, args, reply); done {
   384  		return err
   385  	}
   386  	defer metrics.MeasureSince([]string{"nomad", "job_summary", "get_job_summary"}, time.Now())
   387  
   388  	// Check for read-job permissions
   389  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   390  		return err
   391  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   392  		return structs.ErrPermissionDenied
   393  	}
   394  
   395  	// Setup the blocking query
   396  	opts := blockingOptions{
   397  		queryOpts: &args.QueryOptions,
   398  		queryMeta: &reply.QueryMeta,
   399  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
   400  			// Look for job summary
   401  			out, err := state.JobSummaryByID(ws, args.RequestNamespace(), args.JobID)
   402  			if err != nil {
   403  				return err
   404  			}
   405  
   406  			// Setup the output
   407  			reply.JobSummary = out
   408  			if out != nil {
   409  				reply.Index = out.ModifyIndex
   410  			} else {
   411  				// Use the last index that affected the job_summary table
   412  				index, err := state.Index("job_summary")
   413  				if err != nil {
   414  					return err
   415  				}
   416  				reply.Index = index
   417  			}
   418  
   419  			// Set the query response
   420  			j.srv.setQueryMeta(&reply.QueryMeta)
   421  			return nil
   422  		}}
   423  	return j.srv.blockingRPC(&opts)
   424  }
   425  
   426  // Validate validates a job
   427  func (j *Job) Validate(args *structs.JobValidateRequest, reply *structs.JobValidateResponse) error {
   428  	defer metrics.MeasureSince([]string{"nomad", "job", "validate"}, time.Now())
   429  
   430  	// defensive check; http layer and RPC requester should ensure namespaces are set consistently
   431  	if args.RequestNamespace() != args.Job.Namespace {
   432  		return fmt.Errorf("mismatched request namespace in request: %q, %q", args.RequestNamespace(), args.Job.Namespace)
   433  	}
   434  
   435  	job, mutateWarnings, err := j.admissionMutators(args.Job)
   436  	if err != nil {
   437  		return err
   438  	}
   439  	args.Job = job
   440  
   441  	// Check for read-job permissions
   442  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   443  		return err
   444  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   445  		return structs.ErrPermissionDenied
   446  	}
   447  
   448  	// Validate the job and capture any warnings
   449  	validateWarnings, err := j.admissionValidators(args.Job)
   450  	if err != nil {
   451  		if merr, ok := err.(*multierror.Error); ok {
   452  			for _, err := range merr.Errors {
   453  				reply.ValidationErrors = append(reply.ValidationErrors, err.Error())
   454  			}
   455  			reply.Error = merr.Error()
   456  		} else {
   457  			reply.ValidationErrors = append(reply.ValidationErrors, err.Error())
   458  			reply.Error = err.Error()
   459  		}
   460  	}
   461  
   462  	validateWarnings = append(validateWarnings, mutateWarnings...)
   463  
   464  	// Set the warning message
   465  	reply.Warnings = structs.MergeMultierrorWarnings(validateWarnings...)
   466  	reply.DriverConfigValidated = true
   467  	return nil
   468  }
   469  
   470  // Revert is used to revert the job to a prior version
   471  func (j *Job) Revert(args *structs.JobRevertRequest, reply *structs.JobRegisterResponse) error {
   472  	if done, err := j.srv.forward("Job.Revert", args, args, reply); done {
   473  		return err
   474  	}
   475  	defer metrics.MeasureSince([]string{"nomad", "job", "revert"}, time.Now())
   476  
   477  	// Check for submit-job permissions
   478  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   479  		return err
   480  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
   481  		return structs.ErrPermissionDenied
   482  	}
   483  
   484  	// Validate the arguments
   485  	if args.JobID == "" {
   486  		return fmt.Errorf("missing job ID for revert")
   487  	}
   488  
   489  	// Lookup the job by version
   490  	snap, err := j.srv.fsm.State().Snapshot()
   491  	if err != nil {
   492  		return err
   493  	}
   494  
   495  	ws := memdb.NewWatchSet()
   496  	cur, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID)
   497  	if err != nil {
   498  		return err
   499  	}
   500  	if cur == nil {
   501  		return fmt.Errorf("job %q not found", args.JobID)
   502  	}
   503  	if args.JobVersion == cur.Version {
   504  		return fmt.Errorf("can't revert to current version")
   505  	}
   506  
   507  	jobV, err := snap.JobByIDAndVersion(ws, args.RequestNamespace(), args.JobID, args.JobVersion)
   508  	if err != nil {
   509  		return err
   510  	}
   511  	if jobV == nil {
   512  		return fmt.Errorf("job %q in namespace %q at version %d not found", args.JobID, args.RequestNamespace(), args.JobVersion)
   513  	}
   514  
   515  	// Build the register request
   516  	revJob := jobV.Copy()
   517  	// Use Vault Token from revert request to perform registration of reverted job.
   518  	revJob.VaultToken = args.VaultToken
   519  	reg := &structs.JobRegisterRequest{
   520  		Job:          revJob,
   521  		WriteRequest: args.WriteRequest,
   522  	}
   523  
   524  	// If the request is enforcing the existing version do a check.
   525  	if args.EnforcePriorVersion != nil {
   526  		if cur.Version != *args.EnforcePriorVersion {
   527  			return fmt.Errorf("Current job has version %d; enforcing version %d", cur.Version, *args.EnforcePriorVersion)
   528  		}
   529  
   530  		reg.EnforceIndex = true
   531  		reg.JobModifyIndex = cur.JobModifyIndex
   532  	}
   533  
   534  	// Register the version.
   535  	return j.Register(reg, reply)
   536  }
   537  
   538  // Stable is used to mark the job version as stable
   539  func (j *Job) Stable(args *structs.JobStabilityRequest, reply *structs.JobStabilityResponse) error {
   540  	if done, err := j.srv.forward("Job.Stable", args, args, reply); done {
   541  		return err
   542  	}
   543  	defer metrics.MeasureSince([]string{"nomad", "job", "stable"}, time.Now())
   544  
   545  	// Check for read-job permissions
   546  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   547  		return err
   548  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
   549  		return structs.ErrPermissionDenied
   550  	}
   551  
   552  	// Validate the arguments
   553  	if args.JobID == "" {
   554  		return fmt.Errorf("missing job ID for marking job as stable")
   555  	}
   556  
   557  	// Lookup the job by version
   558  	snap, err := j.srv.fsm.State().Snapshot()
   559  	if err != nil {
   560  		return err
   561  	}
   562  
   563  	ws := memdb.NewWatchSet()
   564  	jobV, err := snap.JobByIDAndVersion(ws, args.RequestNamespace(), args.JobID, args.JobVersion)
   565  	if err != nil {
   566  		return err
   567  	}
   568  	if jobV == nil {
   569  		return fmt.Errorf("job %q in namespace %q at version %d not found", args.JobID, args.RequestNamespace(), args.JobVersion)
   570  	}
   571  
   572  	// Commit this stability request via Raft
   573  	_, modifyIndex, err := j.srv.raftApply(structs.JobStabilityRequestType, args)
   574  	if err != nil {
   575  		j.logger.Error("submitting job stability request failed", "error", err)
   576  		return err
   577  	}
   578  
   579  	// Setup the reply
   580  	reply.Index = modifyIndex
   581  	return nil
   582  }
   583  
   584  // Evaluate is used to force a job for re-evaluation
   585  func (j *Job) Evaluate(args *structs.JobEvaluateRequest, reply *structs.JobRegisterResponse) error {
   586  	if done, err := j.srv.forward("Job.Evaluate", args, args, reply); done {
   587  		return err
   588  	}
   589  	defer metrics.MeasureSince([]string{"nomad", "job", "evaluate"}, time.Now())
   590  
   591  	// Check for read-job permissions
   592  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   593  		return err
   594  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
   595  		return structs.ErrPermissionDenied
   596  	}
   597  
   598  	// Validate the arguments
   599  	if args.JobID == "" {
   600  		return fmt.Errorf("missing job ID for evaluation")
   601  	}
   602  
   603  	// Lookup the job
   604  	snap, err := j.srv.fsm.State().Snapshot()
   605  	if err != nil {
   606  		return err
   607  	}
   608  	ws := memdb.NewWatchSet()
   609  	job, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID)
   610  	if err != nil {
   611  		return err
   612  	}
   613  	if job == nil {
   614  		return fmt.Errorf("job not found")
   615  	}
   616  
   617  	if job.IsPeriodic() {
   618  		return fmt.Errorf("can't evaluate periodic job")
   619  	} else if job.IsParameterized() {
   620  		return fmt.Errorf("can't evaluate parameterized job")
   621  	}
   622  
   623  	forceRescheduleAllocs := make(map[string]*structs.DesiredTransition)
   624  
   625  	if args.EvalOptions.ForceReschedule {
   626  		// Find any failed allocs that could be force rescheduled
   627  		allocs, err := snap.AllocsByJob(ws, args.RequestNamespace(), args.JobID, false)
   628  		if err != nil {
   629  			return err
   630  		}
   631  
   632  		for _, alloc := range allocs {
   633  			taskGroup := job.LookupTaskGroup(alloc.TaskGroup)
   634  			// Forcing rescheduling is only allowed if task group has rescheduling enabled
   635  			if taskGroup == nil || !taskGroup.ReschedulePolicy.Enabled() {
   636  				continue
   637  			}
   638  
   639  			if alloc.NextAllocation == "" && alloc.ClientStatus == structs.AllocClientStatusFailed && !alloc.DesiredTransition.ShouldForceReschedule() {
   640  				forceRescheduleAllocs[alloc.ID] = allowForceRescheduleTransition
   641  			}
   642  		}
   643  	}
   644  
   645  	// Create a new evaluation
   646  	now := time.Now().UTC().UnixNano()
   647  	eval := &structs.Evaluation{
   648  		ID:             uuid.Generate(),
   649  		Namespace:      args.RequestNamespace(),
   650  		Priority:       job.Priority,
   651  		Type:           job.Type,
   652  		TriggeredBy:    structs.EvalTriggerJobRegister,
   653  		JobID:          job.ID,
   654  		JobModifyIndex: job.ModifyIndex,
   655  		Status:         structs.EvalStatusPending,
   656  		CreateTime:     now,
   657  		ModifyTime:     now,
   658  	}
   659  
   660  	// Create a AllocUpdateDesiredTransitionRequest request with the eval and any forced rescheduled allocs
   661  	updateTransitionReq := &structs.AllocUpdateDesiredTransitionRequest{
   662  		Allocs: forceRescheduleAllocs,
   663  		Evals:  []*structs.Evaluation{eval},
   664  	}
   665  	_, evalIndex, err := j.srv.raftApply(structs.AllocUpdateDesiredTransitionRequestType, updateTransitionReq)
   666  
   667  	if err != nil {
   668  		j.logger.Error("eval create failed", "error", err, "method", "evaluate")
   669  		return err
   670  	}
   671  
   672  	// Setup the reply
   673  	reply.EvalID = eval.ID
   674  	reply.EvalCreateIndex = evalIndex
   675  	reply.JobModifyIndex = job.ModifyIndex
   676  	reply.Index = evalIndex
   677  	return nil
   678  }
   679  
   680  // Deregister is used to remove a job the cluster.
   681  func (j *Job) Deregister(args *structs.JobDeregisterRequest, reply *structs.JobDeregisterResponse) error {
   682  	if done, err := j.srv.forward("Job.Deregister", args, args, reply); done {
   683  		return err
   684  	}
   685  	defer metrics.MeasureSince([]string{"nomad", "job", "deregister"}, time.Now())
   686  
   687  	// Check for submit-job permissions
   688  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   689  		return err
   690  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
   691  		return structs.ErrPermissionDenied
   692  	}
   693  
   694  	// Validate the arguments
   695  	if args.JobID == "" {
   696  		return fmt.Errorf("missing job ID for deregistering")
   697  	}
   698  
   699  	// Lookup the job
   700  	snap, err := j.srv.fsm.State().Snapshot()
   701  	if err != nil {
   702  		return err
   703  	}
   704  	ws := memdb.NewWatchSet()
   705  	job, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID)
   706  	if err != nil {
   707  		return err
   708  	}
   709  
   710  	// For a job with volumes, find its volumes before deleting the job.
   711  	// Later we'll apply this raft.
   712  	volumesToGC := newCSIBatchRelease(j.srv, j.logger, 100)
   713  	if job != nil {
   714  		for _, tg := range job.TaskGroups {
   715  			for _, vol := range tg.Volumes {
   716  				if vol.Type == structs.VolumeTypeCSI {
   717  					volumesToGC.add(vol.Source, job.Namespace)
   718  				}
   719  			}
   720  		}
   721  	}
   722  
   723  	// Commit the job update via Raft
   724  	_, index, err := j.srv.raftApply(structs.JobDeregisterRequestType, args)
   725  	if err != nil {
   726  		j.logger.Error("deregister failed", "error", err)
   727  		return err
   728  	}
   729  
   730  	// Populate the reply with job information
   731  	reply.JobModifyIndex = index
   732  
   733  	// Make a raft apply to release the CSI volume claims of terminal allocs.
   734  	var result *multierror.Error
   735  	err = volumesToGC.apply()
   736  	if err != nil {
   737  		result = multierror.Append(result, err)
   738  	}
   739  
   740  	// If the job is periodic or parameterized, we don't create an eval.
   741  	if job != nil && (job.IsPeriodic() || job.IsParameterized()) {
   742  		return nil
   743  	}
   744  
   745  	// Create a new evaluation
   746  	// XXX: The job priority / type is strange for this, since it's not a high
   747  	// priority even if the job was.
   748  	now := time.Now().UTC().UnixNano()
   749  	eval := &structs.Evaluation{
   750  		ID:             uuid.Generate(),
   751  		Namespace:      args.RequestNamespace(),
   752  		Priority:       structs.JobDefaultPriority,
   753  		Type:           structs.JobTypeService,
   754  		TriggeredBy:    structs.EvalTriggerJobDeregister,
   755  		JobID:          args.JobID,
   756  		JobModifyIndex: index,
   757  		Status:         structs.EvalStatusPending,
   758  		CreateTime:     now,
   759  		ModifyTime:     now,
   760  	}
   761  	update := &structs.EvalUpdateRequest{
   762  		Evals:        []*structs.Evaluation{eval},
   763  		WriteRequest: structs.WriteRequest{Region: args.Region},
   764  	}
   765  
   766  	// Commit this evaluation via Raft
   767  	_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
   768  	if err != nil {
   769  		result = multierror.Append(result, err)
   770  		j.logger.Error("eval create failed", "error", err, "method", "deregister")
   771  		return result.ErrorOrNil()
   772  	}
   773  
   774  	// Populate the reply with eval information
   775  	reply.EvalID = eval.ID
   776  	reply.EvalCreateIndex = evalIndex
   777  	reply.Index = evalIndex
   778  	return result.ErrorOrNil()
   779  }
   780  
   781  // BatchDeregister is used to remove a set of jobs from the cluster.
   782  func (j *Job) BatchDeregister(args *structs.JobBatchDeregisterRequest, reply *structs.JobBatchDeregisterResponse) error {
   783  	if done, err := j.srv.forward("Job.BatchDeregister", args, args, reply); done {
   784  		return err
   785  	}
   786  	defer metrics.MeasureSince([]string{"nomad", "job", "batch_deregister"}, time.Now())
   787  
   788  	// Resolve the ACL token
   789  	aclObj, err := j.srv.ResolveToken(args.AuthToken)
   790  	if err != nil {
   791  		return err
   792  	}
   793  
   794  	// Validate the arguments
   795  	if len(args.Jobs) == 0 {
   796  		return fmt.Errorf("given no jobs to deregister")
   797  	}
   798  	if len(args.Evals) != 0 {
   799  		return fmt.Errorf("evaluations should not be populated")
   800  	}
   801  
   802  	// Loop through checking for permissions
   803  	for jobNS := range args.Jobs {
   804  		// Check for submit-job permissions
   805  		if aclObj != nil && !aclObj.AllowNsOp(jobNS.Namespace, acl.NamespaceCapabilitySubmitJob) {
   806  			return structs.ErrPermissionDenied
   807  		}
   808  	}
   809  
   810  	// Grab a snapshot
   811  	snap, err := j.srv.fsm.State().Snapshot()
   812  	if err != nil {
   813  		return err
   814  	}
   815  
   816  	// Loop through to create evals
   817  	for jobNS, options := range args.Jobs {
   818  		if options == nil {
   819  			return fmt.Errorf("no deregister options provided for %v", jobNS)
   820  		}
   821  
   822  		job, err := snap.JobByID(nil, jobNS.Namespace, jobNS.ID)
   823  		if err != nil {
   824  			return err
   825  		}
   826  
   827  		// If the job is periodic or parameterized, we don't create an eval.
   828  		if job != nil && (job.IsPeriodic() || job.IsParameterized()) {
   829  			continue
   830  		}
   831  
   832  		priority := structs.JobDefaultPriority
   833  		jtype := structs.JobTypeService
   834  		if job != nil {
   835  			priority = job.Priority
   836  			jtype = job.Type
   837  		}
   838  
   839  		// Create a new evaluation
   840  		now := time.Now().UTC().UnixNano()
   841  		eval := &structs.Evaluation{
   842  			ID:          uuid.Generate(),
   843  			Namespace:   jobNS.Namespace,
   844  			Priority:    priority,
   845  			Type:        jtype,
   846  			TriggeredBy: structs.EvalTriggerJobDeregister,
   847  			JobID:       jobNS.ID,
   848  			Status:      structs.EvalStatusPending,
   849  			CreateTime:  now,
   850  			ModifyTime:  now,
   851  		}
   852  		args.Evals = append(args.Evals, eval)
   853  	}
   854  
   855  	// Commit this update via Raft
   856  	_, index, err := j.srv.raftApply(structs.JobBatchDeregisterRequestType, args)
   857  	if err != nil {
   858  		j.logger.Error("batch deregister failed", "error", err)
   859  		return err
   860  	}
   861  
   862  	reply.Index = index
   863  	return nil
   864  }
   865  
   866  // Scale is used to modify one of the scaling targets in the job
   867  func (j *Job) Scale(args *structs.JobScaleRequest, reply *structs.JobRegisterResponse) error {
   868  	if done, err := j.srv.forward("Job.Scale", args, args, reply); done {
   869  		return err
   870  	}
   871  	defer metrics.MeasureSince([]string{"nomad", "job", "scale"}, time.Now())
   872  
   873  	// Validate the arguments
   874  	namespace := args.Target[structs.ScalingTargetNamespace]
   875  	jobID := args.Target[structs.ScalingTargetJob]
   876  	groupName := args.Target[structs.ScalingTargetGroup]
   877  	if namespace != "" && namespace != args.RequestNamespace() {
   878  		return structs.NewErrRPCCoded(400, "namespace in payload did not match header")
   879  	} else if namespace == "" {
   880  		namespace = args.RequestNamespace()
   881  	}
   882  	if jobID != "" && jobID != args.JobID {
   883  		return fmt.Errorf("job ID in payload did not match URL")
   884  	}
   885  	if groupName == "" {
   886  		return structs.NewErrRPCCoded(400, "missing task group name for scaling action")
   887  	}
   888  	if args.Error && args.Count != nil {
   889  		return structs.NewErrRPCCoded(400, "scaling action should not contain count if error is true")
   890  	}
   891  	if args.Count != nil && *args.Count < 0 {
   892  		return structs.NewErrRPCCoded(400, "scaling action count can't be negative")
   893  	}
   894  
   895  	// Check for submit-job permissions
   896  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
   897  		return err
   898  	} else if aclObj != nil {
   899  		hasScaleJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityScaleJob)
   900  		hasSubmitJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob)
   901  		if !(hasScaleJob || hasSubmitJob) {
   902  			return structs.ErrPermissionDenied
   903  		}
   904  	}
   905  
   906  	// Lookup the job
   907  	snap, err := j.srv.fsm.State().Snapshot()
   908  	if err != nil {
   909  		return err
   910  	}
   911  	ws := memdb.NewWatchSet()
   912  	job, err := snap.JobByID(ws, namespace, args.JobID)
   913  	if err != nil {
   914  		return err
   915  	}
   916  	if job == nil {
   917  		return structs.NewErrRPCCoded(404, fmt.Sprintf("job %q not found", args.JobID))
   918  	}
   919  
   920  	var found *structs.TaskGroup
   921  	for _, tg := range job.TaskGroups {
   922  		if groupName == tg.Name {
   923  			found = tg
   924  			break
   925  		}
   926  	}
   927  	if found == nil {
   928  		return structs.NewErrRPCCoded(400,
   929  			fmt.Sprintf("task group %q specified for scaling does not exist in job", groupName))
   930  	}
   931  
   932  	now := time.Now().UTC().UnixNano()
   933  
   934  	// If the count is present, commit the job update via Raft
   935  	// for now, we'll do this even if count didn't change
   936  	if args.Count != nil {
   937  		truncCount := int(*args.Count)
   938  		if int64(truncCount) != *args.Count {
   939  			return structs.NewErrRPCCoded(400,
   940  				fmt.Sprintf("new scaling count is too large for TaskGroup.Count (int): %v", args.Count))
   941  		}
   942  		found.Count = truncCount
   943  
   944  		registerReq := structs.JobRegisterRequest{
   945  			Job:            job,
   946  			EnforceIndex:   true,
   947  			JobModifyIndex: job.ModifyIndex,
   948  			PolicyOverride: args.PolicyOverride,
   949  			WriteRequest:   args.WriteRequest,
   950  		}
   951  		_, jobModifyIndex, err := j.srv.raftApply(structs.JobRegisterRequestType, registerReq)
   952  		if err != nil {
   953  			j.logger.Error("job register for scale failed", "error", err)
   954  			return err
   955  		}
   956  		reply.JobModifyIndex = jobModifyIndex
   957  	} else {
   958  		reply.JobModifyIndex = job.ModifyIndex
   959  	}
   960  
   961  	// Only create an eval for non-dispatch jobs and if the count was provided
   962  	// for now, we'll do this even if count didn't change
   963  	if !job.IsPeriodic() && !job.IsParameterized() && args.Count != nil {
   964  		eval := &structs.Evaluation{
   965  			ID:             uuid.Generate(),
   966  			Namespace:      args.RequestNamespace(),
   967  			Priority:       structs.JobDefaultPriority,
   968  			Type:           structs.JobTypeService,
   969  			TriggeredBy:    structs.EvalTriggerScaling,
   970  			JobID:          args.JobID,
   971  			JobModifyIndex: reply.JobModifyIndex,
   972  			Status:         structs.EvalStatusPending,
   973  			CreateTime:     now,
   974  			ModifyTime:     now,
   975  		}
   976  		update := &structs.EvalUpdateRequest{
   977  			Evals:        []*structs.Evaluation{eval},
   978  			WriteRequest: structs.WriteRequest{Region: args.Region},
   979  		}
   980  
   981  		// Commit this evaluation via Raft
   982  		_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
   983  		if err != nil {
   984  			j.logger.Error("eval create failed", "error", err, "method", "scale")
   985  			return err
   986  		}
   987  
   988  		reply.EvalID = eval.ID
   989  		reply.EvalCreateIndex = evalIndex
   990  	} else {
   991  		reply.EvalID = ""
   992  		reply.EvalCreateIndex = 0
   993  	}
   994  
   995  	event := &structs.ScalingEventRequest{
   996  		Namespace: job.Namespace,
   997  		JobID:     job.ID,
   998  		TaskGroup: groupName,
   999  		ScalingEvent: &structs.ScalingEvent{
  1000  			Time:    now,
  1001  			Count:   args.Count,
  1002  			Message: args.Message,
  1003  			Error:   args.Error,
  1004  			Meta:    args.Meta,
  1005  		},
  1006  	}
  1007  	if reply.EvalID != "" {
  1008  		event.ScalingEvent.EvalID = &reply.EvalID
  1009  	}
  1010  	_, eventIndex, err := j.srv.raftApply(structs.ScalingEventRegisterRequestType, event)
  1011  	if err != nil {
  1012  		j.logger.Error("scaling event create failed", "error", err)
  1013  		return err
  1014  	}
  1015  
  1016  	reply.Index = eventIndex
  1017  	j.srv.setQueryMeta(&reply.QueryMeta)
  1018  	return nil
  1019  }
  1020  
  1021  // GetJob is used to request information about a specific job
  1022  func (j *Job) GetJob(args *structs.JobSpecificRequest,
  1023  	reply *structs.SingleJobResponse) error {
  1024  	if done, err := j.srv.forward("Job.GetJob", args, args, reply); done {
  1025  		return err
  1026  	}
  1027  	defer metrics.MeasureSince([]string{"nomad", "job", "get_job"}, time.Now())
  1028  
  1029  	// Check for read-job permissions
  1030  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1031  		return err
  1032  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1033  		return structs.ErrPermissionDenied
  1034  	}
  1035  
  1036  	// Setup the blocking query
  1037  	opts := blockingOptions{
  1038  		queryOpts: &args.QueryOptions,
  1039  		queryMeta: &reply.QueryMeta,
  1040  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1041  			// Look for the job
  1042  			out, err := state.JobByID(ws, args.RequestNamespace(), args.JobID)
  1043  			if err != nil {
  1044  				return err
  1045  			}
  1046  
  1047  			// Setup the output
  1048  			reply.Job = out
  1049  			if out != nil {
  1050  				reply.Index = out.ModifyIndex
  1051  			} else {
  1052  				// Use the last index that affected the nodes table
  1053  				index, err := state.Index("jobs")
  1054  				if err != nil {
  1055  					return err
  1056  				}
  1057  				reply.Index = index
  1058  			}
  1059  
  1060  			// Set the query response
  1061  			j.srv.setQueryMeta(&reply.QueryMeta)
  1062  			return nil
  1063  		}}
  1064  	return j.srv.blockingRPC(&opts)
  1065  }
  1066  
  1067  // GetJobVersions is used to retrieve all tracked versions of a job.
  1068  func (j *Job) GetJobVersions(args *structs.JobVersionsRequest,
  1069  	reply *structs.JobVersionsResponse) error {
  1070  	if done, err := j.srv.forward("Job.GetJobVersions", args, args, reply); done {
  1071  		return err
  1072  	}
  1073  	defer metrics.MeasureSince([]string{"nomad", "job", "get_job_versions"}, time.Now())
  1074  
  1075  	// Check for read-job permissions
  1076  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1077  		return err
  1078  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1079  		return structs.ErrPermissionDenied
  1080  	}
  1081  
  1082  	// Setup the blocking query
  1083  	opts := blockingOptions{
  1084  		queryOpts: &args.QueryOptions,
  1085  		queryMeta: &reply.QueryMeta,
  1086  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1087  			// Look for the job
  1088  			out, err := state.JobVersionsByID(ws, args.RequestNamespace(), args.JobID)
  1089  			if err != nil {
  1090  				return err
  1091  			}
  1092  
  1093  			// Setup the output
  1094  			reply.Versions = out
  1095  			if len(out) != 0 {
  1096  				reply.Index = out[0].ModifyIndex
  1097  
  1098  				// Compute the diffs
  1099  				if args.Diffs {
  1100  					for i := 0; i < len(out)-1; i++ {
  1101  						old, new := out[i+1], out[i]
  1102  						d, err := old.Diff(new, true)
  1103  						if err != nil {
  1104  							return fmt.Errorf("failed to create job diff: %v", err)
  1105  						}
  1106  						reply.Diffs = append(reply.Diffs, d)
  1107  					}
  1108  				}
  1109  			} else {
  1110  				// Use the last index that affected the nodes table
  1111  				index, err := state.Index("job_version")
  1112  				if err != nil {
  1113  					return err
  1114  				}
  1115  				reply.Index = index
  1116  			}
  1117  
  1118  			// Set the query response
  1119  			j.srv.setQueryMeta(&reply.QueryMeta)
  1120  			return nil
  1121  		}}
  1122  	return j.srv.blockingRPC(&opts)
  1123  }
  1124  
  1125  // List is used to list the jobs registered in the system
  1126  func (j *Job) List(args *structs.JobListRequest,
  1127  	reply *structs.JobListResponse) error {
  1128  	if done, err := j.srv.forward("Job.List", args, args, reply); done {
  1129  		return err
  1130  	}
  1131  	defer metrics.MeasureSince([]string{"nomad", "job", "list"}, time.Now())
  1132  
  1133  	// Check for list-job permissions
  1134  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1135  		return err
  1136  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityListJobs) {
  1137  		return structs.ErrPermissionDenied
  1138  	}
  1139  
  1140  	// Setup the blocking query
  1141  	opts := blockingOptions{
  1142  		queryOpts: &args.QueryOptions,
  1143  		queryMeta: &reply.QueryMeta,
  1144  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1145  			// Capture all the jobs
  1146  			var err error
  1147  			var iter memdb.ResultIterator
  1148  			if prefix := args.QueryOptions.Prefix; prefix != "" {
  1149  				iter, err = state.JobsByIDPrefix(ws, args.RequestNamespace(), prefix)
  1150  			} else {
  1151  				iter, err = state.JobsByNamespace(ws, args.RequestNamespace())
  1152  			}
  1153  			if err != nil {
  1154  				return err
  1155  			}
  1156  
  1157  			var jobs []*structs.JobListStub
  1158  			for {
  1159  				raw := iter.Next()
  1160  				if raw == nil {
  1161  					break
  1162  				}
  1163  				job := raw.(*structs.Job)
  1164  				summary, err := state.JobSummaryByID(ws, args.RequestNamespace(), job.ID)
  1165  				if err != nil {
  1166  					return fmt.Errorf("unable to look up summary for job: %v", job.ID)
  1167  				}
  1168  				jobs = append(jobs, job.Stub(summary))
  1169  			}
  1170  			reply.Jobs = jobs
  1171  
  1172  			// Use the last index that affected the jobs table or summary
  1173  			jindex, err := state.Index("jobs")
  1174  			if err != nil {
  1175  				return err
  1176  			}
  1177  			sindex, err := state.Index("job_summary")
  1178  			if err != nil {
  1179  				return err
  1180  			}
  1181  			reply.Index = helper.Uint64Max(jindex, sindex)
  1182  
  1183  			// Set the query response
  1184  			j.srv.setQueryMeta(&reply.QueryMeta)
  1185  			return nil
  1186  		}}
  1187  	return j.srv.blockingRPC(&opts)
  1188  }
  1189  
  1190  // Allocations is used to list the allocations for a job
  1191  func (j *Job) Allocations(args *structs.JobSpecificRequest,
  1192  	reply *structs.JobAllocationsResponse) error {
  1193  	if done, err := j.srv.forward("Job.Allocations", args, args, reply); done {
  1194  		return err
  1195  	}
  1196  	defer metrics.MeasureSince([]string{"nomad", "job", "allocations"}, time.Now())
  1197  
  1198  	// Check for read-job permissions
  1199  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1200  		return err
  1201  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1202  		return structs.ErrPermissionDenied
  1203  	}
  1204  
  1205  	// Ensure JobID is set otherwise everything works and never returns
  1206  	// allocations which can hide bugs in request code.
  1207  	if args.JobID == "" {
  1208  		return fmt.Errorf("missing job ID")
  1209  	}
  1210  
  1211  	// Setup the blocking query
  1212  	opts := blockingOptions{
  1213  		queryOpts: &args.QueryOptions,
  1214  		queryMeta: &reply.QueryMeta,
  1215  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1216  			// Capture the allocations
  1217  			allocs, err := state.AllocsByJob(ws, args.RequestNamespace(), args.JobID, args.All)
  1218  			if err != nil {
  1219  				return err
  1220  			}
  1221  
  1222  			// Convert to stubs
  1223  			if len(allocs) > 0 {
  1224  				reply.Allocations = make([]*structs.AllocListStub, 0, len(allocs))
  1225  				for _, alloc := range allocs {
  1226  					reply.Allocations = append(reply.Allocations, alloc.Stub())
  1227  				}
  1228  			}
  1229  
  1230  			// Use the last index that affected the allocs table
  1231  			index, err := state.Index("allocs")
  1232  			if err != nil {
  1233  				return err
  1234  			}
  1235  			reply.Index = index
  1236  
  1237  			// Set the query response
  1238  			j.srv.setQueryMeta(&reply.QueryMeta)
  1239  			return nil
  1240  
  1241  		}}
  1242  	return j.srv.blockingRPC(&opts)
  1243  }
  1244  
  1245  // Evaluations is used to list the evaluations for a job
  1246  func (j *Job) Evaluations(args *structs.JobSpecificRequest,
  1247  	reply *structs.JobEvaluationsResponse) error {
  1248  	if done, err := j.srv.forward("Job.Evaluations", args, args, reply); done {
  1249  		return err
  1250  	}
  1251  	defer metrics.MeasureSince([]string{"nomad", "job", "evaluations"}, time.Now())
  1252  
  1253  	// Check for read-job permissions
  1254  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1255  		return err
  1256  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1257  		return structs.ErrPermissionDenied
  1258  	}
  1259  
  1260  	// Setup the blocking query
  1261  	opts := blockingOptions{
  1262  		queryOpts: &args.QueryOptions,
  1263  		queryMeta: &reply.QueryMeta,
  1264  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1265  			// Capture the evals
  1266  			var err error
  1267  			reply.Evaluations, err = state.EvalsByJob(ws, args.RequestNamespace(), args.JobID)
  1268  			if err != nil {
  1269  				return err
  1270  			}
  1271  
  1272  			// Use the last index that affected the evals table
  1273  			index, err := state.Index("evals")
  1274  			if err != nil {
  1275  				return err
  1276  			}
  1277  			reply.Index = index
  1278  
  1279  			// Set the query response
  1280  			j.srv.setQueryMeta(&reply.QueryMeta)
  1281  			return nil
  1282  		}}
  1283  
  1284  	return j.srv.blockingRPC(&opts)
  1285  }
  1286  
  1287  // Deployments is used to list the deployments for a job
  1288  func (j *Job) Deployments(args *structs.JobSpecificRequest,
  1289  	reply *structs.DeploymentListResponse) error {
  1290  	if done, err := j.srv.forward("Job.Deployments", args, args, reply); done {
  1291  		return err
  1292  	}
  1293  	defer metrics.MeasureSince([]string{"nomad", "job", "deployments"}, time.Now())
  1294  
  1295  	// Check for read-job permissions
  1296  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1297  		return err
  1298  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1299  		return structs.ErrPermissionDenied
  1300  	}
  1301  
  1302  	// Setup the blocking query
  1303  	opts := blockingOptions{
  1304  		queryOpts: &args.QueryOptions,
  1305  		queryMeta: &reply.QueryMeta,
  1306  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1307  			// Capture the deployments
  1308  			deploys, err := state.DeploymentsByJobID(ws, args.RequestNamespace(), args.JobID, args.All)
  1309  			if err != nil {
  1310  				return err
  1311  			}
  1312  
  1313  			// Use the last index that affected the deployment table
  1314  			index, err := state.Index("deployment")
  1315  			if err != nil {
  1316  				return err
  1317  			}
  1318  			reply.Index = index
  1319  			reply.Deployments = deploys
  1320  
  1321  			// Set the query response
  1322  			j.srv.setQueryMeta(&reply.QueryMeta)
  1323  			return nil
  1324  
  1325  		}}
  1326  	return j.srv.blockingRPC(&opts)
  1327  }
  1328  
  1329  // LatestDeployment is used to retrieve the latest deployment for a job
  1330  func (j *Job) LatestDeployment(args *structs.JobSpecificRequest,
  1331  	reply *structs.SingleDeploymentResponse) error {
  1332  	if done, err := j.srv.forward("Job.LatestDeployment", args, args, reply); done {
  1333  		return err
  1334  	}
  1335  	defer metrics.MeasureSince([]string{"nomad", "job", "latest_deployment"}, time.Now())
  1336  
  1337  	// Check for read-job permissions
  1338  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1339  		return err
  1340  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob) {
  1341  		return structs.ErrPermissionDenied
  1342  	}
  1343  
  1344  	// Setup the blocking query
  1345  	opts := blockingOptions{
  1346  		queryOpts: &args.QueryOptions,
  1347  		queryMeta: &reply.QueryMeta,
  1348  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1349  			// Capture the deployments
  1350  			deploys, err := state.DeploymentsByJobID(ws, args.RequestNamespace(), args.JobID, args.All)
  1351  			if err != nil {
  1352  				return err
  1353  			}
  1354  
  1355  			// Use the last index that affected the deployment table
  1356  			index, err := state.Index("deployment")
  1357  			if err != nil {
  1358  				return err
  1359  			}
  1360  			reply.Index = index
  1361  			if len(deploys) > 0 {
  1362  				sort.Slice(deploys, func(i, j int) bool {
  1363  					return deploys[i].CreateIndex > deploys[j].CreateIndex
  1364  				})
  1365  				reply.Deployment = deploys[0]
  1366  			}
  1367  
  1368  			// Set the query response
  1369  			j.srv.setQueryMeta(&reply.QueryMeta)
  1370  			return nil
  1371  
  1372  		}}
  1373  	return j.srv.blockingRPC(&opts)
  1374  }
  1375  
  1376  // Plan is used to cause a dry-run evaluation of the Job and return the results
  1377  // with a potential diff containing annotations.
  1378  func (j *Job) Plan(args *structs.JobPlanRequest, reply *structs.JobPlanResponse) error {
  1379  	if done, err := j.srv.forward("Job.Plan", args, args, reply); done {
  1380  		return err
  1381  	}
  1382  	defer metrics.MeasureSince([]string{"nomad", "job", "plan"}, time.Now())
  1383  
  1384  	// Validate the arguments
  1385  	if args.Job == nil {
  1386  		return fmt.Errorf("Job required for plan")
  1387  	}
  1388  
  1389  	// Run admission controllers
  1390  	job, warnings, err := j.admissionControllers(args.Job)
  1391  	if err != nil {
  1392  		return err
  1393  	}
  1394  	args.Job = job
  1395  
  1396  	// Set the warning message
  1397  	reply.Warnings = structs.MergeMultierrorWarnings(warnings...)
  1398  
  1399  	// Check job submission permissions, which we assume is the same for plan
  1400  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1401  		return err
  1402  	} else if aclObj != nil {
  1403  		if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySubmitJob) {
  1404  			return structs.ErrPermissionDenied
  1405  		}
  1406  		// Check if override is set and we do not have permissions
  1407  		if args.PolicyOverride {
  1408  			if !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilitySentinelOverride) {
  1409  				return structs.ErrPermissionDenied
  1410  			}
  1411  		}
  1412  	}
  1413  
  1414  	// Enforce Sentinel policies
  1415  	policyWarnings, err := j.enforceSubmitJob(args.PolicyOverride, args.Job)
  1416  	if err != nil {
  1417  		return err
  1418  	}
  1419  	if policyWarnings != nil {
  1420  		warnings = append(warnings, policyWarnings)
  1421  		reply.Warnings = structs.MergeMultierrorWarnings(warnings...)
  1422  	}
  1423  
  1424  	// Acquire a snapshot of the state
  1425  	snap, err := j.srv.fsm.State().Snapshot()
  1426  	if err != nil {
  1427  		return err
  1428  	}
  1429  
  1430  	// Get the original job
  1431  	ws := memdb.NewWatchSet()
  1432  	oldJob, err := snap.JobByID(ws, args.RequestNamespace(), args.Job.ID)
  1433  	if err != nil {
  1434  		return err
  1435  	}
  1436  
  1437  	var index uint64
  1438  	var updatedIndex uint64
  1439  
  1440  	if oldJob != nil {
  1441  		index = oldJob.JobModifyIndex
  1442  
  1443  		// We want to reuse deployments where possible, so only insert the job if
  1444  		// it has changed or the job didn't exist
  1445  		if oldJob.SpecChanged(args.Job) {
  1446  			// Insert the updated Job into the snapshot
  1447  			updatedIndex = oldJob.JobModifyIndex + 1
  1448  			snap.UpsertJob(updatedIndex, args.Job)
  1449  		}
  1450  	} else if oldJob == nil {
  1451  		// Insert the updated Job into the snapshot
  1452  		snap.UpsertJob(100, args.Job)
  1453  	}
  1454  
  1455  	// Create an eval and mark it as requiring annotations and insert that as well
  1456  	now := time.Now().UTC().UnixNano()
  1457  	eval := &structs.Evaluation{
  1458  		ID:             uuid.Generate(),
  1459  		Namespace:      args.RequestNamespace(),
  1460  		Priority:       args.Job.Priority,
  1461  		Type:           args.Job.Type,
  1462  		TriggeredBy:    structs.EvalTriggerJobRegister,
  1463  		JobID:          args.Job.ID,
  1464  		JobModifyIndex: updatedIndex,
  1465  		Status:         structs.EvalStatusPending,
  1466  		AnnotatePlan:   true,
  1467  		// Timestamps are added for consistency but this eval is never persisted
  1468  		CreateTime: now,
  1469  		ModifyTime: now,
  1470  	}
  1471  
  1472  	snap.UpsertEvals(100, []*structs.Evaluation{eval})
  1473  
  1474  	// Create an in-memory Planner that returns no errors and stores the
  1475  	// submitted plan and created evals.
  1476  	planner := &scheduler.Harness{
  1477  		State: &snap.StateStore,
  1478  	}
  1479  
  1480  	// Create the scheduler and run it
  1481  	sched, err := scheduler.NewScheduler(eval.Type, j.logger, snap, planner)
  1482  	if err != nil {
  1483  		return err
  1484  	}
  1485  
  1486  	if err := sched.Process(eval); err != nil {
  1487  		return err
  1488  	}
  1489  
  1490  	// Annotate and store the diff
  1491  	if plans := len(planner.Plans); plans != 1 {
  1492  		return fmt.Errorf("scheduler resulted in an unexpected number of plans: %v", plans)
  1493  	}
  1494  	annotations := planner.Plans[0].Annotations
  1495  	if args.Diff {
  1496  		jobDiff, err := oldJob.Diff(args.Job, true)
  1497  		if err != nil {
  1498  			return fmt.Errorf("failed to create job diff: %v", err)
  1499  		}
  1500  
  1501  		if err := scheduler.Annotate(jobDiff, annotations); err != nil {
  1502  			return fmt.Errorf("failed to annotate job diff: %v", err)
  1503  		}
  1504  		reply.Diff = jobDiff
  1505  	}
  1506  
  1507  	// Grab the failures
  1508  	if len(planner.Evals) != 1 {
  1509  		return fmt.Errorf("scheduler resulted in an unexpected number of eval updates: %v", planner.Evals)
  1510  	}
  1511  	updatedEval := planner.Evals[0]
  1512  
  1513  	// If it is a periodic job calculate the next launch
  1514  	if args.Job.IsPeriodic() && args.Job.Periodic.Enabled {
  1515  		reply.NextPeriodicLaunch, err = args.Job.Periodic.Next(time.Now().In(args.Job.Periodic.GetLocation()))
  1516  		if err != nil {
  1517  			return fmt.Errorf("Failed to parse cron expression: %v", err)
  1518  		}
  1519  	}
  1520  
  1521  	reply.FailedTGAllocs = updatedEval.FailedTGAllocs
  1522  	reply.JobModifyIndex = index
  1523  	reply.Annotations = annotations
  1524  	reply.CreatedEvals = planner.CreateEvals
  1525  	reply.Index = index
  1526  	return nil
  1527  }
  1528  
  1529  // validateJobUpdate ensures updates to a job are valid.
  1530  func validateJobUpdate(old, new *structs.Job) error {
  1531  	// Validate Dispatch not set on new Jobs
  1532  	if old == nil {
  1533  		if new.Dispatched {
  1534  			return fmt.Errorf("job can't be submitted with 'Dispatched' set")
  1535  		}
  1536  		return nil
  1537  	}
  1538  
  1539  	// Type transitions are disallowed
  1540  	if old.Type != new.Type {
  1541  		return fmt.Errorf("cannot update job from type %q to %q", old.Type, new.Type)
  1542  	}
  1543  
  1544  	// Transitioning to/from periodic is disallowed
  1545  	if old.IsPeriodic() && !new.IsPeriodic() {
  1546  		return fmt.Errorf("cannot update periodic job to being non-periodic")
  1547  	}
  1548  	if new.IsPeriodic() && !old.IsPeriodic() {
  1549  		return fmt.Errorf("cannot update non-periodic job to being periodic")
  1550  	}
  1551  
  1552  	// Transitioning to/from parameterized is disallowed
  1553  	if old.IsParameterized() && !new.IsParameterized() {
  1554  		return fmt.Errorf("cannot update non-parameterized job to being parameterized")
  1555  	}
  1556  	if new.IsParameterized() && !old.IsParameterized() {
  1557  		return fmt.Errorf("cannot update parameterized job to being non-parameterized")
  1558  	}
  1559  
  1560  	if old.Dispatched != new.Dispatched {
  1561  		return fmt.Errorf("field 'Dispatched' is read-only")
  1562  	}
  1563  
  1564  	return nil
  1565  }
  1566  
  1567  // Dispatch a parameterized job.
  1568  func (j *Job) Dispatch(args *structs.JobDispatchRequest, reply *structs.JobDispatchResponse) error {
  1569  	if done, err := j.srv.forward("Job.Dispatch", args, args, reply); done {
  1570  		return err
  1571  	}
  1572  	defer metrics.MeasureSince([]string{"nomad", "job", "dispatch"}, time.Now())
  1573  
  1574  	// Check for submit-job permissions
  1575  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1576  		return err
  1577  	} else if aclObj != nil && !aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityDispatchJob) {
  1578  		return structs.ErrPermissionDenied
  1579  	}
  1580  
  1581  	// Lookup the parameterized job
  1582  	if args.JobID == "" {
  1583  		return fmt.Errorf("missing parameterized job ID")
  1584  	}
  1585  
  1586  	snap, err := j.srv.fsm.State().Snapshot()
  1587  	if err != nil {
  1588  		return err
  1589  	}
  1590  	ws := memdb.NewWatchSet()
  1591  	parameterizedJob, err := snap.JobByID(ws, args.RequestNamespace(), args.JobID)
  1592  	if err != nil {
  1593  		return err
  1594  	}
  1595  	if parameterizedJob == nil {
  1596  		return fmt.Errorf("parameterized job not found")
  1597  	}
  1598  
  1599  	if !parameterizedJob.IsParameterized() {
  1600  		return fmt.Errorf("Specified job %q is not a parameterized job", args.JobID)
  1601  	}
  1602  
  1603  	if parameterizedJob.Stop {
  1604  		return fmt.Errorf("Specified job %q is stopped", args.JobID)
  1605  	}
  1606  
  1607  	// Validate the arguments
  1608  	if err := validateDispatchRequest(args, parameterizedJob); err != nil {
  1609  		return err
  1610  	}
  1611  
  1612  	// Derive the child job and commit it via Raft
  1613  	dispatchJob := parameterizedJob.Copy()
  1614  	dispatchJob.ID = structs.DispatchedID(parameterizedJob.ID, time.Now())
  1615  	dispatchJob.ParentID = parameterizedJob.ID
  1616  	dispatchJob.Name = dispatchJob.ID
  1617  	dispatchJob.SetSubmitTime()
  1618  	dispatchJob.Dispatched = true
  1619  
  1620  	// Merge in the meta data
  1621  	for k, v := range args.Meta {
  1622  		if dispatchJob.Meta == nil {
  1623  			dispatchJob.Meta = make(map[string]string, len(args.Meta))
  1624  		}
  1625  		dispatchJob.Meta[k] = v
  1626  	}
  1627  
  1628  	// Compress the payload
  1629  	dispatchJob.Payload = snappy.Encode(nil, args.Payload)
  1630  
  1631  	regReq := &structs.JobRegisterRequest{
  1632  		Job:          dispatchJob,
  1633  		WriteRequest: args.WriteRequest,
  1634  	}
  1635  
  1636  	// Commit this update via Raft
  1637  	fsmErr, jobCreateIndex, err := j.srv.raftApply(structs.JobRegisterRequestType, regReq)
  1638  	if err, ok := fsmErr.(error); ok && err != nil {
  1639  		j.logger.Error("dispatched job register failed", "error", err, "fsm", true)
  1640  		return err
  1641  	}
  1642  	if err != nil {
  1643  		j.logger.Error("dispatched job register failed", "error", err, "raft", true)
  1644  		return err
  1645  	}
  1646  
  1647  	reply.JobCreateIndex = jobCreateIndex
  1648  	reply.DispatchedJobID = dispatchJob.ID
  1649  	reply.Index = jobCreateIndex
  1650  
  1651  	// If the job is periodic, we don't create an eval.
  1652  	if !dispatchJob.IsPeriodic() {
  1653  		// Create a new evaluation
  1654  		now := time.Now().UTC().UnixNano()
  1655  		eval := &structs.Evaluation{
  1656  			ID:             uuid.Generate(),
  1657  			Namespace:      args.RequestNamespace(),
  1658  			Priority:       dispatchJob.Priority,
  1659  			Type:           dispatchJob.Type,
  1660  			TriggeredBy:    structs.EvalTriggerJobRegister,
  1661  			JobID:          dispatchJob.ID,
  1662  			JobModifyIndex: jobCreateIndex,
  1663  			Status:         structs.EvalStatusPending,
  1664  			CreateTime:     now,
  1665  			ModifyTime:     now,
  1666  		}
  1667  		update := &structs.EvalUpdateRequest{
  1668  			Evals:        []*structs.Evaluation{eval},
  1669  			WriteRequest: structs.WriteRequest{Region: args.Region},
  1670  		}
  1671  
  1672  		// Commit this evaluation via Raft
  1673  		_, evalIndex, err := j.srv.raftApply(structs.EvalUpdateRequestType, update)
  1674  		if err != nil {
  1675  			j.logger.Error("eval create failed", "error", err, "method", "dispatch")
  1676  			return err
  1677  		}
  1678  
  1679  		// Setup the reply
  1680  		reply.EvalID = eval.ID
  1681  		reply.EvalCreateIndex = evalIndex
  1682  		reply.Index = evalIndex
  1683  	}
  1684  
  1685  	return nil
  1686  }
  1687  
  1688  // validateDispatchRequest returns whether the request is valid given the
  1689  // parameterized job.
  1690  func validateDispatchRequest(req *structs.JobDispatchRequest, job *structs.Job) error {
  1691  	// Check the payload constraint is met
  1692  	hasInputData := len(req.Payload) != 0
  1693  	if job.ParameterizedJob.Payload == structs.DispatchPayloadRequired && !hasInputData {
  1694  		return fmt.Errorf("Payload is not provided but required by parameterized job")
  1695  	} else if job.ParameterizedJob.Payload == structs.DispatchPayloadForbidden && hasInputData {
  1696  		return fmt.Errorf("Payload provided but forbidden by parameterized job")
  1697  	}
  1698  
  1699  	// Check the payload doesn't exceed the size limit
  1700  	if l := len(req.Payload); l > DispatchPayloadSizeLimit {
  1701  		return fmt.Errorf("Payload exceeds maximum size; %d > %d", l, DispatchPayloadSizeLimit)
  1702  	}
  1703  
  1704  	// Check if the metadata is a set
  1705  	keys := make(map[string]struct{}, len(req.Meta))
  1706  	for k := range keys {
  1707  		if _, ok := keys[k]; ok {
  1708  			return fmt.Errorf("Duplicate key %q in passed metadata", k)
  1709  		}
  1710  		keys[k] = struct{}{}
  1711  	}
  1712  
  1713  	required := helper.SliceStringToSet(job.ParameterizedJob.MetaRequired)
  1714  	optional := helper.SliceStringToSet(job.ParameterizedJob.MetaOptional)
  1715  
  1716  	// Check the metadata key constraints are met
  1717  	unpermitted := make(map[string]struct{})
  1718  	for k := range req.Meta {
  1719  		_, req := required[k]
  1720  		_, opt := optional[k]
  1721  		if !req && !opt {
  1722  			unpermitted[k] = struct{}{}
  1723  		}
  1724  	}
  1725  
  1726  	if len(unpermitted) != 0 {
  1727  		flat := make([]string, 0, len(unpermitted))
  1728  		for k := range unpermitted {
  1729  			flat = append(flat, k)
  1730  		}
  1731  
  1732  		return fmt.Errorf("Dispatch request included unpermitted metadata keys: %v", flat)
  1733  	}
  1734  
  1735  	missing := make(map[string]struct{})
  1736  	for _, k := range job.ParameterizedJob.MetaRequired {
  1737  		if _, ok := req.Meta[k]; !ok {
  1738  			missing[k] = struct{}{}
  1739  		}
  1740  	}
  1741  
  1742  	if len(missing) != 0 {
  1743  		flat := make([]string, 0, len(missing))
  1744  		for k := range missing {
  1745  			flat = append(flat, k)
  1746  		}
  1747  
  1748  		return fmt.Errorf("Dispatch did not provide required meta keys: %v", flat)
  1749  	}
  1750  
  1751  	return nil
  1752  }
  1753  
  1754  // ScaleStatus retrieves the scaling status for a job
  1755  func (j *Job) ScaleStatus(args *structs.JobScaleStatusRequest,
  1756  	reply *structs.JobScaleStatusResponse) error {
  1757  
  1758  	if done, err := j.srv.forward("Job.ScaleStatus", args, args, reply); done {
  1759  		return err
  1760  	}
  1761  	defer metrics.MeasureSince([]string{"nomad", "job", "scale_status"}, time.Now())
  1762  
  1763  	// Check for autoscaler permissions
  1764  	if aclObj, err := j.srv.ResolveToken(args.AuthToken); err != nil {
  1765  		return err
  1766  	} else if aclObj != nil {
  1767  		hasReadJob := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJob)
  1768  		hasReadJobScaling := aclObj.AllowNsOp(args.RequestNamespace(), acl.NamespaceCapabilityReadJobScaling)
  1769  		if !(hasReadJob || hasReadJobScaling) {
  1770  			return structs.ErrPermissionDenied
  1771  		}
  1772  	}
  1773  
  1774  	// Setup the blocking query
  1775  	opts := blockingOptions{
  1776  		queryOpts: &args.QueryOptions,
  1777  		queryMeta: &reply.QueryMeta,
  1778  		run: func(ws memdb.WatchSet, state *state.StateStore) error {
  1779  
  1780  			// We need the job and the job summary
  1781  			job, err := state.JobByID(ws, args.RequestNamespace(), args.JobID)
  1782  			if err != nil {
  1783  				return err
  1784  			}
  1785  			if job == nil {
  1786  				reply.JobScaleStatus = nil
  1787  				return nil
  1788  			}
  1789  
  1790  			events, eventsIndex, err := state.ScalingEventsByJob(ws, args.RequestNamespace(), args.JobID)
  1791  			if err != nil {
  1792  				return err
  1793  			}
  1794  			if events == nil {
  1795  				events = make(map[string][]*structs.ScalingEvent)
  1796  			}
  1797  
  1798  			var allocs []*structs.Allocation
  1799  			var allocsIndex uint64
  1800  			allocs, err = state.AllocsByJob(ws, job.Namespace, job.ID, false)
  1801  			if err != nil {
  1802  				return err
  1803  			}
  1804  
  1805  			// Setup the output
  1806  			reply.JobScaleStatus = &structs.JobScaleStatus{
  1807  				JobID:          job.ID,
  1808  				JobCreateIndex: job.CreateIndex,
  1809  				JobModifyIndex: job.ModifyIndex,
  1810  				JobStopped:     job.Stop,
  1811  				TaskGroups:     make(map[string]*structs.TaskGroupScaleStatus),
  1812  			}
  1813  
  1814  			for _, tg := range job.TaskGroups {
  1815  				tgScale := &structs.TaskGroupScaleStatus{
  1816  					Desired: tg.Count,
  1817  				}
  1818  				tgScale.Events = events[tg.Name]
  1819  				reply.JobScaleStatus.TaskGroups[tg.Name] = tgScale
  1820  			}
  1821  
  1822  			for _, alloc := range allocs {
  1823  				// TODO: ignore canaries until we figure out what we should do with canaries
  1824  				if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Canary {
  1825  					continue
  1826  				}
  1827  				if alloc.TerminalStatus() {
  1828  					continue
  1829  				}
  1830  				tgScale, ok := reply.JobScaleStatus.TaskGroups[alloc.TaskGroup]
  1831  				if !ok || tgScale == nil {
  1832  					continue
  1833  				}
  1834  				tgScale.Placed++
  1835  				if alloc.ClientStatus == structs.AllocClientStatusRunning {
  1836  					tgScale.Running++
  1837  				}
  1838  				if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.HasHealth() {
  1839  					if alloc.DeploymentStatus.IsHealthy() {
  1840  						tgScale.Healthy++
  1841  					} else if alloc.DeploymentStatus.IsUnhealthy() {
  1842  						tgScale.Unhealthy++
  1843  					}
  1844  				}
  1845  				if alloc.ModifyIndex > allocsIndex {
  1846  					allocsIndex = alloc.ModifyIndex
  1847  				}
  1848  			}
  1849  
  1850  			maxIndex := job.ModifyIndex
  1851  			if eventsIndex > maxIndex {
  1852  				maxIndex = eventsIndex
  1853  			}
  1854  			if allocsIndex > maxIndex {
  1855  				maxIndex = allocsIndex
  1856  			}
  1857  			reply.Index = maxIndex
  1858  
  1859  			// Set the query response
  1860  			j.srv.setQueryMeta(&reply.QueryMeta)
  1861  			return nil
  1862  		}}
  1863  	return j.srv.blockingRPC(&opts)
  1864  }