github.com/jrxfive/nomad@v0.6.1-0.20170802162750-1fef470e89bf/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"crypto/sha1"
     7  	"crypto/sha256"
     8  	"crypto/sha512"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"net"
    14  	"os"
    15  	"path/filepath"
    16  	"reflect"
    17  	"regexp"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/gorhill/cronexpr"
    24  	"github.com/hashicorp/consul/api"
    25  	"github.com/hashicorp/go-multierror"
    26  	"github.com/hashicorp/go-version"
    27  	"github.com/hashicorp/nomad/helper"
    28  	"github.com/hashicorp/nomad/helper/args"
    29  	"github.com/mitchellh/copystructure"
    30  	"github.com/ugorji/go/codec"
    31  
    32  	hcodec "github.com/hashicorp/go-msgpack/codec"
    33  )
    34  
    35  var (
    36  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    37  	ErrNoRegionPath = fmt.Errorf("No path to region")
    38  )
    39  
    40  type MessageType uint8
    41  
    42  const (
    43  	NodeRegisterRequestType MessageType = iota
    44  	NodeDeregisterRequestType
    45  	NodeUpdateStatusRequestType
    46  	NodeUpdateDrainRequestType
    47  	JobRegisterRequestType
    48  	JobDeregisterRequestType
    49  	EvalUpdateRequestType
    50  	EvalDeleteRequestType
    51  	AllocUpdateRequestType
    52  	AllocClientUpdateRequestType
    53  	ReconcileJobSummariesRequestType
    54  	VaultAccessorRegisterRequestType
    55  	VaultAccessorDegisterRequestType
    56  	ApplyPlanResultsRequestType
    57  	DeploymentStatusUpdateRequestType
    58  	DeploymentPromoteRequestType
    59  	DeploymentAllocHealthRequestType
    60  	DeploymentDeleteRequestType
    61  	JobStabilityRequestType
    62  )
    63  
    64  const (
    65  	// IgnoreUnknownTypeFlag is set along with a MessageType
    66  	// to indicate that the message type can be safely ignored
    67  	// if it is not recognized. This is for future proofing, so
    68  	// that new commands can be added in a way that won't cause
    69  	// old servers to crash when the FSM attempts to process them.
    70  	IgnoreUnknownTypeFlag MessageType = 128
    71  
    72  	// ApiMajorVersion is returned as part of the Status.Version request.
    73  	// It should be incremented anytime the APIs are changed in a way
    74  	// that would break clients for sane client versioning.
    75  	ApiMajorVersion = 1
    76  
    77  	// ApiMinorVersion is returned as part of the Status.Version request.
    78  	// It should be incremented anytime the APIs are changed to allow
    79  	// for sane client versioning. Minor changes should be compatible
    80  	// within the major version.
    81  	ApiMinorVersion = 1
    82  
    83  	ProtocolVersion = "protocol"
    84  	APIMajorVersion = "api.major"
    85  	APIMinorVersion = "api.minor"
    86  
    87  	GetterModeAny  = "any"
    88  	GetterModeFile = "file"
    89  	GetterModeDir  = "dir"
    90  )
    91  
    92  // RPCInfo is used to describe common information about query
    93  type RPCInfo interface {
    94  	RequestRegion() string
    95  	IsRead() bool
    96  	AllowStaleRead() bool
    97  }
    98  
    99  // QueryOptions is used to specify various flags for read queries
   100  type QueryOptions struct {
   101  	// The target region for this query
   102  	Region string
   103  
   104  	// If set, wait until query exceeds given index. Must be provided
   105  	// with MaxQueryTime.
   106  	MinQueryIndex uint64
   107  
   108  	// Provided with MinQueryIndex to wait for change.
   109  	MaxQueryTime time.Duration
   110  
   111  	// If set, any follower can service the request. Results
   112  	// may be arbitrarily stale.
   113  	AllowStale bool
   114  
   115  	// If set, used as prefix for resource list searches
   116  	Prefix string
   117  }
   118  
   119  func (q QueryOptions) RequestRegion() string {
   120  	return q.Region
   121  }
   122  
   123  // QueryOption only applies to reads, so always true
   124  func (q QueryOptions) IsRead() bool {
   125  	return true
   126  }
   127  
   128  func (q QueryOptions) AllowStaleRead() bool {
   129  	return q.AllowStale
   130  }
   131  
   132  type WriteRequest struct {
   133  	// The target region for this write
   134  	Region string
   135  }
   136  
   137  func (w WriteRequest) RequestRegion() string {
   138  	// The target region for this request
   139  	return w.Region
   140  }
   141  
   142  // WriteRequest only applies to writes, always false
   143  func (w WriteRequest) IsRead() bool {
   144  	return false
   145  }
   146  
   147  func (w WriteRequest) AllowStaleRead() bool {
   148  	return false
   149  }
   150  
   151  // QueryMeta allows a query response to include potentially
   152  // useful metadata about a query
   153  type QueryMeta struct {
   154  	// This is the index associated with the read
   155  	Index uint64
   156  
   157  	// If AllowStale is used, this is time elapsed since
   158  	// last contact between the follower and leader. This
   159  	// can be used to gauge staleness.
   160  	LastContact time.Duration
   161  
   162  	// Used to indicate if there is a known leader node
   163  	KnownLeader bool
   164  }
   165  
   166  // WriteMeta allows a write response to include potentially
   167  // useful metadata about the write
   168  type WriteMeta struct {
   169  	// This is the index associated with the write
   170  	Index uint64
   171  }
   172  
   173  // NodeRegisterRequest is used for Node.Register endpoint
   174  // to register a node as being a schedulable entity.
   175  type NodeRegisterRequest struct {
   176  	Node *Node
   177  	WriteRequest
   178  }
   179  
   180  // NodeDeregisterRequest is used for Node.Deregister endpoint
   181  // to deregister a node as being a schedulable entity.
   182  type NodeDeregisterRequest struct {
   183  	NodeID string
   184  	WriteRequest
   185  }
   186  
   187  // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
   188  // information used in RPC server lists.
   189  type NodeServerInfo struct {
   190  	// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
   191  	// be contacted at for RPCs.
   192  	RPCAdvertiseAddr string
   193  
   194  	// RpcMajorVersion is the major version number the Nomad Server
   195  	// supports
   196  	RPCMajorVersion int32
   197  
   198  	// RpcMinorVersion is the minor version number the Nomad Server
   199  	// supports
   200  	RPCMinorVersion int32
   201  
   202  	// Datacenter is the datacenter that a Nomad server belongs to
   203  	Datacenter string
   204  }
   205  
   206  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   207  // to update the status of a node.
   208  type NodeUpdateStatusRequest struct {
   209  	NodeID string
   210  	Status string
   211  	WriteRequest
   212  }
   213  
   214  // NodeUpdateDrainRequest is used for updatin the drain status
   215  type NodeUpdateDrainRequest struct {
   216  	NodeID string
   217  	Drain  bool
   218  	WriteRequest
   219  }
   220  
   221  // NodeEvaluateRequest is used to re-evaluate the ndoe
   222  type NodeEvaluateRequest struct {
   223  	NodeID string
   224  	WriteRequest
   225  }
   226  
   227  // NodeSpecificRequest is used when we just need to specify a target node
   228  type NodeSpecificRequest struct {
   229  	NodeID   string
   230  	SecretID string
   231  	QueryOptions
   232  }
   233  
   234  // JobRegisterRequest is used for Job.Register endpoint
   235  // to register a job as being a schedulable entity.
   236  type JobRegisterRequest struct {
   237  	Job *Job
   238  
   239  	// If EnforceIndex is set then the job will only be registered if the passed
   240  	// JobModifyIndex matches the current Jobs index. If the index is zero, the
   241  	// register only occurs if the job is new.
   242  	EnforceIndex   bool
   243  	JobModifyIndex uint64
   244  
   245  	WriteRequest
   246  }
   247  
   248  // JobDeregisterRequest is used for Job.Deregister endpoint
   249  // to deregister a job as being a schedulable entity.
   250  type JobDeregisterRequest struct {
   251  	JobID string
   252  
   253  	// Purge controls whether the deregister purges the job from the system or
   254  	// whether the job is just marked as stopped and will be removed by the
   255  	// garbage collector
   256  	Purge bool
   257  
   258  	WriteRequest
   259  }
   260  
   261  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   262  type JobEvaluateRequest struct {
   263  	JobID string
   264  	WriteRequest
   265  }
   266  
   267  // JobSpecificRequest is used when we just need to specify a target job
   268  type JobSpecificRequest struct {
   269  	JobID     string
   270  	AllAllocs bool
   271  	QueryOptions
   272  }
   273  
   274  // JobListRequest is used to parameterize a list request
   275  type JobListRequest struct {
   276  	QueryOptions
   277  }
   278  
   279  // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
   280  // evaluation of the Job.
   281  type JobPlanRequest struct {
   282  	Job  *Job
   283  	Diff bool // Toggles an annotated diff
   284  	WriteRequest
   285  }
   286  
   287  // JobSummaryRequest is used when we just need to get a specific job summary
   288  type JobSummaryRequest struct {
   289  	JobID string
   290  	QueryOptions
   291  }
   292  
   293  // JobDispatchRequest is used to dispatch a job based on a parameterized job
   294  type JobDispatchRequest struct {
   295  	JobID   string
   296  	Payload []byte
   297  	Meta    map[string]string
   298  	WriteRequest
   299  }
   300  
   301  // JobValidateRequest is used to validate a job
   302  type JobValidateRequest struct {
   303  	Job *Job
   304  	WriteRequest
   305  }
   306  
   307  // JobRevertRequest is used to revert a job to a prior version.
   308  type JobRevertRequest struct {
   309  	// JobID is the ID of the job  being reverted
   310  	JobID string
   311  
   312  	// JobVersion the version to revert to.
   313  	JobVersion uint64
   314  
   315  	// EnforcePriorVersion if set will enforce that the job is at the given
   316  	// version before reverting.
   317  	EnforcePriorVersion *uint64
   318  
   319  	WriteRequest
   320  }
   321  
   322  // JobStabilityRequest is used to marked a job as stable.
   323  type JobStabilityRequest struct {
   324  	// Job to set the stability on
   325  	JobID      string
   326  	JobVersion uint64
   327  
   328  	// Set the stability
   329  	Stable bool
   330  	WriteRequest
   331  }
   332  
   333  // JobStabilityResponse is the response when marking a job as stable.
   334  type JobStabilityResponse struct {
   335  	WriteMeta
   336  }
   337  
   338  // NodeListRequest is used to parameterize a list request
   339  type NodeListRequest struct {
   340  	QueryOptions
   341  }
   342  
   343  // EvalUpdateRequest is used for upserting evaluations.
   344  type EvalUpdateRequest struct {
   345  	Evals     []*Evaluation
   346  	EvalToken string
   347  	WriteRequest
   348  }
   349  
   350  // EvalDeleteRequest is used for deleting an evaluation.
   351  type EvalDeleteRequest struct {
   352  	Evals  []string
   353  	Allocs []string
   354  	WriteRequest
   355  }
   356  
   357  // EvalSpecificRequest is used when we just need to specify a target evaluation
   358  type EvalSpecificRequest struct {
   359  	EvalID string
   360  	QueryOptions
   361  }
   362  
   363  // EvalAckRequest is used to Ack/Nack a specific evaluation
   364  type EvalAckRequest struct {
   365  	EvalID string
   366  	Token  string
   367  	WriteRequest
   368  }
   369  
   370  // EvalDequeueRequest is used when we want to dequeue an evaluation
   371  type EvalDequeueRequest struct {
   372  	Schedulers       []string
   373  	Timeout          time.Duration
   374  	SchedulerVersion uint16
   375  	WriteRequest
   376  }
   377  
   378  // EvalListRequest is used to list the evaluations
   379  type EvalListRequest struct {
   380  	QueryOptions
   381  }
   382  
   383  // PlanRequest is used to submit an allocation plan to the leader
   384  type PlanRequest struct {
   385  	Plan *Plan
   386  	WriteRequest
   387  }
   388  
   389  // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction
   390  // committing the result of a plan.
   391  type ApplyPlanResultsRequest struct {
   392  	// AllocUpdateRequest holds the allocation updates to be made by the
   393  	// scheduler.
   394  	AllocUpdateRequest
   395  
   396  	// Deployment is the deployment created or updated as a result of a
   397  	// scheduling event.
   398  	Deployment *Deployment
   399  
   400  	// DeploymentUpdates is a set of status updates to apply to the given
   401  	// deployments. This allows the scheduler to cancel any unneeded deployment
   402  	// because the job is stopped or the update block is removed.
   403  	DeploymentUpdates []*DeploymentStatusUpdate
   404  }
   405  
   406  // AllocUpdateRequest is used to submit changes to allocations, either
   407  // to cause evictions or to assign new allocaitons. Both can be done
   408  // within a single transaction
   409  type AllocUpdateRequest struct {
   410  	// Alloc is the list of new allocations to assign
   411  	Alloc []*Allocation
   412  
   413  	// Job is the shared parent job of the allocations.
   414  	// It is pulled out since it is common to reduce payload size.
   415  	Job *Job
   416  
   417  	WriteRequest
   418  }
   419  
   420  // AllocListRequest is used to request a list of allocations
   421  type AllocListRequest struct {
   422  	QueryOptions
   423  }
   424  
   425  // AllocSpecificRequest is used to query a specific allocation
   426  type AllocSpecificRequest struct {
   427  	AllocID string
   428  	QueryOptions
   429  }
   430  
   431  // AllocsGetRequest is used to query a set of allocations
   432  type AllocsGetRequest struct {
   433  	AllocIDs []string
   434  	QueryOptions
   435  }
   436  
   437  // PeriodicForceReqeuest is used to force a specific periodic job.
   438  type PeriodicForceRequest struct {
   439  	JobID string
   440  	WriteRequest
   441  }
   442  
   443  // ServerMembersResponse has the list of servers in a cluster
   444  type ServerMembersResponse struct {
   445  	ServerName   string
   446  	ServerRegion string
   447  	ServerDC     string
   448  	Members      []*ServerMember
   449  }
   450  
   451  // ServerMember holds information about a Nomad server agent in a cluster
   452  type ServerMember struct {
   453  	Name        string
   454  	Addr        net.IP
   455  	Port        uint16
   456  	Tags        map[string]string
   457  	Status      string
   458  	ProtocolMin uint8
   459  	ProtocolMax uint8
   460  	ProtocolCur uint8
   461  	DelegateMin uint8
   462  	DelegateMax uint8
   463  	DelegateCur uint8
   464  }
   465  
   466  // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
   467  // following tasks in the given allocation
   468  type DeriveVaultTokenRequest struct {
   469  	NodeID   string
   470  	SecretID string
   471  	AllocID  string
   472  	Tasks    []string
   473  	QueryOptions
   474  }
   475  
   476  // VaultAccessorsRequest is used to operate on a set of Vault accessors
   477  type VaultAccessorsRequest struct {
   478  	Accessors []*VaultAccessor
   479  }
   480  
   481  // VaultAccessor is a reference to a created Vault token on behalf of
   482  // an allocation's task.
   483  type VaultAccessor struct {
   484  	AllocID     string
   485  	Task        string
   486  	NodeID      string
   487  	Accessor    string
   488  	CreationTTL int
   489  
   490  	// Raft Indexes
   491  	CreateIndex uint64
   492  }
   493  
   494  // DeriveVaultTokenResponse returns the wrapped tokens for each requested task
   495  type DeriveVaultTokenResponse struct {
   496  	// Tasks is a mapping between the task name and the wrapped token
   497  	Tasks map[string]string
   498  
   499  	// Error stores any error that occured. Errors are stored here so we can
   500  	// communicate whether it is retriable
   501  	Error *RecoverableError
   502  
   503  	QueryMeta
   504  }
   505  
   506  // GenericRequest is used to request where no
   507  // specific information is needed.
   508  type GenericRequest struct {
   509  	QueryOptions
   510  }
   511  
   512  // DeploymentListRequest is used to list the deployments
   513  type DeploymentListRequest struct {
   514  	QueryOptions
   515  }
   516  
   517  // DeploymentDeleteRequest is used for deleting deployments.
   518  type DeploymentDeleteRequest struct {
   519  	Deployments []string
   520  	WriteRequest
   521  }
   522  
   523  // DeploymentStatusUpdateRequest is used to update the status of a deployment as
   524  // well as optionally creating an evaluation atomically.
   525  type DeploymentStatusUpdateRequest struct {
   526  	// Eval, if set, is used to create an evaluation at the same time as
   527  	// updating the status of a deployment.
   528  	Eval *Evaluation
   529  
   530  	// DeploymentUpdate is a status update to apply to the given
   531  	// deployment.
   532  	DeploymentUpdate *DeploymentStatusUpdate
   533  
   534  	// Job is used to optionally upsert a job. This is used when setting the
   535  	// allocation health results in a deployment failure and the deployment
   536  	// auto-reverts to the latest stable job.
   537  	Job *Job
   538  }
   539  
   540  // DeploymentAllocHealthRequest is used to set the health of a set of
   541  // allocations as part of a deployment.
   542  type DeploymentAllocHealthRequest struct {
   543  	DeploymentID string
   544  
   545  	// Marks these allocations as healthy, allow further allocations
   546  	// to be rolled.
   547  	HealthyAllocationIDs []string
   548  
   549  	// Any unhealthy allocations fail the deployment
   550  	UnhealthyAllocationIDs []string
   551  
   552  	WriteRequest
   553  }
   554  
   555  // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft
   556  type ApplyDeploymentAllocHealthRequest struct {
   557  	DeploymentAllocHealthRequest
   558  
   559  	// An optional field to update the status of a deployment
   560  	DeploymentUpdate *DeploymentStatusUpdate
   561  
   562  	// Job is used to optionally upsert a job. This is used when setting the
   563  	// allocation health results in a deployment failure and the deployment
   564  	// auto-reverts to the latest stable job.
   565  	Job *Job
   566  
   567  	// An optional evaluation to create after promoting the canaries
   568  	Eval *Evaluation
   569  }
   570  
   571  // DeploymentPromoteRequest is used to promote task groups in a deployment
   572  type DeploymentPromoteRequest struct {
   573  	DeploymentID string
   574  
   575  	// All is to promote all task groups
   576  	All bool
   577  
   578  	// Groups is used to set the promotion status per task group
   579  	Groups []string
   580  
   581  	WriteRequest
   582  }
   583  
   584  // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft
   585  type ApplyDeploymentPromoteRequest struct {
   586  	DeploymentPromoteRequest
   587  
   588  	// An optional evaluation to create after promoting the canaries
   589  	Eval *Evaluation
   590  }
   591  
   592  // DeploymentPauseRequest is used to pause a deployment
   593  type DeploymentPauseRequest struct {
   594  	DeploymentID string
   595  
   596  	// Pause sets the pause status
   597  	Pause bool
   598  
   599  	WriteRequest
   600  }
   601  
   602  // DeploymentSpecificRequest is used to make a request specific to a particular
   603  // deployment
   604  type DeploymentSpecificRequest struct {
   605  	DeploymentID string
   606  	QueryOptions
   607  }
   608  
   609  // DeploymentFailRequest is used to fail a particular deployment
   610  type DeploymentFailRequest struct {
   611  	DeploymentID string
   612  	WriteRequest
   613  }
   614  
   615  // SingleDeploymentResponse is used to respond with a single deployment
   616  type SingleDeploymentResponse struct {
   617  	Deployment *Deployment
   618  	QueryMeta
   619  }
   620  
   621  // GenericResponse is used to respond to a request where no
   622  // specific response information is needed.
   623  type GenericResponse struct {
   624  	WriteMeta
   625  }
   626  
   627  // VersionResponse is used for the Status.Version reseponse
   628  type VersionResponse struct {
   629  	Build    string
   630  	Versions map[string]int
   631  	QueryMeta
   632  }
   633  
   634  // JobRegisterResponse is used to respond to a job registration
   635  type JobRegisterResponse struct {
   636  	EvalID          string
   637  	EvalCreateIndex uint64
   638  	JobModifyIndex  uint64
   639  
   640  	// Warnings contains any warnings about the given job. These may include
   641  	// deprecation warnings.
   642  	Warnings string
   643  
   644  	QueryMeta
   645  }
   646  
   647  // JobDeregisterResponse is used to respond to a job deregistration
   648  type JobDeregisterResponse struct {
   649  	EvalID          string
   650  	EvalCreateIndex uint64
   651  	JobModifyIndex  uint64
   652  	QueryMeta
   653  }
   654  
   655  // JobValidateResponse is the response from validate request
   656  type JobValidateResponse struct {
   657  	// DriverConfigValidated indicates whether the agent validated the driver
   658  	// config
   659  	DriverConfigValidated bool
   660  
   661  	// ValidationErrors is a list of validation errors
   662  	ValidationErrors []string
   663  
   664  	// Error is a string version of any error that may have occured
   665  	Error string
   666  
   667  	// Warnings contains any warnings about the given job. These may include
   668  	// deprecation warnings.
   669  	Warnings string
   670  }
   671  
   672  // NodeUpdateResponse is used to respond to a node update
   673  type NodeUpdateResponse struct {
   674  	HeartbeatTTL    time.Duration
   675  	EvalIDs         []string
   676  	EvalCreateIndex uint64
   677  	NodeModifyIndex uint64
   678  
   679  	// LeaderRPCAddr is the RPC address of the current Raft Leader.  If
   680  	// empty, the current Nomad Server is in the minority of a partition.
   681  	LeaderRPCAddr string
   682  
   683  	// NumNodes is the number of Nomad nodes attached to this quorum of
   684  	// Nomad Servers at the time of the response.  This value can
   685  	// fluctuate based on the health of the cluster between heartbeats.
   686  	NumNodes int32
   687  
   688  	// Servers is the full list of known Nomad servers in the local
   689  	// region.
   690  	Servers []*NodeServerInfo
   691  
   692  	QueryMeta
   693  }
   694  
   695  // NodeDrainUpdateResponse is used to respond to a node drain update
   696  type NodeDrainUpdateResponse struct {
   697  	EvalIDs         []string
   698  	EvalCreateIndex uint64
   699  	NodeModifyIndex uint64
   700  	QueryMeta
   701  }
   702  
   703  // NodeAllocsResponse is used to return allocs for a single node
   704  type NodeAllocsResponse struct {
   705  	Allocs []*Allocation
   706  	QueryMeta
   707  }
   708  
   709  // NodeClientAllocsResponse is used to return allocs meta data for a single node
   710  type NodeClientAllocsResponse struct {
   711  	Allocs map[string]uint64
   712  	QueryMeta
   713  }
   714  
   715  // SingleNodeResponse is used to return a single node
   716  type SingleNodeResponse struct {
   717  	Node *Node
   718  	QueryMeta
   719  }
   720  
   721  // NodeListResponse is used for a list request
   722  type NodeListResponse struct {
   723  	Nodes []*NodeListStub
   724  	QueryMeta
   725  }
   726  
   727  // SingleJobResponse is used to return a single job
   728  type SingleJobResponse struct {
   729  	Job *Job
   730  	QueryMeta
   731  }
   732  
   733  // JobSummaryResponse is used to return a single job summary
   734  type JobSummaryResponse struct {
   735  	JobSummary *JobSummary
   736  	QueryMeta
   737  }
   738  
   739  type JobDispatchResponse struct {
   740  	DispatchedJobID string
   741  	EvalID          string
   742  	EvalCreateIndex uint64
   743  	JobCreateIndex  uint64
   744  	WriteMeta
   745  }
   746  
   747  // JobListResponse is used for a list request
   748  type JobListResponse struct {
   749  	Jobs []*JobListStub
   750  	QueryMeta
   751  }
   752  
   753  // JobVersionsRequest is used to get a jobs versions
   754  type JobVersionsRequest struct {
   755  	JobID string
   756  	Diffs bool
   757  	QueryOptions
   758  }
   759  
   760  // JobVersionsResponse is used for a job get versions request
   761  type JobVersionsResponse struct {
   762  	Versions []*Job
   763  	Diffs    []*JobDiff
   764  	QueryMeta
   765  }
   766  
   767  // JobPlanResponse is used to respond to a job plan request
   768  type JobPlanResponse struct {
   769  	// Annotations stores annotations explaining decisions the scheduler made.
   770  	Annotations *PlanAnnotations
   771  
   772  	// FailedTGAllocs is the placement failures per task group.
   773  	FailedTGAllocs map[string]*AllocMetric
   774  
   775  	// JobModifyIndex is the modification index of the job. The value can be
   776  	// used when running `nomad run` to ensure that the Job wasn’t modified
   777  	// since the last plan. If the job is being created, the value is zero.
   778  	JobModifyIndex uint64
   779  
   780  	// CreatedEvals is the set of evaluations created by the scheduler. The
   781  	// reasons for this can be rolling-updates or blocked evals.
   782  	CreatedEvals []*Evaluation
   783  
   784  	// Diff contains the diff of the job and annotations on whether the change
   785  	// causes an in-place update or create/destroy
   786  	Diff *JobDiff
   787  
   788  	// NextPeriodicLaunch is the time duration till the job would be launched if
   789  	// submitted.
   790  	NextPeriodicLaunch time.Time
   791  
   792  	// Warnings contains any warnings about the given job. These may include
   793  	// deprecation warnings.
   794  	Warnings string
   795  
   796  	WriteMeta
   797  }
   798  
   799  // SingleAllocResponse is used to return a single allocation
   800  type SingleAllocResponse struct {
   801  	Alloc *Allocation
   802  	QueryMeta
   803  }
   804  
   805  // AllocsGetResponse is used to return a set of allocations
   806  type AllocsGetResponse struct {
   807  	Allocs []*Allocation
   808  	QueryMeta
   809  }
   810  
   811  // JobAllocationsResponse is used to return the allocations for a job
   812  type JobAllocationsResponse struct {
   813  	Allocations []*AllocListStub
   814  	QueryMeta
   815  }
   816  
   817  // JobEvaluationsResponse is used to return the evaluations for a job
   818  type JobEvaluationsResponse struct {
   819  	Evaluations []*Evaluation
   820  	QueryMeta
   821  }
   822  
   823  // SingleEvalResponse is used to return a single evaluation
   824  type SingleEvalResponse struct {
   825  	Eval *Evaluation
   826  	QueryMeta
   827  }
   828  
   829  // EvalDequeueResponse is used to return from a dequeue
   830  type EvalDequeueResponse struct {
   831  	Eval  *Evaluation
   832  	Token string
   833  	QueryMeta
   834  }
   835  
   836  // PlanResponse is used to return from a PlanRequest
   837  type PlanResponse struct {
   838  	Result *PlanResult
   839  	WriteMeta
   840  }
   841  
   842  // AllocListResponse is used for a list request
   843  type AllocListResponse struct {
   844  	Allocations []*AllocListStub
   845  	QueryMeta
   846  }
   847  
   848  // DeploymentListResponse is used for a list request
   849  type DeploymentListResponse struct {
   850  	Deployments []*Deployment
   851  	QueryMeta
   852  }
   853  
   854  // EvalListResponse is used for a list request
   855  type EvalListResponse struct {
   856  	Evaluations []*Evaluation
   857  	QueryMeta
   858  }
   859  
   860  // EvalAllocationsResponse is used to return the allocations for an evaluation
   861  type EvalAllocationsResponse struct {
   862  	Allocations []*AllocListStub
   863  	QueryMeta
   864  }
   865  
   866  // PeriodicForceResponse is used to respond to a periodic job force launch
   867  type PeriodicForceResponse struct {
   868  	EvalID          string
   869  	EvalCreateIndex uint64
   870  	WriteMeta
   871  }
   872  
   873  // DeploymentUpdateResponse is used to respond to a deployment change. The
   874  // response will include the modify index of the deployment as well as details
   875  // of any triggered evaluation.
   876  type DeploymentUpdateResponse struct {
   877  	EvalID                string
   878  	EvalCreateIndex       uint64
   879  	DeploymentModifyIndex uint64
   880  
   881  	// RevertedJobVersion is the version the job was reverted to. If unset, the
   882  	// job wasn't reverted
   883  	RevertedJobVersion *uint64
   884  
   885  	WriteMeta
   886  }
   887  
   888  const (
   889  	NodeStatusInit  = "initializing"
   890  	NodeStatusReady = "ready"
   891  	NodeStatusDown  = "down"
   892  )
   893  
   894  // ShouldDrainNode checks if a given node status should trigger an
   895  // evaluation. Some states don't require any further action.
   896  func ShouldDrainNode(status string) bool {
   897  	switch status {
   898  	case NodeStatusInit, NodeStatusReady:
   899  		return false
   900  	case NodeStatusDown:
   901  		return true
   902  	default:
   903  		panic(fmt.Sprintf("unhandled node status %s", status))
   904  	}
   905  }
   906  
   907  // ValidNodeStatus is used to check if a node status is valid
   908  func ValidNodeStatus(status string) bool {
   909  	switch status {
   910  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   911  		return true
   912  	default:
   913  		return false
   914  	}
   915  }
   916  
   917  // Node is a representation of a schedulable client node
   918  type Node struct {
   919  	// ID is a unique identifier for the node. It can be constructed
   920  	// by doing a concatenation of the Name and Datacenter as a simple
   921  	// approach. Alternatively a UUID may be used.
   922  	ID string
   923  
   924  	// SecretID is an ID that is only known by the Node and the set of Servers.
   925  	// It is not accessible via the API and is used to authenticate nodes
   926  	// conducting priviledged activities.
   927  	SecretID string
   928  
   929  	// Datacenter for this node
   930  	Datacenter string
   931  
   932  	// Node name
   933  	Name string
   934  
   935  	// HTTPAddr is the address on which the Nomad client is listening for http
   936  	// requests
   937  	HTTPAddr string
   938  
   939  	// TLSEnabled indicates if the Agent has TLS enabled for the HTTP API
   940  	TLSEnabled bool
   941  
   942  	// Attributes is an arbitrary set of key/value
   943  	// data that can be used for constraints. Examples
   944  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   945  	// "docker.runtime=1.8.3"
   946  	Attributes map[string]string
   947  
   948  	// Resources is the available resources on the client.
   949  	// For example 'cpu=2' 'memory=2048'
   950  	Resources *Resources
   951  
   952  	// Reserved is the set of resources that are reserved,
   953  	// and should be subtracted from the total resources for
   954  	// the purposes of scheduling. This may be provide certain
   955  	// high-watermark tolerances or because of external schedulers
   956  	// consuming resources.
   957  	Reserved *Resources
   958  
   959  	// Links are used to 'link' this client to external
   960  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   961  	// 'ami=ami-123'
   962  	Links map[string]string
   963  
   964  	// Meta is used to associate arbitrary metadata with this
   965  	// client. This is opaque to Nomad.
   966  	Meta map[string]string
   967  
   968  	// NodeClass is an opaque identifier used to group nodes
   969  	// together for the purpose of determining scheduling pressure.
   970  	NodeClass string
   971  
   972  	// ComputedClass is a unique id that identifies nodes with a common set of
   973  	// attributes and capabilities.
   974  	ComputedClass string
   975  
   976  	// Drain is controlled by the servers, and not the client.
   977  	// If true, no jobs will be scheduled to this node, and existing
   978  	// allocations will be drained.
   979  	Drain bool
   980  
   981  	// Status of this node
   982  	Status string
   983  
   984  	// StatusDescription is meant to provide more human useful information
   985  	StatusDescription string
   986  
   987  	// StatusUpdatedAt is the time stamp at which the state of the node was
   988  	// updated
   989  	StatusUpdatedAt int64
   990  
   991  	// Raft Indexes
   992  	CreateIndex uint64
   993  	ModifyIndex uint64
   994  }
   995  
   996  // Ready returns if the node is ready for running allocations
   997  func (n *Node) Ready() bool {
   998  	return n.Status == NodeStatusReady && !n.Drain
   999  }
  1000  
  1001  func (n *Node) Copy() *Node {
  1002  	if n == nil {
  1003  		return nil
  1004  	}
  1005  	nn := new(Node)
  1006  	*nn = *n
  1007  	nn.Attributes = helper.CopyMapStringString(nn.Attributes)
  1008  	nn.Resources = nn.Resources.Copy()
  1009  	nn.Reserved = nn.Reserved.Copy()
  1010  	nn.Links = helper.CopyMapStringString(nn.Links)
  1011  	nn.Meta = helper.CopyMapStringString(nn.Meta)
  1012  	return nn
  1013  }
  1014  
  1015  // TerminalStatus returns if the current status is terminal and
  1016  // will no longer transition.
  1017  func (n *Node) TerminalStatus() bool {
  1018  	switch n.Status {
  1019  	case NodeStatusDown:
  1020  		return true
  1021  	default:
  1022  		return false
  1023  	}
  1024  }
  1025  
  1026  // Stub returns a summarized version of the node
  1027  func (n *Node) Stub() *NodeListStub {
  1028  	return &NodeListStub{
  1029  		ID:                n.ID,
  1030  		Datacenter:        n.Datacenter,
  1031  		Name:              n.Name,
  1032  		NodeClass:         n.NodeClass,
  1033  		Drain:             n.Drain,
  1034  		Status:            n.Status,
  1035  		StatusDescription: n.StatusDescription,
  1036  		CreateIndex:       n.CreateIndex,
  1037  		ModifyIndex:       n.ModifyIndex,
  1038  	}
  1039  }
  1040  
  1041  // NodeListStub is used to return a subset of job information
  1042  // for the job list
  1043  type NodeListStub struct {
  1044  	ID                string
  1045  	Datacenter        string
  1046  	Name              string
  1047  	NodeClass         string
  1048  	Drain             bool
  1049  	Status            string
  1050  	StatusDescription string
  1051  	CreateIndex       uint64
  1052  	ModifyIndex       uint64
  1053  }
  1054  
  1055  // Networks defined for a task on the Resources struct.
  1056  type Networks []*NetworkResource
  1057  
  1058  // Port assignment and IP for the given label or empty values.
  1059  func (ns Networks) Port(label string) (string, int) {
  1060  	for _, n := range ns {
  1061  		for _, p := range n.ReservedPorts {
  1062  			if p.Label == label {
  1063  				return n.IP, p.Value
  1064  			}
  1065  		}
  1066  		for _, p := range n.DynamicPorts {
  1067  			if p.Label == label {
  1068  				return n.IP, p.Value
  1069  			}
  1070  		}
  1071  	}
  1072  	return "", 0
  1073  }
  1074  
  1075  // Resources is used to define the resources available
  1076  // on a client
  1077  type Resources struct {
  1078  	CPU      int
  1079  	MemoryMB int
  1080  	DiskMB   int
  1081  	IOPS     int
  1082  	Networks Networks
  1083  }
  1084  
  1085  const (
  1086  	BytesInMegabyte = 1024 * 1024
  1087  )
  1088  
  1089  // DefaultResources returns the default resources for a task.
  1090  func DefaultResources() *Resources {
  1091  	return &Resources{
  1092  		CPU:      100,
  1093  		MemoryMB: 10,
  1094  		IOPS:     0,
  1095  	}
  1096  }
  1097  
  1098  // DiskInBytes returns the amount of disk resources in bytes.
  1099  func (r *Resources) DiskInBytes() int64 {
  1100  	return int64(r.DiskMB * BytesInMegabyte)
  1101  }
  1102  
  1103  // Merge merges this resource with another resource.
  1104  func (r *Resources) Merge(other *Resources) {
  1105  	if other.CPU != 0 {
  1106  		r.CPU = other.CPU
  1107  	}
  1108  	if other.MemoryMB != 0 {
  1109  		r.MemoryMB = other.MemoryMB
  1110  	}
  1111  	if other.DiskMB != 0 {
  1112  		r.DiskMB = other.DiskMB
  1113  	}
  1114  	if other.IOPS != 0 {
  1115  		r.IOPS = other.IOPS
  1116  	}
  1117  	if len(other.Networks) != 0 {
  1118  		r.Networks = other.Networks
  1119  	}
  1120  }
  1121  
  1122  func (r *Resources) Canonicalize() {
  1123  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1124  	// problems since we use reflect DeepEquals.
  1125  	if len(r.Networks) == 0 {
  1126  		r.Networks = nil
  1127  	}
  1128  
  1129  	for _, n := range r.Networks {
  1130  		n.Canonicalize()
  1131  	}
  1132  }
  1133  
  1134  // MeetsMinResources returns an error if the resources specified are less than
  1135  // the minimum allowed.
  1136  func (r *Resources) MeetsMinResources() error {
  1137  	var mErr multierror.Error
  1138  	if r.CPU < 20 {
  1139  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU))
  1140  	}
  1141  	if r.MemoryMB < 10 {
  1142  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB))
  1143  	}
  1144  	if r.IOPS < 0 {
  1145  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS))
  1146  	}
  1147  	for i, n := range r.Networks {
  1148  		if err := n.MeetsMinResources(); err != nil {
  1149  			mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err))
  1150  		}
  1151  	}
  1152  
  1153  	return mErr.ErrorOrNil()
  1154  }
  1155  
  1156  // Copy returns a deep copy of the resources
  1157  func (r *Resources) Copy() *Resources {
  1158  	if r == nil {
  1159  		return nil
  1160  	}
  1161  	newR := new(Resources)
  1162  	*newR = *r
  1163  	if r.Networks != nil {
  1164  		n := len(r.Networks)
  1165  		newR.Networks = make([]*NetworkResource, n)
  1166  		for i := 0; i < n; i++ {
  1167  			newR.Networks[i] = r.Networks[i].Copy()
  1168  		}
  1169  	}
  1170  	return newR
  1171  }
  1172  
  1173  // NetIndex finds the matching net index using device name
  1174  func (r *Resources) NetIndex(n *NetworkResource) int {
  1175  	for idx, net := range r.Networks {
  1176  		if net.Device == n.Device {
  1177  			return idx
  1178  		}
  1179  	}
  1180  	return -1
  1181  }
  1182  
  1183  // Superset checks if one set of resources is a superset
  1184  // of another. This ignores network resources, and the NetworkIndex
  1185  // should be used for that.
  1186  func (r *Resources) Superset(other *Resources) (bool, string) {
  1187  	if r.CPU < other.CPU {
  1188  		return false, "cpu exhausted"
  1189  	}
  1190  	if r.MemoryMB < other.MemoryMB {
  1191  		return false, "memory exhausted"
  1192  	}
  1193  	if r.DiskMB < other.DiskMB {
  1194  		return false, "disk exhausted"
  1195  	}
  1196  	if r.IOPS < other.IOPS {
  1197  		return false, "iops exhausted"
  1198  	}
  1199  	return true, ""
  1200  }
  1201  
  1202  // Add adds the resources of the delta to this, potentially
  1203  // returning an error if not possible.
  1204  func (r *Resources) Add(delta *Resources) error {
  1205  	if delta == nil {
  1206  		return nil
  1207  	}
  1208  	r.CPU += delta.CPU
  1209  	r.MemoryMB += delta.MemoryMB
  1210  	r.DiskMB += delta.DiskMB
  1211  	r.IOPS += delta.IOPS
  1212  
  1213  	for _, n := range delta.Networks {
  1214  		// Find the matching interface by IP or CIDR
  1215  		idx := r.NetIndex(n)
  1216  		if idx == -1 {
  1217  			r.Networks = append(r.Networks, n.Copy())
  1218  		} else {
  1219  			r.Networks[idx].Add(n)
  1220  		}
  1221  	}
  1222  	return nil
  1223  }
  1224  
  1225  func (r *Resources) GoString() string {
  1226  	return fmt.Sprintf("*%#v", *r)
  1227  }
  1228  
  1229  type Port struct {
  1230  	Label string
  1231  	Value int
  1232  }
  1233  
  1234  // NetworkResource is used to represent available network
  1235  // resources
  1236  type NetworkResource struct {
  1237  	Device        string // Name of the device
  1238  	CIDR          string // CIDR block of addresses
  1239  	IP            string // Host IP address
  1240  	MBits         int    // Throughput
  1241  	ReservedPorts []Port // Host Reserved ports
  1242  	DynamicPorts  []Port // Host Dynamically assigned ports
  1243  }
  1244  
  1245  func (n *NetworkResource) Canonicalize() {
  1246  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1247  	// problems since we use reflect DeepEquals.
  1248  	if len(n.ReservedPorts) == 0 {
  1249  		n.ReservedPorts = nil
  1250  	}
  1251  	if len(n.DynamicPorts) == 0 {
  1252  		n.DynamicPorts = nil
  1253  	}
  1254  }
  1255  
  1256  // MeetsMinResources returns an error if the resources specified are less than
  1257  // the minimum allowed.
  1258  func (n *NetworkResource) MeetsMinResources() error {
  1259  	var mErr multierror.Error
  1260  	if n.MBits < 1 {
  1261  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits))
  1262  	}
  1263  	return mErr.ErrorOrNil()
  1264  }
  1265  
  1266  // Copy returns a deep copy of the network resource
  1267  func (n *NetworkResource) Copy() *NetworkResource {
  1268  	if n == nil {
  1269  		return nil
  1270  	}
  1271  	newR := new(NetworkResource)
  1272  	*newR = *n
  1273  	if n.ReservedPorts != nil {
  1274  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
  1275  		copy(newR.ReservedPorts, n.ReservedPorts)
  1276  	}
  1277  	if n.DynamicPorts != nil {
  1278  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
  1279  		copy(newR.DynamicPorts, n.DynamicPorts)
  1280  	}
  1281  	return newR
  1282  }
  1283  
  1284  // Add adds the resources of the delta to this, potentially
  1285  // returning an error if not possible.
  1286  func (n *NetworkResource) Add(delta *NetworkResource) {
  1287  	if len(delta.ReservedPorts) > 0 {
  1288  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
  1289  	}
  1290  	n.MBits += delta.MBits
  1291  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
  1292  }
  1293  
  1294  func (n *NetworkResource) GoString() string {
  1295  	return fmt.Sprintf("*%#v", *n)
  1296  }
  1297  
  1298  // PortLabels returns a map of port labels to their assigned host ports.
  1299  func (n *NetworkResource) PortLabels() map[string]int {
  1300  	num := len(n.ReservedPorts) + len(n.DynamicPorts)
  1301  	labelValues := make(map[string]int, num)
  1302  	for _, port := range n.ReservedPorts {
  1303  		labelValues[port.Label] = port.Value
  1304  	}
  1305  	for _, port := range n.DynamicPorts {
  1306  		labelValues[port.Label] = port.Value
  1307  	}
  1308  	return labelValues
  1309  }
  1310  
  1311  const (
  1312  	// JobTypeNomad is reserved for internal system tasks and is
  1313  	// always handled by the CoreScheduler.
  1314  	JobTypeCore    = "_core"
  1315  	JobTypeService = "service"
  1316  	JobTypeBatch   = "batch"
  1317  	JobTypeSystem  = "system"
  1318  )
  1319  
  1320  const (
  1321  	JobStatusPending = "pending" // Pending means the job is waiting on scheduling
  1322  	JobStatusRunning = "running" // Running means the job has non-terminal allocations
  1323  	JobStatusDead    = "dead"    // Dead means all evaluation's and allocations are terminal
  1324  )
  1325  
  1326  const (
  1327  	// JobMinPriority is the minimum allowed priority
  1328  	JobMinPriority = 1
  1329  
  1330  	// JobDefaultPriority is the default priority if not
  1331  	// not specified.
  1332  	JobDefaultPriority = 50
  1333  
  1334  	// JobMaxPriority is the maximum allowed priority
  1335  	JobMaxPriority = 100
  1336  
  1337  	// Ensure CoreJobPriority is higher than any user
  1338  	// specified job so that it gets priority. This is important
  1339  	// for the system to remain healthy.
  1340  	CoreJobPriority = JobMaxPriority * 2
  1341  
  1342  	// JobTrackedVersions is the number of historic job versions that are
  1343  	// kept.
  1344  	JobTrackedVersions = 6
  1345  )
  1346  
  1347  // Job is the scope of a scheduling request to Nomad. It is the largest
  1348  // scoped object, and is a named collection of task groups. Each task group
  1349  // is further composed of tasks. A task group (TG) is the unit of scheduling
  1350  // however.
  1351  type Job struct {
  1352  	// Stop marks whether the user has stopped the job. A stopped job will
  1353  	// have all created allocations stopped and acts as a way to stop a job
  1354  	// without purging it from the system. This allows existing allocs to be
  1355  	// queried and the job to be inspected as it is being killed.
  1356  	Stop bool
  1357  
  1358  	// Region is the Nomad region that handles scheduling this job
  1359  	Region string
  1360  
  1361  	// ID is a unique identifier for the job per region. It can be
  1362  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
  1363  	ID string
  1364  
  1365  	// ParentID is the unique identifier of the job that spawned this job.
  1366  	ParentID string
  1367  
  1368  	// Name is the logical name of the job used to refer to it. This is unique
  1369  	// per region, but not unique globally.
  1370  	Name string
  1371  
  1372  	// Type is used to control various behaviors about the job. Most jobs
  1373  	// are service jobs, meaning they are expected to be long lived.
  1374  	// Some jobs are batch oriented meaning they run and then terminate.
  1375  	// This can be extended in the future to support custom schedulers.
  1376  	Type string
  1377  
  1378  	// Priority is used to control scheduling importance and if this job
  1379  	// can preempt other jobs.
  1380  	Priority int
  1381  
  1382  	// AllAtOnce is used to control if incremental scheduling of task groups
  1383  	// is allowed or if we must do a gang scheduling of the entire job. This
  1384  	// can slow down larger jobs if resources are not available.
  1385  	AllAtOnce bool
  1386  
  1387  	// Datacenters contains all the datacenters this job is allowed to span
  1388  	Datacenters []string
  1389  
  1390  	// Constraints can be specified at a job level and apply to
  1391  	// all the task groups and tasks.
  1392  	Constraints []*Constraint
  1393  
  1394  	// TaskGroups are the collections of task groups that this job needs
  1395  	// to run. Each task group is an atomic unit of scheduling and placement.
  1396  	TaskGroups []*TaskGroup
  1397  
  1398  	// COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0.
  1399  	Update UpdateStrategy
  1400  
  1401  	// Periodic is used to define the interval the job is run at.
  1402  	Periodic *PeriodicConfig
  1403  
  1404  	// ParameterizedJob is used to specify the job as a parameterized job
  1405  	// for dispatching.
  1406  	ParameterizedJob *ParameterizedJobConfig
  1407  
  1408  	// Payload is the payload supplied when the job was dispatched.
  1409  	Payload []byte
  1410  
  1411  	// Meta is used to associate arbitrary metadata with this
  1412  	// job. This is opaque to Nomad.
  1413  	Meta map[string]string
  1414  
  1415  	// VaultToken is the Vault token that proves the submitter of the job has
  1416  	// access to the specified Vault policies. This field is only used to
  1417  	// transfer the token and is not stored after Job submission.
  1418  	VaultToken string
  1419  
  1420  	// Job status
  1421  	Status string
  1422  
  1423  	// StatusDescription is meant to provide more human useful information
  1424  	StatusDescription string
  1425  
  1426  	// Stable marks a job as stable. Stability is only defined on "service" and
  1427  	// "system" jobs. The stability of a job will be set automatically as part
  1428  	// of a deployment and can be manually set via APIs.
  1429  	Stable bool
  1430  
  1431  	// Version is a monitonically increasing version number that is incremened
  1432  	// on each job register.
  1433  	Version uint64
  1434  
  1435  	// SubmitTime is the time at which the job was submitted as a UnixNano in
  1436  	// UTC
  1437  	SubmitTime int64
  1438  
  1439  	// Raft Indexes
  1440  	CreateIndex    uint64
  1441  	ModifyIndex    uint64
  1442  	JobModifyIndex uint64
  1443  }
  1444  
  1445  // Canonicalize is used to canonicalize fields in the Job. This should be called
  1446  // when registering a Job. A set of warnings are returned if the job was changed
  1447  // in anyway that the user should be made aware of.
  1448  func (j *Job) Canonicalize() (warnings error) {
  1449  	var mErr multierror.Error
  1450  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1451  	// problems since we use reflect DeepEquals.
  1452  	if len(j.Meta) == 0 {
  1453  		j.Meta = nil
  1454  	}
  1455  
  1456  	for _, tg := range j.TaskGroups {
  1457  		tg.Canonicalize(j)
  1458  	}
  1459  
  1460  	if j.ParameterizedJob != nil {
  1461  		j.ParameterizedJob.Canonicalize()
  1462  	}
  1463  
  1464  	if j.Periodic != nil {
  1465  		j.Periodic.Canonicalize()
  1466  	}
  1467  
  1468  	// COMPAT: Remove in 0.7.0
  1469  	// Rewrite any job that has an update block with pre 0.6.0 syntax.
  1470  	jobHasOldUpdate := j.Update.Stagger > 0 && j.Update.MaxParallel > 0
  1471  	if jobHasOldUpdate && j.Type != JobTypeBatch {
  1472  		// Build an appropriate update block and copy it down to each task group
  1473  		base := DefaultUpdateStrategy.Copy()
  1474  		base.MaxParallel = j.Update.MaxParallel
  1475  		base.MinHealthyTime = j.Update.Stagger
  1476  
  1477  		// Add to each task group, modifying as needed
  1478  		upgraded := false
  1479  		l := len(j.TaskGroups)
  1480  		for _, tg := range j.TaskGroups {
  1481  			// The task group doesn't need upgrading if it has an update block with the new syntax
  1482  			u := tg.Update
  1483  			if u != nil && u.Stagger > 0 && u.MaxParallel > 0 &&
  1484  				u.HealthCheck != "" && u.MinHealthyTime > 0 && u.HealthyDeadline > 0 {
  1485  				continue
  1486  			}
  1487  
  1488  			upgraded = true
  1489  
  1490  			// The MaxParallel for the job should be 10% of the total count
  1491  			// unless there is just one task group then we can infer the old
  1492  			// max parallel should be the new
  1493  			tgu := base.Copy()
  1494  			if l != 1 {
  1495  				// RoundTo 10%
  1496  				var percent float64 = float64(tg.Count) * 0.1
  1497  				tgu.MaxParallel = int(percent + 0.5)
  1498  			}
  1499  
  1500  			// Safety guards
  1501  			if tgu.MaxParallel == 0 {
  1502  				tgu.MaxParallel = 1
  1503  			} else if tgu.MaxParallel > tg.Count {
  1504  				tgu.MaxParallel = tg.Count
  1505  			}
  1506  
  1507  			tg.Update = tgu
  1508  		}
  1509  
  1510  		if upgraded {
  1511  			w := "A best effort conversion to new update stanza introduced in v0.6.0 applied. " +
  1512  				"Please update upgrade stanza before v0.7.0."
  1513  			multierror.Append(&mErr, fmt.Errorf(w))
  1514  		}
  1515  	}
  1516  
  1517  	// Ensure that the batch job doesn't have new style or old style update
  1518  	// stanza. Unfortunately are scanning here because we have to deprecate over
  1519  	// a release so we can't check in the task group since that may be new style
  1520  	// but wouldn't capture the old style and we don't want to have duplicate
  1521  	// warnings.
  1522  	if j.Type == JobTypeBatch {
  1523  		displayWarning := jobHasOldUpdate
  1524  		j.Update.Stagger = 0
  1525  		j.Update.MaxParallel = 0
  1526  		j.Update.HealthCheck = ""
  1527  		j.Update.MinHealthyTime = 0
  1528  		j.Update.HealthyDeadline = 0
  1529  		j.Update.AutoRevert = false
  1530  		j.Update.Canary = 0
  1531  
  1532  		// Remove any update spec from the task groups
  1533  		for _, tg := range j.TaskGroups {
  1534  			if tg.Update != nil {
  1535  				displayWarning = true
  1536  				tg.Update = nil
  1537  			}
  1538  		}
  1539  
  1540  		if displayWarning {
  1541  			w := "Update stanza is disallowed for batch jobs since v0.6.0. " +
  1542  				"The update block has automatically been removed"
  1543  			multierror.Append(&mErr, fmt.Errorf(w))
  1544  		}
  1545  	}
  1546  
  1547  	return mErr.ErrorOrNil()
  1548  }
  1549  
  1550  // Copy returns a deep copy of the Job. It is expected that callers use recover.
  1551  // This job can panic if the deep copy failed as it uses reflection.
  1552  func (j *Job) Copy() *Job {
  1553  	if j == nil {
  1554  		return nil
  1555  	}
  1556  	nj := new(Job)
  1557  	*nj = *j
  1558  	nj.Datacenters = helper.CopySliceString(nj.Datacenters)
  1559  	nj.Constraints = CopySliceConstraints(nj.Constraints)
  1560  
  1561  	if j.TaskGroups != nil {
  1562  		tgs := make([]*TaskGroup, len(nj.TaskGroups))
  1563  		for i, tg := range nj.TaskGroups {
  1564  			tgs[i] = tg.Copy()
  1565  		}
  1566  		nj.TaskGroups = tgs
  1567  	}
  1568  
  1569  	nj.Periodic = nj.Periodic.Copy()
  1570  	nj.Meta = helper.CopyMapStringString(nj.Meta)
  1571  	nj.ParameterizedJob = nj.ParameterizedJob.Copy()
  1572  	return nj
  1573  }
  1574  
  1575  // Validate is used to sanity check a job input
  1576  func (j *Job) Validate() error {
  1577  	var mErr multierror.Error
  1578  
  1579  	if j.Region == "" {
  1580  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
  1581  	}
  1582  	if j.ID == "" {
  1583  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
  1584  	} else if strings.Contains(j.ID, " ") {
  1585  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
  1586  	}
  1587  	if j.Name == "" {
  1588  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
  1589  	}
  1590  	switch j.Type {
  1591  	case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem:
  1592  	case "":
  1593  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
  1594  	default:
  1595  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type))
  1596  	}
  1597  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
  1598  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
  1599  	}
  1600  	if len(j.Datacenters) == 0 {
  1601  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
  1602  	}
  1603  	if len(j.TaskGroups) == 0 {
  1604  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
  1605  	}
  1606  	for idx, constr := range j.Constraints {
  1607  		if err := constr.Validate(); err != nil {
  1608  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1609  			mErr.Errors = append(mErr.Errors, outer)
  1610  		}
  1611  	}
  1612  
  1613  	// Check for duplicate task groups
  1614  	taskGroups := make(map[string]int)
  1615  	for idx, tg := range j.TaskGroups {
  1616  		if tg.Name == "" {
  1617  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
  1618  		} else if existing, ok := taskGroups[tg.Name]; ok {
  1619  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
  1620  		} else {
  1621  			taskGroups[tg.Name] = idx
  1622  		}
  1623  
  1624  		if j.Type == "system" && tg.Count > 1 {
  1625  			mErr.Errors = append(mErr.Errors,
  1626  				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
  1627  					tg.Name, tg.Count))
  1628  		}
  1629  	}
  1630  
  1631  	// Validate the task group
  1632  	for _, tg := range j.TaskGroups {
  1633  		if err := tg.Validate(j); err != nil {
  1634  			outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err)
  1635  			mErr.Errors = append(mErr.Errors, outer)
  1636  		}
  1637  	}
  1638  
  1639  	// Validate periodic is only used with batch jobs.
  1640  	if j.IsPeriodic() && j.Periodic.Enabled {
  1641  		if j.Type != JobTypeBatch {
  1642  			mErr.Errors = append(mErr.Errors,
  1643  				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
  1644  		}
  1645  
  1646  		if err := j.Periodic.Validate(); err != nil {
  1647  			mErr.Errors = append(mErr.Errors, err)
  1648  		}
  1649  	}
  1650  
  1651  	if j.IsParameterized() {
  1652  		if j.Type != JobTypeBatch {
  1653  			mErr.Errors = append(mErr.Errors,
  1654  				fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch))
  1655  		}
  1656  
  1657  		if err := j.ParameterizedJob.Validate(); err != nil {
  1658  			mErr.Errors = append(mErr.Errors, err)
  1659  		}
  1660  	}
  1661  
  1662  	return mErr.ErrorOrNil()
  1663  }
  1664  
  1665  // Warnings returns a list of warnings that may be from dubious settings or
  1666  // deprecation warnings.
  1667  func (j *Job) Warnings() error {
  1668  	var mErr multierror.Error
  1669  
  1670  	// Check the groups
  1671  	for _, tg := range j.TaskGroups {
  1672  		if err := tg.Warnings(j); err != nil {
  1673  			outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err)
  1674  			mErr.Errors = append(mErr.Errors, outer)
  1675  		}
  1676  	}
  1677  
  1678  	return mErr.ErrorOrNil()
  1679  }
  1680  
  1681  // LookupTaskGroup finds a task group by name
  1682  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
  1683  	for _, tg := range j.TaskGroups {
  1684  		if tg.Name == name {
  1685  			return tg
  1686  		}
  1687  	}
  1688  	return nil
  1689  }
  1690  
  1691  // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined
  1692  // meta data for the task. When joining Job, Group and Task Meta, the precedence
  1693  // is by deepest scope (Task > Group > Job).
  1694  func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string {
  1695  	group := j.LookupTaskGroup(groupName)
  1696  	if group == nil {
  1697  		return nil
  1698  	}
  1699  
  1700  	task := group.LookupTask(taskName)
  1701  	if task == nil {
  1702  		return nil
  1703  	}
  1704  
  1705  	meta := helper.CopyMapStringString(task.Meta)
  1706  	if meta == nil {
  1707  		meta = make(map[string]string, len(group.Meta)+len(j.Meta))
  1708  	}
  1709  
  1710  	// Add the group specific meta
  1711  	for k, v := range group.Meta {
  1712  		if _, ok := meta[k]; !ok {
  1713  			meta[k] = v
  1714  		}
  1715  	}
  1716  
  1717  	// Add the job specific meta
  1718  	for k, v := range j.Meta {
  1719  		if _, ok := meta[k]; !ok {
  1720  			meta[k] = v
  1721  		}
  1722  	}
  1723  
  1724  	return meta
  1725  }
  1726  
  1727  // Stopped returns if a job is stopped.
  1728  func (j *Job) Stopped() bool {
  1729  	return j == nil || j.Stop
  1730  }
  1731  
  1732  // HasUpdateStrategy returns if any task group in the job has an update strategy
  1733  func (j *Job) HasUpdateStrategy() bool {
  1734  	for _, tg := range j.TaskGroups {
  1735  		if tg.Update != nil {
  1736  			return true
  1737  		}
  1738  	}
  1739  
  1740  	return false
  1741  }
  1742  
  1743  // Stub is used to return a summary of the job
  1744  func (j *Job) Stub(summary *JobSummary) *JobListStub {
  1745  	return &JobListStub{
  1746  		ID:                j.ID,
  1747  		ParentID:          j.ParentID,
  1748  		Name:              j.Name,
  1749  		Type:              j.Type,
  1750  		Priority:          j.Priority,
  1751  		Periodic:          j.IsPeriodic(),
  1752  		ParameterizedJob:  j.IsParameterized(),
  1753  		Stop:              j.Stop,
  1754  		Status:            j.Status,
  1755  		StatusDescription: j.StatusDescription,
  1756  		CreateIndex:       j.CreateIndex,
  1757  		ModifyIndex:       j.ModifyIndex,
  1758  		JobModifyIndex:    j.JobModifyIndex,
  1759  		SubmitTime:        j.SubmitTime,
  1760  		JobSummary:        summary,
  1761  	}
  1762  }
  1763  
  1764  // IsPeriodic returns whether a job is periodic.
  1765  func (j *Job) IsPeriodic() bool {
  1766  	return j.Periodic != nil
  1767  }
  1768  
  1769  // IsParameterized returns whether a job is parameterized job.
  1770  func (j *Job) IsParameterized() bool {
  1771  	return j.ParameterizedJob != nil
  1772  }
  1773  
  1774  // VaultPolicies returns the set of Vault policies per task group, per task
  1775  func (j *Job) VaultPolicies() map[string]map[string]*Vault {
  1776  	policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
  1777  
  1778  	for _, tg := range j.TaskGroups {
  1779  		tgPolicies := make(map[string]*Vault, len(tg.Tasks))
  1780  
  1781  		for _, task := range tg.Tasks {
  1782  			if task.Vault == nil {
  1783  				continue
  1784  			}
  1785  
  1786  			tgPolicies[task.Name] = task.Vault
  1787  		}
  1788  
  1789  		if len(tgPolicies) != 0 {
  1790  			policies[tg.Name] = tgPolicies
  1791  		}
  1792  	}
  1793  
  1794  	return policies
  1795  }
  1796  
  1797  // RequiredSignals returns a mapping of task groups to tasks to their required
  1798  // set of signals
  1799  func (j *Job) RequiredSignals() map[string]map[string][]string {
  1800  	signals := make(map[string]map[string][]string)
  1801  
  1802  	for _, tg := range j.TaskGroups {
  1803  		for _, task := range tg.Tasks {
  1804  			// Use this local one as a set
  1805  			taskSignals := make(map[string]struct{})
  1806  
  1807  			// Check if the Vault change mode uses signals
  1808  			if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal {
  1809  				taskSignals[task.Vault.ChangeSignal] = struct{}{}
  1810  			}
  1811  
  1812  			// Check if any template change mode uses signals
  1813  			for _, t := range task.Templates {
  1814  				if t.ChangeMode != TemplateChangeModeSignal {
  1815  					continue
  1816  				}
  1817  
  1818  				taskSignals[t.ChangeSignal] = struct{}{}
  1819  			}
  1820  
  1821  			// Flatten and sort the signals
  1822  			l := len(taskSignals)
  1823  			if l == 0 {
  1824  				continue
  1825  			}
  1826  
  1827  			flat := make([]string, 0, l)
  1828  			for sig := range taskSignals {
  1829  				flat = append(flat, sig)
  1830  			}
  1831  
  1832  			sort.Strings(flat)
  1833  			tgSignals, ok := signals[tg.Name]
  1834  			if !ok {
  1835  				tgSignals = make(map[string][]string)
  1836  				signals[tg.Name] = tgSignals
  1837  			}
  1838  			tgSignals[task.Name] = flat
  1839  		}
  1840  
  1841  	}
  1842  
  1843  	return signals
  1844  }
  1845  
  1846  // SpecChanged determines if the functional specification has changed between
  1847  // two job versions.
  1848  func (j *Job) SpecChanged(new *Job) bool {
  1849  	if j == nil {
  1850  		return new != nil
  1851  	}
  1852  
  1853  	// Create a copy of the new job
  1854  	c := new.Copy()
  1855  
  1856  	// Update the new job so we can do a reflect
  1857  	c.Status = j.Status
  1858  	c.StatusDescription = j.StatusDescription
  1859  	c.Stable = j.Stable
  1860  	c.Version = j.Version
  1861  	c.CreateIndex = j.CreateIndex
  1862  	c.ModifyIndex = j.ModifyIndex
  1863  	c.JobModifyIndex = j.JobModifyIndex
  1864  	c.SubmitTime = j.SubmitTime
  1865  
  1866  	// Deep equals the jobs
  1867  	return !reflect.DeepEqual(j, c)
  1868  }
  1869  
  1870  func (j *Job) SetSubmitTime() {
  1871  	j.SubmitTime = time.Now().UTC().UnixNano()
  1872  }
  1873  
  1874  // JobListStub is used to return a subset of job information
  1875  // for the job list
  1876  type JobListStub struct {
  1877  	ID                string
  1878  	ParentID          string
  1879  	Name              string
  1880  	Type              string
  1881  	Priority          int
  1882  	Periodic          bool
  1883  	ParameterizedJob  bool
  1884  	Stop              bool
  1885  	Status            string
  1886  	StatusDescription string
  1887  	JobSummary        *JobSummary
  1888  	CreateIndex       uint64
  1889  	ModifyIndex       uint64
  1890  	JobModifyIndex    uint64
  1891  	SubmitTime        int64
  1892  }
  1893  
  1894  // JobSummary summarizes the state of the allocations of a job
  1895  type JobSummary struct {
  1896  	JobID string
  1897  
  1898  	// Summmary contains the summary per task group for the Job
  1899  	Summary map[string]TaskGroupSummary
  1900  
  1901  	// Children contains a summary for the children of this job.
  1902  	Children *JobChildrenSummary
  1903  
  1904  	// Raft Indexes
  1905  	CreateIndex uint64
  1906  	ModifyIndex uint64
  1907  }
  1908  
  1909  // Copy returns a new copy of JobSummary
  1910  func (js *JobSummary) Copy() *JobSummary {
  1911  	newJobSummary := new(JobSummary)
  1912  	*newJobSummary = *js
  1913  	newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary))
  1914  	for k, v := range js.Summary {
  1915  		newTGSummary[k] = v
  1916  	}
  1917  	newJobSummary.Summary = newTGSummary
  1918  	newJobSummary.Children = newJobSummary.Children.Copy()
  1919  	return newJobSummary
  1920  }
  1921  
  1922  // JobChildrenSummary contains the summary of children job statuses
  1923  type JobChildrenSummary struct {
  1924  	Pending int64
  1925  	Running int64
  1926  	Dead    int64
  1927  }
  1928  
  1929  // Copy returns a new copy of a JobChildrenSummary
  1930  func (jc *JobChildrenSummary) Copy() *JobChildrenSummary {
  1931  	if jc == nil {
  1932  		return nil
  1933  	}
  1934  
  1935  	njc := new(JobChildrenSummary)
  1936  	*njc = *jc
  1937  	return njc
  1938  }
  1939  
  1940  // TaskGroup summarizes the state of all the allocations of a particular
  1941  // TaskGroup
  1942  type TaskGroupSummary struct {
  1943  	Queued   int
  1944  	Complete int
  1945  	Failed   int
  1946  	Running  int
  1947  	Starting int
  1948  	Lost     int
  1949  }
  1950  
  1951  const (
  1952  	// Checks uses any registered health check state in combination with task
  1953  	// states to determine if a allocation is healthy.
  1954  	UpdateStrategyHealthCheck_Checks = "checks"
  1955  
  1956  	// TaskStates uses the task states of an allocation to determine if the
  1957  	// allocation is healthy.
  1958  	UpdateStrategyHealthCheck_TaskStates = "task_states"
  1959  
  1960  	// Manual allows the operator to manually signal to Nomad when an
  1961  	// allocations is healthy. This allows more advanced health checking that is
  1962  	// outside of the scope of Nomad.
  1963  	UpdateStrategyHealthCheck_Manual = "manual"
  1964  )
  1965  
  1966  var (
  1967  	// DefaultUpdateStrategy provides a baseline that can be used to upgrade
  1968  	// jobs with the old policy or for populating field defaults.
  1969  	DefaultUpdateStrategy = &UpdateStrategy{
  1970  		Stagger:         30 * time.Second,
  1971  		MaxParallel:     0,
  1972  		HealthCheck:     UpdateStrategyHealthCheck_Checks,
  1973  		MinHealthyTime:  10 * time.Second,
  1974  		HealthyDeadline: 5 * time.Minute,
  1975  		AutoRevert:      false,
  1976  		Canary:          0,
  1977  	}
  1978  )
  1979  
  1980  // UpdateStrategy is used to modify how updates are done
  1981  type UpdateStrategy struct {
  1982  	// Stagger is used to determine the rate at which allocations are migrated
  1983  	// due to down or draining nodes.
  1984  	Stagger time.Duration
  1985  
  1986  	// MaxParallel is how many updates can be done in parallel
  1987  	MaxParallel int
  1988  
  1989  	// HealthCheck specifies the mechanism in which allocations are marked
  1990  	// healthy or unhealthy as part of a deployment.
  1991  	HealthCheck string
  1992  
  1993  	// MinHealthyTime is the minimum time an allocation must be in the healthy
  1994  	// state before it is marked as healthy, unblocking more alllocations to be
  1995  	// rolled.
  1996  	MinHealthyTime time.Duration
  1997  
  1998  	// HealthyDeadline is the time in which an allocation must be marked as
  1999  	// healthy before it is automatically transistioned to unhealthy. This time
  2000  	// period doesn't count against the MinHealthyTime.
  2001  	HealthyDeadline time.Duration
  2002  
  2003  	// AutoRevert declares that if a deployment fails because of unhealthy
  2004  	// allocations, there should be an attempt to auto-revert the job to a
  2005  	// stable version.
  2006  	AutoRevert bool
  2007  
  2008  	// Canary is the number of canaries to deploy when a change to the task
  2009  	// group is detected.
  2010  	Canary int
  2011  }
  2012  
  2013  func (u *UpdateStrategy) Copy() *UpdateStrategy {
  2014  	if u == nil {
  2015  		return nil
  2016  	}
  2017  
  2018  	copy := new(UpdateStrategy)
  2019  	*copy = *u
  2020  	return copy
  2021  }
  2022  
  2023  func (u *UpdateStrategy) Validate() error {
  2024  	if u == nil {
  2025  		return nil
  2026  	}
  2027  
  2028  	var mErr multierror.Error
  2029  	switch u.HealthCheck {
  2030  	case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual:
  2031  	default:
  2032  		multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck))
  2033  	}
  2034  
  2035  	if u.MaxParallel < 0 {
  2036  		multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel))
  2037  	}
  2038  	if u.Canary < 0 {
  2039  		multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary))
  2040  	}
  2041  	if u.MinHealthyTime < 0 {
  2042  		multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime))
  2043  	}
  2044  	if u.HealthyDeadline <= 0 {
  2045  		multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline))
  2046  	}
  2047  	if u.MinHealthyTime >= u.HealthyDeadline {
  2048  		multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline))
  2049  	}
  2050  	if u.Stagger <= 0 {
  2051  		multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger))
  2052  	}
  2053  
  2054  	return mErr.ErrorOrNil()
  2055  }
  2056  
  2057  // TODO(alexdadgar): Remove once no longer used by the scheduler.
  2058  // Rolling returns if a rolling strategy should be used
  2059  func (u *UpdateStrategy) Rolling() bool {
  2060  	return u.Stagger > 0 && u.MaxParallel > 0
  2061  }
  2062  
  2063  const (
  2064  	// PeriodicSpecCron is used for a cron spec.
  2065  	PeriodicSpecCron = "cron"
  2066  
  2067  	// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
  2068  	// separated list of unix timestamps at which to launch.
  2069  	PeriodicSpecTest = "_internal_test"
  2070  )
  2071  
  2072  // Periodic defines the interval a job should be run at.
  2073  type PeriodicConfig struct {
  2074  	// Enabled determines if the job should be run periodically.
  2075  	Enabled bool
  2076  
  2077  	// Spec specifies the interval the job should be run as. It is parsed based
  2078  	// on the SpecType.
  2079  	Spec string
  2080  
  2081  	// SpecType defines the format of the spec.
  2082  	SpecType string
  2083  
  2084  	// ProhibitOverlap enforces that spawned jobs do not run in parallel.
  2085  	ProhibitOverlap bool
  2086  
  2087  	// TimeZone is the user specified string that determines the time zone to
  2088  	// launch against. The time zones must be specified from IANA Time Zone
  2089  	// database, such as "America/New_York".
  2090  	// Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
  2091  	// Reference: https://www.iana.org/time-zones
  2092  	TimeZone string
  2093  
  2094  	// location is the time zone to evaluate the launch time against
  2095  	location *time.Location
  2096  }
  2097  
  2098  func (p *PeriodicConfig) Copy() *PeriodicConfig {
  2099  	if p == nil {
  2100  		return nil
  2101  	}
  2102  	np := new(PeriodicConfig)
  2103  	*np = *p
  2104  	return np
  2105  }
  2106  
  2107  func (p *PeriodicConfig) Validate() error {
  2108  	if !p.Enabled {
  2109  		return nil
  2110  	}
  2111  
  2112  	var mErr multierror.Error
  2113  	if p.Spec == "" {
  2114  		multierror.Append(&mErr, fmt.Errorf("Must specify a spec"))
  2115  	}
  2116  
  2117  	// Check if we got a valid time zone
  2118  	if p.TimeZone != "" {
  2119  		if _, err := time.LoadLocation(p.TimeZone); err != nil {
  2120  			multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err))
  2121  		}
  2122  	}
  2123  
  2124  	switch p.SpecType {
  2125  	case PeriodicSpecCron:
  2126  		// Validate the cron spec
  2127  		if _, err := cronexpr.Parse(p.Spec); err != nil {
  2128  			multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err))
  2129  		}
  2130  	case PeriodicSpecTest:
  2131  		// No-op
  2132  	default:
  2133  		multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType))
  2134  	}
  2135  
  2136  	return mErr.ErrorOrNil()
  2137  }
  2138  
  2139  func (p *PeriodicConfig) Canonicalize() {
  2140  	// Load the location
  2141  	l, err := time.LoadLocation(p.TimeZone)
  2142  	if err != nil {
  2143  		p.location = time.UTC
  2144  	}
  2145  
  2146  	p.location = l
  2147  }
  2148  
  2149  // Next returns the closest time instant matching the spec that is after the
  2150  // passed time. If no matching instance exists, the zero value of time.Time is
  2151  // returned. The `time.Location` of the returned value matches that of the
  2152  // passed time.
  2153  func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
  2154  	switch p.SpecType {
  2155  	case PeriodicSpecCron:
  2156  		if e, err := cronexpr.Parse(p.Spec); err == nil {
  2157  			return e.Next(fromTime)
  2158  		}
  2159  	case PeriodicSpecTest:
  2160  		split := strings.Split(p.Spec, ",")
  2161  		if len(split) == 1 && split[0] == "" {
  2162  			return time.Time{}
  2163  		}
  2164  
  2165  		// Parse the times
  2166  		times := make([]time.Time, len(split))
  2167  		for i, s := range split {
  2168  			unix, err := strconv.Atoi(s)
  2169  			if err != nil {
  2170  				return time.Time{}
  2171  			}
  2172  
  2173  			times[i] = time.Unix(int64(unix), 0)
  2174  		}
  2175  
  2176  		// Find the next match
  2177  		for _, next := range times {
  2178  			if fromTime.Before(next) {
  2179  				return next
  2180  			}
  2181  		}
  2182  	}
  2183  
  2184  	return time.Time{}
  2185  }
  2186  
  2187  // GetLocation returns the location to use for determining the time zone to run
  2188  // the periodic job against.
  2189  func (p *PeriodicConfig) GetLocation() *time.Location {
  2190  	// Jobs pre 0.5.5 will not have this
  2191  	if p.location != nil {
  2192  		return p.location
  2193  	}
  2194  
  2195  	return time.UTC
  2196  }
  2197  
  2198  const (
  2199  	// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
  2200  	// when launching derived instances of it.
  2201  	PeriodicLaunchSuffix = "/periodic-"
  2202  )
  2203  
  2204  // PeriodicLaunch tracks the last launch time of a periodic job.
  2205  type PeriodicLaunch struct {
  2206  	ID     string    // ID of the periodic job.
  2207  	Launch time.Time // The last launch time.
  2208  
  2209  	// Raft Indexes
  2210  	CreateIndex uint64
  2211  	ModifyIndex uint64
  2212  }
  2213  
  2214  const (
  2215  	DispatchPayloadForbidden = "forbidden"
  2216  	DispatchPayloadOptional  = "optional"
  2217  	DispatchPayloadRequired  = "required"
  2218  
  2219  	// DispatchLaunchSuffix is the string appended to the parameterized job's ID
  2220  	// when dispatching instances of it.
  2221  	DispatchLaunchSuffix = "/dispatch-"
  2222  )
  2223  
  2224  // ParameterizedJobConfig is used to configure the parameterized job
  2225  type ParameterizedJobConfig struct {
  2226  	// Payload configure the payload requirements
  2227  	Payload string
  2228  
  2229  	// MetaRequired is metadata keys that must be specified by the dispatcher
  2230  	MetaRequired []string
  2231  
  2232  	// MetaOptional is metadata keys that may be specified by the dispatcher
  2233  	MetaOptional []string
  2234  }
  2235  
  2236  func (d *ParameterizedJobConfig) Validate() error {
  2237  	var mErr multierror.Error
  2238  	switch d.Payload {
  2239  	case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden:
  2240  	default:
  2241  		multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload))
  2242  	}
  2243  
  2244  	// Check that the meta configurations are disjoint sets
  2245  	disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional)
  2246  	if !disjoint {
  2247  		multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending))
  2248  	}
  2249  
  2250  	return mErr.ErrorOrNil()
  2251  }
  2252  
  2253  func (d *ParameterizedJobConfig) Canonicalize() {
  2254  	if d.Payload == "" {
  2255  		d.Payload = DispatchPayloadOptional
  2256  	}
  2257  }
  2258  
  2259  func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig {
  2260  	if d == nil {
  2261  		return nil
  2262  	}
  2263  	nd := new(ParameterizedJobConfig)
  2264  	*nd = *d
  2265  	nd.MetaOptional = helper.CopySliceString(nd.MetaOptional)
  2266  	nd.MetaRequired = helper.CopySliceString(nd.MetaRequired)
  2267  	return nd
  2268  }
  2269  
  2270  // DispatchedID returns an ID appropriate for a job dispatched against a
  2271  // particular parameterized job
  2272  func DispatchedID(templateID string, t time.Time) string {
  2273  	u := GenerateUUID()[:8]
  2274  	return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u)
  2275  }
  2276  
  2277  // DispatchPayloadConfig configures how a task gets its input from a job dispatch
  2278  type DispatchPayloadConfig struct {
  2279  	// File specifies a relative path to where the input data should be written
  2280  	File string
  2281  }
  2282  
  2283  func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig {
  2284  	if d == nil {
  2285  		return nil
  2286  	}
  2287  	nd := new(DispatchPayloadConfig)
  2288  	*nd = *d
  2289  	return nd
  2290  }
  2291  
  2292  func (d *DispatchPayloadConfig) Validate() error {
  2293  	// Verify the destination doesn't escape
  2294  	escaped, err := PathEscapesAllocDir("task/local/", d.File)
  2295  	if err != nil {
  2296  		return fmt.Errorf("invalid destination path: %v", err)
  2297  	} else if escaped {
  2298  		return fmt.Errorf("destination escapes allocation directory")
  2299  	}
  2300  
  2301  	return nil
  2302  }
  2303  
  2304  var (
  2305  	defaultServiceJobRestartPolicy = RestartPolicy{
  2306  		Delay:    15 * time.Second,
  2307  		Attempts: 2,
  2308  		Interval: 1 * time.Minute,
  2309  		Mode:     RestartPolicyModeDelay,
  2310  	}
  2311  	defaultBatchJobRestartPolicy = RestartPolicy{
  2312  		Delay:    15 * time.Second,
  2313  		Attempts: 15,
  2314  		Interval: 7 * 24 * time.Hour,
  2315  		Mode:     RestartPolicyModeDelay,
  2316  	}
  2317  )
  2318  
  2319  const (
  2320  	// RestartPolicyModeDelay causes an artificial delay till the next interval is
  2321  	// reached when the specified attempts have been reached in the interval.
  2322  	RestartPolicyModeDelay = "delay"
  2323  
  2324  	// RestartPolicyModeFail causes a job to fail if the specified number of
  2325  	// attempts are reached within an interval.
  2326  	RestartPolicyModeFail = "fail"
  2327  
  2328  	// RestartPolicyMinInterval is the minimum interval that is accepted for a
  2329  	// restart policy.
  2330  	RestartPolicyMinInterval = 5 * time.Second
  2331  )
  2332  
  2333  // RestartPolicy configures how Tasks are restarted when they crash or fail.
  2334  type RestartPolicy struct {
  2335  	// Attempts is the number of restart that will occur in an interval.
  2336  	Attempts int
  2337  
  2338  	// Interval is a duration in which we can limit the number of restarts
  2339  	// within.
  2340  	Interval time.Duration
  2341  
  2342  	// Delay is the time between a failure and a restart.
  2343  	Delay time.Duration
  2344  
  2345  	// Mode controls what happens when the task restarts more than attempt times
  2346  	// in an interval.
  2347  	Mode string
  2348  }
  2349  
  2350  func (r *RestartPolicy) Copy() *RestartPolicy {
  2351  	if r == nil {
  2352  		return nil
  2353  	}
  2354  	nrp := new(RestartPolicy)
  2355  	*nrp = *r
  2356  	return nrp
  2357  }
  2358  
  2359  func (r *RestartPolicy) Validate() error {
  2360  	var mErr multierror.Error
  2361  	switch r.Mode {
  2362  	case RestartPolicyModeDelay, RestartPolicyModeFail:
  2363  	default:
  2364  		multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode))
  2365  	}
  2366  
  2367  	// Check for ambiguous/confusing settings
  2368  	if r.Attempts == 0 && r.Mode != RestartPolicyModeFail {
  2369  		multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts))
  2370  	}
  2371  
  2372  	if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() {
  2373  		multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval))
  2374  	}
  2375  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
  2376  		multierror.Append(&mErr,
  2377  			fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay))
  2378  	}
  2379  	return mErr.ErrorOrNil()
  2380  }
  2381  
  2382  func NewRestartPolicy(jobType string) *RestartPolicy {
  2383  	switch jobType {
  2384  	case JobTypeService, JobTypeSystem:
  2385  		rp := defaultServiceJobRestartPolicy
  2386  		return &rp
  2387  	case JobTypeBatch:
  2388  		rp := defaultBatchJobRestartPolicy
  2389  		return &rp
  2390  	}
  2391  	return nil
  2392  }
  2393  
  2394  // TaskGroup is an atomic unit of placement. Each task group belongs to
  2395  // a job and may contain any number of tasks. A task group support running
  2396  // in many replicas using the same configuration..
  2397  type TaskGroup struct {
  2398  	// Name of the task group
  2399  	Name string
  2400  
  2401  	// Count is the number of replicas of this task group that should
  2402  	// be scheduled.
  2403  	Count int
  2404  
  2405  	// Update is used to control the update strategy for this task group
  2406  	Update *UpdateStrategy
  2407  
  2408  	// Constraints can be specified at a task group level and apply to
  2409  	// all the tasks contained.
  2410  	Constraints []*Constraint
  2411  
  2412  	//RestartPolicy of a TaskGroup
  2413  	RestartPolicy *RestartPolicy
  2414  
  2415  	// Tasks are the collection of tasks that this task group needs to run
  2416  	Tasks []*Task
  2417  
  2418  	// EphemeralDisk is the disk resources that the task group requests
  2419  	EphemeralDisk *EphemeralDisk
  2420  
  2421  	// Meta is used to associate arbitrary metadata with this
  2422  	// task group. This is opaque to Nomad.
  2423  	Meta map[string]string
  2424  }
  2425  
  2426  func (tg *TaskGroup) Copy() *TaskGroup {
  2427  	if tg == nil {
  2428  		return nil
  2429  	}
  2430  	ntg := new(TaskGroup)
  2431  	*ntg = *tg
  2432  	ntg.Update = ntg.Update.Copy()
  2433  	ntg.Constraints = CopySliceConstraints(ntg.Constraints)
  2434  	ntg.RestartPolicy = ntg.RestartPolicy.Copy()
  2435  
  2436  	if tg.Tasks != nil {
  2437  		tasks := make([]*Task, len(ntg.Tasks))
  2438  		for i, t := range ntg.Tasks {
  2439  			tasks[i] = t.Copy()
  2440  		}
  2441  		ntg.Tasks = tasks
  2442  	}
  2443  
  2444  	ntg.Meta = helper.CopyMapStringString(ntg.Meta)
  2445  
  2446  	if tg.EphemeralDisk != nil {
  2447  		ntg.EphemeralDisk = tg.EphemeralDisk.Copy()
  2448  	}
  2449  	return ntg
  2450  }
  2451  
  2452  // Canonicalize is used to canonicalize fields in the TaskGroup.
  2453  func (tg *TaskGroup) Canonicalize(job *Job) {
  2454  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  2455  	// problems since we use reflect DeepEquals.
  2456  	if len(tg.Meta) == 0 {
  2457  		tg.Meta = nil
  2458  	}
  2459  
  2460  	// Set the default restart policy.
  2461  	if tg.RestartPolicy == nil {
  2462  		tg.RestartPolicy = NewRestartPolicy(job.Type)
  2463  	}
  2464  
  2465  	// Set a default ephemeral disk object if the user has not requested for one
  2466  	if tg.EphemeralDisk == nil {
  2467  		tg.EphemeralDisk = DefaultEphemeralDisk()
  2468  	}
  2469  
  2470  	for _, task := range tg.Tasks {
  2471  		task.Canonicalize(job, tg)
  2472  	}
  2473  
  2474  	// Add up the disk resources to EphemeralDisk. This is done so that users
  2475  	// are not required to move their disk attribute from resources to
  2476  	// EphemeralDisk section of the job spec in Nomad 0.5
  2477  	// COMPAT 0.4.1 -> 0.5
  2478  	// Remove in 0.6
  2479  	var diskMB int
  2480  	for _, task := range tg.Tasks {
  2481  		diskMB += task.Resources.DiskMB
  2482  	}
  2483  	if diskMB > 0 {
  2484  		tg.EphemeralDisk.SizeMB = diskMB
  2485  	}
  2486  }
  2487  
  2488  // Validate is used to sanity check a task group
  2489  func (tg *TaskGroup) Validate(j *Job) error {
  2490  	var mErr multierror.Error
  2491  	if tg.Name == "" {
  2492  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
  2493  	}
  2494  	if tg.Count < 0 {
  2495  		mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative"))
  2496  	}
  2497  	if len(tg.Tasks) == 0 {
  2498  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
  2499  	}
  2500  	for idx, constr := range tg.Constraints {
  2501  		if err := constr.Validate(); err != nil {
  2502  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  2503  			mErr.Errors = append(mErr.Errors, outer)
  2504  		}
  2505  	}
  2506  
  2507  	if tg.RestartPolicy != nil {
  2508  		if err := tg.RestartPolicy.Validate(); err != nil {
  2509  			mErr.Errors = append(mErr.Errors, err)
  2510  		}
  2511  	} else {
  2512  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
  2513  	}
  2514  
  2515  	if tg.EphemeralDisk != nil {
  2516  		if err := tg.EphemeralDisk.Validate(); err != nil {
  2517  			mErr.Errors = append(mErr.Errors, err)
  2518  		}
  2519  	} else {
  2520  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name))
  2521  	}
  2522  
  2523  	// Validate the update strategy
  2524  	if u := tg.Update; u != nil {
  2525  		switch j.Type {
  2526  		case JobTypeService, JobTypeSystem:
  2527  		default:
  2528  			// COMPAT: Enable in 0.7.0
  2529  			//mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type))
  2530  		}
  2531  		if err := u.Validate(); err != nil {
  2532  			mErr.Errors = append(mErr.Errors, err)
  2533  		}
  2534  	}
  2535  
  2536  	// Check for duplicate tasks, that there is only leader task if any,
  2537  	// and no duplicated static ports
  2538  	tasks := make(map[string]int)
  2539  	staticPorts := make(map[int]string)
  2540  	leaderTasks := 0
  2541  	for idx, task := range tg.Tasks {
  2542  		if task.Name == "" {
  2543  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
  2544  		} else if existing, ok := tasks[task.Name]; ok {
  2545  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
  2546  		} else {
  2547  			tasks[task.Name] = idx
  2548  		}
  2549  
  2550  		if task.Leader {
  2551  			leaderTasks++
  2552  		}
  2553  
  2554  		if task.Resources == nil {
  2555  			continue
  2556  		}
  2557  
  2558  		for _, net := range task.Resources.Networks {
  2559  			for _, port := range net.ReservedPorts {
  2560  				if other, ok := staticPorts[port.Value]; ok {
  2561  					err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other)
  2562  					mErr.Errors = append(mErr.Errors, err)
  2563  				} else {
  2564  					staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label)
  2565  				}
  2566  			}
  2567  		}
  2568  	}
  2569  
  2570  	if leaderTasks > 1 {
  2571  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader"))
  2572  	}
  2573  
  2574  	// Validate the tasks
  2575  	for _, task := range tg.Tasks {
  2576  		if err := task.Validate(tg.EphemeralDisk); err != nil {
  2577  			outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err)
  2578  			mErr.Errors = append(mErr.Errors, outer)
  2579  		}
  2580  	}
  2581  	return mErr.ErrorOrNil()
  2582  }
  2583  
  2584  // Warnings returns a list of warnings that may be from dubious settings or
  2585  // deprecation warnings.
  2586  func (tg *TaskGroup) Warnings(j *Job) error {
  2587  	var mErr multierror.Error
  2588  
  2589  	// Validate the update strategy
  2590  	if u := tg.Update; u != nil {
  2591  		// Check the counts are appropriate
  2592  		if u.MaxParallel > tg.Count {
  2593  			mErr.Errors = append(mErr.Errors,
  2594  				fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+
  2595  					"A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count))
  2596  		}
  2597  	}
  2598  
  2599  	return mErr.ErrorOrNil()
  2600  }
  2601  
  2602  // LookupTask finds a task by name
  2603  func (tg *TaskGroup) LookupTask(name string) *Task {
  2604  	for _, t := range tg.Tasks {
  2605  		if t.Name == name {
  2606  			return t
  2607  		}
  2608  	}
  2609  	return nil
  2610  }
  2611  
  2612  func (tg *TaskGroup) GoString() string {
  2613  	return fmt.Sprintf("*%#v", *tg)
  2614  }
  2615  
  2616  const (
  2617  	ServiceCheckHTTP   = "http"
  2618  	ServiceCheckTCP    = "tcp"
  2619  	ServiceCheckScript = "script"
  2620  
  2621  	// minCheckInterval is the minimum check interval permitted.  Consul
  2622  	// currently has its MinInterval set to 1s.  Mirror that here for
  2623  	// consistency.
  2624  	minCheckInterval = 1 * time.Second
  2625  
  2626  	// minCheckTimeout is the minimum check timeout permitted for Consul
  2627  	// script TTL checks.
  2628  	minCheckTimeout = 1 * time.Second
  2629  )
  2630  
  2631  // The ServiceCheck data model represents the consul health check that
  2632  // Nomad registers for a Task
  2633  type ServiceCheck struct {
  2634  	Name          string        // Name of the check, defaults to id
  2635  	Type          string        // Type of the check - tcp, http, docker and script
  2636  	Command       string        // Command is the command to run for script checks
  2637  	Args          []string      // Args is a list of argumes for script checks
  2638  	Path          string        // path of the health check url for http type check
  2639  	Protocol      string        // Protocol to use if check is http, defaults to http
  2640  	PortLabel     string        // The port to use for tcp/http checks
  2641  	Interval      time.Duration // Interval of the check
  2642  	Timeout       time.Duration // Timeout of the response from the check before consul fails the check
  2643  	InitialStatus string        // Initial status of the check
  2644  	TLSSkipVerify bool          // Skip TLS verification when Protocol=https
  2645  }
  2646  
  2647  func (sc *ServiceCheck) Copy() *ServiceCheck {
  2648  	if sc == nil {
  2649  		return nil
  2650  	}
  2651  	nsc := new(ServiceCheck)
  2652  	*nsc = *sc
  2653  	return nsc
  2654  }
  2655  
  2656  func (sc *ServiceCheck) Canonicalize(serviceName string) {
  2657  	// Ensure empty slices are treated as null to avoid scheduling issues when
  2658  	// using DeepEquals.
  2659  	if len(sc.Args) == 0 {
  2660  		sc.Args = nil
  2661  	}
  2662  
  2663  	if sc.Name == "" {
  2664  		sc.Name = fmt.Sprintf("service: %q check", serviceName)
  2665  	}
  2666  }
  2667  
  2668  // validate a Service's ServiceCheck
  2669  func (sc *ServiceCheck) validate() error {
  2670  	switch strings.ToLower(sc.Type) {
  2671  	case ServiceCheckTCP:
  2672  		if sc.Timeout == 0 {
  2673  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  2674  		} else if sc.Timeout < minCheckTimeout {
  2675  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  2676  		}
  2677  	case ServiceCheckHTTP:
  2678  		if sc.Path == "" {
  2679  			return fmt.Errorf("http type must have a valid http path")
  2680  		}
  2681  
  2682  		if sc.Timeout == 0 {
  2683  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  2684  		} else if sc.Timeout < minCheckTimeout {
  2685  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  2686  		}
  2687  	case ServiceCheckScript:
  2688  		if sc.Command == "" {
  2689  			return fmt.Errorf("script type must have a valid script path")
  2690  		}
  2691  
  2692  		// TODO: enforce timeout on the Client side and reenable
  2693  		// validation.
  2694  	default:
  2695  		return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
  2696  	}
  2697  
  2698  	if sc.Interval == 0 {
  2699  		return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval)
  2700  	} else if sc.Interval < minCheckInterval {
  2701  		return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval)
  2702  	}
  2703  
  2704  	switch sc.InitialStatus {
  2705  	case "":
  2706  		// case api.HealthUnknown: TODO: Add when Consul releases 0.7.1
  2707  	case api.HealthPassing:
  2708  	case api.HealthWarning:
  2709  	case api.HealthCritical:
  2710  	default:
  2711  		return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical)
  2712  
  2713  	}
  2714  
  2715  	return nil
  2716  }
  2717  
  2718  // RequiresPort returns whether the service check requires the task has a port.
  2719  func (sc *ServiceCheck) RequiresPort() bool {
  2720  	switch sc.Type {
  2721  	case ServiceCheckHTTP, ServiceCheckTCP:
  2722  		return true
  2723  	default:
  2724  		return false
  2725  	}
  2726  }
  2727  
  2728  // Hash all ServiceCheck fields and the check's corresponding service ID to
  2729  // create an identifier. The identifier is not guaranteed to be unique as if
  2730  // the PortLabel is blank, the Service's PortLabel will be used after Hash is
  2731  // called.
  2732  func (sc *ServiceCheck) Hash(serviceID string) string {
  2733  	h := sha1.New()
  2734  	io.WriteString(h, serviceID)
  2735  	io.WriteString(h, sc.Name)
  2736  	io.WriteString(h, sc.Type)
  2737  	io.WriteString(h, sc.Command)
  2738  	io.WriteString(h, strings.Join(sc.Args, ""))
  2739  	io.WriteString(h, sc.Path)
  2740  	io.WriteString(h, sc.Protocol)
  2741  	io.WriteString(h, sc.PortLabel)
  2742  	io.WriteString(h, sc.Interval.String())
  2743  	io.WriteString(h, sc.Timeout.String())
  2744  	// Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6
  2745  	if sc.TLSSkipVerify {
  2746  		io.WriteString(h, "true")
  2747  	}
  2748  	return fmt.Sprintf("%x", h.Sum(nil))
  2749  }
  2750  
  2751  const (
  2752  	AddressModeAuto   = "auto"
  2753  	AddressModeHost   = "host"
  2754  	AddressModeDriver = "driver"
  2755  )
  2756  
  2757  // Service represents a Consul service definition in Nomad
  2758  type Service struct {
  2759  	// Name of the service registered with Consul. Consul defaults the
  2760  	// Name to ServiceID if not specified.  The Name if specified is used
  2761  	// as one of the seed values when generating a Consul ServiceID.
  2762  	Name string
  2763  
  2764  	// PortLabel is either the numeric port number or the `host:port`.
  2765  	// To specify the port number using the host's Consul Advertise
  2766  	// address, specify an empty host in the PortLabel (e.g. `:port`).
  2767  	PortLabel string
  2768  
  2769  	// AddressMode specifies whether or not to use the host ip:port for
  2770  	// this service.
  2771  	AddressMode string
  2772  
  2773  	Tags   []string        // List of tags for the service
  2774  	Checks []*ServiceCheck // List of checks associated with the service
  2775  }
  2776  
  2777  func (s *Service) Copy() *Service {
  2778  	if s == nil {
  2779  		return nil
  2780  	}
  2781  	ns := new(Service)
  2782  	*ns = *s
  2783  	ns.Tags = helper.CopySliceString(ns.Tags)
  2784  
  2785  	if s.Checks != nil {
  2786  		checks := make([]*ServiceCheck, len(ns.Checks))
  2787  		for i, c := range ns.Checks {
  2788  			checks[i] = c.Copy()
  2789  		}
  2790  		ns.Checks = checks
  2791  	}
  2792  
  2793  	return ns
  2794  }
  2795  
  2796  // Canonicalize interpolates values of Job, Task Group and Task in the Service
  2797  // Name. This also generates check names, service id and check ids.
  2798  func (s *Service) Canonicalize(job string, taskGroup string, task string) {
  2799  	// Ensure empty lists are treated as null to avoid scheduler issues when
  2800  	// using DeepEquals
  2801  	if len(s.Tags) == 0 {
  2802  		s.Tags = nil
  2803  	}
  2804  	if len(s.Checks) == 0 {
  2805  		s.Checks = nil
  2806  	}
  2807  
  2808  	s.Name = args.ReplaceEnv(s.Name, map[string]string{
  2809  		"JOB":       job,
  2810  		"TASKGROUP": taskGroup,
  2811  		"TASK":      task,
  2812  		"BASE":      fmt.Sprintf("%s-%s-%s", job, taskGroup, task),
  2813  	},
  2814  	)
  2815  
  2816  	for _, check := range s.Checks {
  2817  		check.Canonicalize(s.Name)
  2818  	}
  2819  }
  2820  
  2821  // Validate checks if the Check definition is valid
  2822  func (s *Service) Validate() error {
  2823  	var mErr multierror.Error
  2824  
  2825  	// Ensure the service name is valid per the below RFCs but make an exception
  2826  	// for our interpolation syntax
  2827  	// RFC-952 §1 (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  2828  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  2829  	// (https://tools.ietf.org/html/rfc2782).
  2830  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9\$][a-zA-Z0-9\-\$\{\}\_\.]*[a-z0-9\}])$`)
  2831  	if !re.MatchString(s.Name) {
  2832  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name))
  2833  	}
  2834  
  2835  	switch s.AddressMode {
  2836  	case "", AddressModeAuto, AddressModeHost, AddressModeDriver:
  2837  		// OK
  2838  	default:
  2839  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode))
  2840  	}
  2841  
  2842  	for _, c := range s.Checks {
  2843  		if s.PortLabel == "" && c.RequiresPort() {
  2844  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name))
  2845  			continue
  2846  		}
  2847  
  2848  		if err := c.validate(); err != nil {
  2849  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err))
  2850  		}
  2851  	}
  2852  	return mErr.ErrorOrNil()
  2853  }
  2854  
  2855  // ValidateName checks if the services Name is valid and should be called after
  2856  // the name has been interpolated
  2857  func (s *Service) ValidateName(name string) error {
  2858  	// Ensure the service name is valid per RFC-952 §1
  2859  	// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  2860  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  2861  	// (https://tools.ietf.org/html/rfc2782).
  2862  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`)
  2863  	if !re.MatchString(name) {
  2864  		return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name)
  2865  	}
  2866  	return nil
  2867  }
  2868  
  2869  // Hash calculates the hash of the check based on it's content and the service
  2870  // which owns it
  2871  func (s *Service) Hash() string {
  2872  	h := sha1.New()
  2873  	io.WriteString(h, s.Name)
  2874  	io.WriteString(h, strings.Join(s.Tags, ""))
  2875  	io.WriteString(h, s.PortLabel)
  2876  	io.WriteString(h, s.AddressMode)
  2877  	return fmt.Sprintf("%x", h.Sum(nil))
  2878  }
  2879  
  2880  const (
  2881  	// DefaultKillTimeout is the default timeout between signaling a task it
  2882  	// will be killed and killing it.
  2883  	DefaultKillTimeout = 5 * time.Second
  2884  )
  2885  
  2886  // LogConfig provides configuration for log rotation
  2887  type LogConfig struct {
  2888  	MaxFiles      int
  2889  	MaxFileSizeMB int
  2890  }
  2891  
  2892  // DefaultLogConfig returns the default LogConfig values.
  2893  func DefaultLogConfig() *LogConfig {
  2894  	return &LogConfig{
  2895  		MaxFiles:      10,
  2896  		MaxFileSizeMB: 10,
  2897  	}
  2898  }
  2899  
  2900  // Validate returns an error if the log config specified are less than
  2901  // the minimum allowed.
  2902  func (l *LogConfig) Validate() error {
  2903  	var mErr multierror.Error
  2904  	if l.MaxFiles < 1 {
  2905  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles))
  2906  	}
  2907  	if l.MaxFileSizeMB < 1 {
  2908  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB))
  2909  	}
  2910  	return mErr.ErrorOrNil()
  2911  }
  2912  
  2913  // Task is a single process typically that is executed as part of a task group.
  2914  type Task struct {
  2915  	// Name of the task
  2916  	Name string
  2917  
  2918  	// Driver is used to control which driver is used
  2919  	Driver string
  2920  
  2921  	// User is used to determine which user will run the task. It defaults to
  2922  	// the same user the Nomad client is being run as.
  2923  	User string
  2924  
  2925  	// Config is provided to the driver to initialize
  2926  	Config map[string]interface{}
  2927  
  2928  	// Map of environment variables to be used by the driver
  2929  	Env map[string]string
  2930  
  2931  	// List of service definitions exposed by the Task
  2932  	Services []*Service
  2933  
  2934  	// Vault is used to define the set of Vault policies that this task should
  2935  	// have access to.
  2936  	Vault *Vault
  2937  
  2938  	// Templates are the set of templates to be rendered for the task.
  2939  	Templates []*Template
  2940  
  2941  	// Constraints can be specified at a task level and apply only to
  2942  	// the particular task.
  2943  	Constraints []*Constraint
  2944  
  2945  	// Resources is the resources needed by this task
  2946  	Resources *Resources
  2947  
  2948  	// DispatchPayload configures how the task retrieves its input from a dispatch
  2949  	DispatchPayload *DispatchPayloadConfig
  2950  
  2951  	// Meta is used to associate arbitrary metadata with this
  2952  	// task. This is opaque to Nomad.
  2953  	Meta map[string]string
  2954  
  2955  	// KillTimeout is the time between signaling a task that it will be
  2956  	// killed and killing it.
  2957  	KillTimeout time.Duration
  2958  
  2959  	// LogConfig provides configuration for log rotation
  2960  	LogConfig *LogConfig
  2961  
  2962  	// Artifacts is a list of artifacts to download and extract before running
  2963  	// the task.
  2964  	Artifacts []*TaskArtifact
  2965  
  2966  	// Leader marks the task as the leader within the group. When the leader
  2967  	// task exits, other tasks will be gracefully terminated.
  2968  	Leader bool
  2969  }
  2970  
  2971  func (t *Task) Copy() *Task {
  2972  	if t == nil {
  2973  		return nil
  2974  	}
  2975  	nt := new(Task)
  2976  	*nt = *t
  2977  	nt.Env = helper.CopyMapStringString(nt.Env)
  2978  
  2979  	if t.Services != nil {
  2980  		services := make([]*Service, len(nt.Services))
  2981  		for i, s := range nt.Services {
  2982  			services[i] = s.Copy()
  2983  		}
  2984  		nt.Services = services
  2985  	}
  2986  
  2987  	nt.Constraints = CopySliceConstraints(nt.Constraints)
  2988  
  2989  	nt.Vault = nt.Vault.Copy()
  2990  	nt.Resources = nt.Resources.Copy()
  2991  	nt.Meta = helper.CopyMapStringString(nt.Meta)
  2992  	nt.DispatchPayload = nt.DispatchPayload.Copy()
  2993  
  2994  	if t.Artifacts != nil {
  2995  		artifacts := make([]*TaskArtifact, 0, len(t.Artifacts))
  2996  		for _, a := range nt.Artifacts {
  2997  			artifacts = append(artifacts, a.Copy())
  2998  		}
  2999  		nt.Artifacts = artifacts
  3000  	}
  3001  
  3002  	if i, err := copystructure.Copy(nt.Config); err != nil {
  3003  		panic(err.Error())
  3004  	} else {
  3005  		nt.Config = i.(map[string]interface{})
  3006  	}
  3007  
  3008  	if t.Templates != nil {
  3009  		templates := make([]*Template, len(t.Templates))
  3010  		for i, tmpl := range nt.Templates {
  3011  			templates[i] = tmpl.Copy()
  3012  		}
  3013  		nt.Templates = templates
  3014  	}
  3015  
  3016  	return nt
  3017  }
  3018  
  3019  // Canonicalize canonicalizes fields in the task.
  3020  func (t *Task) Canonicalize(job *Job, tg *TaskGroup) {
  3021  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  3022  	// problems since we use reflect DeepEquals.
  3023  	if len(t.Meta) == 0 {
  3024  		t.Meta = nil
  3025  	}
  3026  	if len(t.Config) == 0 {
  3027  		t.Config = nil
  3028  	}
  3029  	if len(t.Env) == 0 {
  3030  		t.Env = nil
  3031  	}
  3032  
  3033  	for _, service := range t.Services {
  3034  		service.Canonicalize(job.Name, tg.Name, t.Name)
  3035  	}
  3036  
  3037  	// If Resources are nil initialize them to defaults, otherwise canonicalize
  3038  	if t.Resources == nil {
  3039  		t.Resources = DefaultResources()
  3040  	} else {
  3041  		t.Resources.Canonicalize()
  3042  	}
  3043  
  3044  	// Set the default timeout if it is not specified.
  3045  	if t.KillTimeout == 0 {
  3046  		t.KillTimeout = DefaultKillTimeout
  3047  	}
  3048  
  3049  	if t.Vault != nil {
  3050  		t.Vault.Canonicalize()
  3051  	}
  3052  
  3053  	for _, template := range t.Templates {
  3054  		template.Canonicalize()
  3055  	}
  3056  }
  3057  
  3058  func (t *Task) GoString() string {
  3059  	return fmt.Sprintf("*%#v", *t)
  3060  }
  3061  
  3062  // Validate is used to sanity check a task
  3063  func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error {
  3064  	var mErr multierror.Error
  3065  	if t.Name == "" {
  3066  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  3067  	}
  3068  	if strings.ContainsAny(t.Name, `/\`) {
  3069  		// We enforce this so that when creating the directory on disk it will
  3070  		// not have any slashes.
  3071  		mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes"))
  3072  	}
  3073  	if t.Driver == "" {
  3074  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  3075  	}
  3076  	if t.KillTimeout.Nanoseconds() < 0 {
  3077  		mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
  3078  	}
  3079  
  3080  	// Validate the resources.
  3081  	if t.Resources == nil {
  3082  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  3083  	} else {
  3084  		if err := t.Resources.MeetsMinResources(); err != nil {
  3085  			mErr.Errors = append(mErr.Errors, err)
  3086  		}
  3087  
  3088  		// Ensure the task isn't asking for disk resources
  3089  		if t.Resources.DiskMB > 0 {
  3090  			mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level."))
  3091  		}
  3092  	}
  3093  
  3094  	// Validate the log config
  3095  	if t.LogConfig == nil {
  3096  		mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config"))
  3097  	} else if err := t.LogConfig.Validate(); err != nil {
  3098  		mErr.Errors = append(mErr.Errors, err)
  3099  	}
  3100  
  3101  	for idx, constr := range t.Constraints {
  3102  		if err := constr.Validate(); err != nil {
  3103  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  3104  			mErr.Errors = append(mErr.Errors, outer)
  3105  		}
  3106  
  3107  		switch constr.Operand {
  3108  		case ConstraintDistinctHosts, ConstraintDistinctProperty:
  3109  			outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand)
  3110  			mErr.Errors = append(mErr.Errors, outer)
  3111  		}
  3112  	}
  3113  
  3114  	// Validate Services
  3115  	if err := validateServices(t); err != nil {
  3116  		mErr.Errors = append(mErr.Errors, err)
  3117  	}
  3118  
  3119  	if t.LogConfig != nil && ephemeralDisk != nil {
  3120  		logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB)
  3121  		if ephemeralDisk.SizeMB <= logUsage {
  3122  			mErr.Errors = append(mErr.Errors,
  3123  				fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)",
  3124  					logUsage, ephemeralDisk.SizeMB))
  3125  		}
  3126  	}
  3127  
  3128  	for idx, artifact := range t.Artifacts {
  3129  		if err := artifact.Validate(); err != nil {
  3130  			outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err)
  3131  			mErr.Errors = append(mErr.Errors, outer)
  3132  		}
  3133  	}
  3134  
  3135  	if t.Vault != nil {
  3136  		if err := t.Vault.Validate(); err != nil {
  3137  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err))
  3138  		}
  3139  	}
  3140  
  3141  	destinations := make(map[string]int, len(t.Templates))
  3142  	for idx, tmpl := range t.Templates {
  3143  		if err := tmpl.Validate(); err != nil {
  3144  			outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err)
  3145  			mErr.Errors = append(mErr.Errors, outer)
  3146  		}
  3147  
  3148  		if other, ok := destinations[tmpl.DestPath]; ok {
  3149  			outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other)
  3150  			mErr.Errors = append(mErr.Errors, outer)
  3151  		} else {
  3152  			destinations[tmpl.DestPath] = idx + 1
  3153  		}
  3154  	}
  3155  
  3156  	// Validate the dispatch payload block if there
  3157  	if t.DispatchPayload != nil {
  3158  		if err := t.DispatchPayload.Validate(); err != nil {
  3159  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err))
  3160  		}
  3161  	}
  3162  
  3163  	return mErr.ErrorOrNil()
  3164  }
  3165  
  3166  // validateServices takes a task and validates the services within it are valid
  3167  // and reference ports that exist.
  3168  func validateServices(t *Task) error {
  3169  	var mErr multierror.Error
  3170  
  3171  	// Ensure that services don't ask for non-existent ports and their names are
  3172  	// unique.
  3173  	servicePorts := make(map[string][]string)
  3174  	knownServices := make(map[string]struct{})
  3175  	for i, service := range t.Services {
  3176  		if err := service.Validate(); err != nil {
  3177  			outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err)
  3178  			mErr.Errors = append(mErr.Errors, outer)
  3179  		}
  3180  
  3181  		// Ensure that services with the same name are not being registered for
  3182  		// the same port
  3183  		if _, ok := knownServices[service.Name+service.PortLabel]; ok {
  3184  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name))
  3185  		}
  3186  		knownServices[service.Name+service.PortLabel] = struct{}{}
  3187  
  3188  		if service.PortLabel != "" {
  3189  			servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name)
  3190  		}
  3191  
  3192  		// Ensure that check names are unique.
  3193  		knownChecks := make(map[string]struct{})
  3194  		for _, check := range service.Checks {
  3195  			if _, ok := knownChecks[check.Name]; ok {
  3196  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name))
  3197  			}
  3198  			knownChecks[check.Name] = struct{}{}
  3199  		}
  3200  	}
  3201  
  3202  	// Get the set of port labels.
  3203  	portLabels := make(map[string]struct{})
  3204  	if t.Resources != nil {
  3205  		for _, network := range t.Resources.Networks {
  3206  			ports := network.PortLabels()
  3207  			for portLabel, _ := range ports {
  3208  				portLabels[portLabel] = struct{}{}
  3209  			}
  3210  		}
  3211  	}
  3212  
  3213  	// Ensure all ports referenced in services exist.
  3214  	for servicePort, services := range servicePorts {
  3215  		_, ok := portLabels[servicePort]
  3216  		if !ok {
  3217  			joined := strings.Join(services, ", ")
  3218  			err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined)
  3219  			mErr.Errors = append(mErr.Errors, err)
  3220  		}
  3221  	}
  3222  
  3223  	// Ensure address mode is valid
  3224  	return mErr.ErrorOrNil()
  3225  }
  3226  
  3227  const (
  3228  	// TemplateChangeModeNoop marks that no action should be taken if the
  3229  	// template is re-rendered
  3230  	TemplateChangeModeNoop = "noop"
  3231  
  3232  	// TemplateChangeModeSignal marks that the task should be signaled if the
  3233  	// template is re-rendered
  3234  	TemplateChangeModeSignal = "signal"
  3235  
  3236  	// TemplateChangeModeRestart marks that the task should be restarted if the
  3237  	// template is re-rendered
  3238  	TemplateChangeModeRestart = "restart"
  3239  )
  3240  
  3241  var (
  3242  	// TemplateChangeModeInvalidError is the error for when an invalid change
  3243  	// mode is given
  3244  	TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart")
  3245  )
  3246  
  3247  // Template represents a template configuration to be rendered for a given task
  3248  type Template struct {
  3249  	// SourcePath is the path to the template to be rendered
  3250  	SourcePath string
  3251  
  3252  	// DestPath is the path to where the template should be rendered
  3253  	DestPath string
  3254  
  3255  	// EmbeddedTmpl store the raw template. This is useful for smaller templates
  3256  	// where they are embedded in the job file rather than sent as an artificat
  3257  	EmbeddedTmpl string
  3258  
  3259  	// ChangeMode indicates what should be done if the template is re-rendered
  3260  	ChangeMode string
  3261  
  3262  	// ChangeSignal is the signal that should be sent if the change mode
  3263  	// requires it.
  3264  	ChangeSignal string
  3265  
  3266  	// Splay is used to avoid coordinated restarts of processes by applying a
  3267  	// random wait between 0 and the given splay value before signalling the
  3268  	// application of a change
  3269  	Splay time.Duration
  3270  
  3271  	// Perms is the permission the file should be written out with.
  3272  	Perms string
  3273  
  3274  	// LeftDelim and RightDelim are optional configurations to control what
  3275  	// delimiter is utilized when parsing the template.
  3276  	LeftDelim  string
  3277  	RightDelim string
  3278  
  3279  	// Envvars enables exposing the template as environment variables
  3280  	// instead of as a file. The template must be of the form:
  3281  	//
  3282  	//	VAR_NAME_1={{ key service/my-key }}
  3283  	//	VAR_NAME_2=raw string and {{ env "attr.kernel.name" }}
  3284  	//
  3285  	// Lines will be split on the initial "=" with the first part being the
  3286  	// key name and the second part the value.
  3287  	// Empty lines and lines starting with # will be ignored, but to avoid
  3288  	// escaping issues #s within lines will not be treated as comments.
  3289  	Envvars bool
  3290  }
  3291  
  3292  // DefaultTemplate returns a default template.
  3293  func DefaultTemplate() *Template {
  3294  	return &Template{
  3295  		ChangeMode: TemplateChangeModeRestart,
  3296  		Splay:      5 * time.Second,
  3297  		Perms:      "0644",
  3298  	}
  3299  }
  3300  
  3301  func (t *Template) Copy() *Template {
  3302  	if t == nil {
  3303  		return nil
  3304  	}
  3305  	copy := new(Template)
  3306  	*copy = *t
  3307  	return copy
  3308  }
  3309  
  3310  func (t *Template) Canonicalize() {
  3311  	if t.ChangeSignal != "" {
  3312  		t.ChangeSignal = strings.ToUpper(t.ChangeSignal)
  3313  	}
  3314  }
  3315  
  3316  func (t *Template) Validate() error {
  3317  	var mErr multierror.Error
  3318  
  3319  	// Verify we have something to render
  3320  	if t.SourcePath == "" && t.EmbeddedTmpl == "" {
  3321  		multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template"))
  3322  	}
  3323  
  3324  	// Verify we can render somewhere
  3325  	if t.DestPath == "" {
  3326  		multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template"))
  3327  	}
  3328  
  3329  	// Verify the destination doesn't escape
  3330  	escaped, err := PathEscapesAllocDir("task", t.DestPath)
  3331  	if err != nil {
  3332  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  3333  	} else if escaped {
  3334  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  3335  	}
  3336  
  3337  	// Verify a proper change mode
  3338  	switch t.ChangeMode {
  3339  	case TemplateChangeModeNoop, TemplateChangeModeRestart:
  3340  	case TemplateChangeModeSignal:
  3341  		if t.ChangeSignal == "" {
  3342  			multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal"))
  3343  		}
  3344  		if t.Envvars {
  3345  			multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates"))
  3346  		}
  3347  	default:
  3348  		multierror.Append(&mErr, TemplateChangeModeInvalidError)
  3349  	}
  3350  
  3351  	// Verify the splay is positive
  3352  	if t.Splay < 0 {
  3353  		multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value"))
  3354  	}
  3355  
  3356  	// Verify the permissions
  3357  	if t.Perms != "" {
  3358  		if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil {
  3359  			multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err))
  3360  		}
  3361  	}
  3362  
  3363  	return mErr.ErrorOrNil()
  3364  }
  3365  
  3366  // Set of possible states for a task.
  3367  const (
  3368  	TaskStatePending = "pending" // The task is waiting to be run.
  3369  	TaskStateRunning = "running" // The task is currently running.
  3370  	TaskStateDead    = "dead"    // Terminal state of task.
  3371  )
  3372  
  3373  // TaskState tracks the current state of a task and events that caused state
  3374  // transitions.
  3375  type TaskState struct {
  3376  	// The current state of the task.
  3377  	State string
  3378  
  3379  	// Failed marks a task as having failed
  3380  	Failed bool
  3381  
  3382  	// Restarts is the number of times the task has restarted
  3383  	Restarts uint64
  3384  
  3385  	// LastRestart is the time the task last restarted. It is updated each time the
  3386  	// task restarts
  3387  	LastRestart time.Time
  3388  
  3389  	// StartedAt is the time the task is started. It is updated each time the
  3390  	// task starts
  3391  	StartedAt time.Time
  3392  
  3393  	// FinishedAt is the time at which the task transistioned to dead and will
  3394  	// not be started again.
  3395  	FinishedAt time.Time
  3396  
  3397  	// Series of task events that transition the state of the task.
  3398  	Events []*TaskEvent
  3399  }
  3400  
  3401  func (ts *TaskState) Copy() *TaskState {
  3402  	if ts == nil {
  3403  		return nil
  3404  	}
  3405  	copy := new(TaskState)
  3406  	*copy = *ts
  3407  
  3408  	if ts.Events != nil {
  3409  		copy.Events = make([]*TaskEvent, len(ts.Events))
  3410  		for i, e := range ts.Events {
  3411  			copy.Events[i] = e.Copy()
  3412  		}
  3413  	}
  3414  	return copy
  3415  }
  3416  
  3417  // Successful returns whether a task finished successfully.
  3418  func (ts *TaskState) Successful() bool {
  3419  	l := len(ts.Events)
  3420  	if ts.State != TaskStateDead || l == 0 {
  3421  		return false
  3422  	}
  3423  
  3424  	e := ts.Events[l-1]
  3425  	if e.Type != TaskTerminated {
  3426  		return false
  3427  	}
  3428  
  3429  	return e.ExitCode == 0
  3430  }
  3431  
  3432  const (
  3433  	// TaskSetupFailure indicates that the task could not be started due to a
  3434  	// a setup failure.
  3435  	TaskSetupFailure = "Setup Failure"
  3436  
  3437  	// TaskDriveFailure indicates that the task could not be started due to a
  3438  	// failure in the driver.
  3439  	TaskDriverFailure = "Driver Failure"
  3440  
  3441  	// TaskReceived signals that the task has been pulled by the client at the
  3442  	// given timestamp.
  3443  	TaskReceived = "Received"
  3444  
  3445  	// TaskFailedValidation indicates the task was invalid and as such was not
  3446  	// run.
  3447  	TaskFailedValidation = "Failed Validation"
  3448  
  3449  	// TaskStarted signals that the task was started and its timestamp can be
  3450  	// used to determine the running length of the task.
  3451  	TaskStarted = "Started"
  3452  
  3453  	// TaskTerminated indicates that the task was started and exited.
  3454  	TaskTerminated = "Terminated"
  3455  
  3456  	// TaskKilling indicates a kill signal has been sent to the task.
  3457  	TaskKilling = "Killing"
  3458  
  3459  	// TaskKilled indicates a user has killed the task.
  3460  	TaskKilled = "Killed"
  3461  
  3462  	// TaskRestarting indicates that task terminated and is being restarted.
  3463  	TaskRestarting = "Restarting"
  3464  
  3465  	// TaskNotRestarting indicates that the task has failed and is not being
  3466  	// restarted because it has exceeded its restart policy.
  3467  	TaskNotRestarting = "Not Restarting"
  3468  
  3469  	// TaskRestartSignal indicates that the task has been signalled to be
  3470  	// restarted
  3471  	TaskRestartSignal = "Restart Signaled"
  3472  
  3473  	// TaskSignaling indicates that the task is being signalled.
  3474  	TaskSignaling = "Signaling"
  3475  
  3476  	// TaskDownloadingArtifacts means the task is downloading the artifacts
  3477  	// specified in the task.
  3478  	TaskDownloadingArtifacts = "Downloading Artifacts"
  3479  
  3480  	// TaskArtifactDownloadFailed indicates that downloading the artifacts
  3481  	// failed.
  3482  	TaskArtifactDownloadFailed = "Failed Artifact Download"
  3483  
  3484  	// TaskBuildingTaskDir indicates that the task directory/chroot is being
  3485  	// built.
  3486  	TaskBuildingTaskDir = "Building Task Directory"
  3487  
  3488  	// TaskSetup indicates the task runner is setting up the task environment
  3489  	TaskSetup = "Task Setup"
  3490  
  3491  	// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
  3492  	// exceeded the requested disk resources.
  3493  	TaskDiskExceeded = "Disk Resources Exceeded"
  3494  
  3495  	// TaskSiblingFailed indicates that a sibling task in the task group has
  3496  	// failed.
  3497  	TaskSiblingFailed = "Sibling Task Failed"
  3498  
  3499  	// TaskDriverMessage is an informational event message emitted by
  3500  	// drivers such as when they're performing a long running action like
  3501  	// downloading an image.
  3502  	TaskDriverMessage = "Driver"
  3503  
  3504  	// TaskLeaderDead indicates that the leader task within the has finished.
  3505  	TaskLeaderDead = "Leader Task Dead"
  3506  )
  3507  
  3508  // TaskEvent is an event that effects the state of a task and contains meta-data
  3509  // appropriate to the events type.
  3510  type TaskEvent struct {
  3511  	Type string
  3512  	Time int64 // Unix Nanosecond timestamp
  3513  
  3514  	// FailsTask marks whether this event fails the task
  3515  	FailsTask bool
  3516  
  3517  	// Restart fields.
  3518  	RestartReason string
  3519  
  3520  	// Setup Failure fields.
  3521  	SetupError string
  3522  
  3523  	// Driver Failure fields.
  3524  	DriverError string // A driver error occurred while starting the task.
  3525  
  3526  	// Task Terminated Fields.
  3527  	ExitCode int    // The exit code of the task.
  3528  	Signal   int    // The signal that terminated the task.
  3529  	Message  string // A possible message explaining the termination of the task.
  3530  
  3531  	// Killing fields
  3532  	KillTimeout time.Duration
  3533  
  3534  	// Task Killed Fields.
  3535  	KillError string // Error killing the task.
  3536  
  3537  	// KillReason is the reason the task was killed
  3538  	KillReason string
  3539  
  3540  	// TaskRestarting fields.
  3541  	StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
  3542  
  3543  	// Artifact Download fields
  3544  	DownloadError string // Error downloading artifacts
  3545  
  3546  	// Validation fields
  3547  	ValidationError string // Validation error
  3548  
  3549  	// The maximum allowed task disk size.
  3550  	DiskLimit int64
  3551  
  3552  	// Name of the sibling task that caused termination of the task that
  3553  	// the TaskEvent refers to.
  3554  	FailedSibling string
  3555  
  3556  	// VaultError is the error from token renewal
  3557  	VaultError string
  3558  
  3559  	// TaskSignalReason indicates the reason the task is being signalled.
  3560  	TaskSignalReason string
  3561  
  3562  	// TaskSignal is the signal that was sent to the task
  3563  	TaskSignal string
  3564  
  3565  	// DriverMessage indicates a driver action being taken.
  3566  	DriverMessage string
  3567  }
  3568  
  3569  func (te *TaskEvent) GoString() string {
  3570  	return fmt.Sprintf("%v at %v", te.Type, te.Time)
  3571  }
  3572  
  3573  // SetMessage sets the message of TaskEvent
  3574  func (te *TaskEvent) SetMessage(msg string) *TaskEvent {
  3575  	te.Message = msg
  3576  	return te
  3577  }
  3578  
  3579  func (te *TaskEvent) Copy() *TaskEvent {
  3580  	if te == nil {
  3581  		return nil
  3582  	}
  3583  	copy := new(TaskEvent)
  3584  	*copy = *te
  3585  	return copy
  3586  }
  3587  
  3588  func NewTaskEvent(event string) *TaskEvent {
  3589  	return &TaskEvent{
  3590  		Type: event,
  3591  		Time: time.Now().UnixNano(),
  3592  	}
  3593  }
  3594  
  3595  // SetSetupError is used to store an error that occured while setting up the
  3596  // task
  3597  func (e *TaskEvent) SetSetupError(err error) *TaskEvent {
  3598  	if err != nil {
  3599  		e.SetupError = err.Error()
  3600  	}
  3601  	return e
  3602  }
  3603  
  3604  func (e *TaskEvent) SetFailsTask() *TaskEvent {
  3605  	e.FailsTask = true
  3606  	return e
  3607  }
  3608  
  3609  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  3610  	if err != nil {
  3611  		e.DriverError = err.Error()
  3612  	}
  3613  	return e
  3614  }
  3615  
  3616  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  3617  	e.ExitCode = c
  3618  	return e
  3619  }
  3620  
  3621  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  3622  	e.Signal = s
  3623  	return e
  3624  }
  3625  
  3626  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  3627  	if err != nil {
  3628  		e.Message = err.Error()
  3629  	}
  3630  	return e
  3631  }
  3632  
  3633  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  3634  	if err != nil {
  3635  		e.KillError = err.Error()
  3636  	}
  3637  	return e
  3638  }
  3639  
  3640  func (e *TaskEvent) SetKillReason(r string) *TaskEvent {
  3641  	e.KillReason = r
  3642  	return e
  3643  }
  3644  
  3645  func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent {
  3646  	e.StartDelay = int64(delay)
  3647  	return e
  3648  }
  3649  
  3650  func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent {
  3651  	e.RestartReason = reason
  3652  	return e
  3653  }
  3654  
  3655  func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent {
  3656  	e.TaskSignalReason = r
  3657  	return e
  3658  }
  3659  
  3660  func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent {
  3661  	e.TaskSignal = s.String()
  3662  	return e
  3663  }
  3664  
  3665  func (e *TaskEvent) SetDownloadError(err error) *TaskEvent {
  3666  	if err != nil {
  3667  		e.DownloadError = err.Error()
  3668  	}
  3669  	return e
  3670  }
  3671  
  3672  func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
  3673  	if err != nil {
  3674  		e.ValidationError = err.Error()
  3675  	}
  3676  	return e
  3677  }
  3678  
  3679  func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
  3680  	e.KillTimeout = timeout
  3681  	return e
  3682  }
  3683  
  3684  func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent {
  3685  	e.DiskLimit = limit
  3686  	return e
  3687  }
  3688  
  3689  func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent {
  3690  	e.FailedSibling = sibling
  3691  	return e
  3692  }
  3693  
  3694  func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent {
  3695  	if err != nil {
  3696  		e.VaultError = err.Error()
  3697  	}
  3698  	return e
  3699  }
  3700  
  3701  func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent {
  3702  	e.DriverMessage = m
  3703  	return e
  3704  }
  3705  
  3706  // TaskArtifact is an artifact to download before running the task.
  3707  type TaskArtifact struct {
  3708  	// GetterSource is the source to download an artifact using go-getter
  3709  	GetterSource string
  3710  
  3711  	// GetterOptions are options to use when downloading the artifact using
  3712  	// go-getter.
  3713  	GetterOptions map[string]string
  3714  
  3715  	// GetterMode is the go-getter.ClientMode for fetching resources.
  3716  	// Defaults to "any" but can be set to "file" or "dir".
  3717  	GetterMode string
  3718  
  3719  	// RelativeDest is the download destination given relative to the task's
  3720  	// directory.
  3721  	RelativeDest string
  3722  }
  3723  
  3724  func (ta *TaskArtifact) Copy() *TaskArtifact {
  3725  	if ta == nil {
  3726  		return nil
  3727  	}
  3728  	nta := new(TaskArtifact)
  3729  	*nta = *ta
  3730  	nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions)
  3731  	return nta
  3732  }
  3733  
  3734  func (ta *TaskArtifact) GoString() string {
  3735  	return fmt.Sprintf("%+v", ta)
  3736  }
  3737  
  3738  // PathEscapesAllocDir returns if the given path escapes the allocation
  3739  // directory. The prefix allows adding a prefix if the path will be joined, for
  3740  // example a "task/local" prefix may be provided if the path will be joined
  3741  // against that prefix.
  3742  func PathEscapesAllocDir(prefix, path string) (bool, error) {
  3743  	// Verify the destination doesn't escape the tasks directory
  3744  	alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/"))
  3745  	if err != nil {
  3746  		return false, err
  3747  	}
  3748  	abs, err := filepath.Abs(filepath.Join(alloc, prefix, path))
  3749  	if err != nil {
  3750  		return false, err
  3751  	}
  3752  	rel, err := filepath.Rel(alloc, abs)
  3753  	if err != nil {
  3754  		return false, err
  3755  	}
  3756  
  3757  	return strings.HasPrefix(rel, ".."), nil
  3758  }
  3759  
  3760  func (ta *TaskArtifact) Validate() error {
  3761  	// Verify the source
  3762  	var mErr multierror.Error
  3763  	if ta.GetterSource == "" {
  3764  		mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified"))
  3765  	}
  3766  
  3767  	switch ta.GetterMode {
  3768  	case "":
  3769  		// Default to any
  3770  		ta.GetterMode = GetterModeAny
  3771  	case GetterModeAny, GetterModeFile, GetterModeDir:
  3772  		// Ok
  3773  	default:
  3774  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s",
  3775  			ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir))
  3776  	}
  3777  
  3778  	escaped, err := PathEscapesAllocDir("task", ta.RelativeDest)
  3779  	if err != nil {
  3780  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  3781  	} else if escaped {
  3782  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  3783  	}
  3784  
  3785  	// Verify the checksum
  3786  	if check, ok := ta.GetterOptions["checksum"]; ok {
  3787  		check = strings.TrimSpace(check)
  3788  		if check == "" {
  3789  			mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty"))
  3790  			return mErr.ErrorOrNil()
  3791  		}
  3792  
  3793  		parts := strings.Split(check, ":")
  3794  		if l := len(parts); l != 2 {
  3795  			mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check))
  3796  			return mErr.ErrorOrNil()
  3797  		}
  3798  
  3799  		checksumVal := parts[1]
  3800  		checksumBytes, err := hex.DecodeString(checksumVal)
  3801  		if err != nil {
  3802  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err))
  3803  			return mErr.ErrorOrNil()
  3804  		}
  3805  
  3806  		checksumType := parts[0]
  3807  		expectedLength := 0
  3808  		switch checksumType {
  3809  		case "md5":
  3810  			expectedLength = md5.Size
  3811  		case "sha1":
  3812  			expectedLength = sha1.Size
  3813  		case "sha256":
  3814  			expectedLength = sha256.Size
  3815  		case "sha512":
  3816  			expectedLength = sha512.Size
  3817  		default:
  3818  			mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType))
  3819  			return mErr.ErrorOrNil()
  3820  		}
  3821  
  3822  		if len(checksumBytes) != expectedLength {
  3823  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal))
  3824  			return mErr.ErrorOrNil()
  3825  		}
  3826  	}
  3827  
  3828  	return mErr.ErrorOrNil()
  3829  }
  3830  
  3831  const (
  3832  	ConstraintDistinctProperty = "distinct_property"
  3833  	ConstraintDistinctHosts    = "distinct_hosts"
  3834  	ConstraintRegex            = "regexp"
  3835  	ConstraintVersion          = "version"
  3836  	ConstraintSetContains      = "set_contains"
  3837  )
  3838  
  3839  // Constraints are used to restrict placement options.
  3840  type Constraint struct {
  3841  	LTarget string // Left-hand target
  3842  	RTarget string // Right-hand target
  3843  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  3844  	str     string // Memoized string
  3845  }
  3846  
  3847  // Equal checks if two constraints are equal
  3848  func (c *Constraint) Equal(o *Constraint) bool {
  3849  	return c.LTarget == o.LTarget &&
  3850  		c.RTarget == o.RTarget &&
  3851  		c.Operand == o.Operand
  3852  }
  3853  
  3854  func (c *Constraint) Copy() *Constraint {
  3855  	if c == nil {
  3856  		return nil
  3857  	}
  3858  	nc := new(Constraint)
  3859  	*nc = *c
  3860  	return nc
  3861  }
  3862  
  3863  func (c *Constraint) String() string {
  3864  	if c.str != "" {
  3865  		return c.str
  3866  	}
  3867  	c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  3868  	return c.str
  3869  }
  3870  
  3871  func (c *Constraint) Validate() error {
  3872  	var mErr multierror.Error
  3873  	if c.Operand == "" {
  3874  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  3875  	}
  3876  
  3877  	// requireLtarget specifies whether the constraint requires an LTarget to be
  3878  	// provided.
  3879  	requireLtarget := true
  3880  
  3881  	// Perform additional validation based on operand
  3882  	switch c.Operand {
  3883  	case ConstraintDistinctHosts:
  3884  		requireLtarget = false
  3885  		if c.RTarget != "" {
  3886  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct hosts constraint doesn't allow RTarget. Got %q", c.RTarget))
  3887  		}
  3888  		if c.LTarget != "" {
  3889  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct hosts constraint doesn't allow LTarget. Got %q", c.LTarget))
  3890  		}
  3891  	case ConstraintSetContains:
  3892  		if c.RTarget == "" {
  3893  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget"))
  3894  		}
  3895  	case ConstraintRegex:
  3896  		if _, err := regexp.Compile(c.RTarget); err != nil {
  3897  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  3898  		}
  3899  	case ConstraintVersion:
  3900  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  3901  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  3902  		}
  3903  	case ConstraintDistinctProperty:
  3904  		// If a count is set, make sure it is convertible to a uint64
  3905  		if c.RTarget != "" {
  3906  			count, err := strconv.ParseUint(c.RTarget, 10, 64)
  3907  			if err != nil {
  3908  				mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err))
  3909  			} else if count < 1 {
  3910  				mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count))
  3911  			}
  3912  		}
  3913  	case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=":
  3914  		if c.RTarget == "" {
  3915  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand))
  3916  		}
  3917  	default:
  3918  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand))
  3919  	}
  3920  
  3921  	// Ensure we have an LTarget for the constraints that need one
  3922  	if requireLtarget && c.LTarget == "" {
  3923  		mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint"))
  3924  	}
  3925  
  3926  	return mErr.ErrorOrNil()
  3927  }
  3928  
  3929  // EphemeralDisk is an ephemeral disk object
  3930  type EphemeralDisk struct {
  3931  	// Sticky indicates whether the allocation is sticky to a node
  3932  	Sticky bool
  3933  
  3934  	// SizeMB is the size of the local disk
  3935  	SizeMB int
  3936  
  3937  	// Migrate determines if Nomad client should migrate the allocation dir for
  3938  	// sticky allocations
  3939  	Migrate bool
  3940  }
  3941  
  3942  // DefaultEphemeralDisk returns a EphemeralDisk with default configurations
  3943  func DefaultEphemeralDisk() *EphemeralDisk {
  3944  	return &EphemeralDisk{
  3945  		SizeMB: 300,
  3946  	}
  3947  }
  3948  
  3949  // Validate validates EphemeralDisk
  3950  func (d *EphemeralDisk) Validate() error {
  3951  	if d.SizeMB < 10 {
  3952  		return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB)
  3953  	}
  3954  	return nil
  3955  }
  3956  
  3957  // Copy copies the EphemeralDisk struct and returns a new one
  3958  func (d *EphemeralDisk) Copy() *EphemeralDisk {
  3959  	ld := new(EphemeralDisk)
  3960  	*ld = *d
  3961  	return ld
  3962  }
  3963  
  3964  const (
  3965  	// VaultChangeModeNoop takes no action when a new token is retrieved.
  3966  	VaultChangeModeNoop = "noop"
  3967  
  3968  	// VaultChangeModeSignal signals the task when a new token is retrieved.
  3969  	VaultChangeModeSignal = "signal"
  3970  
  3971  	// VaultChangeModeRestart restarts the task when a new token is retrieved.
  3972  	VaultChangeModeRestart = "restart"
  3973  )
  3974  
  3975  // Vault stores the set of premissions a task needs access to from Vault.
  3976  type Vault struct {
  3977  	// Policies is the set of policies that the task needs access to
  3978  	Policies []string
  3979  
  3980  	// Env marks whether the Vault Token should be exposed as an environment
  3981  	// variable
  3982  	Env bool
  3983  
  3984  	// ChangeMode is used to configure the task's behavior when the Vault
  3985  	// token changes because the original token could not be renewed in time.
  3986  	ChangeMode string
  3987  
  3988  	// ChangeSignal is the signal sent to the task when a new token is
  3989  	// retrieved. This is only valid when using the signal change mode.
  3990  	ChangeSignal string
  3991  }
  3992  
  3993  func DefaultVaultBlock() *Vault {
  3994  	return &Vault{
  3995  		Env:        true,
  3996  		ChangeMode: VaultChangeModeRestart,
  3997  	}
  3998  }
  3999  
  4000  // Copy returns a copy of this Vault block.
  4001  func (v *Vault) Copy() *Vault {
  4002  	if v == nil {
  4003  		return nil
  4004  	}
  4005  
  4006  	nv := new(Vault)
  4007  	*nv = *v
  4008  	return nv
  4009  }
  4010  
  4011  func (v *Vault) Canonicalize() {
  4012  	if v.ChangeSignal != "" {
  4013  		v.ChangeSignal = strings.ToUpper(v.ChangeSignal)
  4014  	}
  4015  }
  4016  
  4017  // Validate returns if the Vault block is valid.
  4018  func (v *Vault) Validate() error {
  4019  	if v == nil {
  4020  		return nil
  4021  	}
  4022  
  4023  	var mErr multierror.Error
  4024  	if len(v.Policies) == 0 {
  4025  		multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty"))
  4026  	}
  4027  
  4028  	for _, p := range v.Policies {
  4029  		if p == "root" {
  4030  			multierror.Append(&mErr, fmt.Errorf("Can not specifiy \"root\" policy"))
  4031  		}
  4032  	}
  4033  
  4034  	switch v.ChangeMode {
  4035  	case VaultChangeModeSignal:
  4036  		if v.ChangeSignal == "" {
  4037  			multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal))
  4038  		}
  4039  	case VaultChangeModeNoop, VaultChangeModeRestart:
  4040  	default:
  4041  		multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode))
  4042  	}
  4043  
  4044  	return mErr.ErrorOrNil()
  4045  }
  4046  
  4047  const (
  4048  	// DeploymentStatuses are the various states a deployment can be be in
  4049  	DeploymentStatusRunning    = "running"
  4050  	DeploymentStatusPaused     = "paused"
  4051  	DeploymentStatusFailed     = "failed"
  4052  	DeploymentStatusSuccessful = "successful"
  4053  	DeploymentStatusCancelled  = "cancelled"
  4054  
  4055  	// DeploymentStatusDescriptions are the various descriptions of the states a
  4056  	// deployment can be in.
  4057  	DeploymentStatusDescriptionRunning               = "Deployment is running"
  4058  	DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion"
  4059  	DeploymentStatusDescriptionPaused                = "Deployment is paused"
  4060  	DeploymentStatusDescriptionSuccessful            = "Deployment completed successfully"
  4061  	DeploymentStatusDescriptionStoppedJob            = "Cancelled because job is stopped"
  4062  	DeploymentStatusDescriptionNewerJob              = "Cancelled due to newer version of job"
  4063  	DeploymentStatusDescriptionFailedAllocations     = "Failed due to unhealthy allocations"
  4064  	DeploymentStatusDescriptionFailedByUser          = "Deployment marked as failed"
  4065  )
  4066  
  4067  // DeploymentStatusDescriptionRollback is used to get the status description of
  4068  // a deployment when rolling back to an older job.
  4069  func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string {
  4070  	return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion)
  4071  }
  4072  
  4073  // Deployment is the object that represents a job deployment which is used to
  4074  // transistion a job between versions.
  4075  type Deployment struct {
  4076  	// ID is a generated UUID for the deployment
  4077  	ID string
  4078  
  4079  	// JobID is the job the deployment is created for
  4080  	JobID string
  4081  
  4082  	// JobVersion is the version of the job at which the deployment is tracking
  4083  	JobVersion uint64
  4084  
  4085  	// JobModifyIndex is the modify index of the job at which the deployment is tracking
  4086  	JobModifyIndex uint64
  4087  
  4088  	// JobCreateIndex is the create index of the job which the deployment is
  4089  	// tracking. It is needed so that if the job gets stopped and reran we can
  4090  	// present the correct list of deployments for the job and not old ones.
  4091  	JobCreateIndex uint64
  4092  
  4093  	// TaskGroups is the set of task groups effected by the deployment and their
  4094  	// current deployment status.
  4095  	TaskGroups map[string]*DeploymentState
  4096  
  4097  	// The status of the deployment
  4098  	Status string
  4099  
  4100  	// StatusDescription allows a human readable description of the deployment
  4101  	// status.
  4102  	StatusDescription string
  4103  
  4104  	CreateIndex uint64
  4105  	ModifyIndex uint64
  4106  }
  4107  
  4108  // NewDeployment creates a new deployment given the job.
  4109  func NewDeployment(job *Job) *Deployment {
  4110  	return &Deployment{
  4111  		ID:                GenerateUUID(),
  4112  		JobID:             job.ID,
  4113  		JobVersion:        job.Version,
  4114  		JobModifyIndex:    job.ModifyIndex,
  4115  		JobCreateIndex:    job.CreateIndex,
  4116  		Status:            DeploymentStatusRunning,
  4117  		StatusDescription: DeploymentStatusDescriptionRunning,
  4118  		TaskGroups:        make(map[string]*DeploymentState, len(job.TaskGroups)),
  4119  	}
  4120  }
  4121  
  4122  func (d *Deployment) Copy() *Deployment {
  4123  	if d == nil {
  4124  		return nil
  4125  	}
  4126  
  4127  	c := &Deployment{}
  4128  	*c = *d
  4129  
  4130  	c.TaskGroups = nil
  4131  	if l := len(d.TaskGroups); d.TaskGroups != nil {
  4132  		c.TaskGroups = make(map[string]*DeploymentState, l)
  4133  		for tg, s := range d.TaskGroups {
  4134  			c.TaskGroups[tg] = s.Copy()
  4135  		}
  4136  	}
  4137  
  4138  	return c
  4139  }
  4140  
  4141  // Active returns whether the deployment is active or terminal.
  4142  func (d *Deployment) Active() bool {
  4143  	switch d.Status {
  4144  	case DeploymentStatusRunning, DeploymentStatusPaused:
  4145  		return true
  4146  	default:
  4147  		return false
  4148  	}
  4149  }
  4150  
  4151  // GetID is a helper for getting the ID when the object may be nil
  4152  func (d *Deployment) GetID() string {
  4153  	if d == nil {
  4154  		return ""
  4155  	}
  4156  	return d.ID
  4157  }
  4158  
  4159  // HasPlacedCanaries returns whether the deployment has placed canaries
  4160  func (d *Deployment) HasPlacedCanaries() bool {
  4161  	if d == nil || len(d.TaskGroups) == 0 {
  4162  		return false
  4163  	}
  4164  	for _, group := range d.TaskGroups {
  4165  		if len(group.PlacedCanaries) != 0 {
  4166  			return true
  4167  		}
  4168  	}
  4169  	return false
  4170  }
  4171  
  4172  // RequiresPromotion returns whether the deployment requires promotion to
  4173  // continue
  4174  func (d *Deployment) RequiresPromotion() bool {
  4175  	if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning {
  4176  		return false
  4177  	}
  4178  	for _, group := range d.TaskGroups {
  4179  		if group.DesiredCanaries > 0 && !group.Promoted {
  4180  			return true
  4181  		}
  4182  	}
  4183  	return false
  4184  }
  4185  
  4186  func (d *Deployment) GoString() string {
  4187  	base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription)
  4188  	for group, state := range d.TaskGroups {
  4189  		base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state)
  4190  	}
  4191  	return base
  4192  }
  4193  
  4194  // DeploymentState tracks the state of a deployment for a given task group.
  4195  type DeploymentState struct {
  4196  	// AutoRevert marks whether the task group has indicated the job should be
  4197  	// reverted on failure
  4198  	AutoRevert bool
  4199  
  4200  	// Promoted marks whether the canaries have been promoted
  4201  	Promoted bool
  4202  
  4203  	// PlacedCanaries is the set of placed canary allocations
  4204  	PlacedCanaries []string
  4205  
  4206  	// DesiredCanaries is the number of canaries that should be created.
  4207  	DesiredCanaries int
  4208  
  4209  	// DesiredTotal is the total number of allocations that should be created as
  4210  	// part of the deployment.
  4211  	DesiredTotal int
  4212  
  4213  	// PlacedAllocs is the number of allocations that have been placed
  4214  	PlacedAllocs int
  4215  
  4216  	// HealthyAllocs is the number of allocations that have been marked healthy.
  4217  	HealthyAllocs int
  4218  
  4219  	// UnhealthyAllocs are allocations that have been marked as unhealthy.
  4220  	UnhealthyAllocs int
  4221  }
  4222  
  4223  func (d *DeploymentState) GoString() string {
  4224  	base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal)
  4225  	base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries)
  4226  	base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries)
  4227  	base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted)
  4228  	base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs)
  4229  	base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs)
  4230  	base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs)
  4231  	base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert)
  4232  	return base
  4233  }
  4234  
  4235  func (d *DeploymentState) Copy() *DeploymentState {
  4236  	c := &DeploymentState{}
  4237  	*c = *d
  4238  	c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries)
  4239  	return c
  4240  }
  4241  
  4242  // DeploymentStatusUpdate is used to update the status of a given deployment
  4243  type DeploymentStatusUpdate struct {
  4244  	// DeploymentID is the ID of the deployment to update
  4245  	DeploymentID string
  4246  
  4247  	// Status is the new status of the deployment.
  4248  	Status string
  4249  
  4250  	// StatusDescription is the new status description of the deployment.
  4251  	StatusDescription string
  4252  }
  4253  
  4254  const (
  4255  	AllocDesiredStatusRun   = "run"   // Allocation should run
  4256  	AllocDesiredStatusStop  = "stop"  // Allocation should stop
  4257  	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
  4258  )
  4259  
  4260  const (
  4261  	AllocClientStatusPending  = "pending"
  4262  	AllocClientStatusRunning  = "running"
  4263  	AllocClientStatusComplete = "complete"
  4264  	AllocClientStatusFailed   = "failed"
  4265  	AllocClientStatusLost     = "lost"
  4266  )
  4267  
  4268  // Allocation is used to allocate the placement of a task group to a node.
  4269  type Allocation struct {
  4270  	// ID of the allocation (UUID)
  4271  	ID string
  4272  
  4273  	// ID of the evaluation that generated this allocation
  4274  	EvalID string
  4275  
  4276  	// Name is a logical name of the allocation.
  4277  	Name string
  4278  
  4279  	// NodeID is the node this is being placed on
  4280  	NodeID string
  4281  
  4282  	// Job is the parent job of the task group being allocated.
  4283  	// This is copied at allocation time to avoid issues if the job
  4284  	// definition is updated.
  4285  	JobID string
  4286  	Job   *Job
  4287  
  4288  	// TaskGroup is the name of the task group that should be run
  4289  	TaskGroup string
  4290  
  4291  	// Resources is the total set of resources allocated as part
  4292  	// of this allocation of the task group.
  4293  	Resources *Resources
  4294  
  4295  	// SharedResources are the resources that are shared by all the tasks in an
  4296  	// allocation
  4297  	SharedResources *Resources
  4298  
  4299  	// TaskResources is the set of resources allocated to each
  4300  	// task. These should sum to the total Resources.
  4301  	TaskResources map[string]*Resources
  4302  
  4303  	// Metrics associated with this allocation
  4304  	Metrics *AllocMetric
  4305  
  4306  	// Desired Status of the allocation on the client
  4307  	DesiredStatus string
  4308  
  4309  	// DesiredStatusDescription is meant to provide more human useful information
  4310  	DesiredDescription string
  4311  
  4312  	// Status of the allocation on the client
  4313  	ClientStatus string
  4314  
  4315  	// ClientStatusDescription is meant to provide more human useful information
  4316  	ClientDescription string
  4317  
  4318  	// TaskStates stores the state of each task,
  4319  	TaskStates map[string]*TaskState
  4320  
  4321  	// PreviousAllocation is the allocation that this allocation is replacing
  4322  	PreviousAllocation string
  4323  
  4324  	// DeploymentID identifies an allocation as being created from a
  4325  	// particular deployment
  4326  	DeploymentID string
  4327  
  4328  	// DeploymentStatus captures the status of the allocation as part of the
  4329  	// given deployment
  4330  	DeploymentStatus *AllocDeploymentStatus
  4331  
  4332  	// Raft Indexes
  4333  	CreateIndex uint64
  4334  	ModifyIndex uint64
  4335  
  4336  	// AllocModifyIndex is not updated when the client updates allocations. This
  4337  	// lets the client pull only the allocs updated by the server.
  4338  	AllocModifyIndex uint64
  4339  
  4340  	// CreateTime is the time the allocation has finished scheduling and been
  4341  	// verified by the plan applier.
  4342  	CreateTime int64
  4343  }
  4344  
  4345  // Index returns the index of the allocation. If the allocation is from a task
  4346  // group with count greater than 1, there will be multiple allocations for it.
  4347  func (a *Allocation) Index() uint {
  4348  	l := len(a.Name)
  4349  	prefix := len(a.JobID) + len(a.TaskGroup) + 2
  4350  	if l <= 3 || l <= prefix {
  4351  		return uint(0)
  4352  	}
  4353  
  4354  	strNum := a.Name[prefix : len(a.Name)-1]
  4355  	num, _ := strconv.Atoi(strNum)
  4356  	return uint(num)
  4357  }
  4358  
  4359  func (a *Allocation) Copy() *Allocation {
  4360  	return a.copyImpl(true)
  4361  }
  4362  
  4363  // Copy provides a copy of the allocation but doesn't deep copy the job
  4364  func (a *Allocation) CopySkipJob() *Allocation {
  4365  	return a.copyImpl(false)
  4366  }
  4367  
  4368  func (a *Allocation) copyImpl(job bool) *Allocation {
  4369  	if a == nil {
  4370  		return nil
  4371  	}
  4372  	na := new(Allocation)
  4373  	*na = *a
  4374  
  4375  	if job {
  4376  		na.Job = na.Job.Copy()
  4377  	}
  4378  
  4379  	na.Resources = na.Resources.Copy()
  4380  	na.SharedResources = na.SharedResources.Copy()
  4381  
  4382  	if a.TaskResources != nil {
  4383  		tr := make(map[string]*Resources, len(na.TaskResources))
  4384  		for task, resource := range na.TaskResources {
  4385  			tr[task] = resource.Copy()
  4386  		}
  4387  		na.TaskResources = tr
  4388  	}
  4389  
  4390  	na.Metrics = na.Metrics.Copy()
  4391  	na.DeploymentStatus = na.DeploymentStatus.Copy()
  4392  
  4393  	if a.TaskStates != nil {
  4394  		ts := make(map[string]*TaskState, len(na.TaskStates))
  4395  		for task, state := range na.TaskStates {
  4396  			ts[task] = state.Copy()
  4397  		}
  4398  		na.TaskStates = ts
  4399  	}
  4400  	return na
  4401  }
  4402  
  4403  // TerminalStatus returns if the desired or actual status is terminal and
  4404  // will no longer transition.
  4405  func (a *Allocation) TerminalStatus() bool {
  4406  	// First check the desired state and if that isn't terminal, check client
  4407  	// state.
  4408  	switch a.DesiredStatus {
  4409  	case AllocDesiredStatusStop, AllocDesiredStatusEvict:
  4410  		return true
  4411  	default:
  4412  	}
  4413  
  4414  	switch a.ClientStatus {
  4415  	case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
  4416  		return true
  4417  	default:
  4418  		return false
  4419  	}
  4420  }
  4421  
  4422  // Terminated returns if the allocation is in a terminal state on a client.
  4423  func (a *Allocation) Terminated() bool {
  4424  	if a.ClientStatus == AllocClientStatusFailed ||
  4425  		a.ClientStatus == AllocClientStatusComplete ||
  4426  		a.ClientStatus == AllocClientStatusLost {
  4427  		return true
  4428  	}
  4429  	return false
  4430  }
  4431  
  4432  // RanSuccessfully returns whether the client has ran the allocation and all
  4433  // tasks finished successfully
  4434  func (a *Allocation) RanSuccessfully() bool {
  4435  	// Handle the case the client hasn't started the allocation.
  4436  	if len(a.TaskStates) == 0 {
  4437  		return false
  4438  	}
  4439  
  4440  	// Check to see if all the tasks finised successfully in the allocation
  4441  	allSuccess := true
  4442  	for _, state := range a.TaskStates {
  4443  		allSuccess = allSuccess && state.Successful()
  4444  	}
  4445  
  4446  	return allSuccess
  4447  }
  4448  
  4449  // ShouldMigrate returns if the allocation needs data migration
  4450  func (a *Allocation) ShouldMigrate() bool {
  4451  	if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict {
  4452  		return false
  4453  	}
  4454  
  4455  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
  4456  
  4457  	// if the task group is nil or the ephemeral disk block isn't present then
  4458  	// we won't migrate
  4459  	if tg == nil || tg.EphemeralDisk == nil {
  4460  		return false
  4461  	}
  4462  
  4463  	// We won't migrate any data is the user hasn't enabled migration or the
  4464  	// disk is not marked as sticky
  4465  	if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky {
  4466  		return false
  4467  	}
  4468  
  4469  	return true
  4470  }
  4471  
  4472  // Stub returns a list stub for the allocation
  4473  func (a *Allocation) Stub() *AllocListStub {
  4474  	return &AllocListStub{
  4475  		ID:                 a.ID,
  4476  		EvalID:             a.EvalID,
  4477  		Name:               a.Name,
  4478  		NodeID:             a.NodeID,
  4479  		JobID:              a.JobID,
  4480  		JobVersion:         a.Job.Version,
  4481  		TaskGroup:          a.TaskGroup,
  4482  		DesiredStatus:      a.DesiredStatus,
  4483  		DesiredDescription: a.DesiredDescription,
  4484  		ClientStatus:       a.ClientStatus,
  4485  		ClientDescription:  a.ClientDescription,
  4486  		TaskStates:         a.TaskStates,
  4487  		DeploymentStatus:   a.DeploymentStatus,
  4488  		CreateIndex:        a.CreateIndex,
  4489  		ModifyIndex:        a.ModifyIndex,
  4490  		CreateTime:         a.CreateTime,
  4491  	}
  4492  }
  4493  
  4494  // AllocListStub is used to return a subset of alloc information
  4495  type AllocListStub struct {
  4496  	ID                 string
  4497  	EvalID             string
  4498  	Name               string
  4499  	NodeID             string
  4500  	JobID              string
  4501  	JobVersion         uint64
  4502  	TaskGroup          string
  4503  	DesiredStatus      string
  4504  	DesiredDescription string
  4505  	ClientStatus       string
  4506  	ClientDescription  string
  4507  	TaskStates         map[string]*TaskState
  4508  	DeploymentStatus   *AllocDeploymentStatus
  4509  	CreateIndex        uint64
  4510  	ModifyIndex        uint64
  4511  	CreateTime         int64
  4512  }
  4513  
  4514  // AllocMetric is used to track various metrics while attempting
  4515  // to make an allocation. These are used to debug a job, or to better
  4516  // understand the pressure within the system.
  4517  type AllocMetric struct {
  4518  	// NodesEvaluated is the number of nodes that were evaluated
  4519  	NodesEvaluated int
  4520  
  4521  	// NodesFiltered is the number of nodes filtered due to a constraint
  4522  	NodesFiltered int
  4523  
  4524  	// NodesAvailable is the number of nodes available for evaluation per DC.
  4525  	NodesAvailable map[string]int
  4526  
  4527  	// ClassFiltered is the number of nodes filtered by class
  4528  	ClassFiltered map[string]int
  4529  
  4530  	// ConstraintFiltered is the number of failures caused by constraint
  4531  	ConstraintFiltered map[string]int
  4532  
  4533  	// NodesExhausted is the number of nodes skipped due to being
  4534  	// exhausted of at least one resource
  4535  	NodesExhausted int
  4536  
  4537  	// ClassExhausted is the number of nodes exhausted by class
  4538  	ClassExhausted map[string]int
  4539  
  4540  	// DimensionExhausted provides the count by dimension or reason
  4541  	DimensionExhausted map[string]int
  4542  
  4543  	// Scores is the scores of the final few nodes remaining
  4544  	// for placement. The top score is typically selected.
  4545  	Scores map[string]float64
  4546  
  4547  	// AllocationTime is a measure of how long the allocation
  4548  	// attempt took. This can affect performance and SLAs.
  4549  	AllocationTime time.Duration
  4550  
  4551  	// CoalescedFailures indicates the number of other
  4552  	// allocations that were coalesced into this failed allocation.
  4553  	// This is to prevent creating many failed allocations for a
  4554  	// single task group.
  4555  	CoalescedFailures int
  4556  }
  4557  
  4558  func (a *AllocMetric) Copy() *AllocMetric {
  4559  	if a == nil {
  4560  		return nil
  4561  	}
  4562  	na := new(AllocMetric)
  4563  	*na = *a
  4564  	na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable)
  4565  	na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered)
  4566  	na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered)
  4567  	na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted)
  4568  	na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted)
  4569  	na.Scores = helper.CopyMapStringFloat64(na.Scores)
  4570  	return na
  4571  }
  4572  
  4573  func (a *AllocMetric) EvaluateNode() {
  4574  	a.NodesEvaluated += 1
  4575  }
  4576  
  4577  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  4578  	a.NodesFiltered += 1
  4579  	if node != nil && node.NodeClass != "" {
  4580  		if a.ClassFiltered == nil {
  4581  			a.ClassFiltered = make(map[string]int)
  4582  		}
  4583  		a.ClassFiltered[node.NodeClass] += 1
  4584  	}
  4585  	if constraint != "" {
  4586  		if a.ConstraintFiltered == nil {
  4587  			a.ConstraintFiltered = make(map[string]int)
  4588  		}
  4589  		a.ConstraintFiltered[constraint] += 1
  4590  	}
  4591  }
  4592  
  4593  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  4594  	a.NodesExhausted += 1
  4595  	if node != nil && node.NodeClass != "" {
  4596  		if a.ClassExhausted == nil {
  4597  			a.ClassExhausted = make(map[string]int)
  4598  		}
  4599  		a.ClassExhausted[node.NodeClass] += 1
  4600  	}
  4601  	if dimension != "" {
  4602  		if a.DimensionExhausted == nil {
  4603  			a.DimensionExhausted = make(map[string]int)
  4604  		}
  4605  		a.DimensionExhausted[dimension] += 1
  4606  	}
  4607  }
  4608  
  4609  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  4610  	if a.Scores == nil {
  4611  		a.Scores = make(map[string]float64)
  4612  	}
  4613  	key := fmt.Sprintf("%s.%s", node.ID, name)
  4614  	a.Scores[key] = score
  4615  }
  4616  
  4617  // AllocDeploymentStatus captures the status of the allocation as part of the
  4618  // deployment. This can include things like if the allocation has been marked as
  4619  // heatlhy.
  4620  type AllocDeploymentStatus struct {
  4621  	// Healthy marks whether the allocation has been marked healthy or unhealthy
  4622  	// as part of a deployment. It can be unset if it has neither been marked
  4623  	// healthy or unhealthy.
  4624  	Healthy *bool
  4625  
  4626  	// ModifyIndex is the raft index in which the deployment status was last
  4627  	// changed.
  4628  	ModifyIndex uint64
  4629  }
  4630  
  4631  // IsHealthy returns if the allocation is marked as healthy as part of a
  4632  // deployment
  4633  func (a *AllocDeploymentStatus) IsHealthy() bool {
  4634  	if a == nil {
  4635  		return false
  4636  	}
  4637  
  4638  	return a.Healthy != nil && *a.Healthy
  4639  }
  4640  
  4641  // IsUnhealthy returns if the allocation is marked as unhealthy as part of a
  4642  // deployment
  4643  func (a *AllocDeploymentStatus) IsUnhealthy() bool {
  4644  	if a == nil {
  4645  		return false
  4646  	}
  4647  
  4648  	return a.Healthy != nil && !*a.Healthy
  4649  }
  4650  
  4651  func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus {
  4652  	if a == nil {
  4653  		return nil
  4654  	}
  4655  
  4656  	c := new(AllocDeploymentStatus)
  4657  	*c = *a
  4658  
  4659  	if a.Healthy != nil {
  4660  		c.Healthy = helper.BoolToPtr(*a.Healthy)
  4661  	}
  4662  
  4663  	return c
  4664  }
  4665  
  4666  const (
  4667  	EvalStatusBlocked   = "blocked"
  4668  	EvalStatusPending   = "pending"
  4669  	EvalStatusComplete  = "complete"
  4670  	EvalStatusFailed    = "failed"
  4671  	EvalStatusCancelled = "canceled"
  4672  )
  4673  
  4674  const (
  4675  	EvalTriggerJobRegister       = "job-register"
  4676  	EvalTriggerJobDeregister     = "job-deregister"
  4677  	EvalTriggerPeriodicJob       = "periodic-job"
  4678  	EvalTriggerNodeUpdate        = "node-update"
  4679  	EvalTriggerScheduled         = "scheduled"
  4680  	EvalTriggerRollingUpdate     = "rolling-update"
  4681  	EvalTriggerDeploymentWatcher = "deployment-watcher"
  4682  	EvalTriggerFailedFollowUp    = "failed-follow-up"
  4683  	EvalTriggerMaxPlans          = "max-plan-attempts"
  4684  )
  4685  
  4686  const (
  4687  	// CoreJobEvalGC is used for the garbage collection of evaluations
  4688  	// and allocations. We periodically scan evaluations in a terminal state,
  4689  	// in which all the corresponding allocations are also terminal. We
  4690  	// delete these out of the system to bound the state.
  4691  	CoreJobEvalGC = "eval-gc"
  4692  
  4693  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  4694  	// We periodically scan nodes in a terminal state, and if they have no
  4695  	// corresponding allocations we delete these out of the system.
  4696  	CoreJobNodeGC = "node-gc"
  4697  
  4698  	// CoreJobJobGC is used for the garbage collection of eligible jobs. We
  4699  	// periodically scan garbage collectible jobs and check if both their
  4700  	// evaluations and allocations are terminal. If so, we delete these out of
  4701  	// the system.
  4702  	CoreJobJobGC = "job-gc"
  4703  
  4704  	// CoreJobDeploymentGC is used for the garbage collection of eligible
  4705  	// deployments. We periodically scan garbage collectible deployments and
  4706  	// check if they are terminal. If so, we delete these out of the system.
  4707  	CoreJobDeploymentGC = "deployment-gc"
  4708  
  4709  	// CoreJobForceGC is used to force garbage collection of all GCable objects.
  4710  	CoreJobForceGC = "force-gc"
  4711  )
  4712  
  4713  // Evaluation is used anytime we need to apply business logic as a result
  4714  // of a change to our desired state (job specification) or the emergent state
  4715  // (registered nodes). When the inputs change, we need to "evaluate" them,
  4716  // potentially taking action (allocation of work) or doing nothing if the state
  4717  // of the world does not require it.
  4718  type Evaluation struct {
  4719  	// ID is a randonly generated UUID used for this evaluation. This
  4720  	// is assigned upon the creation of the evaluation.
  4721  	ID string
  4722  
  4723  	// Priority is used to control scheduling importance and if this job
  4724  	// can preempt other jobs.
  4725  	Priority int
  4726  
  4727  	// Type is used to control which schedulers are available to handle
  4728  	// this evaluation.
  4729  	Type string
  4730  
  4731  	// TriggeredBy is used to give some insight into why this Eval
  4732  	// was created. (Job change, node failure, alloc failure, etc).
  4733  	TriggeredBy string
  4734  
  4735  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  4736  	// be run in parallel for a given JobID, so we serialize on this.
  4737  	JobID string
  4738  
  4739  	// JobModifyIndex is the modify index of the job at the time
  4740  	// the evaluation was created
  4741  	JobModifyIndex uint64
  4742  
  4743  	// NodeID is the node that was affected triggering the evaluation.
  4744  	NodeID string
  4745  
  4746  	// NodeModifyIndex is the modify index of the node at the time
  4747  	// the evaluation was created
  4748  	NodeModifyIndex uint64
  4749  
  4750  	// DeploymentID is the ID of the deployment that triggered the evaluation.
  4751  	DeploymentID string
  4752  
  4753  	// Status of the evaluation
  4754  	Status string
  4755  
  4756  	// StatusDescription is meant to provide more human useful information
  4757  	StatusDescription string
  4758  
  4759  	// Wait is a minimum wait time for running the eval. This is used to
  4760  	// support a rolling upgrade.
  4761  	Wait time.Duration
  4762  
  4763  	// NextEval is the evaluation ID for the eval created to do a followup.
  4764  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  4765  	NextEval string
  4766  
  4767  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  4768  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  4769  	PreviousEval string
  4770  
  4771  	// BlockedEval is the evaluation ID for a created blocked eval. A
  4772  	// blocked eval will be created if all allocations could not be placed due
  4773  	// to constraints or lacking resources.
  4774  	BlockedEval string
  4775  
  4776  	// FailedTGAllocs are task groups which have allocations that could not be
  4777  	// made, but the metrics are persisted so that the user can use the feedback
  4778  	// to determine the cause.
  4779  	FailedTGAllocs map[string]*AllocMetric
  4780  
  4781  	// ClassEligibility tracks computed node classes that have been explicitly
  4782  	// marked as eligible or ineligible.
  4783  	ClassEligibility map[string]bool
  4784  
  4785  	// EscapedComputedClass marks whether the job has constraints that are not
  4786  	// captured by computed node classes.
  4787  	EscapedComputedClass bool
  4788  
  4789  	// AnnotatePlan triggers the scheduler to provide additional annotations
  4790  	// during the evaluation. This should not be set during normal operations.
  4791  	AnnotatePlan bool
  4792  
  4793  	// QueuedAllocations is the number of unplaced allocations at the time the
  4794  	// evaluation was processed. The map is keyed by Task Group names.
  4795  	QueuedAllocations map[string]int
  4796  
  4797  	// SnapshotIndex is the Raft index of the snapshot used to process the
  4798  	// evaluation. As such it will only be set once it has gone through the
  4799  	// scheduler.
  4800  	SnapshotIndex uint64
  4801  
  4802  	// Raft Indexes
  4803  	CreateIndex uint64
  4804  	ModifyIndex uint64
  4805  }
  4806  
  4807  // TerminalStatus returns if the current status is terminal and
  4808  // will no longer transition.
  4809  func (e *Evaluation) TerminalStatus() bool {
  4810  	switch e.Status {
  4811  	case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled:
  4812  		return true
  4813  	default:
  4814  		return false
  4815  	}
  4816  }
  4817  
  4818  func (e *Evaluation) GoString() string {
  4819  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  4820  }
  4821  
  4822  func (e *Evaluation) Copy() *Evaluation {
  4823  	if e == nil {
  4824  		return nil
  4825  	}
  4826  	ne := new(Evaluation)
  4827  	*ne = *e
  4828  
  4829  	// Copy ClassEligibility
  4830  	if e.ClassEligibility != nil {
  4831  		classes := make(map[string]bool, len(e.ClassEligibility))
  4832  		for class, elig := range e.ClassEligibility {
  4833  			classes[class] = elig
  4834  		}
  4835  		ne.ClassEligibility = classes
  4836  	}
  4837  
  4838  	// Copy FailedTGAllocs
  4839  	if e.FailedTGAllocs != nil {
  4840  		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
  4841  		for tg, metric := range e.FailedTGAllocs {
  4842  			failedTGs[tg] = metric.Copy()
  4843  		}
  4844  		ne.FailedTGAllocs = failedTGs
  4845  	}
  4846  
  4847  	// Copy queued allocations
  4848  	if e.QueuedAllocations != nil {
  4849  		queuedAllocations := make(map[string]int, len(e.QueuedAllocations))
  4850  		for tg, num := range e.QueuedAllocations {
  4851  			queuedAllocations[tg] = num
  4852  		}
  4853  		ne.QueuedAllocations = queuedAllocations
  4854  	}
  4855  
  4856  	return ne
  4857  }
  4858  
  4859  // ShouldEnqueue checks if a given evaluation should be enqueued into the
  4860  // eval_broker
  4861  func (e *Evaluation) ShouldEnqueue() bool {
  4862  	switch e.Status {
  4863  	case EvalStatusPending:
  4864  		return true
  4865  	case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled:
  4866  		return false
  4867  	default:
  4868  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  4869  	}
  4870  }
  4871  
  4872  // ShouldBlock checks if a given evaluation should be entered into the blocked
  4873  // eval tracker.
  4874  func (e *Evaluation) ShouldBlock() bool {
  4875  	switch e.Status {
  4876  	case EvalStatusBlocked:
  4877  		return true
  4878  	case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled:
  4879  		return false
  4880  	default:
  4881  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  4882  	}
  4883  }
  4884  
  4885  // MakePlan is used to make a plan from the given evaluation
  4886  // for a given Job
  4887  func (e *Evaluation) MakePlan(j *Job) *Plan {
  4888  	p := &Plan{
  4889  		EvalID:         e.ID,
  4890  		Priority:       e.Priority,
  4891  		Job:            j,
  4892  		NodeUpdate:     make(map[string][]*Allocation),
  4893  		NodeAllocation: make(map[string][]*Allocation),
  4894  	}
  4895  	if j != nil {
  4896  		p.AllAtOnce = j.AllAtOnce
  4897  	}
  4898  	return p
  4899  }
  4900  
  4901  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  4902  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  4903  	return &Evaluation{
  4904  		ID:             GenerateUUID(),
  4905  		Priority:       e.Priority,
  4906  		Type:           e.Type,
  4907  		TriggeredBy:    EvalTriggerRollingUpdate,
  4908  		JobID:          e.JobID,
  4909  		JobModifyIndex: e.JobModifyIndex,
  4910  		Status:         EvalStatusPending,
  4911  		Wait:           wait,
  4912  		PreviousEval:   e.ID,
  4913  	}
  4914  }
  4915  
  4916  // CreateBlockedEval creates a blocked evaluation to followup this eval to place any
  4917  // failed allocations. It takes the classes marked explicitly eligible or
  4918  // ineligible and whether the job has escaped computed node classes.
  4919  func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
  4920  	return &Evaluation{
  4921  		ID:                   GenerateUUID(),
  4922  		Priority:             e.Priority,
  4923  		Type:                 e.Type,
  4924  		TriggeredBy:          e.TriggeredBy,
  4925  		JobID:                e.JobID,
  4926  		JobModifyIndex:       e.JobModifyIndex,
  4927  		Status:               EvalStatusBlocked,
  4928  		PreviousEval:         e.ID,
  4929  		ClassEligibility:     classEligibility,
  4930  		EscapedComputedClass: escaped,
  4931  	}
  4932  }
  4933  
  4934  // CreateFailedFollowUpEval creates a follow up evaluation when the current one
  4935  // has been marked as failed becasue it has hit the delivery limit and will not
  4936  // be retried by the eval_broker.
  4937  func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation {
  4938  	return &Evaluation{
  4939  		ID:             GenerateUUID(),
  4940  		Priority:       e.Priority,
  4941  		Type:           e.Type,
  4942  		TriggeredBy:    EvalTriggerFailedFollowUp,
  4943  		JobID:          e.JobID,
  4944  		JobModifyIndex: e.JobModifyIndex,
  4945  		Status:         EvalStatusPending,
  4946  		Wait:           wait,
  4947  		PreviousEval:   e.ID,
  4948  	}
  4949  }
  4950  
  4951  // Plan is used to submit a commit plan for task allocations. These
  4952  // are submitted to the leader which verifies that resources have
  4953  // not been overcommitted before admiting the plan.
  4954  type Plan struct {
  4955  	// EvalID is the evaluation ID this plan is associated with
  4956  	EvalID string
  4957  
  4958  	// EvalToken is used to prevent a split-brain processing of
  4959  	// an evaluation. There should only be a single scheduler running
  4960  	// an Eval at a time, but this could be violated after a leadership
  4961  	// transition. This unique token is used to reject plans that are
  4962  	// being submitted from a different leader.
  4963  	EvalToken string
  4964  
  4965  	// Priority is the priority of the upstream job
  4966  	Priority int
  4967  
  4968  	// AllAtOnce is used to control if incremental scheduling of task groups
  4969  	// is allowed or if we must do a gang scheduling of the entire job.
  4970  	// If this is false, a plan may be partially applied. Otherwise, the
  4971  	// entire plan must be able to make progress.
  4972  	AllAtOnce bool
  4973  
  4974  	// Job is the parent job of all the allocations in the Plan.
  4975  	// Since a Plan only involves a single Job, we can reduce the size
  4976  	// of the plan by only including it once.
  4977  	Job *Job
  4978  
  4979  	// NodeUpdate contains all the allocations for each node. For each node,
  4980  	// this is a list of the allocations to update to either stop or evict.
  4981  	NodeUpdate map[string][]*Allocation
  4982  
  4983  	// NodeAllocation contains all the allocations for each node.
  4984  	// The evicts must be considered prior to the allocations.
  4985  	NodeAllocation map[string][]*Allocation
  4986  
  4987  	// Annotations contains annotations by the scheduler to be used by operators
  4988  	// to understand the decisions made by the scheduler.
  4989  	Annotations *PlanAnnotations
  4990  
  4991  	// Deployment is the deployment created or updated by the scheduler that
  4992  	// should be applied by the planner.
  4993  	Deployment *Deployment
  4994  
  4995  	// DeploymentUpdates is a set of status updates to apply to the given
  4996  	// deployments. This allows the scheduler to cancel any unneeded deployment
  4997  	// because the job is stopped or the update block is removed.
  4998  	DeploymentUpdates []*DeploymentStatusUpdate
  4999  }
  5000  
  5001  // AppendUpdate marks the allocation for eviction. The clientStatus of the
  5002  // allocation may be optionally set by passing in a non-empty value.
  5003  func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) {
  5004  	newAlloc := new(Allocation)
  5005  	*newAlloc = *alloc
  5006  
  5007  	// If the job is not set in the plan we are deregistering a job so we
  5008  	// extract the job from the allocation.
  5009  	if p.Job == nil && newAlloc.Job != nil {
  5010  		p.Job = newAlloc.Job
  5011  	}
  5012  
  5013  	// Normalize the job
  5014  	newAlloc.Job = nil
  5015  
  5016  	// Strip the resources as it can be rebuilt.
  5017  	newAlloc.Resources = nil
  5018  
  5019  	newAlloc.DesiredStatus = desiredStatus
  5020  	newAlloc.DesiredDescription = desiredDesc
  5021  
  5022  	if clientStatus != "" {
  5023  		newAlloc.ClientStatus = clientStatus
  5024  	}
  5025  
  5026  	node := alloc.NodeID
  5027  	existing := p.NodeUpdate[node]
  5028  	p.NodeUpdate[node] = append(existing, newAlloc)
  5029  }
  5030  
  5031  func (p *Plan) PopUpdate(alloc *Allocation) {
  5032  	existing := p.NodeUpdate[alloc.NodeID]
  5033  	n := len(existing)
  5034  	if n > 0 && existing[n-1].ID == alloc.ID {
  5035  		existing = existing[:n-1]
  5036  		if len(existing) > 0 {
  5037  			p.NodeUpdate[alloc.NodeID] = existing
  5038  		} else {
  5039  			delete(p.NodeUpdate, alloc.NodeID)
  5040  		}
  5041  	}
  5042  }
  5043  
  5044  func (p *Plan) AppendAlloc(alloc *Allocation) {
  5045  	node := alloc.NodeID
  5046  	existing := p.NodeAllocation[node]
  5047  	p.NodeAllocation[node] = append(existing, alloc)
  5048  }
  5049  
  5050  // IsNoOp checks if this plan would do nothing
  5051  func (p *Plan) IsNoOp() bool {
  5052  	return len(p.NodeUpdate) == 0 &&
  5053  		len(p.NodeAllocation) == 0 &&
  5054  		p.Deployment == nil &&
  5055  		len(p.DeploymentUpdates) == 0
  5056  }
  5057  
  5058  // PlanResult is the result of a plan submitted to the leader.
  5059  type PlanResult struct {
  5060  	// NodeUpdate contains all the updates that were committed.
  5061  	NodeUpdate map[string][]*Allocation
  5062  
  5063  	// NodeAllocation contains all the allocations that were committed.
  5064  	NodeAllocation map[string][]*Allocation
  5065  
  5066  	// Deployment is the deployment that was committed.
  5067  	Deployment *Deployment
  5068  
  5069  	// DeploymentUpdates is the set of deployment updates that were commited.
  5070  	DeploymentUpdates []*DeploymentStatusUpdate
  5071  
  5072  	// RefreshIndex is the index the worker should refresh state up to.
  5073  	// This allows all evictions and allocations to be materialized.
  5074  	// If any allocations were rejected due to stale data (node state,
  5075  	// over committed) this can be used to force a worker refresh.
  5076  	RefreshIndex uint64
  5077  
  5078  	// AllocIndex is the Raft index in which the evictions and
  5079  	// allocations took place. This is used for the write index.
  5080  	AllocIndex uint64
  5081  }
  5082  
  5083  // IsNoOp checks if this plan result would do nothing
  5084  func (p *PlanResult) IsNoOp() bool {
  5085  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 &&
  5086  		len(p.DeploymentUpdates) == 0 && p.Deployment == nil
  5087  }
  5088  
  5089  // FullCommit is used to check if all the allocations in a plan
  5090  // were committed as part of the result. Returns if there was
  5091  // a match, and the number of expected and actual allocations.
  5092  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  5093  	expected := 0
  5094  	actual := 0
  5095  	for name, allocList := range plan.NodeAllocation {
  5096  		didAlloc, _ := p.NodeAllocation[name]
  5097  		expected += len(allocList)
  5098  		actual += len(didAlloc)
  5099  	}
  5100  	return actual == expected, expected, actual
  5101  }
  5102  
  5103  // PlanAnnotations holds annotations made by the scheduler to give further debug
  5104  // information to operators.
  5105  type PlanAnnotations struct {
  5106  	// DesiredTGUpdates is the set of desired updates per task group.
  5107  	DesiredTGUpdates map[string]*DesiredUpdates
  5108  }
  5109  
  5110  // DesiredUpdates is the set of changes the scheduler would like to make given
  5111  // sufficient resources and cluster capacity.
  5112  type DesiredUpdates struct {
  5113  	Ignore            uint64
  5114  	Place             uint64
  5115  	Migrate           uint64
  5116  	Stop              uint64
  5117  	InPlaceUpdate     uint64
  5118  	DestructiveUpdate uint64
  5119  	Canary            uint64
  5120  }
  5121  
  5122  func (d *DesiredUpdates) GoString() string {
  5123  	return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)",
  5124  		d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary)
  5125  }
  5126  
  5127  // msgpackHandle is a shared handle for encoding/decoding of structs
  5128  var MsgpackHandle = func() *codec.MsgpackHandle {
  5129  	h := &codec.MsgpackHandle{RawToString: true}
  5130  
  5131  	// Sets the default type for decoding a map into a nil interface{}.
  5132  	// This is necessary in particular because we store the driver configs as a
  5133  	// nil interface{}.
  5134  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5135  	return h
  5136  }()
  5137  
  5138  var (
  5139  	// JsonHandle and JsonHandlePretty are the codec handles to JSON encode
  5140  	// structs. The pretty handle will add indents for easier human consumption.
  5141  	JsonHandle = &codec.JsonHandle{
  5142  		HTMLCharsAsIs: true,
  5143  	}
  5144  	JsonHandlePretty = &codec.JsonHandle{
  5145  		HTMLCharsAsIs: true,
  5146  		Indent:        4,
  5147  	}
  5148  )
  5149  
  5150  var HashiMsgpackHandle = func() *hcodec.MsgpackHandle {
  5151  	h := &hcodec.MsgpackHandle{RawToString: true}
  5152  
  5153  	// Sets the default type for decoding a map into a nil interface{}.
  5154  	// This is necessary in particular because we store the driver configs as a
  5155  	// nil interface{}.
  5156  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5157  	return h
  5158  }()
  5159  
  5160  // Decode is used to decode a MsgPack encoded object
  5161  func Decode(buf []byte, out interface{}) error {
  5162  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  5163  }
  5164  
  5165  // Encode is used to encode a MsgPack object with type prefix
  5166  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  5167  	var buf bytes.Buffer
  5168  	buf.WriteByte(uint8(t))
  5169  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  5170  	return buf.Bytes(), err
  5171  }
  5172  
  5173  // KeyringResponse is a unified key response and can be used for install,
  5174  // remove, use, as well as listing key queries.
  5175  type KeyringResponse struct {
  5176  	Messages map[string]string
  5177  	Keys     map[string]int
  5178  	NumNodes int
  5179  }
  5180  
  5181  // KeyringRequest is request objects for serf key operations.
  5182  type KeyringRequest struct {
  5183  	Key string
  5184  }
  5185  
  5186  // RecoverableError wraps an error and marks whether it is recoverable and could
  5187  // be retried or it is fatal.
  5188  type RecoverableError struct {
  5189  	Err         string
  5190  	Recoverable bool
  5191  }
  5192  
  5193  // NewRecoverableError is used to wrap an error and mark it as recoverable or
  5194  // not.
  5195  func NewRecoverableError(e error, recoverable bool) error {
  5196  	if e == nil {
  5197  		return nil
  5198  	}
  5199  
  5200  	return &RecoverableError{
  5201  		Err:         e.Error(),
  5202  		Recoverable: recoverable,
  5203  	}
  5204  }
  5205  
  5206  // WrapRecoverable wraps an existing error in a new RecoverableError with a new
  5207  // message. If the error was recoverable before the returned error is as well;
  5208  // otherwise it is unrecoverable.
  5209  func WrapRecoverable(msg string, err error) error {
  5210  	return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)}
  5211  }
  5212  
  5213  func (r *RecoverableError) Error() string {
  5214  	return r.Err
  5215  }
  5216  
  5217  func (r *RecoverableError) IsRecoverable() bool {
  5218  	return r.Recoverable
  5219  }
  5220  
  5221  // Recoverable is an interface for errors to implement to indicate whether or
  5222  // not they are fatal or recoverable.
  5223  type Recoverable interface {
  5224  	error
  5225  	IsRecoverable() bool
  5226  }
  5227  
  5228  // IsRecoverable returns true if error is a RecoverableError with
  5229  // Recoverable=true. Otherwise false is returned.
  5230  func IsRecoverable(e error) bool {
  5231  	if re, ok := e.(Recoverable); ok {
  5232  		return re.IsRecoverable()
  5233  	}
  5234  	return false
  5235  }