github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"crypto/sha1"
     7  	"crypto/sha256"
     8  	"crypto/sha512"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"net"
    14  	"os"
    15  	"path/filepath"
    16  	"reflect"
    17  	"regexp"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/gorhill/cronexpr"
    24  	"github.com/hashicorp/consul/api"
    25  	"github.com/hashicorp/go-multierror"
    26  	"github.com/hashicorp/go-version"
    27  	"github.com/hashicorp/nomad/helper"
    28  	"github.com/hashicorp/nomad/helper/args"
    29  	"github.com/mitchellh/copystructure"
    30  	"github.com/ugorji/go/codec"
    31  
    32  	hcodec "github.com/hashicorp/go-msgpack/codec"
    33  )
    34  
    35  var (
    36  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    37  	ErrNoRegionPath = fmt.Errorf("No path to region")
    38  )
    39  
    40  type MessageType uint8
    41  
    42  const (
    43  	NodeRegisterRequestType MessageType = iota
    44  	NodeDeregisterRequestType
    45  	NodeUpdateStatusRequestType
    46  	NodeUpdateDrainRequestType
    47  	JobRegisterRequestType
    48  	JobDeregisterRequestType
    49  	EvalUpdateRequestType
    50  	EvalDeleteRequestType
    51  	AllocUpdateRequestType
    52  	AllocClientUpdateRequestType
    53  	ReconcileJobSummariesRequestType
    54  	VaultAccessorRegisterRequestType
    55  	VaultAccessorDegisterRequestType
    56  	ApplyPlanResultsRequestType
    57  	DeploymentStatusUpdateRequestType
    58  	DeploymentPromoteRequestType
    59  	DeploymentAllocHealthRequestType
    60  	DeploymentDeleteRequestType
    61  	JobStabilityRequestType
    62  )
    63  
    64  const (
    65  	// IgnoreUnknownTypeFlag is set along with a MessageType
    66  	// to indicate that the message type can be safely ignored
    67  	// if it is not recognized. This is for future proofing, so
    68  	// that new commands can be added in a way that won't cause
    69  	// old servers to crash when the FSM attempts to process them.
    70  	IgnoreUnknownTypeFlag MessageType = 128
    71  
    72  	// ApiMajorVersion is returned as part of the Status.Version request.
    73  	// It should be incremented anytime the APIs are changed in a way
    74  	// that would break clients for sane client versioning.
    75  	ApiMajorVersion = 1
    76  
    77  	// ApiMinorVersion is returned as part of the Status.Version request.
    78  	// It should be incremented anytime the APIs are changed to allow
    79  	// for sane client versioning. Minor changes should be compatible
    80  	// within the major version.
    81  	ApiMinorVersion = 1
    82  
    83  	ProtocolVersion = "protocol"
    84  	APIMajorVersion = "api.major"
    85  	APIMinorVersion = "api.minor"
    86  
    87  	GetterModeAny  = "any"
    88  	GetterModeFile = "file"
    89  	GetterModeDir  = "dir"
    90  )
    91  
    92  // RPCInfo is used to describe common information about query
    93  type RPCInfo interface {
    94  	RequestRegion() string
    95  	IsRead() bool
    96  	AllowStaleRead() bool
    97  }
    98  
    99  // QueryOptions is used to specify various flags for read queries
   100  type QueryOptions struct {
   101  	// The target region for this query
   102  	Region string
   103  
   104  	// If set, wait until query exceeds given index. Must be provided
   105  	// with MaxQueryTime.
   106  	MinQueryIndex uint64
   107  
   108  	// Provided with MinQueryIndex to wait for change.
   109  	MaxQueryTime time.Duration
   110  
   111  	// If set, any follower can service the request. Results
   112  	// may be arbitrarily stale.
   113  	AllowStale bool
   114  
   115  	// If set, used as prefix for resource list searches
   116  	Prefix string
   117  }
   118  
   119  func (q QueryOptions) RequestRegion() string {
   120  	return q.Region
   121  }
   122  
   123  // QueryOption only applies to reads, so always true
   124  func (q QueryOptions) IsRead() bool {
   125  	return true
   126  }
   127  
   128  func (q QueryOptions) AllowStaleRead() bool {
   129  	return q.AllowStale
   130  }
   131  
   132  type WriteRequest struct {
   133  	// The target region for this write
   134  	Region string
   135  }
   136  
   137  func (w WriteRequest) RequestRegion() string {
   138  	// The target region for this request
   139  	return w.Region
   140  }
   141  
   142  // WriteRequest only applies to writes, always false
   143  func (w WriteRequest) IsRead() bool {
   144  	return false
   145  }
   146  
   147  func (w WriteRequest) AllowStaleRead() bool {
   148  	return false
   149  }
   150  
   151  // QueryMeta allows a query response to include potentially
   152  // useful metadata about a query
   153  type QueryMeta struct {
   154  	// This is the index associated with the read
   155  	Index uint64
   156  
   157  	// If AllowStale is used, this is time elapsed since
   158  	// last contact between the follower and leader. This
   159  	// can be used to gauge staleness.
   160  	LastContact time.Duration
   161  
   162  	// Used to indicate if there is a known leader node
   163  	KnownLeader bool
   164  }
   165  
   166  // WriteMeta allows a write response to include potentially
   167  // useful metadata about the write
   168  type WriteMeta struct {
   169  	// This is the index associated with the write
   170  	Index uint64
   171  }
   172  
   173  // NodeRegisterRequest is used for Node.Register endpoint
   174  // to register a node as being a schedulable entity.
   175  type NodeRegisterRequest struct {
   176  	Node *Node
   177  	WriteRequest
   178  }
   179  
   180  // NodeDeregisterRequest is used for Node.Deregister endpoint
   181  // to deregister a node as being a schedulable entity.
   182  type NodeDeregisterRequest struct {
   183  	NodeID string
   184  	WriteRequest
   185  }
   186  
   187  // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
   188  // information used in RPC server lists.
   189  type NodeServerInfo struct {
   190  	// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
   191  	// be contacted at for RPCs.
   192  	RPCAdvertiseAddr string
   193  
   194  	// RpcMajorVersion is the major version number the Nomad Server
   195  	// supports
   196  	RPCMajorVersion int32
   197  
   198  	// RpcMinorVersion is the minor version number the Nomad Server
   199  	// supports
   200  	RPCMinorVersion int32
   201  
   202  	// Datacenter is the datacenter that a Nomad server belongs to
   203  	Datacenter string
   204  }
   205  
   206  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   207  // to update the status of a node.
   208  type NodeUpdateStatusRequest struct {
   209  	NodeID string
   210  	Status string
   211  	WriteRequest
   212  }
   213  
   214  // NodeUpdateDrainRequest is used for updatin the drain status
   215  type NodeUpdateDrainRequest struct {
   216  	NodeID string
   217  	Drain  bool
   218  	WriteRequest
   219  }
   220  
   221  // NodeEvaluateRequest is used to re-evaluate the ndoe
   222  type NodeEvaluateRequest struct {
   223  	NodeID string
   224  	WriteRequest
   225  }
   226  
   227  // NodeSpecificRequest is used when we just need to specify a target node
   228  type NodeSpecificRequest struct {
   229  	NodeID   string
   230  	SecretID string
   231  	QueryOptions
   232  }
   233  
   234  // JobRegisterRequest is used for Job.Register endpoint
   235  // to register a job as being a schedulable entity.
   236  type JobRegisterRequest struct {
   237  	Job *Job
   238  
   239  	// If EnforceIndex is set then the job will only be registered if the passed
   240  	// JobModifyIndex matches the current Jobs index. If the index is zero, the
   241  	// register only occurs if the job is new.
   242  	EnforceIndex   bool
   243  	JobModifyIndex uint64
   244  
   245  	WriteRequest
   246  }
   247  
   248  // JobDeregisterRequest is used for Job.Deregister endpoint
   249  // to deregister a job as being a schedulable entity.
   250  type JobDeregisterRequest struct {
   251  	JobID string
   252  
   253  	// Purge controls whether the deregister purges the job from the system or
   254  	// whether the job is just marked as stopped and will be removed by the
   255  	// garbage collector
   256  	Purge bool
   257  
   258  	WriteRequest
   259  }
   260  
   261  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   262  type JobEvaluateRequest struct {
   263  	JobID string
   264  	WriteRequest
   265  }
   266  
   267  // JobSpecificRequest is used when we just need to specify a target job
   268  type JobSpecificRequest struct {
   269  	JobID     string
   270  	AllAllocs bool
   271  	QueryOptions
   272  }
   273  
   274  // JobListRequest is used to parameterize a list request
   275  type JobListRequest struct {
   276  	QueryOptions
   277  }
   278  
   279  // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
   280  // evaluation of the Job.
   281  type JobPlanRequest struct {
   282  	Job  *Job
   283  	Diff bool // Toggles an annotated diff
   284  	WriteRequest
   285  }
   286  
   287  // JobSummaryRequest is used when we just need to get a specific job summary
   288  type JobSummaryRequest struct {
   289  	JobID string
   290  	QueryOptions
   291  }
   292  
   293  // JobDispatchRequest is used to dispatch a job based on a parameterized job
   294  type JobDispatchRequest struct {
   295  	JobID   string
   296  	Payload []byte
   297  	Meta    map[string]string
   298  	WriteRequest
   299  }
   300  
   301  // JobValidateRequest is used to validate a job
   302  type JobValidateRequest struct {
   303  	Job *Job
   304  	WriteRequest
   305  }
   306  
   307  // JobRevertRequest is used to revert a job to a prior version.
   308  type JobRevertRequest struct {
   309  	// JobID is the ID of the job  being reverted
   310  	JobID string
   311  
   312  	// JobVersion the version to revert to.
   313  	JobVersion uint64
   314  
   315  	// EnforcePriorVersion if set will enforce that the job is at the given
   316  	// version before reverting.
   317  	EnforcePriorVersion *uint64
   318  
   319  	WriteRequest
   320  }
   321  
   322  // JobStabilityRequest is used to marked a job as stable.
   323  type JobStabilityRequest struct {
   324  	// Job to set the stability on
   325  	JobID      string
   326  	JobVersion uint64
   327  
   328  	// Set the stability
   329  	Stable bool
   330  	WriteRequest
   331  }
   332  
   333  // JobStabilityResponse is the response when marking a job as stable.
   334  type JobStabilityResponse struct {
   335  	WriteMeta
   336  }
   337  
   338  // NodeListRequest is used to parameterize a list request
   339  type NodeListRequest struct {
   340  	QueryOptions
   341  }
   342  
   343  // EvalUpdateRequest is used for upserting evaluations.
   344  type EvalUpdateRequest struct {
   345  	Evals     []*Evaluation
   346  	EvalToken string
   347  	WriteRequest
   348  }
   349  
   350  // EvalDeleteRequest is used for deleting an evaluation.
   351  type EvalDeleteRequest struct {
   352  	Evals  []string
   353  	Allocs []string
   354  	WriteRequest
   355  }
   356  
   357  // EvalSpecificRequest is used when we just need to specify a target evaluation
   358  type EvalSpecificRequest struct {
   359  	EvalID string
   360  	QueryOptions
   361  }
   362  
   363  // EvalAckRequest is used to Ack/Nack a specific evaluation
   364  type EvalAckRequest struct {
   365  	EvalID string
   366  	Token  string
   367  	WriteRequest
   368  }
   369  
   370  // EvalDequeueRequest is used when we want to dequeue an evaluation
   371  type EvalDequeueRequest struct {
   372  	Schedulers       []string
   373  	Timeout          time.Duration
   374  	SchedulerVersion uint16
   375  	WriteRequest
   376  }
   377  
   378  // EvalListRequest is used to list the evaluations
   379  type EvalListRequest struct {
   380  	QueryOptions
   381  }
   382  
   383  // PlanRequest is used to submit an allocation plan to the leader
   384  type PlanRequest struct {
   385  	Plan *Plan
   386  	WriteRequest
   387  }
   388  
   389  // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction
   390  // committing the result of a plan.
   391  type ApplyPlanResultsRequest struct {
   392  	// AllocUpdateRequest holds the allocation updates to be made by the
   393  	// scheduler.
   394  	AllocUpdateRequest
   395  
   396  	// Deployment is the deployment created or updated as a result of a
   397  	// scheduling event.
   398  	Deployment *Deployment
   399  
   400  	// DeploymentUpdates is a set of status updates to apply to the given
   401  	// deployments. This allows the scheduler to cancel any unneeded deployment
   402  	// because the job is stopped or the update block is removed.
   403  	DeploymentUpdates []*DeploymentStatusUpdate
   404  }
   405  
   406  // AllocUpdateRequest is used to submit changes to allocations, either
   407  // to cause evictions or to assign new allocaitons. Both can be done
   408  // within a single transaction
   409  type AllocUpdateRequest struct {
   410  	// Alloc is the list of new allocations to assign
   411  	Alloc []*Allocation
   412  
   413  	// Job is the shared parent job of the allocations.
   414  	// It is pulled out since it is common to reduce payload size.
   415  	Job *Job
   416  
   417  	WriteRequest
   418  }
   419  
   420  // AllocListRequest is used to request a list of allocations
   421  type AllocListRequest struct {
   422  	QueryOptions
   423  }
   424  
   425  // AllocSpecificRequest is used to query a specific allocation
   426  type AllocSpecificRequest struct {
   427  	AllocID string
   428  	QueryOptions
   429  }
   430  
   431  // AllocsGetRequest is used to query a set of allocations
   432  type AllocsGetRequest struct {
   433  	AllocIDs []string
   434  	QueryOptions
   435  }
   436  
   437  // PeriodicForceReqeuest is used to force a specific periodic job.
   438  type PeriodicForceRequest struct {
   439  	JobID string
   440  	WriteRequest
   441  }
   442  
   443  // ServerMembersResponse has the list of servers in a cluster
   444  type ServerMembersResponse struct {
   445  	ServerName   string
   446  	ServerRegion string
   447  	ServerDC     string
   448  	Members      []*ServerMember
   449  }
   450  
   451  // ServerMember holds information about a Nomad server agent in a cluster
   452  type ServerMember struct {
   453  	Name        string
   454  	Addr        net.IP
   455  	Port        uint16
   456  	Tags        map[string]string
   457  	Status      string
   458  	ProtocolMin uint8
   459  	ProtocolMax uint8
   460  	ProtocolCur uint8
   461  	DelegateMin uint8
   462  	DelegateMax uint8
   463  	DelegateCur uint8
   464  }
   465  
   466  // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
   467  // following tasks in the given allocation
   468  type DeriveVaultTokenRequest struct {
   469  	NodeID   string
   470  	SecretID string
   471  	AllocID  string
   472  	Tasks    []string
   473  	QueryOptions
   474  }
   475  
   476  // VaultAccessorsRequest is used to operate on a set of Vault accessors
   477  type VaultAccessorsRequest struct {
   478  	Accessors []*VaultAccessor
   479  }
   480  
   481  // VaultAccessor is a reference to a created Vault token on behalf of
   482  // an allocation's task.
   483  type VaultAccessor struct {
   484  	AllocID     string
   485  	Task        string
   486  	NodeID      string
   487  	Accessor    string
   488  	CreationTTL int
   489  
   490  	// Raft Indexes
   491  	CreateIndex uint64
   492  }
   493  
   494  // DeriveVaultTokenResponse returns the wrapped tokens for each requested task
   495  type DeriveVaultTokenResponse struct {
   496  	// Tasks is a mapping between the task name and the wrapped token
   497  	Tasks map[string]string
   498  
   499  	// Error stores any error that occured. Errors are stored here so we can
   500  	// communicate whether it is retriable
   501  	Error *RecoverableError
   502  
   503  	QueryMeta
   504  }
   505  
   506  // GenericRequest is used to request where no
   507  // specific information is needed.
   508  type GenericRequest struct {
   509  	QueryOptions
   510  }
   511  
   512  // DeploymentListRequest is used to list the deployments
   513  type DeploymentListRequest struct {
   514  	QueryOptions
   515  }
   516  
   517  // DeploymentDeleteRequest is used for deleting deployments.
   518  type DeploymentDeleteRequest struct {
   519  	Deployments []string
   520  	WriteRequest
   521  }
   522  
   523  // DeploymentStatusUpdateRequest is used to update the status of a deployment as
   524  // well as optionally creating an evaluation atomically.
   525  type DeploymentStatusUpdateRequest struct {
   526  	// Eval, if set, is used to create an evaluation at the same time as
   527  	// updating the status of a deployment.
   528  	Eval *Evaluation
   529  
   530  	// DeploymentUpdate is a status update to apply to the given
   531  	// deployment.
   532  	DeploymentUpdate *DeploymentStatusUpdate
   533  
   534  	// Job is used to optionally upsert a job. This is used when setting the
   535  	// allocation health results in a deployment failure and the deployment
   536  	// auto-reverts to the latest stable job.
   537  	Job *Job
   538  }
   539  
   540  // DeploymentAllocHealthRequest is used to set the health of a set of
   541  // allocations as part of a deployment.
   542  type DeploymentAllocHealthRequest struct {
   543  	DeploymentID string
   544  
   545  	// Marks these allocations as healthy, allow further allocations
   546  	// to be rolled.
   547  	HealthyAllocationIDs []string
   548  
   549  	// Any unhealthy allocations fail the deployment
   550  	UnhealthyAllocationIDs []string
   551  
   552  	WriteRequest
   553  }
   554  
   555  // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft
   556  type ApplyDeploymentAllocHealthRequest struct {
   557  	DeploymentAllocHealthRequest
   558  
   559  	// An optional field to update the status of a deployment
   560  	DeploymentUpdate *DeploymentStatusUpdate
   561  
   562  	// Job is used to optionally upsert a job. This is used when setting the
   563  	// allocation health results in a deployment failure and the deployment
   564  	// auto-reverts to the latest stable job.
   565  	Job *Job
   566  
   567  	// An optional evaluation to create after promoting the canaries
   568  	Eval *Evaluation
   569  }
   570  
   571  // DeploymentPromoteRequest is used to promote task groups in a deployment
   572  type DeploymentPromoteRequest struct {
   573  	DeploymentID string
   574  
   575  	// All is to promote all task groups
   576  	All bool
   577  
   578  	// Groups is used to set the promotion status per task group
   579  	Groups []string
   580  
   581  	WriteRequest
   582  }
   583  
   584  // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft
   585  type ApplyDeploymentPromoteRequest struct {
   586  	DeploymentPromoteRequest
   587  
   588  	// An optional evaluation to create after promoting the canaries
   589  	Eval *Evaluation
   590  }
   591  
   592  // DeploymentPauseRequest is used to pause a deployment
   593  type DeploymentPauseRequest struct {
   594  	DeploymentID string
   595  
   596  	// Pause sets the pause status
   597  	Pause bool
   598  
   599  	WriteRequest
   600  }
   601  
   602  // DeploymentSpecificRequest is used to make a request specific to a particular
   603  // deployment
   604  type DeploymentSpecificRequest struct {
   605  	DeploymentID string
   606  	QueryOptions
   607  }
   608  
   609  // DeploymentFailRequest is used to fail a particular deployment
   610  type DeploymentFailRequest struct {
   611  	DeploymentID string
   612  	WriteRequest
   613  }
   614  
   615  // SingleDeploymentResponse is used to respond with a single deployment
   616  type SingleDeploymentResponse struct {
   617  	Deployment *Deployment
   618  	QueryMeta
   619  }
   620  
   621  // GenericResponse is used to respond to a request where no
   622  // specific response information is needed.
   623  type GenericResponse struct {
   624  	WriteMeta
   625  }
   626  
   627  // VersionResponse is used for the Status.Version reseponse
   628  type VersionResponse struct {
   629  	Build    string
   630  	Versions map[string]int
   631  	QueryMeta
   632  }
   633  
   634  // JobRegisterResponse is used to respond to a job registration
   635  type JobRegisterResponse struct {
   636  	EvalID          string
   637  	EvalCreateIndex uint64
   638  	JobModifyIndex  uint64
   639  
   640  	// Warnings contains any warnings about the given job. These may include
   641  	// deprecation warnings.
   642  	Warnings string
   643  
   644  	QueryMeta
   645  }
   646  
   647  // JobDeregisterResponse is used to respond to a job deregistration
   648  type JobDeregisterResponse struct {
   649  	EvalID          string
   650  	EvalCreateIndex uint64
   651  	JobModifyIndex  uint64
   652  	QueryMeta
   653  }
   654  
   655  // JobValidateResponse is the response from validate request
   656  type JobValidateResponse struct {
   657  	// DriverConfigValidated indicates whether the agent validated the driver
   658  	// config
   659  	DriverConfigValidated bool
   660  
   661  	// ValidationErrors is a list of validation errors
   662  	ValidationErrors []string
   663  
   664  	// Error is a string version of any error that may have occured
   665  	Error string
   666  
   667  	// Warnings contains any warnings about the given job. These may include
   668  	// deprecation warnings.
   669  	Warnings string
   670  }
   671  
   672  // NodeUpdateResponse is used to respond to a node update
   673  type NodeUpdateResponse struct {
   674  	HeartbeatTTL    time.Duration
   675  	EvalIDs         []string
   676  	EvalCreateIndex uint64
   677  	NodeModifyIndex uint64
   678  
   679  	// LeaderRPCAddr is the RPC address of the current Raft Leader.  If
   680  	// empty, the current Nomad Server is in the minority of a partition.
   681  	LeaderRPCAddr string
   682  
   683  	// NumNodes is the number of Nomad nodes attached to this quorum of
   684  	// Nomad Servers at the time of the response.  This value can
   685  	// fluctuate based on the health of the cluster between heartbeats.
   686  	NumNodes int32
   687  
   688  	// Servers is the full list of known Nomad servers in the local
   689  	// region.
   690  	Servers []*NodeServerInfo
   691  
   692  	QueryMeta
   693  }
   694  
   695  // NodeDrainUpdateResponse is used to respond to a node drain update
   696  type NodeDrainUpdateResponse struct {
   697  	EvalIDs         []string
   698  	EvalCreateIndex uint64
   699  	NodeModifyIndex uint64
   700  	QueryMeta
   701  }
   702  
   703  // NodeAllocsResponse is used to return allocs for a single node
   704  type NodeAllocsResponse struct {
   705  	Allocs []*Allocation
   706  	QueryMeta
   707  }
   708  
   709  // NodeClientAllocsResponse is used to return allocs meta data for a single node
   710  type NodeClientAllocsResponse struct {
   711  	Allocs map[string]uint64
   712  	QueryMeta
   713  }
   714  
   715  // SingleNodeResponse is used to return a single node
   716  type SingleNodeResponse struct {
   717  	Node *Node
   718  	QueryMeta
   719  }
   720  
   721  // NodeListResponse is used for a list request
   722  type NodeListResponse struct {
   723  	Nodes []*NodeListStub
   724  	QueryMeta
   725  }
   726  
   727  // SingleJobResponse is used to return a single job
   728  type SingleJobResponse struct {
   729  	Job *Job
   730  	QueryMeta
   731  }
   732  
   733  // JobSummaryResponse is used to return a single job summary
   734  type JobSummaryResponse struct {
   735  	JobSummary *JobSummary
   736  	QueryMeta
   737  }
   738  
   739  type JobDispatchResponse struct {
   740  	DispatchedJobID string
   741  	EvalID          string
   742  	EvalCreateIndex uint64
   743  	JobCreateIndex  uint64
   744  	WriteMeta
   745  }
   746  
   747  // JobListResponse is used for a list request
   748  type JobListResponse struct {
   749  	Jobs []*JobListStub
   750  	QueryMeta
   751  }
   752  
   753  // JobVersionsRequest is used to get a jobs versions
   754  type JobVersionsRequest struct {
   755  	JobID string
   756  	Diffs bool
   757  	QueryOptions
   758  }
   759  
   760  // JobVersionsResponse is used for a job get versions request
   761  type JobVersionsResponse struct {
   762  	Versions []*Job
   763  	Diffs    []*JobDiff
   764  	QueryMeta
   765  }
   766  
   767  // JobPlanResponse is used to respond to a job plan request
   768  type JobPlanResponse struct {
   769  	// Annotations stores annotations explaining decisions the scheduler made.
   770  	Annotations *PlanAnnotations
   771  
   772  	// FailedTGAllocs is the placement failures per task group.
   773  	FailedTGAllocs map[string]*AllocMetric
   774  
   775  	// JobModifyIndex is the modification index of the job. The value can be
   776  	// used when running `nomad run` to ensure that the Job wasn’t modified
   777  	// since the last plan. If the job is being created, the value is zero.
   778  	JobModifyIndex uint64
   779  
   780  	// CreatedEvals is the set of evaluations created by the scheduler. The
   781  	// reasons for this can be rolling-updates or blocked evals.
   782  	CreatedEvals []*Evaluation
   783  
   784  	// Diff contains the diff of the job and annotations on whether the change
   785  	// causes an in-place update or create/destroy
   786  	Diff *JobDiff
   787  
   788  	// NextPeriodicLaunch is the time duration till the job would be launched if
   789  	// submitted.
   790  	NextPeriodicLaunch time.Time
   791  
   792  	// Warnings contains any warnings about the given job. These may include
   793  	// deprecation warnings.
   794  	Warnings string
   795  
   796  	WriteMeta
   797  }
   798  
   799  // SingleAllocResponse is used to return a single allocation
   800  type SingleAllocResponse struct {
   801  	Alloc *Allocation
   802  	QueryMeta
   803  }
   804  
   805  // AllocsGetResponse is used to return a set of allocations
   806  type AllocsGetResponse struct {
   807  	Allocs []*Allocation
   808  	QueryMeta
   809  }
   810  
   811  // JobAllocationsResponse is used to return the allocations for a job
   812  type JobAllocationsResponse struct {
   813  	Allocations []*AllocListStub
   814  	QueryMeta
   815  }
   816  
   817  // JobEvaluationsResponse is used to return the evaluations for a job
   818  type JobEvaluationsResponse struct {
   819  	Evaluations []*Evaluation
   820  	QueryMeta
   821  }
   822  
   823  // SingleEvalResponse is used to return a single evaluation
   824  type SingleEvalResponse struct {
   825  	Eval *Evaluation
   826  	QueryMeta
   827  }
   828  
   829  // EvalDequeueResponse is used to return from a dequeue
   830  type EvalDequeueResponse struct {
   831  	Eval  *Evaluation
   832  	Token string
   833  	QueryMeta
   834  }
   835  
   836  // PlanResponse is used to return from a PlanRequest
   837  type PlanResponse struct {
   838  	Result *PlanResult
   839  	WriteMeta
   840  }
   841  
   842  // AllocListResponse is used for a list request
   843  type AllocListResponse struct {
   844  	Allocations []*AllocListStub
   845  	QueryMeta
   846  }
   847  
   848  // DeploymentListResponse is used for a list request
   849  type DeploymentListResponse struct {
   850  	Deployments []*Deployment
   851  	QueryMeta
   852  }
   853  
   854  // EvalListResponse is used for a list request
   855  type EvalListResponse struct {
   856  	Evaluations []*Evaluation
   857  	QueryMeta
   858  }
   859  
   860  // EvalAllocationsResponse is used to return the allocations for an evaluation
   861  type EvalAllocationsResponse struct {
   862  	Allocations []*AllocListStub
   863  	QueryMeta
   864  }
   865  
   866  // PeriodicForceResponse is used to respond to a periodic job force launch
   867  type PeriodicForceResponse struct {
   868  	EvalID          string
   869  	EvalCreateIndex uint64
   870  	WriteMeta
   871  }
   872  
   873  // DeploymentUpdateResponse is used to respond to a deployment change. The
   874  // response will include the modify index of the deployment as well as details
   875  // of any triggered evaluation.
   876  type DeploymentUpdateResponse struct {
   877  	EvalID                string
   878  	EvalCreateIndex       uint64
   879  	DeploymentModifyIndex uint64
   880  
   881  	// RevertedJobVersion is the version the job was reverted to. If unset, the
   882  	// job wasn't reverted
   883  	RevertedJobVersion *uint64
   884  
   885  	WriteMeta
   886  }
   887  
   888  const (
   889  	NodeStatusInit  = "initializing"
   890  	NodeStatusReady = "ready"
   891  	NodeStatusDown  = "down"
   892  )
   893  
   894  // ShouldDrainNode checks if a given node status should trigger an
   895  // evaluation. Some states don't require any further action.
   896  func ShouldDrainNode(status string) bool {
   897  	switch status {
   898  	case NodeStatusInit, NodeStatusReady:
   899  		return false
   900  	case NodeStatusDown:
   901  		return true
   902  	default:
   903  		panic(fmt.Sprintf("unhandled node status %s", status))
   904  	}
   905  }
   906  
   907  // ValidNodeStatus is used to check if a node status is valid
   908  func ValidNodeStatus(status string) bool {
   909  	switch status {
   910  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   911  		return true
   912  	default:
   913  		return false
   914  	}
   915  }
   916  
   917  // Node is a representation of a schedulable client node
   918  type Node struct {
   919  	// ID is a unique identifier for the node. It can be constructed
   920  	// by doing a concatenation of the Name and Datacenter as a simple
   921  	// approach. Alternatively a UUID may be used.
   922  	ID string
   923  
   924  	// SecretID is an ID that is only known by the Node and the set of Servers.
   925  	// It is not accessible via the API and is used to authenticate nodes
   926  	// conducting priviledged activities.
   927  	SecretID string
   928  
   929  	// Datacenter for this node
   930  	Datacenter string
   931  
   932  	// Node name
   933  	Name string
   934  
   935  	// HTTPAddr is the address on which the Nomad client is listening for http
   936  	// requests
   937  	HTTPAddr string
   938  
   939  	// TLSEnabled indicates if the Agent has TLS enabled for the HTTP API
   940  	TLSEnabled bool
   941  
   942  	// Attributes is an arbitrary set of key/value
   943  	// data that can be used for constraints. Examples
   944  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   945  	// "docker.runtime=1.8.3"
   946  	Attributes map[string]string
   947  
   948  	// Resources is the available resources on the client.
   949  	// For example 'cpu=2' 'memory=2048'
   950  	Resources *Resources
   951  
   952  	// Reserved is the set of resources that are reserved,
   953  	// and should be subtracted from the total resources for
   954  	// the purposes of scheduling. This may be provide certain
   955  	// high-watermark tolerances or because of external schedulers
   956  	// consuming resources.
   957  	Reserved *Resources
   958  
   959  	// Links are used to 'link' this client to external
   960  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   961  	// 'ami=ami-123'
   962  	Links map[string]string
   963  
   964  	// Meta is used to associate arbitrary metadata with this
   965  	// client. This is opaque to Nomad.
   966  	Meta map[string]string
   967  
   968  	// NodeClass is an opaque identifier used to group nodes
   969  	// together for the purpose of determining scheduling pressure.
   970  	NodeClass string
   971  
   972  	// ComputedClass is a unique id that identifies nodes with a common set of
   973  	// attributes and capabilities.
   974  	ComputedClass string
   975  
   976  	// Drain is controlled by the servers, and not the client.
   977  	// If true, no jobs will be scheduled to this node, and existing
   978  	// allocations will be drained.
   979  	Drain bool
   980  
   981  	// Status of this node
   982  	Status string
   983  
   984  	// StatusDescription is meant to provide more human useful information
   985  	StatusDescription string
   986  
   987  	// StatusUpdatedAt is the time stamp at which the state of the node was
   988  	// updated
   989  	StatusUpdatedAt int64
   990  
   991  	// Raft Indexes
   992  	CreateIndex uint64
   993  	ModifyIndex uint64
   994  }
   995  
   996  // Ready returns if the node is ready for running allocations
   997  func (n *Node) Ready() bool {
   998  	return n.Status == NodeStatusReady && !n.Drain
   999  }
  1000  
  1001  func (n *Node) Copy() *Node {
  1002  	if n == nil {
  1003  		return nil
  1004  	}
  1005  	nn := new(Node)
  1006  	*nn = *n
  1007  	nn.Attributes = helper.CopyMapStringString(nn.Attributes)
  1008  	nn.Resources = nn.Resources.Copy()
  1009  	nn.Reserved = nn.Reserved.Copy()
  1010  	nn.Links = helper.CopyMapStringString(nn.Links)
  1011  	nn.Meta = helper.CopyMapStringString(nn.Meta)
  1012  	return nn
  1013  }
  1014  
  1015  // TerminalStatus returns if the current status is terminal and
  1016  // will no longer transition.
  1017  func (n *Node) TerminalStatus() bool {
  1018  	switch n.Status {
  1019  	case NodeStatusDown:
  1020  		return true
  1021  	default:
  1022  		return false
  1023  	}
  1024  }
  1025  
  1026  // Stub returns a summarized version of the node
  1027  func (n *Node) Stub() *NodeListStub {
  1028  	return &NodeListStub{
  1029  		ID:                n.ID,
  1030  		Datacenter:        n.Datacenter,
  1031  		Name:              n.Name,
  1032  		NodeClass:         n.NodeClass,
  1033  		Drain:             n.Drain,
  1034  		Status:            n.Status,
  1035  		StatusDescription: n.StatusDescription,
  1036  		CreateIndex:       n.CreateIndex,
  1037  		ModifyIndex:       n.ModifyIndex,
  1038  	}
  1039  }
  1040  
  1041  // NodeListStub is used to return a subset of job information
  1042  // for the job list
  1043  type NodeListStub struct {
  1044  	ID                string
  1045  	Datacenter        string
  1046  	Name              string
  1047  	NodeClass         string
  1048  	Drain             bool
  1049  	Status            string
  1050  	StatusDescription string
  1051  	CreateIndex       uint64
  1052  	ModifyIndex       uint64
  1053  }
  1054  
  1055  // Networks defined for a task on the Resources struct.
  1056  type Networks []*NetworkResource
  1057  
  1058  // Port assignment and IP for the given label or empty values.
  1059  func (ns Networks) Port(label string) (string, int) {
  1060  	for _, n := range ns {
  1061  		for _, p := range n.ReservedPorts {
  1062  			if p.Label == label {
  1063  				return n.IP, p.Value
  1064  			}
  1065  		}
  1066  		for _, p := range n.DynamicPorts {
  1067  			if p.Label == label {
  1068  				return n.IP, p.Value
  1069  			}
  1070  		}
  1071  	}
  1072  	return "", 0
  1073  }
  1074  
  1075  // Resources is used to define the resources available
  1076  // on a client
  1077  type Resources struct {
  1078  	CPU      int
  1079  	MemoryMB int
  1080  	DiskMB   int
  1081  	IOPS     int
  1082  	Networks Networks
  1083  }
  1084  
  1085  const (
  1086  	BytesInMegabyte = 1024 * 1024
  1087  )
  1088  
  1089  // DefaultResources returns the default resources for a task.
  1090  func DefaultResources() *Resources {
  1091  	return &Resources{
  1092  		CPU:      100,
  1093  		MemoryMB: 10,
  1094  		IOPS:     0,
  1095  	}
  1096  }
  1097  
  1098  // DiskInBytes returns the amount of disk resources in bytes.
  1099  func (r *Resources) DiskInBytes() int64 {
  1100  	return int64(r.DiskMB * BytesInMegabyte)
  1101  }
  1102  
  1103  // Merge merges this resource with another resource.
  1104  func (r *Resources) Merge(other *Resources) {
  1105  	if other.CPU != 0 {
  1106  		r.CPU = other.CPU
  1107  	}
  1108  	if other.MemoryMB != 0 {
  1109  		r.MemoryMB = other.MemoryMB
  1110  	}
  1111  	if other.DiskMB != 0 {
  1112  		r.DiskMB = other.DiskMB
  1113  	}
  1114  	if other.IOPS != 0 {
  1115  		r.IOPS = other.IOPS
  1116  	}
  1117  	if len(other.Networks) != 0 {
  1118  		r.Networks = other.Networks
  1119  	}
  1120  }
  1121  
  1122  func (r *Resources) Canonicalize() {
  1123  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1124  	// problems since we use reflect DeepEquals.
  1125  	if len(r.Networks) == 0 {
  1126  		r.Networks = nil
  1127  	}
  1128  
  1129  	for _, n := range r.Networks {
  1130  		n.Canonicalize()
  1131  	}
  1132  }
  1133  
  1134  // MeetsMinResources returns an error if the resources specified are less than
  1135  // the minimum allowed.
  1136  func (r *Resources) MeetsMinResources() error {
  1137  	var mErr multierror.Error
  1138  	if r.CPU < 20 {
  1139  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU))
  1140  	}
  1141  	if r.MemoryMB < 10 {
  1142  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB))
  1143  	}
  1144  	if r.IOPS < 0 {
  1145  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS))
  1146  	}
  1147  	for i, n := range r.Networks {
  1148  		if err := n.MeetsMinResources(); err != nil {
  1149  			mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err))
  1150  		}
  1151  	}
  1152  
  1153  	return mErr.ErrorOrNil()
  1154  }
  1155  
  1156  // Copy returns a deep copy of the resources
  1157  func (r *Resources) Copy() *Resources {
  1158  	if r == nil {
  1159  		return nil
  1160  	}
  1161  	newR := new(Resources)
  1162  	*newR = *r
  1163  	if r.Networks != nil {
  1164  		n := len(r.Networks)
  1165  		newR.Networks = make([]*NetworkResource, n)
  1166  		for i := 0; i < n; i++ {
  1167  			newR.Networks[i] = r.Networks[i].Copy()
  1168  		}
  1169  	}
  1170  	return newR
  1171  }
  1172  
  1173  // NetIndex finds the matching net index using device name
  1174  func (r *Resources) NetIndex(n *NetworkResource) int {
  1175  	for idx, net := range r.Networks {
  1176  		if net.Device == n.Device {
  1177  			return idx
  1178  		}
  1179  	}
  1180  	return -1
  1181  }
  1182  
  1183  // Superset checks if one set of resources is a superset
  1184  // of another. This ignores network resources, and the NetworkIndex
  1185  // should be used for that.
  1186  func (r *Resources) Superset(other *Resources) (bool, string) {
  1187  	if r.CPU < other.CPU {
  1188  		return false, "cpu exhausted"
  1189  	}
  1190  	if r.MemoryMB < other.MemoryMB {
  1191  		return false, "memory exhausted"
  1192  	}
  1193  	if r.DiskMB < other.DiskMB {
  1194  		return false, "disk exhausted"
  1195  	}
  1196  	if r.IOPS < other.IOPS {
  1197  		return false, "iops exhausted"
  1198  	}
  1199  	return true, ""
  1200  }
  1201  
  1202  // Add adds the resources of the delta to this, potentially
  1203  // returning an error if not possible.
  1204  func (r *Resources) Add(delta *Resources) error {
  1205  	if delta == nil {
  1206  		return nil
  1207  	}
  1208  	r.CPU += delta.CPU
  1209  	r.MemoryMB += delta.MemoryMB
  1210  	r.DiskMB += delta.DiskMB
  1211  	r.IOPS += delta.IOPS
  1212  
  1213  	for _, n := range delta.Networks {
  1214  		// Find the matching interface by IP or CIDR
  1215  		idx := r.NetIndex(n)
  1216  		if idx == -1 {
  1217  			r.Networks = append(r.Networks, n.Copy())
  1218  		} else {
  1219  			r.Networks[idx].Add(n)
  1220  		}
  1221  	}
  1222  	return nil
  1223  }
  1224  
  1225  func (r *Resources) GoString() string {
  1226  	return fmt.Sprintf("*%#v", *r)
  1227  }
  1228  
  1229  type Port struct {
  1230  	Label string
  1231  	Value int
  1232  }
  1233  
  1234  // NetworkResource is used to represent available network
  1235  // resources
  1236  type NetworkResource struct {
  1237  	Device        string // Name of the device
  1238  	CIDR          string // CIDR block of addresses
  1239  	IP            string // Host IP address
  1240  	MBits         int    // Throughput
  1241  	ReservedPorts []Port // Host Reserved ports
  1242  	DynamicPorts  []Port // Host Dynamically assigned ports
  1243  }
  1244  
  1245  func (n *NetworkResource) Canonicalize() {
  1246  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1247  	// problems since we use reflect DeepEquals.
  1248  	if len(n.ReservedPorts) == 0 {
  1249  		n.ReservedPorts = nil
  1250  	}
  1251  	if len(n.DynamicPorts) == 0 {
  1252  		n.DynamicPorts = nil
  1253  	}
  1254  }
  1255  
  1256  // MeetsMinResources returns an error if the resources specified are less than
  1257  // the minimum allowed.
  1258  func (n *NetworkResource) MeetsMinResources() error {
  1259  	var mErr multierror.Error
  1260  	if n.MBits < 1 {
  1261  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits))
  1262  	}
  1263  	return mErr.ErrorOrNil()
  1264  }
  1265  
  1266  // Copy returns a deep copy of the network resource
  1267  func (n *NetworkResource) Copy() *NetworkResource {
  1268  	if n == nil {
  1269  		return nil
  1270  	}
  1271  	newR := new(NetworkResource)
  1272  	*newR = *n
  1273  	if n.ReservedPorts != nil {
  1274  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
  1275  		copy(newR.ReservedPorts, n.ReservedPorts)
  1276  	}
  1277  	if n.DynamicPorts != nil {
  1278  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
  1279  		copy(newR.DynamicPorts, n.DynamicPorts)
  1280  	}
  1281  	return newR
  1282  }
  1283  
  1284  // Add adds the resources of the delta to this, potentially
  1285  // returning an error if not possible.
  1286  func (n *NetworkResource) Add(delta *NetworkResource) {
  1287  	if len(delta.ReservedPorts) > 0 {
  1288  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
  1289  	}
  1290  	n.MBits += delta.MBits
  1291  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
  1292  }
  1293  
  1294  func (n *NetworkResource) GoString() string {
  1295  	return fmt.Sprintf("*%#v", *n)
  1296  }
  1297  
  1298  // PortLabels returns a map of port labels to their assigned host ports.
  1299  func (n *NetworkResource) PortLabels() map[string]int {
  1300  	num := len(n.ReservedPorts) + len(n.DynamicPorts)
  1301  	labelValues := make(map[string]int, num)
  1302  	for _, port := range n.ReservedPorts {
  1303  		labelValues[port.Label] = port.Value
  1304  	}
  1305  	for _, port := range n.DynamicPorts {
  1306  		labelValues[port.Label] = port.Value
  1307  	}
  1308  	return labelValues
  1309  }
  1310  
  1311  const (
  1312  	// JobTypeNomad is reserved for internal system tasks and is
  1313  	// always handled by the CoreScheduler.
  1314  	JobTypeCore    = "_core"
  1315  	JobTypeService = "service"
  1316  	JobTypeBatch   = "batch"
  1317  	JobTypeSystem  = "system"
  1318  )
  1319  
  1320  const (
  1321  	JobStatusPending = "pending" // Pending means the job is waiting on scheduling
  1322  	JobStatusRunning = "running" // Running means the job has non-terminal allocations
  1323  	JobStatusDead    = "dead"    // Dead means all evaluation's and allocations are terminal
  1324  )
  1325  
  1326  const (
  1327  	// JobMinPriority is the minimum allowed priority
  1328  	JobMinPriority = 1
  1329  
  1330  	// JobDefaultPriority is the default priority if not
  1331  	// not specified.
  1332  	JobDefaultPriority = 50
  1333  
  1334  	// JobMaxPriority is the maximum allowed priority
  1335  	JobMaxPriority = 100
  1336  
  1337  	// Ensure CoreJobPriority is higher than any user
  1338  	// specified job so that it gets priority. This is important
  1339  	// for the system to remain healthy.
  1340  	CoreJobPriority = JobMaxPriority * 2
  1341  
  1342  	// JobTrackedVersions is the number of historic job versions that are
  1343  	// kept.
  1344  	JobTrackedVersions = 6
  1345  )
  1346  
  1347  // Job is the scope of a scheduling request to Nomad. It is the largest
  1348  // scoped object, and is a named collection of task groups. Each task group
  1349  // is further composed of tasks. A task group (TG) is the unit of scheduling
  1350  // however.
  1351  type Job struct {
  1352  	// Stop marks whether the user has stopped the job. A stopped job will
  1353  	// have all created allocations stopped and acts as a way to stop a job
  1354  	// without purging it from the system. This allows existing allocs to be
  1355  	// queried and the job to be inspected as it is being killed.
  1356  	Stop bool
  1357  
  1358  	// Region is the Nomad region that handles scheduling this job
  1359  	Region string
  1360  
  1361  	// ID is a unique identifier for the job per region. It can be
  1362  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
  1363  	ID string
  1364  
  1365  	// ParentID is the unique identifier of the job that spawned this job.
  1366  	ParentID string
  1367  
  1368  	// Name is the logical name of the job used to refer to it. This is unique
  1369  	// per region, but not unique globally.
  1370  	Name string
  1371  
  1372  	// Type is used to control various behaviors about the job. Most jobs
  1373  	// are service jobs, meaning they are expected to be long lived.
  1374  	// Some jobs are batch oriented meaning they run and then terminate.
  1375  	// This can be extended in the future to support custom schedulers.
  1376  	Type string
  1377  
  1378  	// Priority is used to control scheduling importance and if this job
  1379  	// can preempt other jobs.
  1380  	Priority int
  1381  
  1382  	// AllAtOnce is used to control if incremental scheduling of task groups
  1383  	// is allowed or if we must do a gang scheduling of the entire job. This
  1384  	// can slow down larger jobs if resources are not available.
  1385  	AllAtOnce bool
  1386  
  1387  	// Datacenters contains all the datacenters this job is allowed to span
  1388  	Datacenters []string
  1389  
  1390  	// Constraints can be specified at a job level and apply to
  1391  	// all the task groups and tasks.
  1392  	Constraints []*Constraint
  1393  
  1394  	// TaskGroups are the collections of task groups that this job needs
  1395  	// to run. Each task group is an atomic unit of scheduling and placement.
  1396  	TaskGroups []*TaskGroup
  1397  
  1398  	// COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0.
  1399  	Update UpdateStrategy
  1400  
  1401  	// Periodic is used to define the interval the job is run at.
  1402  	Periodic *PeriodicConfig
  1403  
  1404  	// ParameterizedJob is used to specify the job as a parameterized job
  1405  	// for dispatching.
  1406  	ParameterizedJob *ParameterizedJobConfig
  1407  
  1408  	// Payload is the payload supplied when the job was dispatched.
  1409  	Payload []byte
  1410  
  1411  	// Meta is used to associate arbitrary metadata with this
  1412  	// job. This is opaque to Nomad.
  1413  	Meta map[string]string
  1414  
  1415  	// VaultToken is the Vault token that proves the submitter of the job has
  1416  	// access to the specified Vault policies. This field is only used to
  1417  	// transfer the token and is not stored after Job submission.
  1418  	VaultToken string
  1419  
  1420  	// Job status
  1421  	Status string
  1422  
  1423  	// StatusDescription is meant to provide more human useful information
  1424  	StatusDescription string
  1425  
  1426  	// Stable marks a job as stable. Stability is only defined on "service" and
  1427  	// "system" jobs. The stability of a job will be set automatically as part
  1428  	// of a deployment and can be manually set via APIs.
  1429  	Stable bool
  1430  
  1431  	// Version is a monitonically increasing version number that is incremened
  1432  	// on each job register.
  1433  	Version uint64
  1434  
  1435  	// SubmitTime is the time at which the job was submitted as a UnixNano in
  1436  	// UTC
  1437  	SubmitTime int64
  1438  
  1439  	// Raft Indexes
  1440  	CreateIndex    uint64
  1441  	ModifyIndex    uint64
  1442  	JobModifyIndex uint64
  1443  }
  1444  
  1445  // Canonicalize is used to canonicalize fields in the Job. This should be called
  1446  // when registering a Job. A set of warnings are returned if the job was changed
  1447  // in anyway that the user should be made aware of.
  1448  func (j *Job) Canonicalize() (warnings error) {
  1449  	var mErr multierror.Error
  1450  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1451  	// problems since we use reflect DeepEquals.
  1452  	if len(j.Meta) == 0 {
  1453  		j.Meta = nil
  1454  	}
  1455  
  1456  	for _, tg := range j.TaskGroups {
  1457  		tg.Canonicalize(j)
  1458  	}
  1459  
  1460  	if j.ParameterizedJob != nil {
  1461  		j.ParameterizedJob.Canonicalize()
  1462  	}
  1463  
  1464  	if j.Periodic != nil {
  1465  		j.Periodic.Canonicalize()
  1466  	}
  1467  
  1468  	// COMPAT: Remove in 0.7.0
  1469  	// Rewrite any job that has an update block with pre 0.6.0 syntax.
  1470  	jobHasOldUpdate := j.Update.Stagger > 0 && j.Update.MaxParallel > 0
  1471  	if jobHasOldUpdate && j.Type != JobTypeBatch {
  1472  		// Build an appropriate update block and copy it down to each task group
  1473  		base := DefaultUpdateStrategy.Copy()
  1474  		base.MaxParallel = j.Update.MaxParallel
  1475  		base.MinHealthyTime = j.Update.Stagger
  1476  
  1477  		// Add to each task group, modifying as needed
  1478  		upgraded := false
  1479  		l := len(j.TaskGroups)
  1480  		for _, tg := range j.TaskGroups {
  1481  			// The task group doesn't need upgrading if it has an update block with the new syntax
  1482  			u := tg.Update
  1483  			if u != nil && u.Stagger > 0 && u.MaxParallel > 0 &&
  1484  				u.HealthCheck != "" && u.MinHealthyTime > 0 && u.HealthyDeadline > 0 {
  1485  				continue
  1486  			}
  1487  
  1488  			upgraded = true
  1489  
  1490  			// The MaxParallel for the job should be 10% of the total count
  1491  			// unless there is just one task group then we can infer the old
  1492  			// max parallel should be the new
  1493  			tgu := base.Copy()
  1494  			if l != 1 {
  1495  				// RoundTo 10%
  1496  				var percent float64 = float64(tg.Count) * 0.1
  1497  				tgu.MaxParallel = int(percent + 0.5)
  1498  			}
  1499  
  1500  			// Safety guards
  1501  			if tgu.MaxParallel == 0 {
  1502  				tgu.MaxParallel = 1
  1503  			} else if tgu.MaxParallel > tg.Count {
  1504  				tgu.MaxParallel = tg.Count
  1505  			}
  1506  
  1507  			tg.Update = tgu
  1508  		}
  1509  
  1510  		if upgraded {
  1511  			w := "A best effort conversion to new update stanza introduced in v0.6.0 applied. " +
  1512  				"Please update upgrade stanza before v0.7.0."
  1513  			multierror.Append(&mErr, fmt.Errorf(w))
  1514  		}
  1515  	}
  1516  
  1517  	// Ensure that the batch job doesn't have new style or old style update
  1518  	// stanza. Unfortunately are scanning here because we have to deprecate over
  1519  	// a release so we can't check in the task group since that may be new style
  1520  	// but wouldn't capture the old style and we don't want to have duplicate
  1521  	// warnings.
  1522  	if j.Type == JobTypeBatch {
  1523  		displayWarning := jobHasOldUpdate
  1524  		j.Update.Stagger = 0
  1525  		j.Update.MaxParallel = 0
  1526  		j.Update.HealthCheck = ""
  1527  		j.Update.MinHealthyTime = 0
  1528  		j.Update.HealthyDeadline = 0
  1529  		j.Update.AutoRevert = false
  1530  		j.Update.Canary = 0
  1531  
  1532  		// Remove any update spec from the task groups
  1533  		for _, tg := range j.TaskGroups {
  1534  			if tg.Update != nil {
  1535  				displayWarning = true
  1536  				tg.Update = nil
  1537  			}
  1538  		}
  1539  
  1540  		if displayWarning {
  1541  			w := "Update stanza is disallowed for batch jobs since v0.6.0. " +
  1542  				"The update block has automatically been removed"
  1543  			multierror.Append(&mErr, fmt.Errorf(w))
  1544  		}
  1545  	}
  1546  
  1547  	return mErr.ErrorOrNil()
  1548  }
  1549  
  1550  // Copy returns a deep copy of the Job. It is expected that callers use recover.
  1551  // This job can panic if the deep copy failed as it uses reflection.
  1552  func (j *Job) Copy() *Job {
  1553  	if j == nil {
  1554  		return nil
  1555  	}
  1556  	nj := new(Job)
  1557  	*nj = *j
  1558  	nj.Datacenters = helper.CopySliceString(nj.Datacenters)
  1559  	nj.Constraints = CopySliceConstraints(nj.Constraints)
  1560  
  1561  	if j.TaskGroups != nil {
  1562  		tgs := make([]*TaskGroup, len(nj.TaskGroups))
  1563  		for i, tg := range nj.TaskGroups {
  1564  			tgs[i] = tg.Copy()
  1565  		}
  1566  		nj.TaskGroups = tgs
  1567  	}
  1568  
  1569  	nj.Periodic = nj.Periodic.Copy()
  1570  	nj.Meta = helper.CopyMapStringString(nj.Meta)
  1571  	nj.ParameterizedJob = nj.ParameterizedJob.Copy()
  1572  	return nj
  1573  }
  1574  
  1575  // Validate is used to sanity check a job input
  1576  func (j *Job) Validate() error {
  1577  	var mErr multierror.Error
  1578  
  1579  	if j.Region == "" {
  1580  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
  1581  	}
  1582  	if j.ID == "" {
  1583  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
  1584  	} else if strings.Contains(j.ID, " ") {
  1585  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
  1586  	}
  1587  	if j.Name == "" {
  1588  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
  1589  	}
  1590  	switch j.Type {
  1591  	case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem:
  1592  	case "":
  1593  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
  1594  	default:
  1595  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type))
  1596  	}
  1597  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
  1598  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
  1599  	}
  1600  	if len(j.Datacenters) == 0 {
  1601  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
  1602  	}
  1603  	if len(j.TaskGroups) == 0 {
  1604  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
  1605  	}
  1606  	for idx, constr := range j.Constraints {
  1607  		if err := constr.Validate(); err != nil {
  1608  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1609  			mErr.Errors = append(mErr.Errors, outer)
  1610  		}
  1611  	}
  1612  
  1613  	// Check for duplicate task groups
  1614  	taskGroups := make(map[string]int)
  1615  	for idx, tg := range j.TaskGroups {
  1616  		if tg.Name == "" {
  1617  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
  1618  		} else if existing, ok := taskGroups[tg.Name]; ok {
  1619  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
  1620  		} else {
  1621  			taskGroups[tg.Name] = idx
  1622  		}
  1623  
  1624  		if j.Type == "system" && tg.Count > 1 {
  1625  			mErr.Errors = append(mErr.Errors,
  1626  				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
  1627  					tg.Name, tg.Count))
  1628  		}
  1629  	}
  1630  
  1631  	// Validate the task group
  1632  	for _, tg := range j.TaskGroups {
  1633  		if err := tg.Validate(j); err != nil {
  1634  			outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err)
  1635  			mErr.Errors = append(mErr.Errors, outer)
  1636  		}
  1637  	}
  1638  
  1639  	// Validate periodic is only used with batch jobs.
  1640  	if j.IsPeriodic() && j.Periodic.Enabled {
  1641  		if j.Type != JobTypeBatch {
  1642  			mErr.Errors = append(mErr.Errors,
  1643  				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
  1644  		}
  1645  
  1646  		if err := j.Periodic.Validate(); err != nil {
  1647  			mErr.Errors = append(mErr.Errors, err)
  1648  		}
  1649  	}
  1650  
  1651  	if j.IsParameterized() {
  1652  		if j.Type != JobTypeBatch {
  1653  			mErr.Errors = append(mErr.Errors,
  1654  				fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch))
  1655  		}
  1656  
  1657  		if err := j.ParameterizedJob.Validate(); err != nil {
  1658  			mErr.Errors = append(mErr.Errors, err)
  1659  		}
  1660  	}
  1661  
  1662  	return mErr.ErrorOrNil()
  1663  }
  1664  
  1665  // Warnings returns a list of warnings that may be from dubious settings or
  1666  // deprecation warnings.
  1667  func (j *Job) Warnings() error {
  1668  	var mErr multierror.Error
  1669  
  1670  	// Check the groups
  1671  	for _, tg := range j.TaskGroups {
  1672  		if err := tg.Warnings(j); err != nil {
  1673  			outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err)
  1674  			mErr.Errors = append(mErr.Errors, outer)
  1675  		}
  1676  	}
  1677  
  1678  	return mErr.ErrorOrNil()
  1679  }
  1680  
  1681  // LookupTaskGroup finds a task group by name
  1682  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
  1683  	for _, tg := range j.TaskGroups {
  1684  		if tg.Name == name {
  1685  			return tg
  1686  		}
  1687  	}
  1688  	return nil
  1689  }
  1690  
  1691  // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined
  1692  // meta data for the task. When joining Job, Group and Task Meta, the precedence
  1693  // is by deepest scope (Task > Group > Job).
  1694  func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string {
  1695  	group := j.LookupTaskGroup(groupName)
  1696  	if group == nil {
  1697  		return nil
  1698  	}
  1699  
  1700  	task := group.LookupTask(taskName)
  1701  	if task == nil {
  1702  		return nil
  1703  	}
  1704  
  1705  	meta := helper.CopyMapStringString(task.Meta)
  1706  	if meta == nil {
  1707  		meta = make(map[string]string, len(group.Meta)+len(j.Meta))
  1708  	}
  1709  
  1710  	// Add the group specific meta
  1711  	for k, v := range group.Meta {
  1712  		if _, ok := meta[k]; !ok {
  1713  			meta[k] = v
  1714  		}
  1715  	}
  1716  
  1717  	// Add the job specific meta
  1718  	for k, v := range j.Meta {
  1719  		if _, ok := meta[k]; !ok {
  1720  			meta[k] = v
  1721  		}
  1722  	}
  1723  
  1724  	return meta
  1725  }
  1726  
  1727  // Stopped returns if a job is stopped.
  1728  func (j *Job) Stopped() bool {
  1729  	return j == nil || j.Stop
  1730  }
  1731  
  1732  // HasUpdateStrategy returns if any task group in the job has an update strategy
  1733  func (j *Job) HasUpdateStrategy() bool {
  1734  	for _, tg := range j.TaskGroups {
  1735  		if tg.Update != nil {
  1736  			return true
  1737  		}
  1738  	}
  1739  
  1740  	return false
  1741  }
  1742  
  1743  // Stub is used to return a summary of the job
  1744  func (j *Job) Stub(summary *JobSummary) *JobListStub {
  1745  	return &JobListStub{
  1746  		ID:                j.ID,
  1747  		ParentID:          j.ParentID,
  1748  		Name:              j.Name,
  1749  		Type:              j.Type,
  1750  		Priority:          j.Priority,
  1751  		Periodic:          j.IsPeriodic(),
  1752  		ParameterizedJob:  j.IsParameterized(),
  1753  		Stop:              j.Stop,
  1754  		Status:            j.Status,
  1755  		StatusDescription: j.StatusDescription,
  1756  		CreateIndex:       j.CreateIndex,
  1757  		ModifyIndex:       j.ModifyIndex,
  1758  		JobModifyIndex:    j.JobModifyIndex,
  1759  		SubmitTime:        j.SubmitTime,
  1760  		JobSummary:        summary,
  1761  	}
  1762  }
  1763  
  1764  // IsPeriodic returns whether a job is periodic.
  1765  func (j *Job) IsPeriodic() bool {
  1766  	return j.Periodic != nil
  1767  }
  1768  
  1769  // IsParameterized returns whether a job is parameterized job.
  1770  func (j *Job) IsParameterized() bool {
  1771  	return j.ParameterizedJob != nil
  1772  }
  1773  
  1774  // VaultPolicies returns the set of Vault policies per task group, per task
  1775  func (j *Job) VaultPolicies() map[string]map[string]*Vault {
  1776  	policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
  1777  
  1778  	for _, tg := range j.TaskGroups {
  1779  		tgPolicies := make(map[string]*Vault, len(tg.Tasks))
  1780  
  1781  		for _, task := range tg.Tasks {
  1782  			if task.Vault == nil {
  1783  				continue
  1784  			}
  1785  
  1786  			tgPolicies[task.Name] = task.Vault
  1787  		}
  1788  
  1789  		if len(tgPolicies) != 0 {
  1790  			policies[tg.Name] = tgPolicies
  1791  		}
  1792  	}
  1793  
  1794  	return policies
  1795  }
  1796  
  1797  // RequiredSignals returns a mapping of task groups to tasks to their required
  1798  // set of signals
  1799  func (j *Job) RequiredSignals() map[string]map[string][]string {
  1800  	signals := make(map[string]map[string][]string)
  1801  
  1802  	for _, tg := range j.TaskGroups {
  1803  		for _, task := range tg.Tasks {
  1804  			// Use this local one as a set
  1805  			taskSignals := make(map[string]struct{})
  1806  
  1807  			// Check if the Vault change mode uses signals
  1808  			if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal {
  1809  				taskSignals[task.Vault.ChangeSignal] = struct{}{}
  1810  			}
  1811  
  1812  			// Check if any template change mode uses signals
  1813  			for _, t := range task.Templates {
  1814  				if t.ChangeMode != TemplateChangeModeSignal {
  1815  					continue
  1816  				}
  1817  
  1818  				taskSignals[t.ChangeSignal] = struct{}{}
  1819  			}
  1820  
  1821  			// Flatten and sort the signals
  1822  			l := len(taskSignals)
  1823  			if l == 0 {
  1824  				continue
  1825  			}
  1826  
  1827  			flat := make([]string, 0, l)
  1828  			for sig := range taskSignals {
  1829  				flat = append(flat, sig)
  1830  			}
  1831  
  1832  			sort.Strings(flat)
  1833  			tgSignals, ok := signals[tg.Name]
  1834  			if !ok {
  1835  				tgSignals = make(map[string][]string)
  1836  				signals[tg.Name] = tgSignals
  1837  			}
  1838  			tgSignals[task.Name] = flat
  1839  		}
  1840  
  1841  	}
  1842  
  1843  	return signals
  1844  }
  1845  
  1846  // SpecChanged determines if the functional specification has changed between
  1847  // two job versions.
  1848  func (j *Job) SpecChanged(new *Job) bool {
  1849  	if j == nil {
  1850  		return new != nil
  1851  	}
  1852  
  1853  	// Create a copy of the new job
  1854  	c := new.Copy()
  1855  
  1856  	// Update the new job so we can do a reflect
  1857  	c.Status = j.Status
  1858  	c.StatusDescription = j.StatusDescription
  1859  	c.Stable = j.Stable
  1860  	c.Version = j.Version
  1861  	c.CreateIndex = j.CreateIndex
  1862  	c.ModifyIndex = j.ModifyIndex
  1863  	c.JobModifyIndex = j.JobModifyIndex
  1864  	c.SubmitTime = j.SubmitTime
  1865  
  1866  	// Deep equals the jobs
  1867  	return !reflect.DeepEqual(j, c)
  1868  }
  1869  
  1870  func (j *Job) SetSubmitTime() {
  1871  	j.SubmitTime = time.Now().UTC().UnixNano()
  1872  }
  1873  
  1874  // JobListStub is used to return a subset of job information
  1875  // for the job list
  1876  type JobListStub struct {
  1877  	ID                string
  1878  	ParentID          string
  1879  	Name              string
  1880  	Type              string
  1881  	Priority          int
  1882  	Periodic          bool
  1883  	ParameterizedJob  bool
  1884  	Stop              bool
  1885  	Status            string
  1886  	StatusDescription string
  1887  	JobSummary        *JobSummary
  1888  	CreateIndex       uint64
  1889  	ModifyIndex       uint64
  1890  	JobModifyIndex    uint64
  1891  	SubmitTime        int64
  1892  }
  1893  
  1894  // JobSummary summarizes the state of the allocations of a job
  1895  type JobSummary struct {
  1896  	JobID string
  1897  
  1898  	// Summmary contains the summary per task group for the Job
  1899  	Summary map[string]TaskGroupSummary
  1900  
  1901  	// Children contains a summary for the children of this job.
  1902  	Children *JobChildrenSummary
  1903  
  1904  	// Raft Indexes
  1905  	CreateIndex uint64
  1906  	ModifyIndex uint64
  1907  }
  1908  
  1909  // Copy returns a new copy of JobSummary
  1910  func (js *JobSummary) Copy() *JobSummary {
  1911  	newJobSummary := new(JobSummary)
  1912  	*newJobSummary = *js
  1913  	newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary))
  1914  	for k, v := range js.Summary {
  1915  		newTGSummary[k] = v
  1916  	}
  1917  	newJobSummary.Summary = newTGSummary
  1918  	newJobSummary.Children = newJobSummary.Children.Copy()
  1919  	return newJobSummary
  1920  }
  1921  
  1922  // JobChildrenSummary contains the summary of children job statuses
  1923  type JobChildrenSummary struct {
  1924  	Pending int64
  1925  	Running int64
  1926  	Dead    int64
  1927  }
  1928  
  1929  // Copy returns a new copy of a JobChildrenSummary
  1930  func (jc *JobChildrenSummary) Copy() *JobChildrenSummary {
  1931  	if jc == nil {
  1932  		return nil
  1933  	}
  1934  
  1935  	njc := new(JobChildrenSummary)
  1936  	*njc = *jc
  1937  	return njc
  1938  }
  1939  
  1940  // TaskGroup summarizes the state of all the allocations of a particular
  1941  // TaskGroup
  1942  type TaskGroupSummary struct {
  1943  	Queued   int
  1944  	Complete int
  1945  	Failed   int
  1946  	Running  int
  1947  	Starting int
  1948  	Lost     int
  1949  }
  1950  
  1951  const (
  1952  	// Checks uses any registered health check state in combination with task
  1953  	// states to determine if a allocation is healthy.
  1954  	UpdateStrategyHealthCheck_Checks = "checks"
  1955  
  1956  	// TaskStates uses the task states of an allocation to determine if the
  1957  	// allocation is healthy.
  1958  	UpdateStrategyHealthCheck_TaskStates = "task_states"
  1959  
  1960  	// Manual allows the operator to manually signal to Nomad when an
  1961  	// allocations is healthy. This allows more advanced health checking that is
  1962  	// outside of the scope of Nomad.
  1963  	UpdateStrategyHealthCheck_Manual = "manual"
  1964  )
  1965  
  1966  var (
  1967  	// DefaultUpdateStrategy provides a baseline that can be used to upgrade
  1968  	// jobs with the old policy or for populating field defaults.
  1969  	DefaultUpdateStrategy = &UpdateStrategy{
  1970  		Stagger:         30 * time.Second,
  1971  		MaxParallel:     0,
  1972  		HealthCheck:     UpdateStrategyHealthCheck_Checks,
  1973  		MinHealthyTime:  10 * time.Second,
  1974  		HealthyDeadline: 5 * time.Minute,
  1975  		AutoRevert:      false,
  1976  		Canary:          0,
  1977  	}
  1978  )
  1979  
  1980  // UpdateStrategy is used to modify how updates are done
  1981  type UpdateStrategy struct {
  1982  	// Stagger is used to determine the rate at which allocations are migrated
  1983  	// due to down or draining nodes.
  1984  	Stagger time.Duration
  1985  
  1986  	// MaxParallel is how many updates can be done in parallel
  1987  	MaxParallel int
  1988  
  1989  	// HealthCheck specifies the mechanism in which allocations are marked
  1990  	// healthy or unhealthy as part of a deployment.
  1991  	HealthCheck string
  1992  
  1993  	// MinHealthyTime is the minimum time an allocation must be in the healthy
  1994  	// state before it is marked as healthy, unblocking more alllocations to be
  1995  	// rolled.
  1996  	MinHealthyTime time.Duration
  1997  
  1998  	// HealthyDeadline is the time in which an allocation must be marked as
  1999  	// healthy before it is automatically transistioned to unhealthy. This time
  2000  	// period doesn't count against the MinHealthyTime.
  2001  	HealthyDeadline time.Duration
  2002  
  2003  	// AutoRevert declares that if a deployment fails because of unhealthy
  2004  	// allocations, there should be an attempt to auto-revert the job to a
  2005  	// stable version.
  2006  	AutoRevert bool
  2007  
  2008  	// Canary is the number of canaries to deploy when a change to the task
  2009  	// group is detected.
  2010  	Canary int
  2011  }
  2012  
  2013  func (u *UpdateStrategy) Copy() *UpdateStrategy {
  2014  	if u == nil {
  2015  		return nil
  2016  	}
  2017  
  2018  	copy := new(UpdateStrategy)
  2019  	*copy = *u
  2020  	return copy
  2021  }
  2022  
  2023  func (u *UpdateStrategy) Validate() error {
  2024  	if u == nil {
  2025  		return nil
  2026  	}
  2027  
  2028  	var mErr multierror.Error
  2029  	switch u.HealthCheck {
  2030  	case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual:
  2031  	default:
  2032  		multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck))
  2033  	}
  2034  
  2035  	if u.MaxParallel < 0 {
  2036  		multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel))
  2037  	}
  2038  	if u.Canary < 0 {
  2039  		multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary))
  2040  	}
  2041  	if u.MinHealthyTime < 0 {
  2042  		multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime))
  2043  	}
  2044  	if u.HealthyDeadline <= 0 {
  2045  		multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline))
  2046  	}
  2047  	if u.Stagger <= 0 {
  2048  		multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger))
  2049  	}
  2050  
  2051  	return mErr.ErrorOrNil()
  2052  }
  2053  
  2054  // TODO(alexdadgar): Remove once no longer used by the scheduler.
  2055  // Rolling returns if a rolling strategy should be used
  2056  func (u *UpdateStrategy) Rolling() bool {
  2057  	return u.Stagger > 0 && u.MaxParallel > 0
  2058  }
  2059  
  2060  const (
  2061  	// PeriodicSpecCron is used for a cron spec.
  2062  	PeriodicSpecCron = "cron"
  2063  
  2064  	// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
  2065  	// separated list of unix timestamps at which to launch.
  2066  	PeriodicSpecTest = "_internal_test"
  2067  )
  2068  
  2069  // Periodic defines the interval a job should be run at.
  2070  type PeriodicConfig struct {
  2071  	// Enabled determines if the job should be run periodically.
  2072  	Enabled bool
  2073  
  2074  	// Spec specifies the interval the job should be run as. It is parsed based
  2075  	// on the SpecType.
  2076  	Spec string
  2077  
  2078  	// SpecType defines the format of the spec.
  2079  	SpecType string
  2080  
  2081  	// ProhibitOverlap enforces that spawned jobs do not run in parallel.
  2082  	ProhibitOverlap bool
  2083  
  2084  	// TimeZone is the user specified string that determines the time zone to
  2085  	// launch against. The time zones must be specified from IANA Time Zone
  2086  	// database, such as "America/New_York".
  2087  	// Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
  2088  	// Reference: https://www.iana.org/time-zones
  2089  	TimeZone string
  2090  
  2091  	// location is the time zone to evaluate the launch time against
  2092  	location *time.Location
  2093  }
  2094  
  2095  func (p *PeriodicConfig) Copy() *PeriodicConfig {
  2096  	if p == nil {
  2097  		return nil
  2098  	}
  2099  	np := new(PeriodicConfig)
  2100  	*np = *p
  2101  	return np
  2102  }
  2103  
  2104  func (p *PeriodicConfig) Validate() error {
  2105  	if !p.Enabled {
  2106  		return nil
  2107  	}
  2108  
  2109  	var mErr multierror.Error
  2110  	if p.Spec == "" {
  2111  		multierror.Append(&mErr, fmt.Errorf("Must specify a spec"))
  2112  	}
  2113  
  2114  	// Check if we got a valid time zone
  2115  	if p.TimeZone != "" {
  2116  		if _, err := time.LoadLocation(p.TimeZone); err != nil {
  2117  			multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err))
  2118  		}
  2119  	}
  2120  
  2121  	switch p.SpecType {
  2122  	case PeriodicSpecCron:
  2123  		// Validate the cron spec
  2124  		if _, err := cronexpr.Parse(p.Spec); err != nil {
  2125  			multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err))
  2126  		}
  2127  	case PeriodicSpecTest:
  2128  		// No-op
  2129  	default:
  2130  		multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType))
  2131  	}
  2132  
  2133  	return mErr.ErrorOrNil()
  2134  }
  2135  
  2136  func (p *PeriodicConfig) Canonicalize() {
  2137  	// Load the location
  2138  	l, err := time.LoadLocation(p.TimeZone)
  2139  	if err != nil {
  2140  		p.location = time.UTC
  2141  	}
  2142  
  2143  	p.location = l
  2144  }
  2145  
  2146  // Next returns the closest time instant matching the spec that is after the
  2147  // passed time. If no matching instance exists, the zero value of time.Time is
  2148  // returned. The `time.Location` of the returned value matches that of the
  2149  // passed time.
  2150  func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
  2151  	switch p.SpecType {
  2152  	case PeriodicSpecCron:
  2153  		if e, err := cronexpr.Parse(p.Spec); err == nil {
  2154  			return e.Next(fromTime)
  2155  		}
  2156  	case PeriodicSpecTest:
  2157  		split := strings.Split(p.Spec, ",")
  2158  		if len(split) == 1 && split[0] == "" {
  2159  			return time.Time{}
  2160  		}
  2161  
  2162  		// Parse the times
  2163  		times := make([]time.Time, len(split))
  2164  		for i, s := range split {
  2165  			unix, err := strconv.Atoi(s)
  2166  			if err != nil {
  2167  				return time.Time{}
  2168  			}
  2169  
  2170  			times[i] = time.Unix(int64(unix), 0)
  2171  		}
  2172  
  2173  		// Find the next match
  2174  		for _, next := range times {
  2175  			if fromTime.Before(next) {
  2176  				return next
  2177  			}
  2178  		}
  2179  	}
  2180  
  2181  	return time.Time{}
  2182  }
  2183  
  2184  // GetLocation returns the location to use for determining the time zone to run
  2185  // the periodic job against.
  2186  func (p *PeriodicConfig) GetLocation() *time.Location {
  2187  	// Jobs pre 0.5.5 will not have this
  2188  	if p.location != nil {
  2189  		return p.location
  2190  	}
  2191  
  2192  	return time.UTC
  2193  }
  2194  
  2195  const (
  2196  	// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
  2197  	// when launching derived instances of it.
  2198  	PeriodicLaunchSuffix = "/periodic-"
  2199  )
  2200  
  2201  // PeriodicLaunch tracks the last launch time of a periodic job.
  2202  type PeriodicLaunch struct {
  2203  	ID     string    // ID of the periodic job.
  2204  	Launch time.Time // The last launch time.
  2205  
  2206  	// Raft Indexes
  2207  	CreateIndex uint64
  2208  	ModifyIndex uint64
  2209  }
  2210  
  2211  const (
  2212  	DispatchPayloadForbidden = "forbidden"
  2213  	DispatchPayloadOptional  = "optional"
  2214  	DispatchPayloadRequired  = "required"
  2215  
  2216  	// DispatchLaunchSuffix is the string appended to the parameterized job's ID
  2217  	// when dispatching instances of it.
  2218  	DispatchLaunchSuffix = "/dispatch-"
  2219  )
  2220  
  2221  // ParameterizedJobConfig is used to configure the parameterized job
  2222  type ParameterizedJobConfig struct {
  2223  	// Payload configure the payload requirements
  2224  	Payload string
  2225  
  2226  	// MetaRequired is metadata keys that must be specified by the dispatcher
  2227  	MetaRequired []string
  2228  
  2229  	// MetaOptional is metadata keys that may be specified by the dispatcher
  2230  	MetaOptional []string
  2231  }
  2232  
  2233  func (d *ParameterizedJobConfig) Validate() error {
  2234  	var mErr multierror.Error
  2235  	switch d.Payload {
  2236  	case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden:
  2237  	default:
  2238  		multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload))
  2239  	}
  2240  
  2241  	// Check that the meta configurations are disjoint sets
  2242  	disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional)
  2243  	if !disjoint {
  2244  		multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending))
  2245  	}
  2246  
  2247  	return mErr.ErrorOrNil()
  2248  }
  2249  
  2250  func (d *ParameterizedJobConfig) Canonicalize() {
  2251  	if d.Payload == "" {
  2252  		d.Payload = DispatchPayloadOptional
  2253  	}
  2254  }
  2255  
  2256  func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig {
  2257  	if d == nil {
  2258  		return nil
  2259  	}
  2260  	nd := new(ParameterizedJobConfig)
  2261  	*nd = *d
  2262  	nd.MetaOptional = helper.CopySliceString(nd.MetaOptional)
  2263  	nd.MetaRequired = helper.CopySliceString(nd.MetaRequired)
  2264  	return nd
  2265  }
  2266  
  2267  // DispatchedID returns an ID appropriate for a job dispatched against a
  2268  // particular parameterized job
  2269  func DispatchedID(templateID string, t time.Time) string {
  2270  	u := GenerateUUID()[:8]
  2271  	return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u)
  2272  }
  2273  
  2274  // DispatchPayloadConfig configures how a task gets its input from a job dispatch
  2275  type DispatchPayloadConfig struct {
  2276  	// File specifies a relative path to where the input data should be written
  2277  	File string
  2278  }
  2279  
  2280  func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig {
  2281  	if d == nil {
  2282  		return nil
  2283  	}
  2284  	nd := new(DispatchPayloadConfig)
  2285  	*nd = *d
  2286  	return nd
  2287  }
  2288  
  2289  func (d *DispatchPayloadConfig) Validate() error {
  2290  	// Verify the destination doesn't escape
  2291  	escaped, err := PathEscapesAllocDir("task/local/", d.File)
  2292  	if err != nil {
  2293  		return fmt.Errorf("invalid destination path: %v", err)
  2294  	} else if escaped {
  2295  		return fmt.Errorf("destination escapes allocation directory")
  2296  	}
  2297  
  2298  	return nil
  2299  }
  2300  
  2301  var (
  2302  	defaultServiceJobRestartPolicy = RestartPolicy{
  2303  		Delay:    15 * time.Second,
  2304  		Attempts: 2,
  2305  		Interval: 1 * time.Minute,
  2306  		Mode:     RestartPolicyModeDelay,
  2307  	}
  2308  	defaultBatchJobRestartPolicy = RestartPolicy{
  2309  		Delay:    15 * time.Second,
  2310  		Attempts: 15,
  2311  		Interval: 7 * 24 * time.Hour,
  2312  		Mode:     RestartPolicyModeDelay,
  2313  	}
  2314  )
  2315  
  2316  const (
  2317  	// RestartPolicyModeDelay causes an artificial delay till the next interval is
  2318  	// reached when the specified attempts have been reached in the interval.
  2319  	RestartPolicyModeDelay = "delay"
  2320  
  2321  	// RestartPolicyModeFail causes a job to fail if the specified number of
  2322  	// attempts are reached within an interval.
  2323  	RestartPolicyModeFail = "fail"
  2324  
  2325  	// RestartPolicyMinInterval is the minimum interval that is accepted for a
  2326  	// restart policy.
  2327  	RestartPolicyMinInterval = 5 * time.Second
  2328  )
  2329  
  2330  // RestartPolicy configures how Tasks are restarted when they crash or fail.
  2331  type RestartPolicy struct {
  2332  	// Attempts is the number of restart that will occur in an interval.
  2333  	Attempts int
  2334  
  2335  	// Interval is a duration in which we can limit the number of restarts
  2336  	// within.
  2337  	Interval time.Duration
  2338  
  2339  	// Delay is the time between a failure and a restart.
  2340  	Delay time.Duration
  2341  
  2342  	// Mode controls what happens when the task restarts more than attempt times
  2343  	// in an interval.
  2344  	Mode string
  2345  }
  2346  
  2347  func (r *RestartPolicy) Copy() *RestartPolicy {
  2348  	if r == nil {
  2349  		return nil
  2350  	}
  2351  	nrp := new(RestartPolicy)
  2352  	*nrp = *r
  2353  	return nrp
  2354  }
  2355  
  2356  func (r *RestartPolicy) Validate() error {
  2357  	var mErr multierror.Error
  2358  	switch r.Mode {
  2359  	case RestartPolicyModeDelay, RestartPolicyModeFail:
  2360  	default:
  2361  		multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode))
  2362  	}
  2363  
  2364  	// Check for ambiguous/confusing settings
  2365  	if r.Attempts == 0 && r.Mode != RestartPolicyModeFail {
  2366  		multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts))
  2367  	}
  2368  
  2369  	if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() {
  2370  		multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval))
  2371  	}
  2372  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
  2373  		multierror.Append(&mErr,
  2374  			fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay))
  2375  	}
  2376  	return mErr.ErrorOrNil()
  2377  }
  2378  
  2379  func NewRestartPolicy(jobType string) *RestartPolicy {
  2380  	switch jobType {
  2381  	case JobTypeService, JobTypeSystem:
  2382  		rp := defaultServiceJobRestartPolicy
  2383  		return &rp
  2384  	case JobTypeBatch:
  2385  		rp := defaultBatchJobRestartPolicy
  2386  		return &rp
  2387  	}
  2388  	return nil
  2389  }
  2390  
  2391  // TaskGroup is an atomic unit of placement. Each task group belongs to
  2392  // a job and may contain any number of tasks. A task group support running
  2393  // in many replicas using the same configuration..
  2394  type TaskGroup struct {
  2395  	// Name of the task group
  2396  	Name string
  2397  
  2398  	// Count is the number of replicas of this task group that should
  2399  	// be scheduled.
  2400  	Count int
  2401  
  2402  	// Update is used to control the update strategy for this task group
  2403  	Update *UpdateStrategy
  2404  
  2405  	// Constraints can be specified at a task group level and apply to
  2406  	// all the tasks contained.
  2407  	Constraints []*Constraint
  2408  
  2409  	//RestartPolicy of a TaskGroup
  2410  	RestartPolicy *RestartPolicy
  2411  
  2412  	// Tasks are the collection of tasks that this task group needs to run
  2413  	Tasks []*Task
  2414  
  2415  	// EphemeralDisk is the disk resources that the task group requests
  2416  	EphemeralDisk *EphemeralDisk
  2417  
  2418  	// Meta is used to associate arbitrary metadata with this
  2419  	// task group. This is opaque to Nomad.
  2420  	Meta map[string]string
  2421  }
  2422  
  2423  func (tg *TaskGroup) Copy() *TaskGroup {
  2424  	if tg == nil {
  2425  		return nil
  2426  	}
  2427  	ntg := new(TaskGroup)
  2428  	*ntg = *tg
  2429  	ntg.Update = ntg.Update.Copy()
  2430  	ntg.Constraints = CopySliceConstraints(ntg.Constraints)
  2431  	ntg.RestartPolicy = ntg.RestartPolicy.Copy()
  2432  
  2433  	if tg.Tasks != nil {
  2434  		tasks := make([]*Task, len(ntg.Tasks))
  2435  		for i, t := range ntg.Tasks {
  2436  			tasks[i] = t.Copy()
  2437  		}
  2438  		ntg.Tasks = tasks
  2439  	}
  2440  
  2441  	ntg.Meta = helper.CopyMapStringString(ntg.Meta)
  2442  
  2443  	if tg.EphemeralDisk != nil {
  2444  		ntg.EphemeralDisk = tg.EphemeralDisk.Copy()
  2445  	}
  2446  	return ntg
  2447  }
  2448  
  2449  // Canonicalize is used to canonicalize fields in the TaskGroup.
  2450  func (tg *TaskGroup) Canonicalize(job *Job) {
  2451  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  2452  	// problems since we use reflect DeepEquals.
  2453  	if len(tg.Meta) == 0 {
  2454  		tg.Meta = nil
  2455  	}
  2456  
  2457  	// Set the default restart policy.
  2458  	if tg.RestartPolicy == nil {
  2459  		tg.RestartPolicy = NewRestartPolicy(job.Type)
  2460  	}
  2461  
  2462  	// Set a default ephemeral disk object if the user has not requested for one
  2463  	if tg.EphemeralDisk == nil {
  2464  		tg.EphemeralDisk = DefaultEphemeralDisk()
  2465  	}
  2466  
  2467  	for _, task := range tg.Tasks {
  2468  		task.Canonicalize(job, tg)
  2469  	}
  2470  
  2471  	// Add up the disk resources to EphemeralDisk. This is done so that users
  2472  	// are not required to move their disk attribute from resources to
  2473  	// EphemeralDisk section of the job spec in Nomad 0.5
  2474  	// COMPAT 0.4.1 -> 0.5
  2475  	// Remove in 0.6
  2476  	var diskMB int
  2477  	for _, task := range tg.Tasks {
  2478  		diskMB += task.Resources.DiskMB
  2479  	}
  2480  	if diskMB > 0 {
  2481  		tg.EphemeralDisk.SizeMB = diskMB
  2482  	}
  2483  }
  2484  
  2485  // Validate is used to sanity check a task group
  2486  func (tg *TaskGroup) Validate(j *Job) error {
  2487  	var mErr multierror.Error
  2488  	if tg.Name == "" {
  2489  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
  2490  	}
  2491  	if tg.Count < 0 {
  2492  		mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative"))
  2493  	}
  2494  	if len(tg.Tasks) == 0 {
  2495  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
  2496  	}
  2497  	for idx, constr := range tg.Constraints {
  2498  		if err := constr.Validate(); err != nil {
  2499  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  2500  			mErr.Errors = append(mErr.Errors, outer)
  2501  		}
  2502  	}
  2503  
  2504  	if tg.RestartPolicy != nil {
  2505  		if err := tg.RestartPolicy.Validate(); err != nil {
  2506  			mErr.Errors = append(mErr.Errors, err)
  2507  		}
  2508  	} else {
  2509  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
  2510  	}
  2511  
  2512  	if tg.EphemeralDisk != nil {
  2513  		if err := tg.EphemeralDisk.Validate(); err != nil {
  2514  			mErr.Errors = append(mErr.Errors, err)
  2515  		}
  2516  	} else {
  2517  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name))
  2518  	}
  2519  
  2520  	// Validate the update strategy
  2521  	if u := tg.Update; u != nil {
  2522  		switch j.Type {
  2523  		case JobTypeService, JobTypeSystem:
  2524  		default:
  2525  			// COMPAT: Enable in 0.7.0
  2526  			//mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type))
  2527  		}
  2528  		if err := u.Validate(); err != nil {
  2529  			mErr.Errors = append(mErr.Errors, err)
  2530  		}
  2531  	}
  2532  
  2533  	// Check for duplicate tasks, that there is only leader task if any,
  2534  	// and no duplicated static ports
  2535  	tasks := make(map[string]int)
  2536  	staticPorts := make(map[int]string)
  2537  	leaderTasks := 0
  2538  	for idx, task := range tg.Tasks {
  2539  		if task.Name == "" {
  2540  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
  2541  		} else if existing, ok := tasks[task.Name]; ok {
  2542  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
  2543  		} else {
  2544  			tasks[task.Name] = idx
  2545  		}
  2546  
  2547  		if task.Leader {
  2548  			leaderTasks++
  2549  		}
  2550  
  2551  		if task.Resources == nil {
  2552  			continue
  2553  		}
  2554  
  2555  		for _, net := range task.Resources.Networks {
  2556  			for _, port := range net.ReservedPorts {
  2557  				if other, ok := staticPorts[port.Value]; ok {
  2558  					err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other)
  2559  					mErr.Errors = append(mErr.Errors, err)
  2560  				} else {
  2561  					staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label)
  2562  				}
  2563  			}
  2564  		}
  2565  	}
  2566  
  2567  	if leaderTasks > 1 {
  2568  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader"))
  2569  	}
  2570  
  2571  	// Validate the tasks
  2572  	for _, task := range tg.Tasks {
  2573  		if err := task.Validate(tg.EphemeralDisk); err != nil {
  2574  			outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err)
  2575  			mErr.Errors = append(mErr.Errors, outer)
  2576  		}
  2577  	}
  2578  	return mErr.ErrorOrNil()
  2579  }
  2580  
  2581  // Warnings returns a list of warnings that may be from dubious settings or
  2582  // deprecation warnings.
  2583  func (tg *TaskGroup) Warnings(j *Job) error {
  2584  	var mErr multierror.Error
  2585  
  2586  	// Validate the update strategy
  2587  	if u := tg.Update; u != nil {
  2588  		// Check the counts are appropriate
  2589  		if u.MaxParallel > tg.Count {
  2590  			mErr.Errors = append(mErr.Errors,
  2591  				fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+
  2592  					"A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count))
  2593  		}
  2594  	}
  2595  
  2596  	return mErr.ErrorOrNil()
  2597  }
  2598  
  2599  // LookupTask finds a task by name
  2600  func (tg *TaskGroup) LookupTask(name string) *Task {
  2601  	for _, t := range tg.Tasks {
  2602  		if t.Name == name {
  2603  			return t
  2604  		}
  2605  	}
  2606  	return nil
  2607  }
  2608  
  2609  func (tg *TaskGroup) GoString() string {
  2610  	return fmt.Sprintf("*%#v", *tg)
  2611  }
  2612  
  2613  const (
  2614  	ServiceCheckHTTP   = "http"
  2615  	ServiceCheckTCP    = "tcp"
  2616  	ServiceCheckScript = "script"
  2617  
  2618  	// minCheckInterval is the minimum check interval permitted.  Consul
  2619  	// currently has its MinInterval set to 1s.  Mirror that here for
  2620  	// consistency.
  2621  	minCheckInterval = 1 * time.Second
  2622  
  2623  	// minCheckTimeout is the minimum check timeout permitted for Consul
  2624  	// script TTL checks.
  2625  	minCheckTimeout = 1 * time.Second
  2626  )
  2627  
  2628  // The ServiceCheck data model represents the consul health check that
  2629  // Nomad registers for a Task
  2630  type ServiceCheck struct {
  2631  	Name          string        // Name of the check, defaults to id
  2632  	Type          string        // Type of the check - tcp, http, docker and script
  2633  	Command       string        // Command is the command to run for script checks
  2634  	Args          []string      // Args is a list of argumes for script checks
  2635  	Path          string        // path of the health check url for http type check
  2636  	Protocol      string        // Protocol to use if check is http, defaults to http
  2637  	PortLabel     string        // The port to use for tcp/http checks
  2638  	Interval      time.Duration // Interval of the check
  2639  	Timeout       time.Duration // Timeout of the response from the check before consul fails the check
  2640  	InitialStatus string        // Initial status of the check
  2641  	TLSSkipVerify bool          // Skip TLS verification when Protocol=https
  2642  }
  2643  
  2644  func (sc *ServiceCheck) Copy() *ServiceCheck {
  2645  	if sc == nil {
  2646  		return nil
  2647  	}
  2648  	nsc := new(ServiceCheck)
  2649  	*nsc = *sc
  2650  	return nsc
  2651  }
  2652  
  2653  func (sc *ServiceCheck) Canonicalize(serviceName string) {
  2654  	// Ensure empty slices are treated as null to avoid scheduling issues when
  2655  	// using DeepEquals.
  2656  	if len(sc.Args) == 0 {
  2657  		sc.Args = nil
  2658  	}
  2659  
  2660  	if sc.Name == "" {
  2661  		sc.Name = fmt.Sprintf("service: %q check", serviceName)
  2662  	}
  2663  }
  2664  
  2665  // validate a Service's ServiceCheck
  2666  func (sc *ServiceCheck) validate() error {
  2667  	switch strings.ToLower(sc.Type) {
  2668  	case ServiceCheckTCP:
  2669  		if sc.Timeout == 0 {
  2670  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  2671  		} else if sc.Timeout < minCheckTimeout {
  2672  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  2673  		}
  2674  	case ServiceCheckHTTP:
  2675  		if sc.Path == "" {
  2676  			return fmt.Errorf("http type must have a valid http path")
  2677  		}
  2678  
  2679  		if sc.Timeout == 0 {
  2680  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  2681  		} else if sc.Timeout < minCheckTimeout {
  2682  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  2683  		}
  2684  	case ServiceCheckScript:
  2685  		if sc.Command == "" {
  2686  			return fmt.Errorf("script type must have a valid script path")
  2687  		}
  2688  
  2689  		// TODO: enforce timeout on the Client side and reenable
  2690  		// validation.
  2691  	default:
  2692  		return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
  2693  	}
  2694  
  2695  	if sc.Interval == 0 {
  2696  		return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval)
  2697  	} else if sc.Interval < minCheckInterval {
  2698  		return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval)
  2699  	}
  2700  
  2701  	switch sc.InitialStatus {
  2702  	case "":
  2703  		// case api.HealthUnknown: TODO: Add when Consul releases 0.7.1
  2704  	case api.HealthPassing:
  2705  	case api.HealthWarning:
  2706  	case api.HealthCritical:
  2707  	default:
  2708  		return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical)
  2709  
  2710  	}
  2711  
  2712  	return nil
  2713  }
  2714  
  2715  // RequiresPort returns whether the service check requires the task has a port.
  2716  func (sc *ServiceCheck) RequiresPort() bool {
  2717  	switch sc.Type {
  2718  	case ServiceCheckHTTP, ServiceCheckTCP:
  2719  		return true
  2720  	default:
  2721  		return false
  2722  	}
  2723  }
  2724  
  2725  // Hash all ServiceCheck fields and the check's corresponding service ID to
  2726  // create an identifier. The identifier is not guaranteed to be unique as if
  2727  // the PortLabel is blank, the Service's PortLabel will be used after Hash is
  2728  // called.
  2729  func (sc *ServiceCheck) Hash(serviceID string) string {
  2730  	h := sha1.New()
  2731  	io.WriteString(h, serviceID)
  2732  	io.WriteString(h, sc.Name)
  2733  	io.WriteString(h, sc.Type)
  2734  	io.WriteString(h, sc.Command)
  2735  	io.WriteString(h, strings.Join(sc.Args, ""))
  2736  	io.WriteString(h, sc.Path)
  2737  	io.WriteString(h, sc.Protocol)
  2738  	io.WriteString(h, sc.PortLabel)
  2739  	io.WriteString(h, sc.Interval.String())
  2740  	io.WriteString(h, sc.Timeout.String())
  2741  	// Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6
  2742  	if sc.TLSSkipVerify {
  2743  		io.WriteString(h, "true")
  2744  	}
  2745  	return fmt.Sprintf("%x", h.Sum(nil))
  2746  }
  2747  
  2748  const (
  2749  	AddressModeAuto   = "auto"
  2750  	AddressModeHost   = "host"
  2751  	AddressModeDriver = "driver"
  2752  )
  2753  
  2754  // Service represents a Consul service definition in Nomad
  2755  type Service struct {
  2756  	// Name of the service registered with Consul. Consul defaults the
  2757  	// Name to ServiceID if not specified.  The Name if specified is used
  2758  	// as one of the seed values when generating a Consul ServiceID.
  2759  	Name string
  2760  
  2761  	// PortLabel is either the numeric port number or the `host:port`.
  2762  	// To specify the port number using the host's Consul Advertise
  2763  	// address, specify an empty host in the PortLabel (e.g. `:port`).
  2764  	PortLabel string
  2765  
  2766  	// AddressMode specifies whether or not to use the host ip:port for
  2767  	// this service.
  2768  	AddressMode string
  2769  
  2770  	Tags   []string        // List of tags for the service
  2771  	Checks []*ServiceCheck // List of checks associated with the service
  2772  }
  2773  
  2774  func (s *Service) Copy() *Service {
  2775  	if s == nil {
  2776  		return nil
  2777  	}
  2778  	ns := new(Service)
  2779  	*ns = *s
  2780  	ns.Tags = helper.CopySliceString(ns.Tags)
  2781  
  2782  	if s.Checks != nil {
  2783  		checks := make([]*ServiceCheck, len(ns.Checks))
  2784  		for i, c := range ns.Checks {
  2785  			checks[i] = c.Copy()
  2786  		}
  2787  		ns.Checks = checks
  2788  	}
  2789  
  2790  	return ns
  2791  }
  2792  
  2793  // Canonicalize interpolates values of Job, Task Group and Task in the Service
  2794  // Name. This also generates check names, service id and check ids.
  2795  func (s *Service) Canonicalize(job string, taskGroup string, task string) {
  2796  	// Ensure empty lists are treated as null to avoid scheduler issues when
  2797  	// using DeepEquals
  2798  	if len(s.Tags) == 0 {
  2799  		s.Tags = nil
  2800  	}
  2801  	if len(s.Checks) == 0 {
  2802  		s.Checks = nil
  2803  	}
  2804  
  2805  	s.Name = args.ReplaceEnv(s.Name, map[string]string{
  2806  		"JOB":       job,
  2807  		"TASKGROUP": taskGroup,
  2808  		"TASK":      task,
  2809  		"BASE":      fmt.Sprintf("%s-%s-%s", job, taskGroup, task),
  2810  	},
  2811  	)
  2812  
  2813  	for _, check := range s.Checks {
  2814  		check.Canonicalize(s.Name)
  2815  	}
  2816  }
  2817  
  2818  // Validate checks if the Check definition is valid
  2819  func (s *Service) Validate() error {
  2820  	var mErr multierror.Error
  2821  
  2822  	// Ensure the service name is valid per the below RFCs but make an exception
  2823  	// for our interpolation syntax
  2824  	// RFC-952 §1 (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  2825  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  2826  	// (https://tools.ietf.org/html/rfc2782).
  2827  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9\$][a-zA-Z0-9\-\$\{\}\_\.]*[a-z0-9\}])$`)
  2828  	if !re.MatchString(s.Name) {
  2829  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name))
  2830  	}
  2831  
  2832  	switch s.AddressMode {
  2833  	case "", AddressModeAuto, AddressModeHost, AddressModeDriver:
  2834  		// OK
  2835  	default:
  2836  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode))
  2837  	}
  2838  
  2839  	for _, c := range s.Checks {
  2840  		if s.PortLabel == "" && c.RequiresPort() {
  2841  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name))
  2842  			continue
  2843  		}
  2844  
  2845  		if err := c.validate(); err != nil {
  2846  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err))
  2847  		}
  2848  	}
  2849  	return mErr.ErrorOrNil()
  2850  }
  2851  
  2852  // ValidateName checks if the services Name is valid and should be called after
  2853  // the name has been interpolated
  2854  func (s *Service) ValidateName(name string) error {
  2855  	// Ensure the service name is valid per RFC-952 §1
  2856  	// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  2857  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  2858  	// (https://tools.ietf.org/html/rfc2782).
  2859  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`)
  2860  	if !re.MatchString(name) {
  2861  		return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name)
  2862  	}
  2863  	return nil
  2864  }
  2865  
  2866  // Hash calculates the hash of the check based on it's content and the service
  2867  // which owns it
  2868  func (s *Service) Hash() string {
  2869  	h := sha1.New()
  2870  	io.WriteString(h, s.Name)
  2871  	io.WriteString(h, strings.Join(s.Tags, ""))
  2872  	io.WriteString(h, s.PortLabel)
  2873  	io.WriteString(h, s.AddressMode)
  2874  	return fmt.Sprintf("%x", h.Sum(nil))
  2875  }
  2876  
  2877  const (
  2878  	// DefaultKillTimeout is the default timeout between signaling a task it
  2879  	// will be killed and killing it.
  2880  	DefaultKillTimeout = 5 * time.Second
  2881  )
  2882  
  2883  // LogConfig provides configuration for log rotation
  2884  type LogConfig struct {
  2885  	MaxFiles      int
  2886  	MaxFileSizeMB int
  2887  }
  2888  
  2889  // DefaultLogConfig returns the default LogConfig values.
  2890  func DefaultLogConfig() *LogConfig {
  2891  	return &LogConfig{
  2892  		MaxFiles:      10,
  2893  		MaxFileSizeMB: 10,
  2894  	}
  2895  }
  2896  
  2897  // Validate returns an error if the log config specified are less than
  2898  // the minimum allowed.
  2899  func (l *LogConfig) Validate() error {
  2900  	var mErr multierror.Error
  2901  	if l.MaxFiles < 1 {
  2902  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles))
  2903  	}
  2904  	if l.MaxFileSizeMB < 1 {
  2905  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB))
  2906  	}
  2907  	return mErr.ErrorOrNil()
  2908  }
  2909  
  2910  // Task is a single process typically that is executed as part of a task group.
  2911  type Task struct {
  2912  	// Name of the task
  2913  	Name string
  2914  
  2915  	// Driver is used to control which driver is used
  2916  	Driver string
  2917  
  2918  	// User is used to determine which user will run the task. It defaults to
  2919  	// the same user the Nomad client is being run as.
  2920  	User string
  2921  
  2922  	// Config is provided to the driver to initialize
  2923  	Config map[string]interface{}
  2924  
  2925  	// Map of environment variables to be used by the driver
  2926  	Env map[string]string
  2927  
  2928  	// List of service definitions exposed by the Task
  2929  	Services []*Service
  2930  
  2931  	// Vault is used to define the set of Vault policies that this task should
  2932  	// have access to.
  2933  	Vault *Vault
  2934  
  2935  	// Templates are the set of templates to be rendered for the task.
  2936  	Templates []*Template
  2937  
  2938  	// Constraints can be specified at a task level and apply only to
  2939  	// the particular task.
  2940  	Constraints []*Constraint
  2941  
  2942  	// Resources is the resources needed by this task
  2943  	Resources *Resources
  2944  
  2945  	// DispatchPayload configures how the task retrieves its input from a dispatch
  2946  	DispatchPayload *DispatchPayloadConfig
  2947  
  2948  	// Meta is used to associate arbitrary metadata with this
  2949  	// task. This is opaque to Nomad.
  2950  	Meta map[string]string
  2951  
  2952  	// KillTimeout is the time between signaling a task that it will be
  2953  	// killed and killing it.
  2954  	KillTimeout time.Duration
  2955  
  2956  	// LogConfig provides configuration for log rotation
  2957  	LogConfig *LogConfig
  2958  
  2959  	// Artifacts is a list of artifacts to download and extract before running
  2960  	// the task.
  2961  	Artifacts []*TaskArtifact
  2962  
  2963  	// Leader marks the task as the leader within the group. When the leader
  2964  	// task exits, other tasks will be gracefully terminated.
  2965  	Leader bool
  2966  }
  2967  
  2968  func (t *Task) Copy() *Task {
  2969  	if t == nil {
  2970  		return nil
  2971  	}
  2972  	nt := new(Task)
  2973  	*nt = *t
  2974  	nt.Env = helper.CopyMapStringString(nt.Env)
  2975  
  2976  	if t.Services != nil {
  2977  		services := make([]*Service, len(nt.Services))
  2978  		for i, s := range nt.Services {
  2979  			services[i] = s.Copy()
  2980  		}
  2981  		nt.Services = services
  2982  	}
  2983  
  2984  	nt.Constraints = CopySliceConstraints(nt.Constraints)
  2985  
  2986  	nt.Vault = nt.Vault.Copy()
  2987  	nt.Resources = nt.Resources.Copy()
  2988  	nt.Meta = helper.CopyMapStringString(nt.Meta)
  2989  	nt.DispatchPayload = nt.DispatchPayload.Copy()
  2990  
  2991  	if t.Artifacts != nil {
  2992  		artifacts := make([]*TaskArtifact, 0, len(t.Artifacts))
  2993  		for _, a := range nt.Artifacts {
  2994  			artifacts = append(artifacts, a.Copy())
  2995  		}
  2996  		nt.Artifacts = artifacts
  2997  	}
  2998  
  2999  	if i, err := copystructure.Copy(nt.Config); err != nil {
  3000  		panic(err.Error())
  3001  	} else {
  3002  		nt.Config = i.(map[string]interface{})
  3003  	}
  3004  
  3005  	if t.Templates != nil {
  3006  		templates := make([]*Template, len(t.Templates))
  3007  		for i, tmpl := range nt.Templates {
  3008  			templates[i] = tmpl.Copy()
  3009  		}
  3010  		nt.Templates = templates
  3011  	}
  3012  
  3013  	return nt
  3014  }
  3015  
  3016  // Canonicalize canonicalizes fields in the task.
  3017  func (t *Task) Canonicalize(job *Job, tg *TaskGroup) {
  3018  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  3019  	// problems since we use reflect DeepEquals.
  3020  	if len(t.Meta) == 0 {
  3021  		t.Meta = nil
  3022  	}
  3023  	if len(t.Config) == 0 {
  3024  		t.Config = nil
  3025  	}
  3026  	if len(t.Env) == 0 {
  3027  		t.Env = nil
  3028  	}
  3029  
  3030  	for _, service := range t.Services {
  3031  		service.Canonicalize(job.Name, tg.Name, t.Name)
  3032  	}
  3033  
  3034  	// If Resources are nil initialize them to defaults, otherwise canonicalize
  3035  	if t.Resources == nil {
  3036  		t.Resources = DefaultResources()
  3037  	} else {
  3038  		t.Resources.Canonicalize()
  3039  	}
  3040  
  3041  	// Set the default timeout if it is not specified.
  3042  	if t.KillTimeout == 0 {
  3043  		t.KillTimeout = DefaultKillTimeout
  3044  	}
  3045  
  3046  	if t.Vault != nil {
  3047  		t.Vault.Canonicalize()
  3048  	}
  3049  
  3050  	for _, template := range t.Templates {
  3051  		template.Canonicalize()
  3052  	}
  3053  }
  3054  
  3055  func (t *Task) GoString() string {
  3056  	return fmt.Sprintf("*%#v", *t)
  3057  }
  3058  
  3059  // Validate is used to sanity check a task
  3060  func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error {
  3061  	var mErr multierror.Error
  3062  	if t.Name == "" {
  3063  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  3064  	}
  3065  	if strings.ContainsAny(t.Name, `/\`) {
  3066  		// We enforce this so that when creating the directory on disk it will
  3067  		// not have any slashes.
  3068  		mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes"))
  3069  	}
  3070  	if t.Driver == "" {
  3071  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  3072  	}
  3073  	if t.KillTimeout.Nanoseconds() < 0 {
  3074  		mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
  3075  	}
  3076  
  3077  	// Validate the resources.
  3078  	if t.Resources == nil {
  3079  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  3080  	} else {
  3081  		if err := t.Resources.MeetsMinResources(); err != nil {
  3082  			mErr.Errors = append(mErr.Errors, err)
  3083  		}
  3084  
  3085  		// Ensure the task isn't asking for disk resources
  3086  		if t.Resources.DiskMB > 0 {
  3087  			mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level."))
  3088  		}
  3089  	}
  3090  
  3091  	// Validate the log config
  3092  	if t.LogConfig == nil {
  3093  		mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config"))
  3094  	} else if err := t.LogConfig.Validate(); err != nil {
  3095  		mErr.Errors = append(mErr.Errors, err)
  3096  	}
  3097  
  3098  	for idx, constr := range t.Constraints {
  3099  		if err := constr.Validate(); err != nil {
  3100  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  3101  			mErr.Errors = append(mErr.Errors, outer)
  3102  		}
  3103  
  3104  		switch constr.Operand {
  3105  		case ConstraintDistinctHosts, ConstraintDistinctProperty:
  3106  			outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand)
  3107  			mErr.Errors = append(mErr.Errors, outer)
  3108  		}
  3109  	}
  3110  
  3111  	// Validate Services
  3112  	if err := validateServices(t); err != nil {
  3113  		mErr.Errors = append(mErr.Errors, err)
  3114  	}
  3115  
  3116  	if t.LogConfig != nil && ephemeralDisk != nil {
  3117  		logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB)
  3118  		if ephemeralDisk.SizeMB <= logUsage {
  3119  			mErr.Errors = append(mErr.Errors,
  3120  				fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)",
  3121  					logUsage, ephemeralDisk.SizeMB))
  3122  		}
  3123  	}
  3124  
  3125  	for idx, artifact := range t.Artifacts {
  3126  		if err := artifact.Validate(); err != nil {
  3127  			outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err)
  3128  			mErr.Errors = append(mErr.Errors, outer)
  3129  		}
  3130  	}
  3131  
  3132  	if t.Vault != nil {
  3133  		if err := t.Vault.Validate(); err != nil {
  3134  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err))
  3135  		}
  3136  	}
  3137  
  3138  	destinations := make(map[string]int, len(t.Templates))
  3139  	for idx, tmpl := range t.Templates {
  3140  		if err := tmpl.Validate(); err != nil {
  3141  			outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err)
  3142  			mErr.Errors = append(mErr.Errors, outer)
  3143  		}
  3144  
  3145  		if other, ok := destinations[tmpl.DestPath]; ok {
  3146  			outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other)
  3147  			mErr.Errors = append(mErr.Errors, outer)
  3148  		} else {
  3149  			destinations[tmpl.DestPath] = idx + 1
  3150  		}
  3151  	}
  3152  
  3153  	// Validate the dispatch payload block if there
  3154  	if t.DispatchPayload != nil {
  3155  		if err := t.DispatchPayload.Validate(); err != nil {
  3156  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err))
  3157  		}
  3158  	}
  3159  
  3160  	return mErr.ErrorOrNil()
  3161  }
  3162  
  3163  // validateServices takes a task and validates the services within it are valid
  3164  // and reference ports that exist.
  3165  func validateServices(t *Task) error {
  3166  	var mErr multierror.Error
  3167  
  3168  	// Ensure that services don't ask for non-existent ports and their names are
  3169  	// unique.
  3170  	servicePorts := make(map[string][]string)
  3171  	knownServices := make(map[string]struct{})
  3172  	for i, service := range t.Services {
  3173  		if err := service.Validate(); err != nil {
  3174  			outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err)
  3175  			mErr.Errors = append(mErr.Errors, outer)
  3176  		}
  3177  
  3178  		// Ensure that services with the same name are not being registered for
  3179  		// the same port
  3180  		if _, ok := knownServices[service.Name+service.PortLabel]; ok {
  3181  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name))
  3182  		}
  3183  		knownServices[service.Name+service.PortLabel] = struct{}{}
  3184  
  3185  		if service.PortLabel != "" {
  3186  			servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name)
  3187  		}
  3188  
  3189  		// Ensure that check names are unique.
  3190  		knownChecks := make(map[string]struct{})
  3191  		for _, check := range service.Checks {
  3192  			if _, ok := knownChecks[check.Name]; ok {
  3193  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name))
  3194  			}
  3195  			knownChecks[check.Name] = struct{}{}
  3196  		}
  3197  	}
  3198  
  3199  	// Get the set of port labels.
  3200  	portLabels := make(map[string]struct{})
  3201  	if t.Resources != nil {
  3202  		for _, network := range t.Resources.Networks {
  3203  			ports := network.PortLabels()
  3204  			for portLabel, _ := range ports {
  3205  				portLabels[portLabel] = struct{}{}
  3206  			}
  3207  		}
  3208  	}
  3209  
  3210  	// Ensure all ports referenced in services exist.
  3211  	for servicePort, services := range servicePorts {
  3212  		_, ok := portLabels[servicePort]
  3213  		if !ok {
  3214  			joined := strings.Join(services, ", ")
  3215  			err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined)
  3216  			mErr.Errors = append(mErr.Errors, err)
  3217  		}
  3218  	}
  3219  
  3220  	// Ensure address mode is valid
  3221  	return mErr.ErrorOrNil()
  3222  }
  3223  
  3224  const (
  3225  	// TemplateChangeModeNoop marks that no action should be taken if the
  3226  	// template is re-rendered
  3227  	TemplateChangeModeNoop = "noop"
  3228  
  3229  	// TemplateChangeModeSignal marks that the task should be signaled if the
  3230  	// template is re-rendered
  3231  	TemplateChangeModeSignal = "signal"
  3232  
  3233  	// TemplateChangeModeRestart marks that the task should be restarted if the
  3234  	// template is re-rendered
  3235  	TemplateChangeModeRestart = "restart"
  3236  )
  3237  
  3238  var (
  3239  	// TemplateChangeModeInvalidError is the error for when an invalid change
  3240  	// mode is given
  3241  	TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart")
  3242  )
  3243  
  3244  // Template represents a template configuration to be rendered for a given task
  3245  type Template struct {
  3246  	// SourcePath is the path to the template to be rendered
  3247  	SourcePath string
  3248  
  3249  	// DestPath is the path to where the template should be rendered
  3250  	DestPath string
  3251  
  3252  	// EmbeddedTmpl store the raw template. This is useful for smaller templates
  3253  	// where they are embedded in the job file rather than sent as an artificat
  3254  	EmbeddedTmpl string
  3255  
  3256  	// ChangeMode indicates what should be done if the template is re-rendered
  3257  	ChangeMode string
  3258  
  3259  	// ChangeSignal is the signal that should be sent if the change mode
  3260  	// requires it.
  3261  	ChangeSignal string
  3262  
  3263  	// Splay is used to avoid coordinated restarts of processes by applying a
  3264  	// random wait between 0 and the given splay value before signalling the
  3265  	// application of a change
  3266  	Splay time.Duration
  3267  
  3268  	// Perms is the permission the file should be written out with.
  3269  	Perms string
  3270  
  3271  	// LeftDelim and RightDelim are optional configurations to control what
  3272  	// delimiter is utilized when parsing the template.
  3273  	LeftDelim  string
  3274  	RightDelim string
  3275  
  3276  	// Envvars enables exposing the template as environment variables
  3277  	// instead of as a file. The template must be of the form:
  3278  	//
  3279  	//	VAR_NAME_1={{ key service/my-key }}
  3280  	//	VAR_NAME_2=raw string and {{ env "attr.kernel.name" }}
  3281  	//
  3282  	// Lines will be split on the initial "=" with the first part being the
  3283  	// key name and the second part the value.
  3284  	// Empty lines and lines starting with # will be ignored, but to avoid
  3285  	// escaping issues #s within lines will not be treated as comments.
  3286  	Envvars bool
  3287  }
  3288  
  3289  // DefaultTemplate returns a default template.
  3290  func DefaultTemplate() *Template {
  3291  	return &Template{
  3292  		ChangeMode: TemplateChangeModeRestart,
  3293  		Splay:      5 * time.Second,
  3294  		Perms:      "0644",
  3295  	}
  3296  }
  3297  
  3298  func (t *Template) Copy() *Template {
  3299  	if t == nil {
  3300  		return nil
  3301  	}
  3302  	copy := new(Template)
  3303  	*copy = *t
  3304  	return copy
  3305  }
  3306  
  3307  func (t *Template) Canonicalize() {
  3308  	if t.ChangeSignal != "" {
  3309  		t.ChangeSignal = strings.ToUpper(t.ChangeSignal)
  3310  	}
  3311  }
  3312  
  3313  func (t *Template) Validate() error {
  3314  	var mErr multierror.Error
  3315  
  3316  	// Verify we have something to render
  3317  	if t.SourcePath == "" && t.EmbeddedTmpl == "" {
  3318  		multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template"))
  3319  	}
  3320  
  3321  	// Verify we can render somewhere
  3322  	if t.DestPath == "" {
  3323  		multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template"))
  3324  	}
  3325  
  3326  	// Verify the destination doesn't escape
  3327  	escaped, err := PathEscapesAllocDir("task", t.DestPath)
  3328  	if err != nil {
  3329  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  3330  	} else if escaped {
  3331  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  3332  	}
  3333  
  3334  	// Verify a proper change mode
  3335  	switch t.ChangeMode {
  3336  	case TemplateChangeModeNoop, TemplateChangeModeRestart:
  3337  	case TemplateChangeModeSignal:
  3338  		if t.ChangeSignal == "" {
  3339  			multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal"))
  3340  		}
  3341  	default:
  3342  		multierror.Append(&mErr, TemplateChangeModeInvalidError)
  3343  	}
  3344  
  3345  	// Verify the splay is positive
  3346  	if t.Splay < 0 {
  3347  		multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value"))
  3348  	}
  3349  
  3350  	// Verify the permissions
  3351  	if t.Perms != "" {
  3352  		if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil {
  3353  			multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err))
  3354  		}
  3355  	}
  3356  
  3357  	return mErr.ErrorOrNil()
  3358  }
  3359  
  3360  // Set of possible states for a task.
  3361  const (
  3362  	TaskStatePending = "pending" // The task is waiting to be run.
  3363  	TaskStateRunning = "running" // The task is currently running.
  3364  	TaskStateDead    = "dead"    // Terminal state of task.
  3365  )
  3366  
  3367  // TaskState tracks the current state of a task and events that caused state
  3368  // transitions.
  3369  type TaskState struct {
  3370  	// The current state of the task.
  3371  	State string
  3372  
  3373  	// Failed marks a task as having failed
  3374  	Failed bool
  3375  
  3376  	// Restarts is the number of times the task has restarted
  3377  	Restarts uint64
  3378  
  3379  	// LastRestart is the time the task last restarted. It is updated each time the
  3380  	// task restarts
  3381  	LastRestart time.Time
  3382  
  3383  	// StartedAt is the time the task is started. It is updated each time the
  3384  	// task starts
  3385  	StartedAt time.Time
  3386  
  3387  	// FinishedAt is the time at which the task transistioned to dead and will
  3388  	// not be started again.
  3389  	FinishedAt time.Time
  3390  
  3391  	// Series of task events that transition the state of the task.
  3392  	Events []*TaskEvent
  3393  }
  3394  
  3395  func (ts *TaskState) Copy() *TaskState {
  3396  	if ts == nil {
  3397  		return nil
  3398  	}
  3399  	copy := new(TaskState)
  3400  	*copy = *ts
  3401  
  3402  	if ts.Events != nil {
  3403  		copy.Events = make([]*TaskEvent, len(ts.Events))
  3404  		for i, e := range ts.Events {
  3405  			copy.Events[i] = e.Copy()
  3406  		}
  3407  	}
  3408  	return copy
  3409  }
  3410  
  3411  // Successful returns whether a task finished successfully.
  3412  func (ts *TaskState) Successful() bool {
  3413  	l := len(ts.Events)
  3414  	if ts.State != TaskStateDead || l == 0 {
  3415  		return false
  3416  	}
  3417  
  3418  	e := ts.Events[l-1]
  3419  	if e.Type != TaskTerminated {
  3420  		return false
  3421  	}
  3422  
  3423  	return e.ExitCode == 0
  3424  }
  3425  
  3426  const (
  3427  	// TaskSetupFailure indicates that the task could not be started due to a
  3428  	// a setup failure.
  3429  	TaskSetupFailure = "Setup Failure"
  3430  
  3431  	// TaskDriveFailure indicates that the task could not be started due to a
  3432  	// failure in the driver.
  3433  	TaskDriverFailure = "Driver Failure"
  3434  
  3435  	// TaskReceived signals that the task has been pulled by the client at the
  3436  	// given timestamp.
  3437  	TaskReceived = "Received"
  3438  
  3439  	// TaskFailedValidation indicates the task was invalid and as such was not
  3440  	// run.
  3441  	TaskFailedValidation = "Failed Validation"
  3442  
  3443  	// TaskStarted signals that the task was started and its timestamp can be
  3444  	// used to determine the running length of the task.
  3445  	TaskStarted = "Started"
  3446  
  3447  	// TaskTerminated indicates that the task was started and exited.
  3448  	TaskTerminated = "Terminated"
  3449  
  3450  	// TaskKilling indicates a kill signal has been sent to the task.
  3451  	TaskKilling = "Killing"
  3452  
  3453  	// TaskKilled indicates a user has killed the task.
  3454  	TaskKilled = "Killed"
  3455  
  3456  	// TaskRestarting indicates that task terminated and is being restarted.
  3457  	TaskRestarting = "Restarting"
  3458  
  3459  	// TaskNotRestarting indicates that the task has failed and is not being
  3460  	// restarted because it has exceeded its restart policy.
  3461  	TaskNotRestarting = "Not Restarting"
  3462  
  3463  	// TaskRestartSignal indicates that the task has been signalled to be
  3464  	// restarted
  3465  	TaskRestartSignal = "Restart Signaled"
  3466  
  3467  	// TaskSignaling indicates that the task is being signalled.
  3468  	TaskSignaling = "Signaling"
  3469  
  3470  	// TaskDownloadingArtifacts means the task is downloading the artifacts
  3471  	// specified in the task.
  3472  	TaskDownloadingArtifacts = "Downloading Artifacts"
  3473  
  3474  	// TaskArtifactDownloadFailed indicates that downloading the artifacts
  3475  	// failed.
  3476  	TaskArtifactDownloadFailed = "Failed Artifact Download"
  3477  
  3478  	// TaskBuildingTaskDir indicates that the task directory/chroot is being
  3479  	// built.
  3480  	TaskBuildingTaskDir = "Building Task Directory"
  3481  
  3482  	// TaskSetup indicates the task runner is setting up the task environment
  3483  	TaskSetup = "Task Setup"
  3484  
  3485  	// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
  3486  	// exceeded the requested disk resources.
  3487  	TaskDiskExceeded = "Disk Resources Exceeded"
  3488  
  3489  	// TaskSiblingFailed indicates that a sibling task in the task group has
  3490  	// failed.
  3491  	TaskSiblingFailed = "Sibling Task Failed"
  3492  
  3493  	// TaskDriverMessage is an informational event message emitted by
  3494  	// drivers such as when they're performing a long running action like
  3495  	// downloading an image.
  3496  	TaskDriverMessage = "Driver"
  3497  
  3498  	// TaskLeaderDead indicates that the leader task within the has finished.
  3499  	TaskLeaderDead = "Leader Task Dead"
  3500  )
  3501  
  3502  // TaskEvent is an event that effects the state of a task and contains meta-data
  3503  // appropriate to the events type.
  3504  type TaskEvent struct {
  3505  	Type string
  3506  	Time int64 // Unix Nanosecond timestamp
  3507  
  3508  	// FailsTask marks whether this event fails the task
  3509  	FailsTask bool
  3510  
  3511  	// Restart fields.
  3512  	RestartReason string
  3513  
  3514  	// Setup Failure fields.
  3515  	SetupError string
  3516  
  3517  	// Driver Failure fields.
  3518  	DriverError string // A driver error occurred while starting the task.
  3519  
  3520  	// Task Terminated Fields.
  3521  	ExitCode int    // The exit code of the task.
  3522  	Signal   int    // The signal that terminated the task.
  3523  	Message  string // A possible message explaining the termination of the task.
  3524  
  3525  	// Killing fields
  3526  	KillTimeout time.Duration
  3527  
  3528  	// Task Killed Fields.
  3529  	KillError string // Error killing the task.
  3530  
  3531  	// KillReason is the reason the task was killed
  3532  	KillReason string
  3533  
  3534  	// TaskRestarting fields.
  3535  	StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
  3536  
  3537  	// Artifact Download fields
  3538  	DownloadError string // Error downloading artifacts
  3539  
  3540  	// Validation fields
  3541  	ValidationError string // Validation error
  3542  
  3543  	// The maximum allowed task disk size.
  3544  	DiskLimit int64
  3545  
  3546  	// Name of the sibling task that caused termination of the task that
  3547  	// the TaskEvent refers to.
  3548  	FailedSibling string
  3549  
  3550  	// VaultError is the error from token renewal
  3551  	VaultError string
  3552  
  3553  	// TaskSignalReason indicates the reason the task is being signalled.
  3554  	TaskSignalReason string
  3555  
  3556  	// TaskSignal is the signal that was sent to the task
  3557  	TaskSignal string
  3558  
  3559  	// DriverMessage indicates a driver action being taken.
  3560  	DriverMessage string
  3561  }
  3562  
  3563  func (te *TaskEvent) GoString() string {
  3564  	return fmt.Sprintf("%v at %v", te.Type, te.Time)
  3565  }
  3566  
  3567  // SetMessage sets the message of TaskEvent
  3568  func (te *TaskEvent) SetMessage(msg string) *TaskEvent {
  3569  	te.Message = msg
  3570  	return te
  3571  }
  3572  
  3573  func (te *TaskEvent) Copy() *TaskEvent {
  3574  	if te == nil {
  3575  		return nil
  3576  	}
  3577  	copy := new(TaskEvent)
  3578  	*copy = *te
  3579  	return copy
  3580  }
  3581  
  3582  func NewTaskEvent(event string) *TaskEvent {
  3583  	return &TaskEvent{
  3584  		Type: event,
  3585  		Time: time.Now().UnixNano(),
  3586  	}
  3587  }
  3588  
  3589  // SetSetupError is used to store an error that occured while setting up the
  3590  // task
  3591  func (e *TaskEvent) SetSetupError(err error) *TaskEvent {
  3592  	if err != nil {
  3593  		e.SetupError = err.Error()
  3594  	}
  3595  	return e
  3596  }
  3597  
  3598  func (e *TaskEvent) SetFailsTask() *TaskEvent {
  3599  	e.FailsTask = true
  3600  	return e
  3601  }
  3602  
  3603  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  3604  	if err != nil {
  3605  		e.DriverError = err.Error()
  3606  	}
  3607  	return e
  3608  }
  3609  
  3610  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  3611  	e.ExitCode = c
  3612  	return e
  3613  }
  3614  
  3615  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  3616  	e.Signal = s
  3617  	return e
  3618  }
  3619  
  3620  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  3621  	if err != nil {
  3622  		e.Message = err.Error()
  3623  	}
  3624  	return e
  3625  }
  3626  
  3627  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  3628  	if err != nil {
  3629  		e.KillError = err.Error()
  3630  	}
  3631  	return e
  3632  }
  3633  
  3634  func (e *TaskEvent) SetKillReason(r string) *TaskEvent {
  3635  	e.KillReason = r
  3636  	return e
  3637  }
  3638  
  3639  func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent {
  3640  	e.StartDelay = int64(delay)
  3641  	return e
  3642  }
  3643  
  3644  func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent {
  3645  	e.RestartReason = reason
  3646  	return e
  3647  }
  3648  
  3649  func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent {
  3650  	e.TaskSignalReason = r
  3651  	return e
  3652  }
  3653  
  3654  func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent {
  3655  	e.TaskSignal = s.String()
  3656  	return e
  3657  }
  3658  
  3659  func (e *TaskEvent) SetDownloadError(err error) *TaskEvent {
  3660  	if err != nil {
  3661  		e.DownloadError = err.Error()
  3662  	}
  3663  	return e
  3664  }
  3665  
  3666  func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
  3667  	if err != nil {
  3668  		e.ValidationError = err.Error()
  3669  	}
  3670  	return e
  3671  }
  3672  
  3673  func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
  3674  	e.KillTimeout = timeout
  3675  	return e
  3676  }
  3677  
  3678  func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent {
  3679  	e.DiskLimit = limit
  3680  	return e
  3681  }
  3682  
  3683  func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent {
  3684  	e.FailedSibling = sibling
  3685  	return e
  3686  }
  3687  
  3688  func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent {
  3689  	if err != nil {
  3690  		e.VaultError = err.Error()
  3691  	}
  3692  	return e
  3693  }
  3694  
  3695  func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent {
  3696  	e.DriverMessage = m
  3697  	return e
  3698  }
  3699  
  3700  // TaskArtifact is an artifact to download before running the task.
  3701  type TaskArtifact struct {
  3702  	// GetterSource is the source to download an artifact using go-getter
  3703  	GetterSource string
  3704  
  3705  	// GetterOptions are options to use when downloading the artifact using
  3706  	// go-getter.
  3707  	GetterOptions map[string]string
  3708  
  3709  	// GetterMode is the go-getter.ClientMode for fetching resources.
  3710  	// Defaults to "any" but can be set to "file" or "dir".
  3711  	GetterMode string
  3712  
  3713  	// RelativeDest is the download destination given relative to the task's
  3714  	// directory.
  3715  	RelativeDest string
  3716  }
  3717  
  3718  func (ta *TaskArtifact) Copy() *TaskArtifact {
  3719  	if ta == nil {
  3720  		return nil
  3721  	}
  3722  	nta := new(TaskArtifact)
  3723  	*nta = *ta
  3724  	nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions)
  3725  	return nta
  3726  }
  3727  
  3728  func (ta *TaskArtifact) GoString() string {
  3729  	return fmt.Sprintf("%+v", ta)
  3730  }
  3731  
  3732  // PathEscapesAllocDir returns if the given path escapes the allocation
  3733  // directory. The prefix allows adding a prefix if the path will be joined, for
  3734  // example a "task/local" prefix may be provided if the path will be joined
  3735  // against that prefix.
  3736  func PathEscapesAllocDir(prefix, path string) (bool, error) {
  3737  	// Verify the destination doesn't escape the tasks directory
  3738  	alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/"))
  3739  	if err != nil {
  3740  		return false, err
  3741  	}
  3742  	abs, err := filepath.Abs(filepath.Join(alloc, prefix, path))
  3743  	if err != nil {
  3744  		return false, err
  3745  	}
  3746  	rel, err := filepath.Rel(alloc, abs)
  3747  	if err != nil {
  3748  		return false, err
  3749  	}
  3750  
  3751  	return strings.HasPrefix(rel, ".."), nil
  3752  }
  3753  
  3754  func (ta *TaskArtifact) Validate() error {
  3755  	// Verify the source
  3756  	var mErr multierror.Error
  3757  	if ta.GetterSource == "" {
  3758  		mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified"))
  3759  	}
  3760  
  3761  	switch ta.GetterMode {
  3762  	case "":
  3763  		// Default to any
  3764  		ta.GetterMode = GetterModeAny
  3765  	case GetterModeAny, GetterModeFile, GetterModeDir:
  3766  		// Ok
  3767  	default:
  3768  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s",
  3769  			ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir))
  3770  	}
  3771  
  3772  	escaped, err := PathEscapesAllocDir("task", ta.RelativeDest)
  3773  	if err != nil {
  3774  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  3775  	} else if escaped {
  3776  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  3777  	}
  3778  
  3779  	// Verify the checksum
  3780  	if check, ok := ta.GetterOptions["checksum"]; ok {
  3781  		check = strings.TrimSpace(check)
  3782  		if check == "" {
  3783  			mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty"))
  3784  			return mErr.ErrorOrNil()
  3785  		}
  3786  
  3787  		parts := strings.Split(check, ":")
  3788  		if l := len(parts); l != 2 {
  3789  			mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check))
  3790  			return mErr.ErrorOrNil()
  3791  		}
  3792  
  3793  		checksumVal := parts[1]
  3794  		checksumBytes, err := hex.DecodeString(checksumVal)
  3795  		if err != nil {
  3796  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err))
  3797  			return mErr.ErrorOrNil()
  3798  		}
  3799  
  3800  		checksumType := parts[0]
  3801  		expectedLength := 0
  3802  		switch checksumType {
  3803  		case "md5":
  3804  			expectedLength = md5.Size
  3805  		case "sha1":
  3806  			expectedLength = sha1.Size
  3807  		case "sha256":
  3808  			expectedLength = sha256.Size
  3809  		case "sha512":
  3810  			expectedLength = sha512.Size
  3811  		default:
  3812  			mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType))
  3813  			return mErr.ErrorOrNil()
  3814  		}
  3815  
  3816  		if len(checksumBytes) != expectedLength {
  3817  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal))
  3818  			return mErr.ErrorOrNil()
  3819  		}
  3820  	}
  3821  
  3822  	return mErr.ErrorOrNil()
  3823  }
  3824  
  3825  const (
  3826  	ConstraintDistinctProperty = "distinct_property"
  3827  	ConstraintDistinctHosts    = "distinct_hosts"
  3828  	ConstraintRegex            = "regexp"
  3829  	ConstraintVersion          = "version"
  3830  	ConstraintSetContains      = "set_contains"
  3831  )
  3832  
  3833  // Constraints are used to restrict placement options.
  3834  type Constraint struct {
  3835  	LTarget string // Left-hand target
  3836  	RTarget string // Right-hand target
  3837  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  3838  	str     string // Memoized string
  3839  }
  3840  
  3841  // Equal checks if two constraints are equal
  3842  func (c *Constraint) Equal(o *Constraint) bool {
  3843  	return c.LTarget == o.LTarget &&
  3844  		c.RTarget == o.RTarget &&
  3845  		c.Operand == o.Operand
  3846  }
  3847  
  3848  func (c *Constraint) Copy() *Constraint {
  3849  	if c == nil {
  3850  		return nil
  3851  	}
  3852  	nc := new(Constraint)
  3853  	*nc = *c
  3854  	return nc
  3855  }
  3856  
  3857  func (c *Constraint) String() string {
  3858  	if c.str != "" {
  3859  		return c.str
  3860  	}
  3861  	c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  3862  	return c.str
  3863  }
  3864  
  3865  func (c *Constraint) Validate() error {
  3866  	var mErr multierror.Error
  3867  	if c.Operand == "" {
  3868  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  3869  	}
  3870  
  3871  	// Perform additional validation based on operand
  3872  	switch c.Operand {
  3873  	case ConstraintRegex:
  3874  		if _, err := regexp.Compile(c.RTarget); err != nil {
  3875  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  3876  		}
  3877  	case ConstraintVersion:
  3878  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  3879  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  3880  		}
  3881  	}
  3882  	return mErr.ErrorOrNil()
  3883  }
  3884  
  3885  // EphemeralDisk is an ephemeral disk object
  3886  type EphemeralDisk struct {
  3887  	// Sticky indicates whether the allocation is sticky to a node
  3888  	Sticky bool
  3889  
  3890  	// SizeMB is the size of the local disk
  3891  	SizeMB int
  3892  
  3893  	// Migrate determines if Nomad client should migrate the allocation dir for
  3894  	// sticky allocations
  3895  	Migrate bool
  3896  }
  3897  
  3898  // DefaultEphemeralDisk returns a EphemeralDisk with default configurations
  3899  func DefaultEphemeralDisk() *EphemeralDisk {
  3900  	return &EphemeralDisk{
  3901  		SizeMB: 300,
  3902  	}
  3903  }
  3904  
  3905  // Validate validates EphemeralDisk
  3906  func (d *EphemeralDisk) Validate() error {
  3907  	if d.SizeMB < 10 {
  3908  		return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB)
  3909  	}
  3910  	return nil
  3911  }
  3912  
  3913  // Copy copies the EphemeralDisk struct and returns a new one
  3914  func (d *EphemeralDisk) Copy() *EphemeralDisk {
  3915  	ld := new(EphemeralDisk)
  3916  	*ld = *d
  3917  	return ld
  3918  }
  3919  
  3920  const (
  3921  	// VaultChangeModeNoop takes no action when a new token is retrieved.
  3922  	VaultChangeModeNoop = "noop"
  3923  
  3924  	// VaultChangeModeSignal signals the task when a new token is retrieved.
  3925  	VaultChangeModeSignal = "signal"
  3926  
  3927  	// VaultChangeModeRestart restarts the task when a new token is retrieved.
  3928  	VaultChangeModeRestart = "restart"
  3929  )
  3930  
  3931  // Vault stores the set of premissions a task needs access to from Vault.
  3932  type Vault struct {
  3933  	// Policies is the set of policies that the task needs access to
  3934  	Policies []string
  3935  
  3936  	// Env marks whether the Vault Token should be exposed as an environment
  3937  	// variable
  3938  	Env bool
  3939  
  3940  	// ChangeMode is used to configure the task's behavior when the Vault
  3941  	// token changes because the original token could not be renewed in time.
  3942  	ChangeMode string
  3943  
  3944  	// ChangeSignal is the signal sent to the task when a new token is
  3945  	// retrieved. This is only valid when using the signal change mode.
  3946  	ChangeSignal string
  3947  }
  3948  
  3949  func DefaultVaultBlock() *Vault {
  3950  	return &Vault{
  3951  		Env:        true,
  3952  		ChangeMode: VaultChangeModeRestart,
  3953  	}
  3954  }
  3955  
  3956  // Copy returns a copy of this Vault block.
  3957  func (v *Vault) Copy() *Vault {
  3958  	if v == nil {
  3959  		return nil
  3960  	}
  3961  
  3962  	nv := new(Vault)
  3963  	*nv = *v
  3964  	return nv
  3965  }
  3966  
  3967  func (v *Vault) Canonicalize() {
  3968  	if v.ChangeSignal != "" {
  3969  		v.ChangeSignal = strings.ToUpper(v.ChangeSignal)
  3970  	}
  3971  }
  3972  
  3973  // Validate returns if the Vault block is valid.
  3974  func (v *Vault) Validate() error {
  3975  	if v == nil {
  3976  		return nil
  3977  	}
  3978  
  3979  	var mErr multierror.Error
  3980  	if len(v.Policies) == 0 {
  3981  		multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty"))
  3982  	}
  3983  
  3984  	for _, p := range v.Policies {
  3985  		if p == "root" {
  3986  			multierror.Append(&mErr, fmt.Errorf("Can not specifiy \"root\" policy"))
  3987  		}
  3988  	}
  3989  
  3990  	switch v.ChangeMode {
  3991  	case VaultChangeModeSignal:
  3992  		if v.ChangeSignal == "" {
  3993  			multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal))
  3994  		}
  3995  	case VaultChangeModeNoop, VaultChangeModeRestart:
  3996  	default:
  3997  		multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode))
  3998  	}
  3999  
  4000  	return mErr.ErrorOrNil()
  4001  }
  4002  
  4003  const (
  4004  	// DeploymentStatuses are the various states a deployment can be be in
  4005  	DeploymentStatusRunning    = "running"
  4006  	DeploymentStatusPaused     = "paused"
  4007  	DeploymentStatusFailed     = "failed"
  4008  	DeploymentStatusSuccessful = "successful"
  4009  	DeploymentStatusCancelled  = "cancelled"
  4010  
  4011  	// DeploymentStatusDescriptions are the various descriptions of the states a
  4012  	// deployment can be in.
  4013  	DeploymentStatusDescriptionRunning               = "Deployment is running"
  4014  	DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion"
  4015  	DeploymentStatusDescriptionPaused                = "Deployment is paused"
  4016  	DeploymentStatusDescriptionSuccessful            = "Deployment completed successfully"
  4017  	DeploymentStatusDescriptionStoppedJob            = "Cancelled because job is stopped"
  4018  	DeploymentStatusDescriptionNewerJob              = "Cancelled due to newer version of job"
  4019  	DeploymentStatusDescriptionFailedAllocations     = "Failed due to unhealthy allocations"
  4020  	DeploymentStatusDescriptionFailedByUser          = "Deployment marked as failed"
  4021  )
  4022  
  4023  // DeploymentStatusDescriptionRollback is used to get the status description of
  4024  // a deployment when rolling back to an older job.
  4025  func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string {
  4026  	return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion)
  4027  }
  4028  
  4029  // Deployment is the object that represents a job deployment which is used to
  4030  // transistion a job between versions.
  4031  type Deployment struct {
  4032  	// ID is a generated UUID for the deployment
  4033  	ID string
  4034  
  4035  	// JobID is the job the deployment is created for
  4036  	JobID string
  4037  
  4038  	// JobVersion is the version of the job at which the deployment is tracking
  4039  	JobVersion uint64
  4040  
  4041  	// JobModifyIndex is the modify index of the job at which the deployment is tracking
  4042  	JobModifyIndex uint64
  4043  
  4044  	// JobCreateIndex is the create index of the job which the deployment is
  4045  	// tracking. It is needed so that if the job gets stopped and reran we can
  4046  	// present the correct list of deployments for the job and not old ones.
  4047  	JobCreateIndex uint64
  4048  
  4049  	// TaskGroups is the set of task groups effected by the deployment and their
  4050  	// current deployment status.
  4051  	TaskGroups map[string]*DeploymentState
  4052  
  4053  	// The status of the deployment
  4054  	Status string
  4055  
  4056  	// StatusDescription allows a human readable description of the deployment
  4057  	// status.
  4058  	StatusDescription string
  4059  
  4060  	CreateIndex uint64
  4061  	ModifyIndex uint64
  4062  }
  4063  
  4064  // NewDeployment creates a new deployment given the job.
  4065  func NewDeployment(job *Job) *Deployment {
  4066  	return &Deployment{
  4067  		ID:                GenerateUUID(),
  4068  		JobID:             job.ID,
  4069  		JobVersion:        job.Version,
  4070  		JobModifyIndex:    job.ModifyIndex,
  4071  		JobCreateIndex:    job.CreateIndex,
  4072  		Status:            DeploymentStatusRunning,
  4073  		StatusDescription: DeploymentStatusDescriptionRunning,
  4074  		TaskGroups:        make(map[string]*DeploymentState, len(job.TaskGroups)),
  4075  	}
  4076  }
  4077  
  4078  func (d *Deployment) Copy() *Deployment {
  4079  	if d == nil {
  4080  		return nil
  4081  	}
  4082  
  4083  	c := &Deployment{}
  4084  	*c = *d
  4085  
  4086  	c.TaskGroups = nil
  4087  	if l := len(d.TaskGroups); d.TaskGroups != nil {
  4088  		c.TaskGroups = make(map[string]*DeploymentState, l)
  4089  		for tg, s := range d.TaskGroups {
  4090  			c.TaskGroups[tg] = s.Copy()
  4091  		}
  4092  	}
  4093  
  4094  	return c
  4095  }
  4096  
  4097  // Active returns whether the deployment is active or terminal.
  4098  func (d *Deployment) Active() bool {
  4099  	switch d.Status {
  4100  	case DeploymentStatusRunning, DeploymentStatusPaused:
  4101  		return true
  4102  	default:
  4103  		return false
  4104  	}
  4105  }
  4106  
  4107  // GetID is a helper for getting the ID when the object may be nil
  4108  func (d *Deployment) GetID() string {
  4109  	if d == nil {
  4110  		return ""
  4111  	}
  4112  	return d.ID
  4113  }
  4114  
  4115  // HasPlacedCanaries returns whether the deployment has placed canaries
  4116  func (d *Deployment) HasPlacedCanaries() bool {
  4117  	if d == nil || len(d.TaskGroups) == 0 {
  4118  		return false
  4119  	}
  4120  	for _, group := range d.TaskGroups {
  4121  		if len(group.PlacedCanaries) != 0 {
  4122  			return true
  4123  		}
  4124  	}
  4125  	return false
  4126  }
  4127  
  4128  // RequiresPromotion returns whether the deployment requires promotion to
  4129  // continue
  4130  func (d *Deployment) RequiresPromotion() bool {
  4131  	if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning {
  4132  		return false
  4133  	}
  4134  	for _, group := range d.TaskGroups {
  4135  		if group.DesiredCanaries > 0 && !group.Promoted {
  4136  			return true
  4137  		}
  4138  	}
  4139  	return false
  4140  }
  4141  
  4142  func (d *Deployment) GoString() string {
  4143  	base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription)
  4144  	for group, state := range d.TaskGroups {
  4145  		base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state)
  4146  	}
  4147  	return base
  4148  }
  4149  
  4150  // DeploymentState tracks the state of a deployment for a given task group.
  4151  type DeploymentState struct {
  4152  	// AutoRevert marks whether the task group has indicated the job should be
  4153  	// reverted on failure
  4154  	AutoRevert bool
  4155  
  4156  	// Promoted marks whether the canaries have been promoted
  4157  	Promoted bool
  4158  
  4159  	// PlacedCanaries is the set of placed canary allocations
  4160  	PlacedCanaries []string
  4161  
  4162  	// DesiredCanaries is the number of canaries that should be created.
  4163  	DesiredCanaries int
  4164  
  4165  	// DesiredTotal is the total number of allocations that should be created as
  4166  	// part of the deployment.
  4167  	DesiredTotal int
  4168  
  4169  	// PlacedAllocs is the number of allocations that have been placed
  4170  	PlacedAllocs int
  4171  
  4172  	// HealthyAllocs is the number of allocations that have been marked healthy.
  4173  	HealthyAllocs int
  4174  
  4175  	// UnhealthyAllocs are allocations that have been marked as unhealthy.
  4176  	UnhealthyAllocs int
  4177  }
  4178  
  4179  func (d *DeploymentState) GoString() string {
  4180  	base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal)
  4181  	base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries)
  4182  	base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries)
  4183  	base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted)
  4184  	base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs)
  4185  	base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs)
  4186  	base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs)
  4187  	base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert)
  4188  	return base
  4189  }
  4190  
  4191  func (d *DeploymentState) Copy() *DeploymentState {
  4192  	c := &DeploymentState{}
  4193  	*c = *d
  4194  	c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries)
  4195  	return c
  4196  }
  4197  
  4198  // DeploymentStatusUpdate is used to update the status of a given deployment
  4199  type DeploymentStatusUpdate struct {
  4200  	// DeploymentID is the ID of the deployment to update
  4201  	DeploymentID string
  4202  
  4203  	// Status is the new status of the deployment.
  4204  	Status string
  4205  
  4206  	// StatusDescription is the new status description of the deployment.
  4207  	StatusDescription string
  4208  }
  4209  
  4210  const (
  4211  	AllocDesiredStatusRun   = "run"   // Allocation should run
  4212  	AllocDesiredStatusStop  = "stop"  // Allocation should stop
  4213  	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
  4214  )
  4215  
  4216  const (
  4217  	AllocClientStatusPending  = "pending"
  4218  	AllocClientStatusRunning  = "running"
  4219  	AllocClientStatusComplete = "complete"
  4220  	AllocClientStatusFailed   = "failed"
  4221  	AllocClientStatusLost     = "lost"
  4222  )
  4223  
  4224  // Allocation is used to allocate the placement of a task group to a node.
  4225  type Allocation struct {
  4226  	// ID of the allocation (UUID)
  4227  	ID string
  4228  
  4229  	// ID of the evaluation that generated this allocation
  4230  	EvalID string
  4231  
  4232  	// Name is a logical name of the allocation.
  4233  	Name string
  4234  
  4235  	// NodeID is the node this is being placed on
  4236  	NodeID string
  4237  
  4238  	// Job is the parent job of the task group being allocated.
  4239  	// This is copied at allocation time to avoid issues if the job
  4240  	// definition is updated.
  4241  	JobID string
  4242  	Job   *Job
  4243  
  4244  	// TaskGroup is the name of the task group that should be run
  4245  	TaskGroup string
  4246  
  4247  	// Resources is the total set of resources allocated as part
  4248  	// of this allocation of the task group.
  4249  	Resources *Resources
  4250  
  4251  	// SharedResources are the resources that are shared by all the tasks in an
  4252  	// allocation
  4253  	SharedResources *Resources
  4254  
  4255  	// TaskResources is the set of resources allocated to each
  4256  	// task. These should sum to the total Resources.
  4257  	TaskResources map[string]*Resources
  4258  
  4259  	// Metrics associated with this allocation
  4260  	Metrics *AllocMetric
  4261  
  4262  	// Desired Status of the allocation on the client
  4263  	DesiredStatus string
  4264  
  4265  	// DesiredStatusDescription is meant to provide more human useful information
  4266  	DesiredDescription string
  4267  
  4268  	// Status of the allocation on the client
  4269  	ClientStatus string
  4270  
  4271  	// ClientStatusDescription is meant to provide more human useful information
  4272  	ClientDescription string
  4273  
  4274  	// TaskStates stores the state of each task,
  4275  	TaskStates map[string]*TaskState
  4276  
  4277  	// PreviousAllocation is the allocation that this allocation is replacing
  4278  	PreviousAllocation string
  4279  
  4280  	// DeploymentID identifies an allocation as being created from a
  4281  	// particular deployment
  4282  	DeploymentID string
  4283  
  4284  	// DeploymentStatus captures the status of the allocation as part of the
  4285  	// given deployment
  4286  	DeploymentStatus *AllocDeploymentStatus
  4287  
  4288  	// Raft Indexes
  4289  	CreateIndex uint64
  4290  	ModifyIndex uint64
  4291  
  4292  	// AllocModifyIndex is not updated when the client updates allocations. This
  4293  	// lets the client pull only the allocs updated by the server.
  4294  	AllocModifyIndex uint64
  4295  
  4296  	// CreateTime is the time the allocation has finished scheduling and been
  4297  	// verified by the plan applier.
  4298  	CreateTime int64
  4299  }
  4300  
  4301  // Index returns the index of the allocation. If the allocation is from a task
  4302  // group with count greater than 1, there will be multiple allocations for it.
  4303  func (a *Allocation) Index() uint {
  4304  	l := len(a.Name)
  4305  	prefix := len(a.JobID) + len(a.TaskGroup) + 2
  4306  	if l <= 3 || l <= prefix {
  4307  		return uint(0)
  4308  	}
  4309  
  4310  	strNum := a.Name[prefix : len(a.Name)-1]
  4311  	num, _ := strconv.Atoi(strNum)
  4312  	return uint(num)
  4313  }
  4314  
  4315  func (a *Allocation) Copy() *Allocation {
  4316  	return a.copyImpl(true)
  4317  }
  4318  
  4319  // Copy provides a copy of the allocation but doesn't deep copy the job
  4320  func (a *Allocation) CopySkipJob() *Allocation {
  4321  	return a.copyImpl(false)
  4322  }
  4323  
  4324  func (a *Allocation) copyImpl(job bool) *Allocation {
  4325  	if a == nil {
  4326  		return nil
  4327  	}
  4328  	na := new(Allocation)
  4329  	*na = *a
  4330  
  4331  	if job {
  4332  		na.Job = na.Job.Copy()
  4333  	}
  4334  
  4335  	na.Resources = na.Resources.Copy()
  4336  	na.SharedResources = na.SharedResources.Copy()
  4337  
  4338  	if a.TaskResources != nil {
  4339  		tr := make(map[string]*Resources, len(na.TaskResources))
  4340  		for task, resource := range na.TaskResources {
  4341  			tr[task] = resource.Copy()
  4342  		}
  4343  		na.TaskResources = tr
  4344  	}
  4345  
  4346  	na.Metrics = na.Metrics.Copy()
  4347  	na.DeploymentStatus = na.DeploymentStatus.Copy()
  4348  
  4349  	if a.TaskStates != nil {
  4350  		ts := make(map[string]*TaskState, len(na.TaskStates))
  4351  		for task, state := range na.TaskStates {
  4352  			ts[task] = state.Copy()
  4353  		}
  4354  		na.TaskStates = ts
  4355  	}
  4356  	return na
  4357  }
  4358  
  4359  // TerminalStatus returns if the desired or actual status is terminal and
  4360  // will no longer transition.
  4361  func (a *Allocation) TerminalStatus() bool {
  4362  	// First check the desired state and if that isn't terminal, check client
  4363  	// state.
  4364  	switch a.DesiredStatus {
  4365  	case AllocDesiredStatusStop, AllocDesiredStatusEvict:
  4366  		return true
  4367  	default:
  4368  	}
  4369  
  4370  	switch a.ClientStatus {
  4371  	case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
  4372  		return true
  4373  	default:
  4374  		return false
  4375  	}
  4376  }
  4377  
  4378  // Terminated returns if the allocation is in a terminal state on a client.
  4379  func (a *Allocation) Terminated() bool {
  4380  	if a.ClientStatus == AllocClientStatusFailed ||
  4381  		a.ClientStatus == AllocClientStatusComplete ||
  4382  		a.ClientStatus == AllocClientStatusLost {
  4383  		return true
  4384  	}
  4385  	return false
  4386  }
  4387  
  4388  // RanSuccessfully returns whether the client has ran the allocation and all
  4389  // tasks finished successfully
  4390  func (a *Allocation) RanSuccessfully() bool {
  4391  	// Handle the case the client hasn't started the allocation.
  4392  	if len(a.TaskStates) == 0 {
  4393  		return false
  4394  	}
  4395  
  4396  	// Check to see if all the tasks finised successfully in the allocation
  4397  	allSuccess := true
  4398  	for _, state := range a.TaskStates {
  4399  		allSuccess = allSuccess && state.Successful()
  4400  	}
  4401  
  4402  	return allSuccess
  4403  }
  4404  
  4405  // ShouldMigrate returns if the allocation needs data migration
  4406  func (a *Allocation) ShouldMigrate() bool {
  4407  	if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict {
  4408  		return false
  4409  	}
  4410  
  4411  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
  4412  
  4413  	// if the task group is nil or the ephemeral disk block isn't present then
  4414  	// we won't migrate
  4415  	if tg == nil || tg.EphemeralDisk == nil {
  4416  		return false
  4417  	}
  4418  
  4419  	// We won't migrate any data is the user hasn't enabled migration or the
  4420  	// disk is not marked as sticky
  4421  	if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky {
  4422  		return false
  4423  	}
  4424  
  4425  	return true
  4426  }
  4427  
  4428  // Stub returns a list stub for the allocation
  4429  func (a *Allocation) Stub() *AllocListStub {
  4430  	return &AllocListStub{
  4431  		ID:                 a.ID,
  4432  		EvalID:             a.EvalID,
  4433  		Name:               a.Name,
  4434  		NodeID:             a.NodeID,
  4435  		JobID:              a.JobID,
  4436  		JobVersion:         a.Job.Version,
  4437  		TaskGroup:          a.TaskGroup,
  4438  		DesiredStatus:      a.DesiredStatus,
  4439  		DesiredDescription: a.DesiredDescription,
  4440  		ClientStatus:       a.ClientStatus,
  4441  		ClientDescription:  a.ClientDescription,
  4442  		TaskStates:         a.TaskStates,
  4443  		DeploymentStatus:   a.DeploymentStatus,
  4444  		CreateIndex:        a.CreateIndex,
  4445  		ModifyIndex:        a.ModifyIndex,
  4446  		CreateTime:         a.CreateTime,
  4447  	}
  4448  }
  4449  
  4450  // AllocListStub is used to return a subset of alloc information
  4451  type AllocListStub struct {
  4452  	ID                 string
  4453  	EvalID             string
  4454  	Name               string
  4455  	NodeID             string
  4456  	JobID              string
  4457  	JobVersion         uint64
  4458  	TaskGroup          string
  4459  	DesiredStatus      string
  4460  	DesiredDescription string
  4461  	ClientStatus       string
  4462  	ClientDescription  string
  4463  	TaskStates         map[string]*TaskState
  4464  	DeploymentStatus   *AllocDeploymentStatus
  4465  	CreateIndex        uint64
  4466  	ModifyIndex        uint64
  4467  	CreateTime         int64
  4468  }
  4469  
  4470  // AllocMetric is used to track various metrics while attempting
  4471  // to make an allocation. These are used to debug a job, or to better
  4472  // understand the pressure within the system.
  4473  type AllocMetric struct {
  4474  	// NodesEvaluated is the number of nodes that were evaluated
  4475  	NodesEvaluated int
  4476  
  4477  	// NodesFiltered is the number of nodes filtered due to a constraint
  4478  	NodesFiltered int
  4479  
  4480  	// NodesAvailable is the number of nodes available for evaluation per DC.
  4481  	NodesAvailable map[string]int
  4482  
  4483  	// ClassFiltered is the number of nodes filtered by class
  4484  	ClassFiltered map[string]int
  4485  
  4486  	// ConstraintFiltered is the number of failures caused by constraint
  4487  	ConstraintFiltered map[string]int
  4488  
  4489  	// NodesExhausted is the number of nodes skipped due to being
  4490  	// exhausted of at least one resource
  4491  	NodesExhausted int
  4492  
  4493  	// ClassExhausted is the number of nodes exhausted by class
  4494  	ClassExhausted map[string]int
  4495  
  4496  	// DimensionExhausted provides the count by dimension or reason
  4497  	DimensionExhausted map[string]int
  4498  
  4499  	// Scores is the scores of the final few nodes remaining
  4500  	// for placement. The top score is typically selected.
  4501  	Scores map[string]float64
  4502  
  4503  	// AllocationTime is a measure of how long the allocation
  4504  	// attempt took. This can affect performance and SLAs.
  4505  	AllocationTime time.Duration
  4506  
  4507  	// CoalescedFailures indicates the number of other
  4508  	// allocations that were coalesced into this failed allocation.
  4509  	// This is to prevent creating many failed allocations for a
  4510  	// single task group.
  4511  	CoalescedFailures int
  4512  }
  4513  
  4514  func (a *AllocMetric) Copy() *AllocMetric {
  4515  	if a == nil {
  4516  		return nil
  4517  	}
  4518  	na := new(AllocMetric)
  4519  	*na = *a
  4520  	na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable)
  4521  	na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered)
  4522  	na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered)
  4523  	na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted)
  4524  	na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted)
  4525  	na.Scores = helper.CopyMapStringFloat64(na.Scores)
  4526  	return na
  4527  }
  4528  
  4529  func (a *AllocMetric) EvaluateNode() {
  4530  	a.NodesEvaluated += 1
  4531  }
  4532  
  4533  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  4534  	a.NodesFiltered += 1
  4535  	if node != nil && node.NodeClass != "" {
  4536  		if a.ClassFiltered == nil {
  4537  			a.ClassFiltered = make(map[string]int)
  4538  		}
  4539  		a.ClassFiltered[node.NodeClass] += 1
  4540  	}
  4541  	if constraint != "" {
  4542  		if a.ConstraintFiltered == nil {
  4543  			a.ConstraintFiltered = make(map[string]int)
  4544  		}
  4545  		a.ConstraintFiltered[constraint] += 1
  4546  	}
  4547  }
  4548  
  4549  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  4550  	a.NodesExhausted += 1
  4551  	if node != nil && node.NodeClass != "" {
  4552  		if a.ClassExhausted == nil {
  4553  			a.ClassExhausted = make(map[string]int)
  4554  		}
  4555  		a.ClassExhausted[node.NodeClass] += 1
  4556  	}
  4557  	if dimension != "" {
  4558  		if a.DimensionExhausted == nil {
  4559  			a.DimensionExhausted = make(map[string]int)
  4560  		}
  4561  		a.DimensionExhausted[dimension] += 1
  4562  	}
  4563  }
  4564  
  4565  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  4566  	if a.Scores == nil {
  4567  		a.Scores = make(map[string]float64)
  4568  	}
  4569  	key := fmt.Sprintf("%s.%s", node.ID, name)
  4570  	a.Scores[key] = score
  4571  }
  4572  
  4573  // AllocDeploymentStatus captures the status of the allocation as part of the
  4574  // deployment. This can include things like if the allocation has been marked as
  4575  // heatlhy.
  4576  type AllocDeploymentStatus struct {
  4577  	// Healthy marks whether the allocation has been marked healthy or unhealthy
  4578  	// as part of a deployment. It can be unset if it has neither been marked
  4579  	// healthy or unhealthy.
  4580  	Healthy *bool
  4581  
  4582  	// ModifyIndex is the raft index in which the deployment status was last
  4583  	// changed.
  4584  	ModifyIndex uint64
  4585  }
  4586  
  4587  // IsHealthy returns if the allocation is marked as healthy as part of a
  4588  // deployment
  4589  func (a *AllocDeploymentStatus) IsHealthy() bool {
  4590  	if a == nil {
  4591  		return false
  4592  	}
  4593  
  4594  	return a.Healthy != nil && *a.Healthy
  4595  }
  4596  
  4597  // IsUnhealthy returns if the allocation is marked as unhealthy as part of a
  4598  // deployment
  4599  func (a *AllocDeploymentStatus) IsUnhealthy() bool {
  4600  	if a == nil {
  4601  		return false
  4602  	}
  4603  
  4604  	return a.Healthy != nil && !*a.Healthy
  4605  }
  4606  
  4607  func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus {
  4608  	if a == nil {
  4609  		return nil
  4610  	}
  4611  
  4612  	c := new(AllocDeploymentStatus)
  4613  	*c = *a
  4614  
  4615  	if a.Healthy != nil {
  4616  		c.Healthy = helper.BoolToPtr(*a.Healthy)
  4617  	}
  4618  
  4619  	return c
  4620  }
  4621  
  4622  const (
  4623  	EvalStatusBlocked   = "blocked"
  4624  	EvalStatusPending   = "pending"
  4625  	EvalStatusComplete  = "complete"
  4626  	EvalStatusFailed    = "failed"
  4627  	EvalStatusCancelled = "canceled"
  4628  )
  4629  
  4630  const (
  4631  	EvalTriggerJobRegister       = "job-register"
  4632  	EvalTriggerJobDeregister     = "job-deregister"
  4633  	EvalTriggerPeriodicJob       = "periodic-job"
  4634  	EvalTriggerNodeUpdate        = "node-update"
  4635  	EvalTriggerScheduled         = "scheduled"
  4636  	EvalTriggerRollingUpdate     = "rolling-update"
  4637  	EvalTriggerDeploymentWatcher = "deployment-watcher"
  4638  	EvalTriggerFailedFollowUp    = "failed-follow-up"
  4639  	EvalTriggerMaxPlans          = "max-plan-attempts"
  4640  )
  4641  
  4642  const (
  4643  	// CoreJobEvalGC is used for the garbage collection of evaluations
  4644  	// and allocations. We periodically scan evaluations in a terminal state,
  4645  	// in which all the corresponding allocations are also terminal. We
  4646  	// delete these out of the system to bound the state.
  4647  	CoreJobEvalGC = "eval-gc"
  4648  
  4649  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  4650  	// We periodically scan nodes in a terminal state, and if they have no
  4651  	// corresponding allocations we delete these out of the system.
  4652  	CoreJobNodeGC = "node-gc"
  4653  
  4654  	// CoreJobJobGC is used for the garbage collection of eligible jobs. We
  4655  	// periodically scan garbage collectible jobs and check if both their
  4656  	// evaluations and allocations are terminal. If so, we delete these out of
  4657  	// the system.
  4658  	CoreJobJobGC = "job-gc"
  4659  
  4660  	// CoreJobDeploymentGC is used for the garbage collection of eligible
  4661  	// deployments. We periodically scan garbage collectible deployments and
  4662  	// check if they are terminal. If so, we delete these out of the system.
  4663  	CoreJobDeploymentGC = "deployment-gc"
  4664  
  4665  	// CoreJobForceGC is used to force garbage collection of all GCable objects.
  4666  	CoreJobForceGC = "force-gc"
  4667  )
  4668  
  4669  // Evaluation is used anytime we need to apply business logic as a result
  4670  // of a change to our desired state (job specification) or the emergent state
  4671  // (registered nodes). When the inputs change, we need to "evaluate" them,
  4672  // potentially taking action (allocation of work) or doing nothing if the state
  4673  // of the world does not require it.
  4674  type Evaluation struct {
  4675  	// ID is a randonly generated UUID used for this evaluation. This
  4676  	// is assigned upon the creation of the evaluation.
  4677  	ID string
  4678  
  4679  	// Priority is used to control scheduling importance and if this job
  4680  	// can preempt other jobs.
  4681  	Priority int
  4682  
  4683  	// Type is used to control which schedulers are available to handle
  4684  	// this evaluation.
  4685  	Type string
  4686  
  4687  	// TriggeredBy is used to give some insight into why this Eval
  4688  	// was created. (Job change, node failure, alloc failure, etc).
  4689  	TriggeredBy string
  4690  
  4691  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  4692  	// be run in parallel for a given JobID, so we serialize on this.
  4693  	JobID string
  4694  
  4695  	// JobModifyIndex is the modify index of the job at the time
  4696  	// the evaluation was created
  4697  	JobModifyIndex uint64
  4698  
  4699  	// NodeID is the node that was affected triggering the evaluation.
  4700  	NodeID string
  4701  
  4702  	// NodeModifyIndex is the modify index of the node at the time
  4703  	// the evaluation was created
  4704  	NodeModifyIndex uint64
  4705  
  4706  	// DeploymentID is the ID of the deployment that triggered the evaluation.
  4707  	DeploymentID string
  4708  
  4709  	// Status of the evaluation
  4710  	Status string
  4711  
  4712  	// StatusDescription is meant to provide more human useful information
  4713  	StatusDescription string
  4714  
  4715  	// Wait is a minimum wait time for running the eval. This is used to
  4716  	// support a rolling upgrade.
  4717  	Wait time.Duration
  4718  
  4719  	// NextEval is the evaluation ID for the eval created to do a followup.
  4720  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  4721  	NextEval string
  4722  
  4723  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  4724  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  4725  	PreviousEval string
  4726  
  4727  	// BlockedEval is the evaluation ID for a created blocked eval. A
  4728  	// blocked eval will be created if all allocations could not be placed due
  4729  	// to constraints or lacking resources.
  4730  	BlockedEval string
  4731  
  4732  	// FailedTGAllocs are task groups which have allocations that could not be
  4733  	// made, but the metrics are persisted so that the user can use the feedback
  4734  	// to determine the cause.
  4735  	FailedTGAllocs map[string]*AllocMetric
  4736  
  4737  	// ClassEligibility tracks computed node classes that have been explicitly
  4738  	// marked as eligible or ineligible.
  4739  	ClassEligibility map[string]bool
  4740  
  4741  	// EscapedComputedClass marks whether the job has constraints that are not
  4742  	// captured by computed node classes.
  4743  	EscapedComputedClass bool
  4744  
  4745  	// AnnotatePlan triggers the scheduler to provide additional annotations
  4746  	// during the evaluation. This should not be set during normal operations.
  4747  	AnnotatePlan bool
  4748  
  4749  	// QueuedAllocations is the number of unplaced allocations at the time the
  4750  	// evaluation was processed. The map is keyed by Task Group names.
  4751  	QueuedAllocations map[string]int
  4752  
  4753  	// SnapshotIndex is the Raft index of the snapshot used to process the
  4754  	// evaluation. As such it will only be set once it has gone through the
  4755  	// scheduler.
  4756  	SnapshotIndex uint64
  4757  
  4758  	// Raft Indexes
  4759  	CreateIndex uint64
  4760  	ModifyIndex uint64
  4761  }
  4762  
  4763  // TerminalStatus returns if the current status is terminal and
  4764  // will no longer transition.
  4765  func (e *Evaluation) TerminalStatus() bool {
  4766  	switch e.Status {
  4767  	case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled:
  4768  		return true
  4769  	default:
  4770  		return false
  4771  	}
  4772  }
  4773  
  4774  func (e *Evaluation) GoString() string {
  4775  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  4776  }
  4777  
  4778  func (e *Evaluation) Copy() *Evaluation {
  4779  	if e == nil {
  4780  		return nil
  4781  	}
  4782  	ne := new(Evaluation)
  4783  	*ne = *e
  4784  
  4785  	// Copy ClassEligibility
  4786  	if e.ClassEligibility != nil {
  4787  		classes := make(map[string]bool, len(e.ClassEligibility))
  4788  		for class, elig := range e.ClassEligibility {
  4789  			classes[class] = elig
  4790  		}
  4791  		ne.ClassEligibility = classes
  4792  	}
  4793  
  4794  	// Copy FailedTGAllocs
  4795  	if e.FailedTGAllocs != nil {
  4796  		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
  4797  		for tg, metric := range e.FailedTGAllocs {
  4798  			failedTGs[tg] = metric.Copy()
  4799  		}
  4800  		ne.FailedTGAllocs = failedTGs
  4801  	}
  4802  
  4803  	// Copy queued allocations
  4804  	if e.QueuedAllocations != nil {
  4805  		queuedAllocations := make(map[string]int, len(e.QueuedAllocations))
  4806  		for tg, num := range e.QueuedAllocations {
  4807  			queuedAllocations[tg] = num
  4808  		}
  4809  		ne.QueuedAllocations = queuedAllocations
  4810  	}
  4811  
  4812  	return ne
  4813  }
  4814  
  4815  // ShouldEnqueue checks if a given evaluation should be enqueued into the
  4816  // eval_broker
  4817  func (e *Evaluation) ShouldEnqueue() bool {
  4818  	switch e.Status {
  4819  	case EvalStatusPending:
  4820  		return true
  4821  	case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled:
  4822  		return false
  4823  	default:
  4824  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  4825  	}
  4826  }
  4827  
  4828  // ShouldBlock checks if a given evaluation should be entered into the blocked
  4829  // eval tracker.
  4830  func (e *Evaluation) ShouldBlock() bool {
  4831  	switch e.Status {
  4832  	case EvalStatusBlocked:
  4833  		return true
  4834  	case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled:
  4835  		return false
  4836  	default:
  4837  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  4838  	}
  4839  }
  4840  
  4841  // MakePlan is used to make a plan from the given evaluation
  4842  // for a given Job
  4843  func (e *Evaluation) MakePlan(j *Job) *Plan {
  4844  	p := &Plan{
  4845  		EvalID:         e.ID,
  4846  		Priority:       e.Priority,
  4847  		Job:            j,
  4848  		NodeUpdate:     make(map[string][]*Allocation),
  4849  		NodeAllocation: make(map[string][]*Allocation),
  4850  	}
  4851  	if j != nil {
  4852  		p.AllAtOnce = j.AllAtOnce
  4853  	}
  4854  	return p
  4855  }
  4856  
  4857  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  4858  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  4859  	return &Evaluation{
  4860  		ID:             GenerateUUID(),
  4861  		Priority:       e.Priority,
  4862  		Type:           e.Type,
  4863  		TriggeredBy:    EvalTriggerRollingUpdate,
  4864  		JobID:          e.JobID,
  4865  		JobModifyIndex: e.JobModifyIndex,
  4866  		Status:         EvalStatusPending,
  4867  		Wait:           wait,
  4868  		PreviousEval:   e.ID,
  4869  	}
  4870  }
  4871  
  4872  // CreateBlockedEval creates a blocked evaluation to followup this eval to place any
  4873  // failed allocations. It takes the classes marked explicitly eligible or
  4874  // ineligible and whether the job has escaped computed node classes.
  4875  func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
  4876  	return &Evaluation{
  4877  		ID:                   GenerateUUID(),
  4878  		Priority:             e.Priority,
  4879  		Type:                 e.Type,
  4880  		TriggeredBy:          e.TriggeredBy,
  4881  		JobID:                e.JobID,
  4882  		JobModifyIndex:       e.JobModifyIndex,
  4883  		Status:               EvalStatusBlocked,
  4884  		PreviousEval:         e.ID,
  4885  		ClassEligibility:     classEligibility,
  4886  		EscapedComputedClass: escaped,
  4887  	}
  4888  }
  4889  
  4890  // CreateFailedFollowUpEval creates a follow up evaluation when the current one
  4891  // has been marked as failed becasue it has hit the delivery limit and will not
  4892  // be retried by the eval_broker.
  4893  func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation {
  4894  	return &Evaluation{
  4895  		ID:             GenerateUUID(),
  4896  		Priority:       e.Priority,
  4897  		Type:           e.Type,
  4898  		TriggeredBy:    EvalTriggerFailedFollowUp,
  4899  		JobID:          e.JobID,
  4900  		JobModifyIndex: e.JobModifyIndex,
  4901  		Status:         EvalStatusPending,
  4902  		Wait:           wait,
  4903  		PreviousEval:   e.ID,
  4904  	}
  4905  }
  4906  
  4907  // Plan is used to submit a commit plan for task allocations. These
  4908  // are submitted to the leader which verifies that resources have
  4909  // not been overcommitted before admiting the plan.
  4910  type Plan struct {
  4911  	// EvalID is the evaluation ID this plan is associated with
  4912  	EvalID string
  4913  
  4914  	// EvalToken is used to prevent a split-brain processing of
  4915  	// an evaluation. There should only be a single scheduler running
  4916  	// an Eval at a time, but this could be violated after a leadership
  4917  	// transition. This unique token is used to reject plans that are
  4918  	// being submitted from a different leader.
  4919  	EvalToken string
  4920  
  4921  	// Priority is the priority of the upstream job
  4922  	Priority int
  4923  
  4924  	// AllAtOnce is used to control if incremental scheduling of task groups
  4925  	// is allowed or if we must do a gang scheduling of the entire job.
  4926  	// If this is false, a plan may be partially applied. Otherwise, the
  4927  	// entire plan must be able to make progress.
  4928  	AllAtOnce bool
  4929  
  4930  	// Job is the parent job of all the allocations in the Plan.
  4931  	// Since a Plan only involves a single Job, we can reduce the size
  4932  	// of the plan by only including it once.
  4933  	Job *Job
  4934  
  4935  	// NodeUpdate contains all the allocations for each node. For each node,
  4936  	// this is a list of the allocations to update to either stop or evict.
  4937  	NodeUpdate map[string][]*Allocation
  4938  
  4939  	// NodeAllocation contains all the allocations for each node.
  4940  	// The evicts must be considered prior to the allocations.
  4941  	NodeAllocation map[string][]*Allocation
  4942  
  4943  	// Annotations contains annotations by the scheduler to be used by operators
  4944  	// to understand the decisions made by the scheduler.
  4945  	Annotations *PlanAnnotations
  4946  
  4947  	// Deployment is the deployment created or updated by the scheduler that
  4948  	// should be applied by the planner.
  4949  	Deployment *Deployment
  4950  
  4951  	// DeploymentUpdates is a set of status updates to apply to the given
  4952  	// deployments. This allows the scheduler to cancel any unneeded deployment
  4953  	// because the job is stopped or the update block is removed.
  4954  	DeploymentUpdates []*DeploymentStatusUpdate
  4955  }
  4956  
  4957  // AppendUpdate marks the allocation for eviction. The clientStatus of the
  4958  // allocation may be optionally set by passing in a non-empty value.
  4959  func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) {
  4960  	newAlloc := new(Allocation)
  4961  	*newAlloc = *alloc
  4962  
  4963  	// If the job is not set in the plan we are deregistering a job so we
  4964  	// extract the job from the allocation.
  4965  	if p.Job == nil && newAlloc.Job != nil {
  4966  		p.Job = newAlloc.Job
  4967  	}
  4968  
  4969  	// Normalize the job
  4970  	newAlloc.Job = nil
  4971  
  4972  	// Strip the resources as it can be rebuilt.
  4973  	newAlloc.Resources = nil
  4974  
  4975  	newAlloc.DesiredStatus = desiredStatus
  4976  	newAlloc.DesiredDescription = desiredDesc
  4977  
  4978  	if clientStatus != "" {
  4979  		newAlloc.ClientStatus = clientStatus
  4980  	}
  4981  
  4982  	node := alloc.NodeID
  4983  	existing := p.NodeUpdate[node]
  4984  	p.NodeUpdate[node] = append(existing, newAlloc)
  4985  }
  4986  
  4987  func (p *Plan) PopUpdate(alloc *Allocation) {
  4988  	existing := p.NodeUpdate[alloc.NodeID]
  4989  	n := len(existing)
  4990  	if n > 0 && existing[n-1].ID == alloc.ID {
  4991  		existing = existing[:n-1]
  4992  		if len(existing) > 0 {
  4993  			p.NodeUpdate[alloc.NodeID] = existing
  4994  		} else {
  4995  			delete(p.NodeUpdate, alloc.NodeID)
  4996  		}
  4997  	}
  4998  }
  4999  
  5000  func (p *Plan) AppendAlloc(alloc *Allocation) {
  5001  	node := alloc.NodeID
  5002  	existing := p.NodeAllocation[node]
  5003  	p.NodeAllocation[node] = append(existing, alloc)
  5004  }
  5005  
  5006  // IsNoOp checks if this plan would do nothing
  5007  func (p *Plan) IsNoOp() bool {
  5008  	return len(p.NodeUpdate) == 0 &&
  5009  		len(p.NodeAllocation) == 0 &&
  5010  		p.Deployment == nil &&
  5011  		len(p.DeploymentUpdates) == 0
  5012  }
  5013  
  5014  // PlanResult is the result of a plan submitted to the leader.
  5015  type PlanResult struct {
  5016  	// NodeUpdate contains all the updates that were committed.
  5017  	NodeUpdate map[string][]*Allocation
  5018  
  5019  	// NodeAllocation contains all the allocations that were committed.
  5020  	NodeAllocation map[string][]*Allocation
  5021  
  5022  	// Deployment is the deployment that was committed.
  5023  	Deployment *Deployment
  5024  
  5025  	// DeploymentUpdates is the set of deployment updates that were commited.
  5026  	DeploymentUpdates []*DeploymentStatusUpdate
  5027  
  5028  	// RefreshIndex is the index the worker should refresh state up to.
  5029  	// This allows all evictions and allocations to be materialized.
  5030  	// If any allocations were rejected due to stale data (node state,
  5031  	// over committed) this can be used to force a worker refresh.
  5032  	RefreshIndex uint64
  5033  
  5034  	// AllocIndex is the Raft index in which the evictions and
  5035  	// allocations took place. This is used for the write index.
  5036  	AllocIndex uint64
  5037  }
  5038  
  5039  // IsNoOp checks if this plan result would do nothing
  5040  func (p *PlanResult) IsNoOp() bool {
  5041  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 &&
  5042  		len(p.DeploymentUpdates) == 0 && p.Deployment == nil
  5043  }
  5044  
  5045  // FullCommit is used to check if all the allocations in a plan
  5046  // were committed as part of the result. Returns if there was
  5047  // a match, and the number of expected and actual allocations.
  5048  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  5049  	expected := 0
  5050  	actual := 0
  5051  	for name, allocList := range plan.NodeAllocation {
  5052  		didAlloc, _ := p.NodeAllocation[name]
  5053  		expected += len(allocList)
  5054  		actual += len(didAlloc)
  5055  	}
  5056  	return actual == expected, expected, actual
  5057  }
  5058  
  5059  // PlanAnnotations holds annotations made by the scheduler to give further debug
  5060  // information to operators.
  5061  type PlanAnnotations struct {
  5062  	// DesiredTGUpdates is the set of desired updates per task group.
  5063  	DesiredTGUpdates map[string]*DesiredUpdates
  5064  }
  5065  
  5066  // DesiredUpdates is the set of changes the scheduler would like to make given
  5067  // sufficient resources and cluster capacity.
  5068  type DesiredUpdates struct {
  5069  	Ignore            uint64
  5070  	Place             uint64
  5071  	Migrate           uint64
  5072  	Stop              uint64
  5073  	InPlaceUpdate     uint64
  5074  	DestructiveUpdate uint64
  5075  	Canary            uint64
  5076  }
  5077  
  5078  func (d *DesiredUpdates) GoString() string {
  5079  	return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)",
  5080  		d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary)
  5081  }
  5082  
  5083  // msgpackHandle is a shared handle for encoding/decoding of structs
  5084  var MsgpackHandle = func() *codec.MsgpackHandle {
  5085  	h := &codec.MsgpackHandle{RawToString: true}
  5086  
  5087  	// Sets the default type for decoding a map into a nil interface{}.
  5088  	// This is necessary in particular because we store the driver configs as a
  5089  	// nil interface{}.
  5090  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5091  	return h
  5092  }()
  5093  
  5094  var (
  5095  	// JsonHandle and JsonHandlePretty are the codec handles to JSON encode
  5096  	// structs. The pretty handle will add indents for easier human consumption.
  5097  	JsonHandle = &codec.JsonHandle{
  5098  		HTMLCharsAsIs: true,
  5099  	}
  5100  	JsonHandlePretty = &codec.JsonHandle{
  5101  		HTMLCharsAsIs: true,
  5102  		Indent:        4,
  5103  	}
  5104  )
  5105  
  5106  var HashiMsgpackHandle = func() *hcodec.MsgpackHandle {
  5107  	h := &hcodec.MsgpackHandle{RawToString: true}
  5108  
  5109  	// Sets the default type for decoding a map into a nil interface{}.
  5110  	// This is necessary in particular because we store the driver configs as a
  5111  	// nil interface{}.
  5112  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5113  	return h
  5114  }()
  5115  
  5116  // Decode is used to decode a MsgPack encoded object
  5117  func Decode(buf []byte, out interface{}) error {
  5118  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  5119  }
  5120  
  5121  // Encode is used to encode a MsgPack object with type prefix
  5122  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  5123  	var buf bytes.Buffer
  5124  	buf.WriteByte(uint8(t))
  5125  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  5126  	return buf.Bytes(), err
  5127  }
  5128  
  5129  // KeyringResponse is a unified key response and can be used for install,
  5130  // remove, use, as well as listing key queries.
  5131  type KeyringResponse struct {
  5132  	Messages map[string]string
  5133  	Keys     map[string]int
  5134  	NumNodes int
  5135  }
  5136  
  5137  // KeyringRequest is request objects for serf key operations.
  5138  type KeyringRequest struct {
  5139  	Key string
  5140  }
  5141  
  5142  // RecoverableError wraps an error and marks whether it is recoverable and could
  5143  // be retried or it is fatal.
  5144  type RecoverableError struct {
  5145  	Err         string
  5146  	Recoverable bool
  5147  }
  5148  
  5149  // NewRecoverableError is used to wrap an error and mark it as recoverable or
  5150  // not.
  5151  func NewRecoverableError(e error, recoverable bool) error {
  5152  	if e == nil {
  5153  		return nil
  5154  	}
  5155  
  5156  	return &RecoverableError{
  5157  		Err:         e.Error(),
  5158  		Recoverable: recoverable,
  5159  	}
  5160  }
  5161  
  5162  // WrapRecoverable wraps an existing error in a new RecoverableError with a new
  5163  // message. If the error was recoverable before the returned error is as well;
  5164  // otherwise it is unrecoverable.
  5165  func WrapRecoverable(msg string, err error) error {
  5166  	return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)}
  5167  }
  5168  
  5169  func (r *RecoverableError) Error() string {
  5170  	return r.Err
  5171  }
  5172  
  5173  func (r *RecoverableError) IsRecoverable() bool {
  5174  	return r.Recoverable
  5175  }
  5176  
  5177  // Recoverable is an interface for errors to implement to indicate whether or
  5178  // not they are fatal or recoverable.
  5179  type Recoverable interface {
  5180  	error
  5181  	IsRecoverable() bool
  5182  }
  5183  
  5184  // IsRecoverable returns true if error is a RecoverableError with
  5185  // Recoverable=true. Otherwise false is returned.
  5186  func IsRecoverable(e error) bool {
  5187  	if re, ok := e.(Recoverable); ok {
  5188  		return re.IsRecoverable()
  5189  	}
  5190  	return false
  5191  }