github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/structs/structs.go

github.com/blixtra/nomad@v0.7.2-0.20171221000451-da9a1d7bb050/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"crypto/sha1"
     7  	"crypto/sha256"
     8  	"crypto/sha512"
     9  	"encoding/base32"
    10  	"encoding/hex"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"net"
    15  	"os"
    16  	"path/filepath"
    17  	"reflect"
    18  	"regexp"
    19  	"sort"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  
    24  	"golang.org/x/crypto/blake2b"
    25  
    26  	"github.com/gorhill/cronexpr"
    27  	"github.com/hashicorp/consul/api"
    28  	multierror "github.com/hashicorp/go-multierror"
    29  	"github.com/hashicorp/go-version"
    30  	"github.com/hashicorp/nomad/acl"
    31  	"github.com/hashicorp/nomad/helper"
    32  	"github.com/hashicorp/nomad/helper/args"
    33  	"github.com/hashicorp/nomad/helper/uuid"
    34  	"github.com/mitchellh/copystructure"
    35  	"github.com/ugorji/go/codec"
    36  
    37  	hcodec "github.com/hashicorp/go-msgpack/codec"
    38  )
    39  
    40  var (
    41  	ErrNoLeader         = fmt.Errorf("No cluster leader")
    42  	ErrNoRegionPath     = fmt.Errorf("No path to region")
    43  	ErrTokenNotFound    = errors.New("ACL token not found")
    44  	ErrPermissionDenied = errors.New("Permission denied")
    45  
    46  	// validPolicyName is used to validate a policy name
    47  	validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$")
    48  
    49  	// b32 is a lowercase base32 encoding for use in URL friendly service hashes
    50  	b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567"))
    51  )
    52  
    53  type MessageType uint8
    54  
    55  const (
    56  	NodeRegisterRequestType MessageType = iota
    57  	NodeDeregisterRequestType
    58  	NodeUpdateStatusRequestType
    59  	NodeUpdateDrainRequestType
    60  	JobRegisterRequestType
    61  	JobDeregisterRequestType
    62  	EvalUpdateRequestType
    63  	EvalDeleteRequestType
    64  	AllocUpdateRequestType
    65  	AllocClientUpdateRequestType
    66  	ReconcileJobSummariesRequestType
    67  	VaultAccessorRegisterRequestType
    68  	VaultAccessorDegisterRequestType
    69  	ApplyPlanResultsRequestType
    70  	DeploymentStatusUpdateRequestType
    71  	DeploymentPromoteRequestType
    72  	DeploymentAllocHealthRequestType
    73  	DeploymentDeleteRequestType
    74  	JobStabilityRequestType
    75  	ACLPolicyUpsertRequestType
    76  	ACLPolicyDeleteRequestType
    77  	ACLTokenUpsertRequestType
    78  	ACLTokenDeleteRequestType
    79  	ACLTokenBootstrapRequestType
    80  )
    81  
    82  const (
    83  	// IgnoreUnknownTypeFlag is set along with a MessageType
    84  	// to indicate that the message type can be safely ignored
    85  	// if it is not recognized. This is for future proofing, so
    86  	// that new commands can be added in a way that won't cause
    87  	// old servers to crash when the FSM attempts to process them.
    88  	IgnoreUnknownTypeFlag MessageType = 128
    89  
    90  	// ApiMajorVersion is returned as part of the Status.Version request.
    91  	// It should be incremented anytime the APIs are changed in a way
    92  	// that would break clients for sane client versioning.
    93  	ApiMajorVersion = 1
    94  
    95  	// ApiMinorVersion is returned as part of the Status.Version request.
    96  	// It should be incremented anytime the APIs are changed to allow
    97  	// for sane client versioning. Minor changes should be compatible
    98  	// within the major version.
    99  	ApiMinorVersion = 1
   100  
   101  	ProtocolVersion = "protocol"
   102  	APIMajorVersion = "api.major"
   103  	APIMinorVersion = "api.minor"
   104  
   105  	GetterModeAny  = "any"
   106  	GetterModeFile = "file"
   107  	GetterModeDir  = "dir"
   108  
   109  	// maxPolicyDescriptionLength limits a policy description length
   110  	maxPolicyDescriptionLength = 256
   111  
   112  	// maxTokenNameLength limits a ACL token name length
   113  	maxTokenNameLength = 64
   114  
   115  	// ACLClientToken and ACLManagementToken are the only types of tokens
   116  	ACLClientToken     = "client"
   117  	ACLManagementToken = "management"
   118  
   119  	// DefaultNamespace is the default namespace.
   120  	DefaultNamespace            = "default"
   121  	DefaultNamespaceDescription = "Default shared namespace"
   122  )
   123  
   124  // Context defines the scope in which a search for Nomad object operates, and
   125  // is also used to query the matching index value for this context
   126  type Context string
   127  
   128  const (
   129  	Allocs      Context = "allocs"
   130  	Deployments Context = "deployment"
   131  	Evals       Context = "evals"
   132  	Jobs        Context = "jobs"
   133  	Nodes       Context = "nodes"
   134  	Namespaces  Context = "namespaces"
   135  	Quotas      Context = "quotas"
   136  	All         Context = "all"
   137  )
   138  
   139  // NamespacedID is a tuple of an ID and a namespace
   140  type NamespacedID struct {
   141  	ID        string
   142  	Namespace string
   143  }
   144  
   145  // RPCInfo is used to describe common information about query
   146  type RPCInfo interface {
   147  	RequestRegion() string
   148  	IsRead() bool
   149  	AllowStaleRead() bool
   150  }
   151  
   152  // QueryOptions is used to specify various flags for read queries
   153  type QueryOptions struct {
   154  	// The target region for this query
   155  	Region string
   156  
   157  	// Namespace is the target namespace for the query.
   158  	Namespace string
   159  
   160  	// If set, wait until query exceeds given index. Must be provided
   161  	// with MaxQueryTime.
   162  	MinQueryIndex uint64
   163  
   164  	// Provided with MinQueryIndex to wait for change.
   165  	MaxQueryTime time.Duration
   166  
   167  	// If set, any follower can service the request. Results
   168  	// may be arbitrarily stale.
   169  	AllowStale bool
   170  
   171  	// If set, used as prefix for resource list searches
   172  	Prefix string
   173  
   174  	// AuthToken is secret portion of the ACL token used for the request
   175  	AuthToken string
   176  }
   177  
   178  func (q QueryOptions) RequestRegion() string {
   179  	return q.Region
   180  }
   181  
   182  func (q QueryOptions) RequestNamespace() string {
   183  	if q.Namespace == "" {
   184  		return DefaultNamespace
   185  	}
   186  	return q.Namespace
   187  }
   188  
   189  // QueryOption only applies to reads, so always true
   190  func (q QueryOptions) IsRead() bool {
   191  	return true
   192  }
   193  
   194  func (q QueryOptions) AllowStaleRead() bool {
   195  	return q.AllowStale
   196  }
   197  
   198  type WriteRequest struct {
   199  	// The target region for this write
   200  	Region string
   201  
   202  	// Namespace is the target namespace for the write.
   203  	Namespace string
   204  
   205  	// AuthToken is secret portion of the ACL token used for the request
   206  	AuthToken string
   207  }
   208  
   209  func (w WriteRequest) RequestRegion() string {
   210  	// The target region for this request
   211  	return w.Region
   212  }
   213  
   214  func (w WriteRequest) RequestNamespace() string {
   215  	if w.Namespace == "" {
   216  		return DefaultNamespace
   217  	}
   218  	return w.Namespace
   219  }
   220  
   221  // WriteRequest only applies to writes, always false
   222  func (w WriteRequest) IsRead() bool {
   223  	return false
   224  }
   225  
   226  func (w WriteRequest) AllowStaleRead() bool {
   227  	return false
   228  }
   229  
   230  // QueryMeta allows a query response to include potentially
   231  // useful metadata about a query
   232  type QueryMeta struct {
   233  	// This is the index associated with the read
   234  	Index uint64
   235  
   236  	// If AllowStale is used, this is time elapsed since
   237  	// last contact between the follower and leader. This
   238  	// can be used to gauge staleness.
   239  	LastContact time.Duration
   240  
   241  	// Used to indicate if there is a known leader node
   242  	KnownLeader bool
   243  }
   244  
   245  // WriteMeta allows a write response to include potentially
   246  // useful metadata about the write
   247  type WriteMeta struct {
   248  	// This is the index associated with the write
   249  	Index uint64
   250  }
   251  
   252  // NodeRegisterRequest is used for Node.Register endpoint
   253  // to register a node as being a schedulable entity.
   254  type NodeRegisterRequest struct {
   255  	Node *Node
   256  	WriteRequest
   257  }
   258  
   259  // NodeDeregisterRequest is used for Node.Deregister endpoint
   260  // to deregister a node as being a schedulable entity.
   261  type NodeDeregisterRequest struct {
   262  	NodeID string
   263  	WriteRequest
   264  }
   265  
   266  // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
   267  // information used in RPC server lists.
   268  type NodeServerInfo struct {
   269  	// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
   270  	// be contacted at for RPCs.
   271  	RPCAdvertiseAddr string
   272  
   273  	// RpcMajorVersion is the major version number the Nomad Server
   274  	// supports
   275  	RPCMajorVersion int32
   276  
   277  	// RpcMinorVersion is the minor version number the Nomad Server
   278  	// supports
   279  	RPCMinorVersion int32
   280  
   281  	// Datacenter is the datacenter that a Nomad server belongs to
   282  	Datacenter string
   283  }
   284  
   285  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   286  // to update the status of a node.
   287  type NodeUpdateStatusRequest struct {
   288  	NodeID string
   289  	Status string
   290  	WriteRequest
   291  }
   292  
   293  // NodeUpdateDrainRequest is used for updatin the drain status
   294  type NodeUpdateDrainRequest struct {
   295  	NodeID string
   296  	Drain  bool
   297  	WriteRequest
   298  }
   299  
   300  // NodeEvaluateRequest is used to re-evaluate the ndoe
   301  type NodeEvaluateRequest struct {
   302  	NodeID string
   303  	WriteRequest
   304  }
   305  
   306  // NodeSpecificRequest is used when we just need to specify a target node
   307  type NodeSpecificRequest struct {
   308  	NodeID   string
   309  	SecretID string
   310  	QueryOptions
   311  }
   312  
   313  // SearchResponse is used to return matches and information about whether
   314  // the match list is truncated specific to each type of context.
   315  type SearchResponse struct {
   316  	// Map of context types to ids which match a specified prefix
   317  	Matches map[Context][]string
   318  
   319  	// Truncations indicates whether the matches for a particular context have
   320  	// been truncated
   321  	Truncations map[Context]bool
   322  
   323  	QueryMeta
   324  }
   325  
   326  // SearchRequest is used to parameterize a request, and returns a
   327  // list of matches made up of jobs, allocations, evaluations, and/or nodes,
   328  // along with whether or not the information returned is truncated.
   329  type SearchRequest struct {
   330  	// Prefix is what ids are matched to. I.e, if the given prefix were
   331  	// "a", potential matches might be "abcd" or "aabb"
   332  	Prefix string
   333  
   334  	// Context is the type that can be matched against. A context can be a job,
   335  	// node, evaluation, allocation, or empty (indicated every context should be
   336  	// matched)
   337  	Context Context
   338  
   339  	QueryOptions
   340  }
   341  
   342  // JobRegisterRequest is used for Job.Register endpoint
   343  // to register a job as being a schedulable entity.
   344  type JobRegisterRequest struct {
   345  	Job *Job
   346  
   347  	// If EnforceIndex is set then the job will only be registered if the passed
   348  	// JobModifyIndex matches the current Jobs index. If the index is zero, the
   349  	// register only occurs if the job is new.
   350  	EnforceIndex   bool
   351  	JobModifyIndex uint64
   352  
   353  	// PolicyOverride is set when the user is attempting to override any policies
   354  	PolicyOverride bool
   355  
   356  	WriteRequest
   357  }
   358  
   359  // JobDeregisterRequest is used for Job.Deregister endpoint
   360  // to deregister a job as being a schedulable entity.
   361  type JobDeregisterRequest struct {
   362  	JobID string
   363  
   364  	// Purge controls whether the deregister purges the job from the system or
   365  	// whether the job is just marked as stopped and will be removed by the
   366  	// garbage collector
   367  	Purge bool
   368  
   369  	WriteRequest
   370  }
   371  
   372  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   373  type JobEvaluateRequest struct {
   374  	JobID string
   375  	WriteRequest
   376  }
   377  
   378  // JobSpecificRequest is used when we just need to specify a target job
   379  type JobSpecificRequest struct {
   380  	JobID     string
   381  	AllAllocs bool
   382  	QueryOptions
   383  }
   384  
   385  // JobListRequest is used to parameterize a list request
   386  type JobListRequest struct {
   387  	QueryOptions
   388  }
   389  
   390  // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
   391  // evaluation of the Job.
   392  type JobPlanRequest struct {
   393  	Job  *Job
   394  	Diff bool // Toggles an annotated diff
   395  	// PolicyOverride is set when the user is attempting to override any policies
   396  	PolicyOverride bool
   397  	WriteRequest
   398  }
   399  
   400  // JobSummaryRequest is used when we just need to get a specific job summary
   401  type JobSummaryRequest struct {
   402  	JobID string
   403  	QueryOptions
   404  }
   405  
   406  // JobDispatchRequest is used to dispatch a job based on a parameterized job
   407  type JobDispatchRequest struct {
   408  	JobID   string
   409  	Payload []byte
   410  	Meta    map[string]string
   411  	WriteRequest
   412  }
   413  
   414  // JobValidateRequest is used to validate a job
   415  type JobValidateRequest struct {
   416  	Job *Job
   417  	WriteRequest
   418  }
   419  
   420  // JobRevertRequest is used to revert a job to a prior version.
   421  type JobRevertRequest struct {
   422  	// JobID is the ID of the job  being reverted
   423  	JobID string
   424  
   425  	// JobVersion the version to revert to.
   426  	JobVersion uint64
   427  
   428  	// EnforcePriorVersion if set will enforce that the job is at the given
   429  	// version before reverting.
   430  	EnforcePriorVersion *uint64
   431  
   432  	WriteRequest
   433  }
   434  
   435  // JobStabilityRequest is used to marked a job as stable.
   436  type JobStabilityRequest struct {
   437  	// Job to set the stability on
   438  	JobID      string
   439  	JobVersion uint64
   440  
   441  	// Set the stability
   442  	Stable bool
   443  	WriteRequest
   444  }
   445  
   446  // JobStabilityResponse is the response when marking a job as stable.
   447  type JobStabilityResponse struct {
   448  	WriteMeta
   449  }
   450  
   451  // NodeListRequest is used to parameterize a list request
   452  type NodeListRequest struct {
   453  	QueryOptions
   454  }
   455  
   456  // EvalUpdateRequest is used for upserting evaluations.
   457  type EvalUpdateRequest struct {
   458  	Evals     []*Evaluation
   459  	EvalToken string
   460  	WriteRequest
   461  }
   462  
   463  // EvalDeleteRequest is used for deleting an evaluation.
   464  type EvalDeleteRequest struct {
   465  	Evals  []string
   466  	Allocs []string
   467  	WriteRequest
   468  }
   469  
   470  // EvalSpecificRequest is used when we just need to specify a target evaluation
   471  type EvalSpecificRequest struct {
   472  	EvalID string
   473  	QueryOptions
   474  }
   475  
   476  // EvalAckRequest is used to Ack/Nack a specific evaluation
   477  type EvalAckRequest struct {
   478  	EvalID string
   479  	Token  string
   480  	WriteRequest
   481  }
   482  
   483  // EvalDequeueRequest is used when we want to dequeue an evaluation
   484  type EvalDequeueRequest struct {
   485  	Schedulers       []string
   486  	Timeout          time.Duration
   487  	SchedulerVersion uint16
   488  	WriteRequest
   489  }
   490  
   491  // EvalListRequest is used to list the evaluations
   492  type EvalListRequest struct {
   493  	QueryOptions
   494  }
   495  
   496  // PlanRequest is used to submit an allocation plan to the leader
   497  type PlanRequest struct {
   498  	Plan *Plan
   499  	WriteRequest
   500  }
   501  
   502  // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction
   503  // committing the result of a plan.
   504  type ApplyPlanResultsRequest struct {
   505  	// AllocUpdateRequest holds the allocation updates to be made by the
   506  	// scheduler.
   507  	AllocUpdateRequest
   508  
   509  	// Deployment is the deployment created or updated as a result of a
   510  	// scheduling event.
   511  	Deployment *Deployment
   512  
   513  	// DeploymentUpdates is a set of status updates to apply to the given
   514  	// deployments. This allows the scheduler to cancel any unneeded deployment
   515  	// because the job is stopped or the update block is removed.
   516  	DeploymentUpdates []*DeploymentStatusUpdate
   517  
   518  	// EvalID is the eval ID of the plan being applied. The modify index of the
   519  	// evaluation is updated as part of applying the plan to ensure that subsequent
   520  	// scheduling events for the same job will wait for the index that last produced
   521  	// state changes. This is necessary for blocked evaluations since they can be
   522  	// processed many times, potentially making state updates, without the state of
   523  	// the evaluation itself being updated.
   524  	EvalID string
   525  }
   526  
   527  // AllocUpdateRequest is used to submit changes to allocations, either
   528  // to cause evictions or to assign new allocaitons. Both can be done
   529  // within a single transaction
   530  type AllocUpdateRequest struct {
   531  	// Alloc is the list of new allocations to assign
   532  	Alloc []*Allocation
   533  
   534  	// Job is the shared parent job of the allocations.
   535  	// It is pulled out since it is common to reduce payload size.
   536  	Job *Job
   537  
   538  	WriteRequest
   539  }
   540  
   541  // AllocListRequest is used to request a list of allocations
   542  type AllocListRequest struct {
   543  	QueryOptions
   544  }
   545  
   546  // AllocSpecificRequest is used to query a specific allocation
   547  type AllocSpecificRequest struct {
   548  	AllocID string
   549  	QueryOptions
   550  }
   551  
   552  // AllocsGetRequest is used to query a set of allocations
   553  type AllocsGetRequest struct {
   554  	AllocIDs []string
   555  	QueryOptions
   556  }
   557  
   558  // PeriodicForceReqeuest is used to force a specific periodic job.
   559  type PeriodicForceRequest struct {
   560  	JobID string
   561  	WriteRequest
   562  }
   563  
   564  // ServerMembersResponse has the list of servers in a cluster
   565  type ServerMembersResponse struct {
   566  	ServerName   string
   567  	ServerRegion string
   568  	ServerDC     string
   569  	Members      []*ServerMember
   570  }
   571  
   572  // ServerMember holds information about a Nomad server agent in a cluster
   573  type ServerMember struct {
   574  	Name        string
   575  	Addr        net.IP
   576  	Port        uint16
   577  	Tags        map[string]string
   578  	Status      string
   579  	ProtocolMin uint8
   580  	ProtocolMax uint8
   581  	ProtocolCur uint8
   582  	DelegateMin uint8
   583  	DelegateMax uint8
   584  	DelegateCur uint8
   585  }
   586  
   587  // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
   588  // following tasks in the given allocation
   589  type DeriveVaultTokenRequest struct {
   590  	NodeID   string
   591  	SecretID string
   592  	AllocID  string
   593  	Tasks    []string
   594  	QueryOptions
   595  }
   596  
   597  // VaultAccessorsRequest is used to operate on a set of Vault accessors
   598  type VaultAccessorsRequest struct {
   599  	Accessors []*VaultAccessor
   600  }
   601  
   602  // VaultAccessor is a reference to a created Vault token on behalf of
   603  // an allocation's task.
   604  type VaultAccessor struct {
   605  	AllocID     string
   606  	Task        string
   607  	NodeID      string
   608  	Accessor    string
   609  	CreationTTL int
   610  
   611  	// Raft Indexes
   612  	CreateIndex uint64
   613  }
   614  
   615  // DeriveVaultTokenResponse returns the wrapped tokens for each requested task
   616  type DeriveVaultTokenResponse struct {
   617  	// Tasks is a mapping between the task name and the wrapped token
   618  	Tasks map[string]string
   619  
   620  	// Error stores any error that occurred. Errors are stored here so we can
   621  	// communicate whether it is retriable
   622  	Error *RecoverableError
   623  
   624  	QueryMeta
   625  }
   626  
   627  // GenericRequest is used to request where no
   628  // specific information is needed.
   629  type GenericRequest struct {
   630  	QueryOptions
   631  }
   632  
   633  // DeploymentListRequest is used to list the deployments
   634  type DeploymentListRequest struct {
   635  	QueryOptions
   636  }
   637  
   638  // DeploymentDeleteRequest is used for deleting deployments.
   639  type DeploymentDeleteRequest struct {
   640  	Deployments []string
   641  	WriteRequest
   642  }
   643  
   644  // DeploymentStatusUpdateRequest is used to update the status of a deployment as
   645  // well as optionally creating an evaluation atomically.
   646  type DeploymentStatusUpdateRequest struct {
   647  	// Eval, if set, is used to create an evaluation at the same time as
   648  	// updating the status of a deployment.
   649  	Eval *Evaluation
   650  
   651  	// DeploymentUpdate is a status update to apply to the given
   652  	// deployment.
   653  	DeploymentUpdate *DeploymentStatusUpdate
   654  
   655  	// Job is used to optionally upsert a job. This is used when setting the
   656  	// allocation health results in a deployment failure and the deployment
   657  	// auto-reverts to the latest stable job.
   658  	Job *Job
   659  }
   660  
   661  // DeploymentAllocHealthRequest is used to set the health of a set of
   662  // allocations as part of a deployment.
   663  type DeploymentAllocHealthRequest struct {
   664  	DeploymentID string
   665  
   666  	// Marks these allocations as healthy, allow further allocations
   667  	// to be rolled.
   668  	HealthyAllocationIDs []string
   669  
   670  	// Any unhealthy allocations fail the deployment
   671  	UnhealthyAllocationIDs []string
   672  
   673  	WriteRequest
   674  }
   675  
   676  // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft
   677  type ApplyDeploymentAllocHealthRequest struct {
   678  	DeploymentAllocHealthRequest
   679  
   680  	// An optional field to update the status of a deployment
   681  	DeploymentUpdate *DeploymentStatusUpdate
   682  
   683  	// Job is used to optionally upsert a job. This is used when setting the
   684  	// allocation health results in a deployment failure and the deployment
   685  	// auto-reverts to the latest stable job.
   686  	Job *Job
   687  
   688  	// An optional evaluation to create after promoting the canaries
   689  	Eval *Evaluation
   690  }
   691  
   692  // DeploymentPromoteRequest is used to promote task groups in a deployment
   693  type DeploymentPromoteRequest struct {
   694  	DeploymentID string
   695  
   696  	// All is to promote all task groups
   697  	All bool
   698  
   699  	// Groups is used to set the promotion status per task group
   700  	Groups []string
   701  
   702  	WriteRequest
   703  }
   704  
   705  // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft
   706  type ApplyDeploymentPromoteRequest struct {
   707  	DeploymentPromoteRequest
   708  
   709  	// An optional evaluation to create after promoting the canaries
   710  	Eval *Evaluation
   711  }
   712  
   713  // DeploymentPauseRequest is used to pause a deployment
   714  type DeploymentPauseRequest struct {
   715  	DeploymentID string
   716  
   717  	// Pause sets the pause status
   718  	Pause bool
   719  
   720  	WriteRequest
   721  }
   722  
   723  // DeploymentSpecificRequest is used to make a request specific to a particular
   724  // deployment
   725  type DeploymentSpecificRequest struct {
   726  	DeploymentID string
   727  	QueryOptions
   728  }
   729  
   730  // DeploymentFailRequest is used to fail a particular deployment
   731  type DeploymentFailRequest struct {
   732  	DeploymentID string
   733  	WriteRequest
   734  }
   735  
   736  // SingleDeploymentResponse is used to respond with a single deployment
   737  type SingleDeploymentResponse struct {
   738  	Deployment *Deployment
   739  	QueryMeta
   740  }
   741  
   742  // GenericResponse is used to respond to a request where no
   743  // specific response information is needed.
   744  type GenericResponse struct {
   745  	WriteMeta
   746  }
   747  
   748  // VersionResponse is used for the Status.Version reseponse
   749  type VersionResponse struct {
   750  	Build    string
   751  	Versions map[string]int
   752  	QueryMeta
   753  }
   754  
   755  // JobRegisterResponse is used to respond to a job registration
   756  type JobRegisterResponse struct {
   757  	EvalID          string
   758  	EvalCreateIndex uint64
   759  	JobModifyIndex  uint64
   760  
   761  	// Warnings contains any warnings about the given job. These may include
   762  	// deprecation warnings.
   763  	Warnings string
   764  
   765  	QueryMeta
   766  }
   767  
   768  // JobDeregisterResponse is used to respond to a job deregistration
   769  type JobDeregisterResponse struct {
   770  	EvalID          string
   771  	EvalCreateIndex uint64
   772  	JobModifyIndex  uint64
   773  	QueryMeta
   774  }
   775  
   776  // JobValidateResponse is the response from validate request
   777  type JobValidateResponse struct {
   778  	// DriverConfigValidated indicates whether the agent validated the driver
   779  	// config
   780  	DriverConfigValidated bool
   781  
   782  	// ValidationErrors is a list of validation errors
   783  	ValidationErrors []string
   784  
   785  	// Error is a string version of any error that may have occurred
   786  	Error string
   787  
   788  	// Warnings contains any warnings about the given job. These may include
   789  	// deprecation warnings.
   790  	Warnings string
   791  }
   792  
   793  // NodeUpdateResponse is used to respond to a node update
   794  type NodeUpdateResponse struct {
   795  	HeartbeatTTL    time.Duration
   796  	EvalIDs         []string
   797  	EvalCreateIndex uint64
   798  	NodeModifyIndex uint64
   799  
   800  	// LeaderRPCAddr is the RPC address of the current Raft Leader.  If
   801  	// empty, the current Nomad Server is in the minority of a partition.
   802  	LeaderRPCAddr string
   803  
   804  	// NumNodes is the number of Nomad nodes attached to this quorum of
   805  	// Nomad Servers at the time of the response.  This value can
   806  	// fluctuate based on the health of the cluster between heartbeats.
   807  	NumNodes int32
   808  
   809  	// Servers is the full list of known Nomad servers in the local
   810  	// region.
   811  	Servers []*NodeServerInfo
   812  
   813  	QueryMeta
   814  }
   815  
   816  // NodeDrainUpdateResponse is used to respond to a node drain update
   817  type NodeDrainUpdateResponse struct {
   818  	EvalIDs         []string
   819  	EvalCreateIndex uint64
   820  	NodeModifyIndex uint64
   821  	QueryMeta
   822  }
   823  
   824  // NodeAllocsResponse is used to return allocs for a single node
   825  type NodeAllocsResponse struct {
   826  	Allocs []*Allocation
   827  	QueryMeta
   828  }
   829  
   830  // NodeClientAllocsResponse is used to return allocs meta data for a single node
   831  type NodeClientAllocsResponse struct {
   832  	Allocs map[string]uint64
   833  
   834  	// MigrateTokens are used when ACLs are enabled to allow cross node,
   835  	// authenticated access to sticky volumes
   836  	MigrateTokens map[string]string
   837  
   838  	QueryMeta
   839  }
   840  
   841  // SingleNodeResponse is used to return a single node
   842  type SingleNodeResponse struct {
   843  	Node *Node
   844  	QueryMeta
   845  }
   846  
   847  // NodeListResponse is used for a list request
   848  type NodeListResponse struct {
   849  	Nodes []*NodeListStub
   850  	QueryMeta
   851  }
   852  
   853  // SingleJobResponse is used to return a single job
   854  type SingleJobResponse struct {
   855  	Job *Job
   856  	QueryMeta
   857  }
   858  
   859  // JobSummaryResponse is used to return a single job summary
   860  type JobSummaryResponse struct {
   861  	JobSummary *JobSummary
   862  	QueryMeta
   863  }
   864  
   865  type JobDispatchResponse struct {
   866  	DispatchedJobID string
   867  	EvalID          string
   868  	EvalCreateIndex uint64
   869  	JobCreateIndex  uint64
   870  	WriteMeta
   871  }
   872  
   873  // JobListResponse is used for a list request
   874  type JobListResponse struct {
   875  	Jobs []*JobListStub
   876  	QueryMeta
   877  }
   878  
   879  // JobVersionsRequest is used to get a jobs versions
   880  type JobVersionsRequest struct {
   881  	JobID string
   882  	Diffs bool
   883  	QueryOptions
   884  }
   885  
   886  // JobVersionsResponse is used for a job get versions request
   887  type JobVersionsResponse struct {
   888  	Versions []*Job
   889  	Diffs    []*JobDiff
   890  	QueryMeta
   891  }
   892  
   893  // JobPlanResponse is used to respond to a job plan request
   894  type JobPlanResponse struct {
   895  	// Annotations stores annotations explaining decisions the scheduler made.
   896  	Annotations *PlanAnnotations
   897  
   898  	// FailedTGAllocs is the placement failures per task group.
   899  	FailedTGAllocs map[string]*AllocMetric
   900  
   901  	// JobModifyIndex is the modification index of the job. The value can be
   902  	// used when running `nomad run` to ensure that the Job wasn’t modified
   903  	// since the last plan. If the job is being created, the value is zero.
   904  	JobModifyIndex uint64
   905  
   906  	// CreatedEvals is the set of evaluations created by the scheduler. The
   907  	// reasons for this can be rolling-updates or blocked evals.
   908  	CreatedEvals []*Evaluation
   909  
   910  	// Diff contains the diff of the job and annotations on whether the change
   911  	// causes an in-place update or create/destroy
   912  	Diff *JobDiff
   913  
   914  	// NextPeriodicLaunch is the time duration till the job would be launched if
   915  	// submitted.
   916  	NextPeriodicLaunch time.Time
   917  
   918  	// Warnings contains any warnings about the given job. These may include
   919  	// deprecation warnings.
   920  	Warnings string
   921  
   922  	WriteMeta
   923  }
   924  
   925  // SingleAllocResponse is used to return a single allocation
   926  type SingleAllocResponse struct {
   927  	Alloc *Allocation
   928  	QueryMeta
   929  }
   930  
   931  // AllocsGetResponse is used to return a set of allocations
   932  type AllocsGetResponse struct {
   933  	Allocs []*Allocation
   934  	QueryMeta
   935  }
   936  
   937  // JobAllocationsResponse is used to return the allocations for a job
   938  type JobAllocationsResponse struct {
   939  	Allocations []*AllocListStub
   940  	QueryMeta
   941  }
   942  
   943  // JobEvaluationsResponse is used to return the evaluations for a job
   944  type JobEvaluationsResponse struct {
   945  	Evaluations []*Evaluation
   946  	QueryMeta
   947  }
   948  
   949  // SingleEvalResponse is used to return a single evaluation
   950  type SingleEvalResponse struct {
   951  	Eval *Evaluation
   952  	QueryMeta
   953  }
   954  
   955  // EvalDequeueResponse is used to return from a dequeue
   956  type EvalDequeueResponse struct {
   957  	Eval  *Evaluation
   958  	Token string
   959  
   960  	// WaitIndex is the Raft index the worker should wait until invoking the
   961  	// scheduler.
   962  	WaitIndex uint64
   963  
   964  	QueryMeta
   965  }
   966  
   967  // GetWaitIndex is used to retrieve the Raft index in which state should be at
   968  // or beyond before invoking the scheduler.
   969  func (e *EvalDequeueResponse) GetWaitIndex() uint64 {
   970  	// Prefer the wait index sent. This will be populated on all responses from
   971  	// 0.7.0 and above
   972  	if e.WaitIndex != 0 {
   973  		return e.WaitIndex
   974  	} else if e.Eval != nil {
   975  		return e.Eval.ModifyIndex
   976  	}
   977  
   978  	// This should never happen
   979  	return 1
   980  }
   981  
   982  // PlanResponse is used to return from a PlanRequest
   983  type PlanResponse struct {
   984  	Result *PlanResult
   985  	WriteMeta
   986  }
   987  
   988  // AllocListResponse is used for a list request
   989  type AllocListResponse struct {
   990  	Allocations []*AllocListStub
   991  	QueryMeta
   992  }
   993  
   994  // DeploymentListResponse is used for a list request
   995  type DeploymentListResponse struct {
   996  	Deployments []*Deployment
   997  	QueryMeta
   998  }
   999  
  1000  // EvalListResponse is used for a list request
  1001  type EvalListResponse struct {
  1002  	Evaluations []*Evaluation
  1003  	QueryMeta
  1004  }
  1005  
  1006  // EvalAllocationsResponse is used to return the allocations for an evaluation
  1007  type EvalAllocationsResponse struct {
  1008  	Allocations []*AllocListStub
  1009  	QueryMeta
  1010  }
  1011  
  1012  // PeriodicForceResponse is used to respond to a periodic job force launch
  1013  type PeriodicForceResponse struct {
  1014  	EvalID          string
  1015  	EvalCreateIndex uint64
  1016  	WriteMeta
  1017  }
  1018  
  1019  // DeploymentUpdateResponse is used to respond to a deployment change. The
  1020  // response will include the modify index of the deployment as well as details
  1021  // of any triggered evaluation.
  1022  type DeploymentUpdateResponse struct {
  1023  	EvalID                string
  1024  	EvalCreateIndex       uint64
  1025  	DeploymentModifyIndex uint64
  1026  
  1027  	// RevertedJobVersion is the version the job was reverted to. If unset, the
  1028  	// job wasn't reverted
  1029  	RevertedJobVersion *uint64
  1030  
  1031  	WriteMeta
  1032  }
  1033  
  1034  const (
  1035  	NodeStatusInit  = "initializing"
  1036  	NodeStatusReady = "ready"
  1037  	NodeStatusDown  = "down"
  1038  )
  1039  
  1040  // ShouldDrainNode checks if a given node status should trigger an
  1041  // evaluation. Some states don't require any further action.
  1042  func ShouldDrainNode(status string) bool {
  1043  	switch status {
  1044  	case NodeStatusInit, NodeStatusReady:
  1045  		return false
  1046  	case NodeStatusDown:
  1047  		return true
  1048  	default:
  1049  		panic(fmt.Sprintf("unhandled node status %s", status))
  1050  	}
  1051  }
  1052  
  1053  // ValidNodeStatus is used to check if a node status is valid
  1054  func ValidNodeStatus(status string) bool {
  1055  	switch status {
  1056  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
  1057  		return true
  1058  	default:
  1059  		return false
  1060  	}
  1061  }
  1062  
  1063  // Node is a representation of a schedulable client node
  1064  type Node struct {
  1065  	// ID is a unique identifier for the node. It can be constructed
  1066  	// by doing a concatenation of the Name and Datacenter as a simple
  1067  	// approach. Alternatively a UUID may be used.
  1068  	ID string
  1069  
  1070  	// SecretID is an ID that is only known by the Node and the set of Servers.
  1071  	// It is not accessible via the API and is used to authenticate nodes
  1072  	// conducting priviledged activities.
  1073  	SecretID string
  1074  
  1075  	// Datacenter for this node
  1076  	Datacenter string
  1077  
  1078  	// Node name
  1079  	Name string
  1080  
  1081  	// HTTPAddr is the address on which the Nomad client is listening for http
  1082  	// requests
  1083  	HTTPAddr string
  1084  
  1085  	// TLSEnabled indicates if the Agent has TLS enabled for the HTTP API
  1086  	TLSEnabled bool
  1087  
  1088  	// Attributes is an arbitrary set of key/value
  1089  	// data that can be used for constraints. Examples
  1090  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
  1091  	// "docker.runtime=1.8.3"
  1092  	Attributes map[string]string
  1093  
  1094  	// Resources is the available resources on the client.
  1095  	// For example 'cpu=2' 'memory=2048'
  1096  	Resources *Resources
  1097  
  1098  	// Reserved is the set of resources that are reserved,
  1099  	// and should be subtracted from the total resources for
  1100  	// the purposes of scheduling. This may be provide certain
  1101  	// high-watermark tolerances or because of external schedulers
  1102  	// consuming resources.
  1103  	Reserved *Resources
  1104  
  1105  	// Links are used to 'link' this client to external
  1106  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
  1107  	// 'ami=ami-123'
  1108  	Links map[string]string
  1109  
  1110  	// Meta is used to associate arbitrary metadata with this
  1111  	// client. This is opaque to Nomad.
  1112  	Meta map[string]string
  1113  
  1114  	// NodeClass is an opaque identifier used to group nodes
  1115  	// together for the purpose of determining scheduling pressure.
  1116  	NodeClass string
  1117  
  1118  	// ComputedClass is a unique id that identifies nodes with a common set of
  1119  	// attributes and capabilities.
  1120  	ComputedClass string
  1121  
  1122  	// Drain is controlled by the servers, and not the client.
  1123  	// If true, no jobs will be scheduled to this node, and existing
  1124  	// allocations will be drained.
  1125  	Drain bool
  1126  
  1127  	// Status of this node
  1128  	Status string
  1129  
  1130  	// StatusDescription is meant to provide more human useful information
  1131  	StatusDescription string
  1132  
  1133  	// StatusUpdatedAt is the time stamp at which the state of the node was
  1134  	// updated
  1135  	StatusUpdatedAt int64
  1136  
  1137  	// Raft Indexes
  1138  	CreateIndex uint64
  1139  	ModifyIndex uint64
  1140  }
  1141  
  1142  // Ready returns if the node is ready for running allocations
  1143  func (n *Node) Ready() bool {
  1144  	return n.Status == NodeStatusReady && !n.Drain
  1145  }
  1146  
  1147  func (n *Node) Copy() *Node {
  1148  	if n == nil {
  1149  		return nil
  1150  	}
  1151  	nn := new(Node)
  1152  	*nn = *n
  1153  	nn.Attributes = helper.CopyMapStringString(nn.Attributes)
  1154  	nn.Resources = nn.Resources.Copy()
  1155  	nn.Reserved = nn.Reserved.Copy()
  1156  	nn.Links = helper.CopyMapStringString(nn.Links)
  1157  	nn.Meta = helper.CopyMapStringString(nn.Meta)
  1158  	return nn
  1159  }
  1160  
  1161  // TerminalStatus returns if the current status is terminal and
  1162  // will no longer transition.
  1163  func (n *Node) TerminalStatus() bool {
  1164  	switch n.Status {
  1165  	case NodeStatusDown:
  1166  		return true
  1167  	default:
  1168  		return false
  1169  	}
  1170  }
  1171  
  1172  // Stub returns a summarized version of the node
  1173  func (n *Node) Stub() *NodeListStub {
  1174  	return &NodeListStub{
  1175  		ID:                n.ID,
  1176  		Datacenter:        n.Datacenter,
  1177  		Name:              n.Name,
  1178  		NodeClass:         n.NodeClass,
  1179  		Version:           n.Attributes["nomad.version"],
  1180  		Drain:             n.Drain,
  1181  		Status:            n.Status,
  1182  		StatusDescription: n.StatusDescription,
  1183  		CreateIndex:       n.CreateIndex,
  1184  		ModifyIndex:       n.ModifyIndex,
  1185  	}
  1186  }
  1187  
  1188  // NodeListStub is used to return a subset of job information
  1189  // for the job list
  1190  type NodeListStub struct {
  1191  	ID                string
  1192  	Datacenter        string
  1193  	Name              string
  1194  	NodeClass         string
  1195  	Version           string
  1196  	Drain             bool
  1197  	Status            string
  1198  	StatusDescription string
  1199  	CreateIndex       uint64
  1200  	ModifyIndex       uint64
  1201  }
  1202  
  1203  // Networks defined for a task on the Resources struct.
  1204  type Networks []*NetworkResource
  1205  
  1206  // Port assignment and IP for the given label or empty values.
  1207  func (ns Networks) Port(label string) (string, int) {
  1208  	for _, n := range ns {
  1209  		for _, p := range n.ReservedPorts {
  1210  			if p.Label == label {
  1211  				return n.IP, p.Value
  1212  			}
  1213  		}
  1214  		for _, p := range n.DynamicPorts {
  1215  			if p.Label == label {
  1216  				return n.IP, p.Value
  1217  			}
  1218  		}
  1219  	}
  1220  	return "", 0
  1221  }
  1222  
  1223  // Resources is used to define the resources available
  1224  // on a client
  1225  type Resources struct {
  1226  	CPU      int
  1227  	MemoryMB int
  1228  	DiskMB   int
  1229  	IOPS     int
  1230  	Networks Networks
  1231  }
  1232  
  1233  const (
  1234  	BytesInMegabyte = 1024 * 1024
  1235  )
  1236  
  1237  // DefaultResources is a small resources object that contains the
  1238  // default resources requests that we will provide to an object.
  1239  // ---  THIS FUNCTION IS REPLICATED IN api/resources.go and should
  1240  // be kept in sync.
  1241  func DefaultResources() *Resources {
  1242  	return &Resources{
  1243  		CPU:      100,
  1244  		MemoryMB: 300,
  1245  		IOPS:     0,
  1246  	}
  1247  }
  1248  
  1249  // MinResources is a small resources object that contains the
  1250  // absolute minimum resources that we will provide to an object.
  1251  // This should not be confused with the defaults which are
  1252  // provided in Canonicalize() ---  THIS FUNCTION IS REPLICATED IN
  1253  // api/resources.go and should be kept in sync.
  1254  func MinResources() *Resources {
  1255  	return &Resources{
  1256  		CPU:      100,
  1257  		MemoryMB: 10,
  1258  		IOPS:     0,
  1259  	}
  1260  }
  1261  
  1262  // DiskInBytes returns the amount of disk resources in bytes.
  1263  func (r *Resources) DiskInBytes() int64 {
  1264  	return int64(r.DiskMB * BytesInMegabyte)
  1265  }
  1266  
  1267  // Merge merges this resource with another resource.
  1268  func (r *Resources) Merge(other *Resources) {
  1269  	if other.CPU != 0 {
  1270  		r.CPU = other.CPU
  1271  	}
  1272  	if other.MemoryMB != 0 {
  1273  		r.MemoryMB = other.MemoryMB
  1274  	}
  1275  	if other.DiskMB != 0 {
  1276  		r.DiskMB = other.DiskMB
  1277  	}
  1278  	if other.IOPS != 0 {
  1279  		r.IOPS = other.IOPS
  1280  	}
  1281  	if len(other.Networks) != 0 {
  1282  		r.Networks = other.Networks
  1283  	}
  1284  }
  1285  
  1286  func (r *Resources) Canonicalize() {
  1287  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1288  	// problems since we use reflect DeepEquals.
  1289  	if len(r.Networks) == 0 {
  1290  		r.Networks = nil
  1291  	}
  1292  
  1293  	for _, n := range r.Networks {
  1294  		n.Canonicalize()
  1295  	}
  1296  }
  1297  
  1298  // MeetsMinResources returns an error if the resources specified are less than
  1299  // the minimum allowed.
  1300  // This is based on the minimums defined in the Resources type
  1301  func (r *Resources) MeetsMinResources() error {
  1302  	var mErr multierror.Error
  1303  	minResources := MinResources()
  1304  	if r.CPU < minResources.CPU {
  1305  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU))
  1306  	}
  1307  	if r.MemoryMB < minResources.MemoryMB {
  1308  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB))
  1309  	}
  1310  	if r.IOPS < minResources.IOPS {
  1311  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is %d; got %d", minResources.IOPS, r.IOPS))
  1312  	}
  1313  	for i, n := range r.Networks {
  1314  		if err := n.MeetsMinResources(); err != nil {
  1315  			mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err))
  1316  		}
  1317  	}
  1318  
  1319  	return mErr.ErrorOrNil()
  1320  }
  1321  
  1322  // Copy returns a deep copy of the resources
  1323  func (r *Resources) Copy() *Resources {
  1324  	if r == nil {
  1325  		return nil
  1326  	}
  1327  	newR := new(Resources)
  1328  	*newR = *r
  1329  	if r.Networks != nil {
  1330  		n := len(r.Networks)
  1331  		newR.Networks = make([]*NetworkResource, n)
  1332  		for i := 0; i < n; i++ {
  1333  			newR.Networks[i] = r.Networks[i].Copy()
  1334  		}
  1335  	}
  1336  	return newR
  1337  }
  1338  
  1339  // NetIndex finds the matching net index using device name
  1340  func (r *Resources) NetIndex(n *NetworkResource) int {
  1341  	for idx, net := range r.Networks {
  1342  		if net.Device == n.Device {
  1343  			return idx
  1344  		}
  1345  	}
  1346  	return -1
  1347  }
  1348  
  1349  // Superset checks if one set of resources is a superset
  1350  // of another. This ignores network resources, and the NetworkIndex
  1351  // should be used for that.
  1352  func (r *Resources) Superset(other *Resources) (bool, string) {
  1353  	if r.CPU < other.CPU {
  1354  		return false, "cpu"
  1355  	}
  1356  	if r.MemoryMB < other.MemoryMB {
  1357  		return false, "memory"
  1358  	}
  1359  	if r.DiskMB < other.DiskMB {
  1360  		return false, "disk"
  1361  	}
  1362  	if r.IOPS < other.IOPS {
  1363  		return false, "iops"
  1364  	}
  1365  	return true, ""
  1366  }
  1367  
  1368  // Add adds the resources of the delta to this, potentially
  1369  // returning an error if not possible.
  1370  func (r *Resources) Add(delta *Resources) error {
  1371  	if delta == nil {
  1372  		return nil
  1373  	}
  1374  	r.CPU += delta.CPU
  1375  	r.MemoryMB += delta.MemoryMB
  1376  	r.DiskMB += delta.DiskMB
  1377  	r.IOPS += delta.IOPS
  1378  
  1379  	for _, n := range delta.Networks {
  1380  		// Find the matching interface by IP or CIDR
  1381  		idx := r.NetIndex(n)
  1382  		if idx == -1 {
  1383  			r.Networks = append(r.Networks, n.Copy())
  1384  		} else {
  1385  			r.Networks[idx].Add(n)
  1386  		}
  1387  	}
  1388  	return nil
  1389  }
  1390  
  1391  func (r *Resources) GoString() string {
  1392  	return fmt.Sprintf("*%#v", *r)
  1393  }
  1394  
  1395  type Port struct {
  1396  	Label string
  1397  	Value int
  1398  }
  1399  
  1400  // NetworkResource is used to represent available network
  1401  // resources
  1402  type NetworkResource struct {
  1403  	Device        string // Name of the device
  1404  	CIDR          string // CIDR block of addresses
  1405  	IP            string // Host IP address
  1406  	MBits         int    // Throughput
  1407  	ReservedPorts []Port // Host Reserved ports
  1408  	DynamicPorts  []Port // Host Dynamically assigned ports
  1409  }
  1410  
  1411  func (n *NetworkResource) Canonicalize() {
  1412  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
  1413  	// problems since we use reflect DeepEquals.
  1414  	if len(n.ReservedPorts) == 0 {
  1415  		n.ReservedPorts = nil
  1416  	}
  1417  	if len(n.DynamicPorts) == 0 {
  1418  		n.DynamicPorts = nil
  1419  	}
  1420  }
  1421  
  1422  // MeetsMinResources returns an error if the resources specified are less than
  1423  // the minimum allowed.
  1424  func (n *NetworkResource) MeetsMinResources() error {
  1425  	var mErr multierror.Error
  1426  	if n.MBits < 1 {
  1427  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits))
  1428  	}
  1429  	return mErr.ErrorOrNil()
  1430  }
  1431  
  1432  // Copy returns a deep copy of the network resource
  1433  func (n *NetworkResource) Copy() *NetworkResource {
  1434  	if n == nil {
  1435  		return nil
  1436  	}
  1437  	newR := new(NetworkResource)
  1438  	*newR = *n
  1439  	if n.ReservedPorts != nil {
  1440  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
  1441  		copy(newR.ReservedPorts, n.ReservedPorts)
  1442  	}
  1443  	if n.DynamicPorts != nil {
  1444  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
  1445  		copy(newR.DynamicPorts, n.DynamicPorts)
  1446  	}
  1447  	return newR
  1448  }
  1449  
  1450  // Add adds the resources of the delta to this, potentially
  1451  // returning an error if not possible.
  1452  func (n *NetworkResource) Add(delta *NetworkResource) {
  1453  	if len(delta.ReservedPorts) > 0 {
  1454  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
  1455  	}
  1456  	n.MBits += delta.MBits
  1457  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
  1458  }
  1459  
  1460  func (n *NetworkResource) GoString() string {
  1461  	return fmt.Sprintf("*%#v", *n)
  1462  }
  1463  
  1464  // PortLabels returns a map of port labels to their assigned host ports.
  1465  func (n *NetworkResource) PortLabels() map[string]int {
  1466  	num := len(n.ReservedPorts) + len(n.DynamicPorts)
  1467  	labelValues := make(map[string]int, num)
  1468  	for _, port := range n.ReservedPorts {
  1469  		labelValues[port.Label] = port.Value
  1470  	}
  1471  	for _, port := range n.DynamicPorts {
  1472  		labelValues[port.Label] = port.Value
  1473  	}
  1474  	return labelValues
  1475  }
  1476  
  1477  const (
  1478  	// JobTypeNomad is reserved for internal system tasks and is
  1479  	// always handled by the CoreScheduler.
  1480  	JobTypeCore    = "_core"
  1481  	JobTypeService = "service"
  1482  	JobTypeBatch   = "batch"
  1483  	JobTypeSystem  = "system"
  1484  )
  1485  
  1486  const (
  1487  	JobStatusPending = "pending" // Pending means the job is waiting on scheduling
  1488  	JobStatusRunning = "running" // Running means the job has non-terminal allocations
  1489  	JobStatusDead    = "dead"    // Dead means all evaluation's and allocations are terminal
  1490  )
  1491  
  1492  const (
  1493  	// JobMinPriority is the minimum allowed priority
  1494  	JobMinPriority = 1
  1495  
  1496  	// JobDefaultPriority is the default priority if not
  1497  	// not specified.
  1498  	JobDefaultPriority = 50
  1499  
  1500  	// JobMaxPriority is the maximum allowed priority
  1501  	JobMaxPriority = 100
  1502  
  1503  	// Ensure CoreJobPriority is higher than any user
  1504  	// specified job so that it gets priority. This is important
  1505  	// for the system to remain healthy.
  1506  	CoreJobPriority = JobMaxPriority * 2
  1507  
  1508  	// JobTrackedVersions is the number of historic job versions that are
  1509  	// kept.
  1510  	JobTrackedVersions = 6
  1511  )
  1512  
  1513  // Job is the scope of a scheduling request to Nomad. It is the largest
  1514  // scoped object, and is a named collection of task groups. Each task group
  1515  // is further composed of tasks. A task group (TG) is the unit of scheduling
  1516  // however.
  1517  type Job struct {
  1518  	// Stop marks whether the user has stopped the job. A stopped job will
  1519  	// have all created allocations stopped and acts as a way to stop a job
  1520  	// without purging it from the system. This allows existing allocs to be
  1521  	// queried and the job to be inspected as it is being killed.
  1522  	Stop bool
  1523  
  1524  	// Region is the Nomad region that handles scheduling this job
  1525  	Region string
  1526  
  1527  	// Namespace is the namespace the job is submitted into.
  1528  	Namespace string
  1529  
  1530  	// ID is a unique identifier for the job per region. It can be
  1531  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
  1532  	ID string
  1533  
  1534  	// ParentID is the unique identifier of the job that spawned this job.
  1535  	ParentID string
  1536  
  1537  	// Name is the logical name of the job used to refer to it. This is unique
  1538  	// per region, but not unique globally.
  1539  	Name string
  1540  
  1541  	// Type is used to control various behaviors about the job. Most jobs
  1542  	// are service jobs, meaning they are expected to be long lived.
  1543  	// Some jobs are batch oriented meaning they run and then terminate.
  1544  	// This can be extended in the future to support custom schedulers.
  1545  	Type string
  1546  
  1547  	// Priority is used to control scheduling importance and if this job
  1548  	// can preempt other jobs.
  1549  	Priority int
  1550  
  1551  	// AllAtOnce is used to control if incremental scheduling of task groups
  1552  	// is allowed or if we must do a gang scheduling of the entire job. This
  1553  	// can slow down larger jobs if resources are not available.
  1554  	AllAtOnce bool
  1555  
  1556  	// Datacenters contains all the datacenters this job is allowed to span
  1557  	Datacenters []string
  1558  
  1559  	// Constraints can be specified at a job level and apply to
  1560  	// all the task groups and tasks.
  1561  	Constraints []*Constraint
  1562  
  1563  	// TaskGroups are the collections of task groups that this job needs
  1564  	// to run. Each task group is an atomic unit of scheduling and placement.
  1565  	TaskGroups []*TaskGroup
  1566  
  1567  	// COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0.
  1568  	Update UpdateStrategy
  1569  
  1570  	// Periodic is used to define the interval the job is run at.
  1571  	Periodic *PeriodicConfig
  1572  
  1573  	// ParameterizedJob is used to specify the job as a parameterized job
  1574  	// for dispatching.
  1575  	ParameterizedJob *ParameterizedJobConfig
  1576  
  1577  	// Payload is the payload supplied when the job was dispatched.
  1578  	Payload []byte
  1579  
  1580  	// Meta is used to associate arbitrary metadata with this
  1581  	// job. This is opaque to Nomad.
  1582  	Meta map[string]string
  1583  
  1584  	// VaultToken is the Vault token that proves the submitter of the job has
  1585  	// access to the specified Vault policies. This field is only used to
  1586  	// transfer the token and is not stored after Job submission.
  1587  	VaultToken string
  1588  
  1589  	// Job status
  1590  	Status string
  1591  
  1592  	// StatusDescription is meant to provide more human useful information
  1593  	StatusDescription string
  1594  
  1595  	// Stable marks a job as stable. Stability is only defined on "service" and
  1596  	// "system" jobs. The stability of a job will be set automatically as part
  1597  	// of a deployment and can be manually set via APIs.
  1598  	Stable bool
  1599  
  1600  	// Version is a monitonically increasing version number that is incremened
  1601  	// on each job register.
  1602  	Version uint64
  1603  
  1604  	// SubmitTime is the time at which the job was submitted as a UnixNano in
  1605  	// UTC
  1606  	SubmitTime int64
  1607  
  1608  	// Raft Indexes
  1609  	CreateIndex    uint64
  1610  	ModifyIndex    uint64
  1611  	JobModifyIndex uint64
  1612  }
  1613  
  1614  // Canonicalize is used to canonicalize fields in the Job. This should be called
  1615  // when registering a Job. A set of warnings are returned if the job was changed
  1616  // in anyway that the user should be made aware of.
  1617  func (j *Job) Canonicalize() (warnings error) {
  1618  	if j == nil {
  1619  		return nil
  1620  	}
  1621  
  1622  	var mErr multierror.Error
  1623  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1624  	// problems since we use reflect DeepEquals.
  1625  	if len(j.Meta) == 0 {
  1626  		j.Meta = nil
  1627  	}
  1628  
  1629  	// Ensure the job is in a namespace.
  1630  	if j.Namespace == "" {
  1631  		j.Namespace = DefaultNamespace
  1632  	}
  1633  
  1634  	for _, tg := range j.TaskGroups {
  1635  		tg.Canonicalize(j)
  1636  	}
  1637  
  1638  	if j.ParameterizedJob != nil {
  1639  		j.ParameterizedJob.Canonicalize()
  1640  	}
  1641  
  1642  	if j.Periodic != nil {
  1643  		j.Periodic.Canonicalize()
  1644  	}
  1645  
  1646  	// COMPAT: Remove in 0.7.0
  1647  	// Rewrite any job that has an update block with pre 0.6.0 syntax.
  1648  	jobHasOldUpdate := j.Update.Stagger > 0 && j.Update.MaxParallel > 0
  1649  	if jobHasOldUpdate && j.Type != JobTypeBatch {
  1650  		// Build an appropriate update block and copy it down to each task group
  1651  		base := DefaultUpdateStrategy.Copy()
  1652  		base.MaxParallel = j.Update.MaxParallel
  1653  		base.MinHealthyTime = j.Update.Stagger
  1654  
  1655  		// Add to each task group, modifying as needed
  1656  		upgraded := false
  1657  		l := len(j.TaskGroups)
  1658  		for _, tg := range j.TaskGroups {
  1659  			// The task group doesn't need upgrading if it has an update block with the new syntax
  1660  			u := tg.Update
  1661  			if u != nil && u.Stagger > 0 && u.MaxParallel > 0 &&
  1662  				u.HealthCheck != "" && u.MinHealthyTime > 0 && u.HealthyDeadline > 0 {
  1663  				continue
  1664  			}
  1665  
  1666  			upgraded = true
  1667  
  1668  			// The MaxParallel for the job should be 10% of the total count
  1669  			// unless there is just one task group then we can infer the old
  1670  			// max parallel should be the new
  1671  			tgu := base.Copy()
  1672  			if l != 1 {
  1673  				// RoundTo 10%
  1674  				var percent float64 = float64(tg.Count) * 0.1
  1675  				tgu.MaxParallel = int(percent + 0.5)
  1676  			}
  1677  
  1678  			// Safety guards
  1679  			if tgu.MaxParallel == 0 {
  1680  				tgu.MaxParallel = 1
  1681  			} else if tgu.MaxParallel > tg.Count {
  1682  				tgu.MaxParallel = tg.Count
  1683  			}
  1684  
  1685  			tg.Update = tgu
  1686  		}
  1687  
  1688  		if upgraded {
  1689  			w := "A best effort conversion to new update stanza introduced in v0.6.0 applied. " +
  1690  				"Please update upgrade stanza before v0.7.0."
  1691  			multierror.Append(&mErr, fmt.Errorf(w))
  1692  		}
  1693  	}
  1694  
  1695  	// Ensure that the batch job doesn't have new style or old style update
  1696  	// stanza. Unfortunately are scanning here because we have to deprecate over
  1697  	// a release so we can't check in the task group since that may be new style
  1698  	// but wouldn't capture the old style and we don't want to have duplicate
  1699  	// warnings.
  1700  	if j.Type == JobTypeBatch {
  1701  		displayWarning := jobHasOldUpdate
  1702  		j.Update.Stagger = 0
  1703  		j.Update.MaxParallel = 0
  1704  		j.Update.HealthCheck = ""
  1705  		j.Update.MinHealthyTime = 0
  1706  		j.Update.HealthyDeadline = 0
  1707  		j.Update.AutoRevert = false
  1708  		j.Update.Canary = 0
  1709  
  1710  		// Remove any update spec from the task groups
  1711  		for _, tg := range j.TaskGroups {
  1712  			if tg.Update != nil {
  1713  				displayWarning = true
  1714  				tg.Update = nil
  1715  			}
  1716  		}
  1717  
  1718  		if displayWarning {
  1719  			w := "Update stanza is disallowed for batch jobs since v0.6.0. " +
  1720  				"The update block has automatically been removed"
  1721  			multierror.Append(&mErr, fmt.Errorf(w))
  1722  		}
  1723  	}
  1724  
  1725  	return mErr.ErrorOrNil()
  1726  }
  1727  
  1728  // Copy returns a deep copy of the Job. It is expected that callers use recover.
  1729  // This job can panic if the deep copy failed as it uses reflection.
  1730  func (j *Job) Copy() *Job {
  1731  	if j == nil {
  1732  		return nil
  1733  	}
  1734  	nj := new(Job)
  1735  	*nj = *j
  1736  	nj.Datacenters = helper.CopySliceString(nj.Datacenters)
  1737  	nj.Constraints = CopySliceConstraints(nj.Constraints)
  1738  
  1739  	if j.TaskGroups != nil {
  1740  		tgs := make([]*TaskGroup, len(nj.TaskGroups))
  1741  		for i, tg := range nj.TaskGroups {
  1742  			tgs[i] = tg.Copy()
  1743  		}
  1744  		nj.TaskGroups = tgs
  1745  	}
  1746  
  1747  	nj.Periodic = nj.Periodic.Copy()
  1748  	nj.Meta = helper.CopyMapStringString(nj.Meta)
  1749  	nj.ParameterizedJob = nj.ParameterizedJob.Copy()
  1750  	return nj
  1751  }
  1752  
  1753  // Validate is used to sanity check a job input
  1754  func (j *Job) Validate() error {
  1755  	var mErr multierror.Error
  1756  
  1757  	if j.Region == "" {
  1758  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
  1759  	}
  1760  	if j.ID == "" {
  1761  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
  1762  	} else if strings.Contains(j.ID, " ") {
  1763  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
  1764  	}
  1765  	if j.Name == "" {
  1766  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
  1767  	}
  1768  	if j.Namespace == "" {
  1769  		mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace"))
  1770  	}
  1771  	switch j.Type {
  1772  	case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem:
  1773  	case "":
  1774  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
  1775  	default:
  1776  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type))
  1777  	}
  1778  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
  1779  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
  1780  	}
  1781  	if len(j.Datacenters) == 0 {
  1782  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
  1783  	}
  1784  	if len(j.TaskGroups) == 0 {
  1785  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
  1786  	}
  1787  	for idx, constr := range j.Constraints {
  1788  		if err := constr.Validate(); err != nil {
  1789  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1790  			mErr.Errors = append(mErr.Errors, outer)
  1791  		}
  1792  	}
  1793  
  1794  	// Check for duplicate task groups
  1795  	taskGroups := make(map[string]int)
  1796  	for idx, tg := range j.TaskGroups {
  1797  		if tg.Name == "" {
  1798  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
  1799  		} else if existing, ok := taskGroups[tg.Name]; ok {
  1800  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
  1801  		} else {
  1802  			taskGroups[tg.Name] = idx
  1803  		}
  1804  
  1805  		if j.Type == "system" && tg.Count > 1 {
  1806  			mErr.Errors = append(mErr.Errors,
  1807  				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
  1808  					tg.Name, tg.Count))
  1809  		}
  1810  	}
  1811  
  1812  	// Validate the task group
  1813  	for _, tg := range j.TaskGroups {
  1814  		if err := tg.Validate(j); err != nil {
  1815  			outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err)
  1816  			mErr.Errors = append(mErr.Errors, outer)
  1817  		}
  1818  	}
  1819  
  1820  	// Validate periodic is only used with batch jobs.
  1821  	if j.IsPeriodic() && j.Periodic.Enabled {
  1822  		if j.Type != JobTypeBatch {
  1823  			mErr.Errors = append(mErr.Errors,
  1824  				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
  1825  		}
  1826  
  1827  		if err := j.Periodic.Validate(); err != nil {
  1828  			mErr.Errors = append(mErr.Errors, err)
  1829  		}
  1830  	}
  1831  
  1832  	if j.IsParameterized() {
  1833  		if j.Type != JobTypeBatch {
  1834  			mErr.Errors = append(mErr.Errors,
  1835  				fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch))
  1836  		}
  1837  
  1838  		if err := j.ParameterizedJob.Validate(); err != nil {
  1839  			mErr.Errors = append(mErr.Errors, err)
  1840  		}
  1841  	}
  1842  
  1843  	return mErr.ErrorOrNil()
  1844  }
  1845  
  1846  // Warnings returns a list of warnings that may be from dubious settings or
  1847  // deprecation warnings.
  1848  func (j *Job) Warnings() error {
  1849  	var mErr multierror.Error
  1850  
  1851  	// Check the groups
  1852  	for _, tg := range j.TaskGroups {
  1853  		if err := tg.Warnings(j); err != nil {
  1854  			outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err)
  1855  			mErr.Errors = append(mErr.Errors, outer)
  1856  		}
  1857  	}
  1858  
  1859  	return mErr.ErrorOrNil()
  1860  }
  1861  
  1862  // LookupTaskGroup finds a task group by name
  1863  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
  1864  	for _, tg := range j.TaskGroups {
  1865  		if tg.Name == name {
  1866  			return tg
  1867  		}
  1868  	}
  1869  	return nil
  1870  }
  1871  
  1872  // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined
  1873  // meta data for the task. When joining Job, Group and Task Meta, the precedence
  1874  // is by deepest scope (Task > Group > Job).
  1875  func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string {
  1876  	group := j.LookupTaskGroup(groupName)
  1877  	if group == nil {
  1878  		return nil
  1879  	}
  1880  
  1881  	task := group.LookupTask(taskName)
  1882  	if task == nil {
  1883  		return nil
  1884  	}
  1885  
  1886  	meta := helper.CopyMapStringString(task.Meta)
  1887  	if meta == nil {
  1888  		meta = make(map[string]string, len(group.Meta)+len(j.Meta))
  1889  	}
  1890  
  1891  	// Add the group specific meta
  1892  	for k, v := range group.Meta {
  1893  		if _, ok := meta[k]; !ok {
  1894  			meta[k] = v
  1895  		}
  1896  	}
  1897  
  1898  	// Add the job specific meta
  1899  	for k, v := range j.Meta {
  1900  		if _, ok := meta[k]; !ok {
  1901  			meta[k] = v
  1902  		}
  1903  	}
  1904  
  1905  	return meta
  1906  }
  1907  
  1908  // Stopped returns if a job is stopped.
  1909  func (j *Job) Stopped() bool {
  1910  	return j == nil || j.Stop
  1911  }
  1912  
  1913  // HasUpdateStrategy returns if any task group in the job has an update strategy
  1914  func (j *Job) HasUpdateStrategy() bool {
  1915  	for _, tg := range j.TaskGroups {
  1916  		if tg.Update != nil {
  1917  			return true
  1918  		}
  1919  	}
  1920  
  1921  	return false
  1922  }
  1923  
  1924  // Stub is used to return a summary of the job
  1925  func (j *Job) Stub(summary *JobSummary) *JobListStub {
  1926  	return &JobListStub{
  1927  		ID:                j.ID,
  1928  		ParentID:          j.ParentID,
  1929  		Name:              j.Name,
  1930  		Type:              j.Type,
  1931  		Priority:          j.Priority,
  1932  		Periodic:          j.IsPeriodic(),
  1933  		ParameterizedJob:  j.IsParameterized(),
  1934  		Stop:              j.Stop,
  1935  		Status:            j.Status,
  1936  		StatusDescription: j.StatusDescription,
  1937  		CreateIndex:       j.CreateIndex,
  1938  		ModifyIndex:       j.ModifyIndex,
  1939  		JobModifyIndex:    j.JobModifyIndex,
  1940  		SubmitTime:        j.SubmitTime,
  1941  		JobSummary:        summary,
  1942  	}
  1943  }
  1944  
  1945  // IsPeriodic returns whether a job is periodic.
  1946  func (j *Job) IsPeriodic() bool {
  1947  	return j.Periodic != nil
  1948  }
  1949  
  1950  // IsPeriodicActive returns whether the job is an active periodic job that will
  1951  // create child jobs
  1952  func (j *Job) IsPeriodicActive() bool {
  1953  	return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized()
  1954  }
  1955  
  1956  // IsParameterized returns whether a job is parameterized job.
  1957  func (j *Job) IsParameterized() bool {
  1958  	return j.ParameterizedJob != nil
  1959  }
  1960  
  1961  // VaultPolicies returns the set of Vault policies per task group, per task
  1962  func (j *Job) VaultPolicies() map[string]map[string]*Vault {
  1963  	policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
  1964  
  1965  	for _, tg := range j.TaskGroups {
  1966  		tgPolicies := make(map[string]*Vault, len(tg.Tasks))
  1967  
  1968  		for _, task := range tg.Tasks {
  1969  			if task.Vault == nil {
  1970  				continue
  1971  			}
  1972  
  1973  			tgPolicies[task.Name] = task.Vault
  1974  		}
  1975  
  1976  		if len(tgPolicies) != 0 {
  1977  			policies[tg.Name] = tgPolicies
  1978  		}
  1979  	}
  1980  
  1981  	return policies
  1982  }
  1983  
  1984  // RequiredSignals returns a mapping of task groups to tasks to their required
  1985  // set of signals
  1986  func (j *Job) RequiredSignals() map[string]map[string][]string {
  1987  	signals := make(map[string]map[string][]string)
  1988  
  1989  	for _, tg := range j.TaskGroups {
  1990  		for _, task := range tg.Tasks {
  1991  			// Use this local one as a set
  1992  			taskSignals := make(map[string]struct{})
  1993  
  1994  			// Check if the Vault change mode uses signals
  1995  			if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal {
  1996  				taskSignals[task.Vault.ChangeSignal] = struct{}{}
  1997  			}
  1998  
  1999  			// If a user has specified a KillSignal, add it to required signals
  2000  			if task.KillSignal != "" {
  2001  				taskSignals[task.KillSignal] = struct{}{}
  2002  			}
  2003  
  2004  			// Check if any template change mode uses signals
  2005  			for _, t := range task.Templates {
  2006  				if t.ChangeMode != TemplateChangeModeSignal {
  2007  					continue
  2008  				}
  2009  
  2010  				taskSignals[t.ChangeSignal] = struct{}{}
  2011  			}
  2012  
  2013  			// Flatten and sort the signals
  2014  			l := len(taskSignals)
  2015  			if l == 0 {
  2016  				continue
  2017  			}
  2018  
  2019  			flat := make([]string, 0, l)
  2020  			for sig := range taskSignals {
  2021  				flat = append(flat, sig)
  2022  			}
  2023  
  2024  			sort.Strings(flat)
  2025  			tgSignals, ok := signals[tg.Name]
  2026  			if !ok {
  2027  				tgSignals = make(map[string][]string)
  2028  				signals[tg.Name] = tgSignals
  2029  			}
  2030  			tgSignals[task.Name] = flat
  2031  		}
  2032  
  2033  	}
  2034  
  2035  	return signals
  2036  }
  2037  
  2038  // SpecChanged determines if the functional specification has changed between
  2039  // two job versions.
  2040  func (j *Job) SpecChanged(new *Job) bool {
  2041  	if j == nil {
  2042  		return new != nil
  2043  	}
  2044  
  2045  	// Create a copy of the new job
  2046  	c := new.Copy()
  2047  
  2048  	// Update the new job so we can do a reflect
  2049  	c.Status = j.Status
  2050  	c.StatusDescription = j.StatusDescription
  2051  	c.Stable = j.Stable
  2052  	c.Version = j.Version
  2053  	c.CreateIndex = j.CreateIndex
  2054  	c.ModifyIndex = j.ModifyIndex
  2055  	c.JobModifyIndex = j.JobModifyIndex
  2056  	c.SubmitTime = j.SubmitTime
  2057  
  2058  	// Deep equals the jobs
  2059  	return !reflect.DeepEqual(j, c)
  2060  }
  2061  
  2062  func (j *Job) SetSubmitTime() {
  2063  	j.SubmitTime = time.Now().UTC().UnixNano()
  2064  }
  2065  
  2066  // JobListStub is used to return a subset of job information
  2067  // for the job list
  2068  type JobListStub struct {
  2069  	ID                string
  2070  	ParentID          string
  2071  	Name              string
  2072  	Type              string
  2073  	Priority          int
  2074  	Periodic          bool
  2075  	ParameterizedJob  bool
  2076  	Stop              bool
  2077  	Status            string
  2078  	StatusDescription string
  2079  	JobSummary        *JobSummary
  2080  	CreateIndex       uint64
  2081  	ModifyIndex       uint64
  2082  	JobModifyIndex    uint64
  2083  	SubmitTime        int64
  2084  }
  2085  
  2086  // JobSummary summarizes the state of the allocations of a job
  2087  type JobSummary struct {
  2088  	// JobID is the ID of the job the summary is for
  2089  	JobID string
  2090  
  2091  	// Namespace is the namespace of the job and its summary
  2092  	Namespace string
  2093  
  2094  	// Summmary contains the summary per task group for the Job
  2095  	Summary map[string]TaskGroupSummary
  2096  
  2097  	// Children contains a summary for the children of this job.
  2098  	Children *JobChildrenSummary
  2099  
  2100  	// Raft Indexes
  2101  	CreateIndex uint64
  2102  	ModifyIndex uint64
  2103  }
  2104  
  2105  // Copy returns a new copy of JobSummary
  2106  func (js *JobSummary) Copy() *JobSummary {
  2107  	newJobSummary := new(JobSummary)
  2108  	*newJobSummary = *js
  2109  	newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary))
  2110  	for k, v := range js.Summary {
  2111  		newTGSummary[k] = v
  2112  	}
  2113  	newJobSummary.Summary = newTGSummary
  2114  	newJobSummary.Children = newJobSummary.Children.Copy()
  2115  	return newJobSummary
  2116  }
  2117  
  2118  // JobChildrenSummary contains the summary of children job statuses
  2119  type JobChildrenSummary struct {
  2120  	Pending int64
  2121  	Running int64
  2122  	Dead    int64
  2123  }
  2124  
  2125  // Copy returns a new copy of a JobChildrenSummary
  2126  func (jc *JobChildrenSummary) Copy() *JobChildrenSummary {
  2127  	if jc == nil {
  2128  		return nil
  2129  	}
  2130  
  2131  	njc := new(JobChildrenSummary)
  2132  	*njc = *jc
  2133  	return njc
  2134  }
  2135  
  2136  // TaskGroup summarizes the state of all the allocations of a particular
  2137  // TaskGroup
  2138  type TaskGroupSummary struct {
  2139  	Queued   int
  2140  	Complete int
  2141  	Failed   int
  2142  	Running  int
  2143  	Starting int
  2144  	Lost     int
  2145  }
  2146  
  2147  const (
  2148  	// Checks uses any registered health check state in combination with task
  2149  	// states to determine if a allocation is healthy.
  2150  	UpdateStrategyHealthCheck_Checks = "checks"
  2151  
  2152  	// TaskStates uses the task states of an allocation to determine if the
  2153  	// allocation is healthy.
  2154  	UpdateStrategyHealthCheck_TaskStates = "task_states"
  2155  
  2156  	// Manual allows the operator to manually signal to Nomad when an
  2157  	// allocations is healthy. This allows more advanced health checking that is
  2158  	// outside of the scope of Nomad.
  2159  	UpdateStrategyHealthCheck_Manual = "manual"
  2160  )
  2161  
  2162  var (
  2163  	// DefaultUpdateStrategy provides a baseline that can be used to upgrade
  2164  	// jobs with the old policy or for populating field defaults.
  2165  	DefaultUpdateStrategy = &UpdateStrategy{
  2166  		Stagger:         30 * time.Second,
  2167  		MaxParallel:     1,
  2168  		HealthCheck:     UpdateStrategyHealthCheck_Checks,
  2169  		MinHealthyTime:  10 * time.Second,
  2170  		HealthyDeadline: 5 * time.Minute,
  2171  		AutoRevert:      false,
  2172  		Canary:          0,
  2173  	}
  2174  )
  2175  
  2176  // UpdateStrategy is used to modify how updates are done
  2177  type UpdateStrategy struct {
  2178  	// Stagger is used to determine the rate at which allocations are migrated
  2179  	// due to down or draining nodes.
  2180  	Stagger time.Duration
  2181  
  2182  	// MaxParallel is how many updates can be done in parallel
  2183  	MaxParallel int
  2184  
  2185  	// HealthCheck specifies the mechanism in which allocations are marked
  2186  	// healthy or unhealthy as part of a deployment.
  2187  	HealthCheck string
  2188  
  2189  	// MinHealthyTime is the minimum time an allocation must be in the healthy
  2190  	// state before it is marked as healthy, unblocking more alllocations to be
  2191  	// rolled.
  2192  	MinHealthyTime time.Duration
  2193  
  2194  	// HealthyDeadline is the time in which an allocation must be marked as
  2195  	// healthy before it is automatically transistioned to unhealthy. This time
  2196  	// period doesn't count against the MinHealthyTime.
  2197  	HealthyDeadline time.Duration
  2198  
  2199  	// AutoRevert declares that if a deployment fails because of unhealthy
  2200  	// allocations, there should be an attempt to auto-revert the job to a
  2201  	// stable version.
  2202  	AutoRevert bool
  2203  
  2204  	// Canary is the number of canaries to deploy when a change to the task
  2205  	// group is detected.
  2206  	Canary int
  2207  }
  2208  
  2209  func (u *UpdateStrategy) Copy() *UpdateStrategy {
  2210  	if u == nil {
  2211  		return nil
  2212  	}
  2213  
  2214  	copy := new(UpdateStrategy)
  2215  	*copy = *u
  2216  	return copy
  2217  }
  2218  
  2219  func (u *UpdateStrategy) Validate() error {
  2220  	if u == nil {
  2221  		return nil
  2222  	}
  2223  
  2224  	var mErr multierror.Error
  2225  	switch u.HealthCheck {
  2226  	case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual:
  2227  	default:
  2228  		multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck))
  2229  	}
  2230  
  2231  	if u.MaxParallel < 1 {
  2232  		multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than one: %d < 1", u.MaxParallel))
  2233  	}
  2234  	if u.Canary < 0 {
  2235  		multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary))
  2236  	}
  2237  	if u.MinHealthyTime < 0 {
  2238  		multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime))
  2239  	}
  2240  	if u.HealthyDeadline <= 0 {
  2241  		multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline))
  2242  	}
  2243  	if u.MinHealthyTime >= u.HealthyDeadline {
  2244  		multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline))
  2245  	}
  2246  	if u.Stagger <= 0 {
  2247  		multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger))
  2248  	}
  2249  
  2250  	return mErr.ErrorOrNil()
  2251  }
  2252  
  2253  // TODO(alexdadgar): Remove once no longer used by the scheduler.
  2254  // Rolling returns if a rolling strategy should be used
  2255  func (u *UpdateStrategy) Rolling() bool {
  2256  	return u.Stagger > 0 && u.MaxParallel > 0
  2257  }
  2258  
  2259  const (
  2260  	// PeriodicSpecCron is used for a cron spec.
  2261  	PeriodicSpecCron = "cron"
  2262  
  2263  	// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
  2264  	// separated list of unix timestamps at which to launch.
  2265  	PeriodicSpecTest = "_internal_test"
  2266  )
  2267  
  2268  // Periodic defines the interval a job should be run at.
  2269  type PeriodicConfig struct {
  2270  	// Enabled determines if the job should be run periodically.
  2271  	Enabled bool
  2272  
  2273  	// Spec specifies the interval the job should be run as. It is parsed based
  2274  	// on the SpecType.
  2275  	Spec string
  2276  
  2277  	// SpecType defines the format of the spec.
  2278  	SpecType string
  2279  
  2280  	// ProhibitOverlap enforces that spawned jobs do not run in parallel.
  2281  	ProhibitOverlap bool
  2282  
  2283  	// TimeZone is the user specified string that determines the time zone to
  2284  	// launch against. The time zones must be specified from IANA Time Zone
  2285  	// database, such as "America/New_York".
  2286  	// Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
  2287  	// Reference: https://www.iana.org/time-zones
  2288  	TimeZone string
  2289  
  2290  	// location is the time zone to evaluate the launch time against
  2291  	location *time.Location
  2292  }
  2293  
  2294  func (p *PeriodicConfig) Copy() *PeriodicConfig {
  2295  	if p == nil {
  2296  		return nil
  2297  	}
  2298  	np := new(PeriodicConfig)
  2299  	*np = *p
  2300  	return np
  2301  }
  2302  
  2303  func (p *PeriodicConfig) Validate() error {
  2304  	if !p.Enabled {
  2305  		return nil
  2306  	}
  2307  
  2308  	var mErr multierror.Error
  2309  	if p.Spec == "" {
  2310  		multierror.Append(&mErr, fmt.Errorf("Must specify a spec"))
  2311  	}
  2312  
  2313  	// Check if we got a valid time zone
  2314  	if p.TimeZone != "" {
  2315  		if _, err := time.LoadLocation(p.TimeZone); err != nil {
  2316  			multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err))
  2317  		}
  2318  	}
  2319  
  2320  	switch p.SpecType {
  2321  	case PeriodicSpecCron:
  2322  		// Validate the cron spec
  2323  		if _, err := cronexpr.Parse(p.Spec); err != nil {
  2324  			multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err))
  2325  		}
  2326  	case PeriodicSpecTest:
  2327  		// No-op
  2328  	default:
  2329  		multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType))
  2330  	}
  2331  
  2332  	return mErr.ErrorOrNil()
  2333  }
  2334  
  2335  func (p *PeriodicConfig) Canonicalize() {
  2336  	// Load the location
  2337  	l, err := time.LoadLocation(p.TimeZone)
  2338  	if err != nil {
  2339  		p.location = time.UTC
  2340  	}
  2341  
  2342  	p.location = l
  2343  }
  2344  
  2345  // Next returns the closest time instant matching the spec that is after the
  2346  // passed time. If no matching instance exists, the zero value of time.Time is
  2347  // returned. The `time.Location` of the returned value matches that of the
  2348  // passed time.
  2349  func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
  2350  	switch p.SpecType {
  2351  	case PeriodicSpecCron:
  2352  		if e, err := cronexpr.Parse(p.Spec); err == nil {
  2353  			return e.Next(fromTime)
  2354  		}
  2355  	case PeriodicSpecTest:
  2356  		split := strings.Split(p.Spec, ",")
  2357  		if len(split) == 1 && split[0] == "" {
  2358  			return time.Time{}
  2359  		}
  2360  
  2361  		// Parse the times
  2362  		times := make([]time.Time, len(split))
  2363  		for i, s := range split {
  2364  			unix, err := strconv.Atoi(s)
  2365  			if err != nil {
  2366  				return time.Time{}
  2367  			}
  2368  
  2369  			times[i] = time.Unix(int64(unix), 0)
  2370  		}
  2371  
  2372  		// Find the next match
  2373  		for _, next := range times {
  2374  			if fromTime.Before(next) {
  2375  				return next
  2376  			}
  2377  		}
  2378  	}
  2379  
  2380  	return time.Time{}
  2381  }
  2382  
  2383  // GetLocation returns the location to use for determining the time zone to run
  2384  // the periodic job against.
  2385  func (p *PeriodicConfig) GetLocation() *time.Location {
  2386  	// Jobs pre 0.5.5 will not have this
  2387  	if p.location != nil {
  2388  		return p.location
  2389  	}
  2390  
  2391  	return time.UTC
  2392  }
  2393  
  2394  const (
  2395  	// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
  2396  	// when launching derived instances of it.
  2397  	PeriodicLaunchSuffix = "/periodic-"
  2398  )
  2399  
  2400  // PeriodicLaunch tracks the last launch time of a periodic job.
  2401  type PeriodicLaunch struct {
  2402  	ID        string    // ID of the periodic job.
  2403  	Namespace string    // Namespace of the periodic job
  2404  	Launch    time.Time // The last launch time.
  2405  
  2406  	// Raft Indexes
  2407  	CreateIndex uint64
  2408  	ModifyIndex uint64
  2409  }
  2410  
  2411  const (
  2412  	DispatchPayloadForbidden = "forbidden"
  2413  	DispatchPayloadOptional  = "optional"
  2414  	DispatchPayloadRequired  = "required"
  2415  
  2416  	// DispatchLaunchSuffix is the string appended to the parameterized job's ID
  2417  	// when dispatching instances of it.
  2418  	DispatchLaunchSuffix = "/dispatch-"
  2419  )
  2420  
  2421  // ParameterizedJobConfig is used to configure the parameterized job
  2422  type ParameterizedJobConfig struct {
  2423  	// Payload configure the payload requirements
  2424  	Payload string
  2425  
  2426  	// MetaRequired is metadata keys that must be specified by the dispatcher
  2427  	MetaRequired []string
  2428  
  2429  	// MetaOptional is metadata keys that may be specified by the dispatcher
  2430  	MetaOptional []string
  2431  }
  2432  
  2433  func (d *ParameterizedJobConfig) Validate() error {
  2434  	var mErr multierror.Error
  2435  	switch d.Payload {
  2436  	case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden:
  2437  	default:
  2438  		multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload))
  2439  	}
  2440  
  2441  	// Check that the meta configurations are disjoint sets
  2442  	disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional)
  2443  	if !disjoint {
  2444  		multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending))
  2445  	}
  2446  
  2447  	return mErr.ErrorOrNil()
  2448  }
  2449  
  2450  func (d *ParameterizedJobConfig) Canonicalize() {
  2451  	if d.Payload == "" {
  2452  		d.Payload = DispatchPayloadOptional
  2453  	}
  2454  }
  2455  
  2456  func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig {
  2457  	if d == nil {
  2458  		return nil
  2459  	}
  2460  	nd := new(ParameterizedJobConfig)
  2461  	*nd = *d
  2462  	nd.MetaOptional = helper.CopySliceString(nd.MetaOptional)
  2463  	nd.MetaRequired = helper.CopySliceString(nd.MetaRequired)
  2464  	return nd
  2465  }
  2466  
  2467  // DispatchedID returns an ID appropriate for a job dispatched against a
  2468  // particular parameterized job
  2469  func DispatchedID(templateID string, t time.Time) string {
  2470  	u := uuid.Generate()[:8]
  2471  	return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u)
  2472  }
  2473  
  2474  // DispatchPayloadConfig configures how a task gets its input from a job dispatch
  2475  type DispatchPayloadConfig struct {
  2476  	// File specifies a relative path to where the input data should be written
  2477  	File string
  2478  }
  2479  
  2480  func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig {
  2481  	if d == nil {
  2482  		return nil
  2483  	}
  2484  	nd := new(DispatchPayloadConfig)
  2485  	*nd = *d
  2486  	return nd
  2487  }
  2488  
  2489  func (d *DispatchPayloadConfig) Validate() error {
  2490  	// Verify the destination doesn't escape
  2491  	escaped, err := PathEscapesAllocDir("task/local/", d.File)
  2492  	if err != nil {
  2493  		return fmt.Errorf("invalid destination path: %v", err)
  2494  	} else if escaped {
  2495  		return fmt.Errorf("destination escapes allocation directory")
  2496  	}
  2497  
  2498  	return nil
  2499  }
  2500  
  2501  var (
  2502  	defaultServiceJobRestartPolicy = RestartPolicy{
  2503  		Delay:    15 * time.Second,
  2504  		Attempts: 2,
  2505  		Interval: 1 * time.Minute,
  2506  		Mode:     RestartPolicyModeDelay,
  2507  	}
  2508  	defaultBatchJobRestartPolicy = RestartPolicy{
  2509  		Delay:    15 * time.Second,
  2510  		Attempts: 15,
  2511  		Interval: 7 * 24 * time.Hour,
  2512  		Mode:     RestartPolicyModeDelay,
  2513  	}
  2514  )
  2515  
  2516  const (
  2517  	// RestartPolicyModeDelay causes an artificial delay till the next interval is
  2518  	// reached when the specified attempts have been reached in the interval.
  2519  	RestartPolicyModeDelay = "delay"
  2520  
  2521  	// RestartPolicyModeFail causes a job to fail if the specified number of
  2522  	// attempts are reached within an interval.
  2523  	RestartPolicyModeFail = "fail"
  2524  
  2525  	// RestartPolicyMinInterval is the minimum interval that is accepted for a
  2526  	// restart policy.
  2527  	RestartPolicyMinInterval = 5 * time.Second
  2528  
  2529  	// ReasonWithinPolicy describes restart events that are within policy
  2530  	ReasonWithinPolicy = "Restart within policy"
  2531  )
  2532  
  2533  // RestartPolicy configures how Tasks are restarted when they crash or fail.
  2534  type RestartPolicy struct {
  2535  	// Attempts is the number of restart that will occur in an interval.
  2536  	Attempts int
  2537  
  2538  	// Interval is a duration in which we can limit the number of restarts
  2539  	// within.
  2540  	Interval time.Duration
  2541  
  2542  	// Delay is the time between a failure and a restart.
  2543  	Delay time.Duration
  2544  
  2545  	// Mode controls what happens when the task restarts more than attempt times
  2546  	// in an interval.
  2547  	Mode string
  2548  }
  2549  
  2550  func (r *RestartPolicy) Copy() *RestartPolicy {
  2551  	if r == nil {
  2552  		return nil
  2553  	}
  2554  	nrp := new(RestartPolicy)
  2555  	*nrp = *r
  2556  	return nrp
  2557  }
  2558  
  2559  func (r *RestartPolicy) Validate() error {
  2560  	var mErr multierror.Error
  2561  	switch r.Mode {
  2562  	case RestartPolicyModeDelay, RestartPolicyModeFail:
  2563  	default:
  2564  		multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode))
  2565  	}
  2566  
  2567  	// Check for ambiguous/confusing settings
  2568  	if r.Attempts == 0 && r.Mode != RestartPolicyModeFail {
  2569  		multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts))
  2570  	}
  2571  
  2572  	if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() {
  2573  		multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval))
  2574  	}
  2575  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
  2576  		multierror.Append(&mErr,
  2577  			fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay))
  2578  	}
  2579  	return mErr.ErrorOrNil()
  2580  }
  2581  
  2582  func NewRestartPolicy(jobType string) *RestartPolicy {
  2583  	switch jobType {
  2584  	case JobTypeService, JobTypeSystem:
  2585  		rp := defaultServiceJobRestartPolicy
  2586  		return &rp
  2587  	case JobTypeBatch:
  2588  		rp := defaultBatchJobRestartPolicy
  2589  		return &rp
  2590  	}
  2591  	return nil
  2592  }
  2593  
  2594  // TaskGroup is an atomic unit of placement. Each task group belongs to
  2595  // a job and may contain any number of tasks. A task group support running
  2596  // in many replicas using the same configuration..
  2597  type TaskGroup struct {
  2598  	// Name of the task group
  2599  	Name string
  2600  
  2601  	// Count is the number of replicas of this task group that should
  2602  	// be scheduled.
  2603  	Count int
  2604  
  2605  	// Update is used to control the update strategy for this task group
  2606  	Update *UpdateStrategy
  2607  
  2608  	// Constraints can be specified at a task group level and apply to
  2609  	// all the tasks contained.
  2610  	Constraints []*Constraint
  2611  
  2612  	//RestartPolicy of a TaskGroup
  2613  	RestartPolicy *RestartPolicy
  2614  
  2615  	// Tasks are the collection of tasks that this task group needs to run
  2616  	Tasks []*Task
  2617  
  2618  	// EphemeralDisk is the disk resources that the task group requests
  2619  	EphemeralDisk *EphemeralDisk
  2620  
  2621  	// Meta is used to associate arbitrary metadata with this
  2622  	// task group. This is opaque to Nomad.
  2623  	Meta map[string]string
  2624  }
  2625  
  2626  func (tg *TaskGroup) Copy() *TaskGroup {
  2627  	if tg == nil {
  2628  		return nil
  2629  	}
  2630  	ntg := new(TaskGroup)
  2631  	*ntg = *tg
  2632  	ntg.Update = ntg.Update.Copy()
  2633  	ntg.Constraints = CopySliceConstraints(ntg.Constraints)
  2634  	ntg.RestartPolicy = ntg.RestartPolicy.Copy()
  2635  
  2636  	if tg.Tasks != nil {
  2637  		tasks := make([]*Task, len(ntg.Tasks))
  2638  		for i, t := range ntg.Tasks {
  2639  			tasks[i] = t.Copy()
  2640  		}
  2641  		ntg.Tasks = tasks
  2642  	}
  2643  
  2644  	ntg.Meta = helper.CopyMapStringString(ntg.Meta)
  2645  
  2646  	if tg.EphemeralDisk != nil {
  2647  		ntg.EphemeralDisk = tg.EphemeralDisk.Copy()
  2648  	}
  2649  	return ntg
  2650  }
  2651  
  2652  // Canonicalize is used to canonicalize fields in the TaskGroup.
  2653  func (tg *TaskGroup) Canonicalize(job *Job) {
  2654  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  2655  	// problems since we use reflect DeepEquals.
  2656  	if len(tg.Meta) == 0 {
  2657  		tg.Meta = nil
  2658  	}
  2659  
  2660  	// Set the default restart policy.
  2661  	if tg.RestartPolicy == nil {
  2662  		tg.RestartPolicy = NewRestartPolicy(job.Type)
  2663  	}
  2664  
  2665  	// Set a default ephemeral disk object if the user has not requested for one
  2666  	if tg.EphemeralDisk == nil {
  2667  		tg.EphemeralDisk = DefaultEphemeralDisk()
  2668  	}
  2669  
  2670  	for _, task := range tg.Tasks {
  2671  		task.Canonicalize(job, tg)
  2672  	}
  2673  
  2674  	// Add up the disk resources to EphemeralDisk. This is done so that users
  2675  	// are not required to move their disk attribute from resources to
  2676  	// EphemeralDisk section of the job spec in Nomad 0.5
  2677  	// COMPAT 0.4.1 -> 0.5
  2678  	// Remove in 0.6
  2679  	var diskMB int
  2680  	for _, task := range tg.Tasks {
  2681  		diskMB += task.Resources.DiskMB
  2682  	}
  2683  	if diskMB > 0 {
  2684  		tg.EphemeralDisk.SizeMB = diskMB
  2685  	}
  2686  }
  2687  
  2688  // Validate is used to sanity check a task group
  2689  func (tg *TaskGroup) Validate(j *Job) error {
  2690  	var mErr multierror.Error
  2691  	if tg.Name == "" {
  2692  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
  2693  	}
  2694  	if tg.Count < 0 {
  2695  		mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative"))
  2696  	}
  2697  	if len(tg.Tasks) == 0 {
  2698  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
  2699  	}
  2700  	for idx, constr := range tg.Constraints {
  2701  		if err := constr.Validate(); err != nil {
  2702  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  2703  			mErr.Errors = append(mErr.Errors, outer)
  2704  		}
  2705  	}
  2706  
  2707  	if tg.RestartPolicy != nil {
  2708  		if err := tg.RestartPolicy.Validate(); err != nil {
  2709  			mErr.Errors = append(mErr.Errors, err)
  2710  		}
  2711  	} else {
  2712  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
  2713  	}
  2714  
  2715  	if tg.EphemeralDisk != nil {
  2716  		if err := tg.EphemeralDisk.Validate(); err != nil {
  2717  			mErr.Errors = append(mErr.Errors, err)
  2718  		}
  2719  	} else {
  2720  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name))
  2721  	}
  2722  
  2723  	// Validate the update strategy
  2724  	if u := tg.Update; u != nil {
  2725  		switch j.Type {
  2726  		case JobTypeService, JobTypeSystem:
  2727  		default:
  2728  			// COMPAT: Enable in 0.7.0
  2729  			//mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type))
  2730  		}
  2731  		if err := u.Validate(); err != nil {
  2732  			mErr.Errors = append(mErr.Errors, err)
  2733  		}
  2734  	}
  2735  
  2736  	// Check for duplicate tasks, that there is only leader task if any,
  2737  	// and no duplicated static ports
  2738  	tasks := make(map[string]int)
  2739  	staticPorts := make(map[int]string)
  2740  	leaderTasks := 0
  2741  	for idx, task := range tg.Tasks {
  2742  		if task.Name == "" {
  2743  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
  2744  		} else if existing, ok := tasks[task.Name]; ok {
  2745  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
  2746  		} else {
  2747  			tasks[task.Name] = idx
  2748  		}
  2749  
  2750  		if task.Leader {
  2751  			leaderTasks++
  2752  		}
  2753  
  2754  		if task.Resources == nil {
  2755  			continue
  2756  		}
  2757  
  2758  		for _, net := range task.Resources.Networks {
  2759  			for _, port := range net.ReservedPorts {
  2760  				if other, ok := staticPorts[port.Value]; ok {
  2761  					err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other)
  2762  					mErr.Errors = append(mErr.Errors, err)
  2763  				} else {
  2764  					staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label)
  2765  				}
  2766  			}
  2767  		}
  2768  	}
  2769  
  2770  	if leaderTasks > 1 {
  2771  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader"))
  2772  	}
  2773  
  2774  	// Validate the tasks
  2775  	for _, task := range tg.Tasks {
  2776  		if err := task.Validate(tg.EphemeralDisk); err != nil {
  2777  			outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err)
  2778  			mErr.Errors = append(mErr.Errors, outer)
  2779  		}
  2780  	}
  2781  	return mErr.ErrorOrNil()
  2782  }
  2783  
  2784  // Warnings returns a list of warnings that may be from dubious settings or
  2785  // deprecation warnings.
  2786  func (tg *TaskGroup) Warnings(j *Job) error {
  2787  	var mErr multierror.Error
  2788  
  2789  	// Validate the update strategy
  2790  	if u := tg.Update; u != nil {
  2791  		// Check the counts are appropriate
  2792  		if u.MaxParallel > tg.Count {
  2793  			mErr.Errors = append(mErr.Errors,
  2794  				fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+
  2795  					"A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count))
  2796  		}
  2797  	}
  2798  
  2799  	return mErr.ErrorOrNil()
  2800  }
  2801  
  2802  // LookupTask finds a task by name
  2803  func (tg *TaskGroup) LookupTask(name string) *Task {
  2804  	for _, t := range tg.Tasks {
  2805  		if t.Name == name {
  2806  			return t
  2807  		}
  2808  	}
  2809  	return nil
  2810  }
  2811  
  2812  func (tg *TaskGroup) GoString() string {
  2813  	return fmt.Sprintf("*%#v", *tg)
  2814  }
  2815  
  2816  // CombinedResources returns the combined resources for the task group
  2817  func (tg *TaskGroup) CombinedResources() *Resources {
  2818  	r := &Resources{
  2819  		DiskMB: tg.EphemeralDisk.SizeMB,
  2820  	}
  2821  	for _, task := range tg.Tasks {
  2822  		r.Add(task.Resources)
  2823  	}
  2824  	return r
  2825  }
  2826  
  2827  // CheckRestart describes if and when a task should be restarted based on
  2828  // failing health checks.
  2829  type CheckRestart struct {
  2830  	Limit          int           // Restart task after this many unhealthy intervals
  2831  	Grace          time.Duration // Grace time to give tasks after starting to get healthy
  2832  	IgnoreWarnings bool          // If true treat checks in `warning` as passing
  2833  }
  2834  
  2835  func (c *CheckRestart) Copy() *CheckRestart {
  2836  	if c == nil {
  2837  		return nil
  2838  	}
  2839  
  2840  	nc := new(CheckRestart)
  2841  	*nc = *c
  2842  	return nc
  2843  }
  2844  
  2845  func (c *CheckRestart) Validate() error {
  2846  	if c == nil {
  2847  		return nil
  2848  	}
  2849  
  2850  	var mErr multierror.Error
  2851  	if c.Limit < 0 {
  2852  		mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit))
  2853  	}
  2854  
  2855  	if c.Grace < 0 {
  2856  		mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace))
  2857  	}
  2858  
  2859  	return mErr.ErrorOrNil()
  2860  }
  2861  
  2862  const (
  2863  	ServiceCheckHTTP   = "http"
  2864  	ServiceCheckTCP    = "tcp"
  2865  	ServiceCheckScript = "script"
  2866  
  2867  	// minCheckInterval is the minimum check interval permitted.  Consul
  2868  	// currently has its MinInterval set to 1s.  Mirror that here for
  2869  	// consistency.
  2870  	minCheckInterval = 1 * time.Second
  2871  
  2872  	// minCheckTimeout is the minimum check timeout permitted for Consul
  2873  	// script TTL checks.
  2874  	minCheckTimeout = 1 * time.Second
  2875  )
  2876  
  2877  // The ServiceCheck data model represents the consul health check that
  2878  // Nomad registers for a Task
  2879  type ServiceCheck struct {
  2880  	Name          string              // Name of the check, defaults to id
  2881  	Type          string              // Type of the check - tcp, http, docker and script
  2882  	Command       string              // Command is the command to run for script checks
  2883  	Args          []string            // Args is a list of argumes for script checks
  2884  	Path          string              // path of the health check url for http type check
  2885  	Protocol      string              // Protocol to use if check is http, defaults to http
  2886  	PortLabel     string              // The port to use for tcp/http checks
  2887  	AddressMode   string              // 'host' to use host ip:port or 'driver' to use driver's
  2888  	Interval      time.Duration       // Interval of the check
  2889  	Timeout       time.Duration       // Timeout of the response from the check before consul fails the check
  2890  	InitialStatus string              // Initial status of the check
  2891  	TLSSkipVerify bool                // Skip TLS verification when Protocol=https
  2892  	Method        string              // HTTP Method to use (GET by default)
  2893  	Header        map[string][]string // HTTP Headers for Consul to set when making HTTP checks
  2894  	CheckRestart  *CheckRestart       // If and when a task should be restarted based on checks
  2895  }
  2896  
  2897  func (sc *ServiceCheck) Copy() *ServiceCheck {
  2898  	if sc == nil {
  2899  		return nil
  2900  	}
  2901  	nsc := new(ServiceCheck)
  2902  	*nsc = *sc
  2903  	nsc.Args = helper.CopySliceString(sc.Args)
  2904  	nsc.Header = helper.CopyMapStringSliceString(sc.Header)
  2905  	nsc.CheckRestart = sc.CheckRestart.Copy()
  2906  	return nsc
  2907  }
  2908  
  2909  func (sc *ServiceCheck) Canonicalize(serviceName string) {
  2910  	// Ensure empty maps/slices are treated as null to avoid scheduling
  2911  	// issues when using DeepEquals.
  2912  	if len(sc.Args) == 0 {
  2913  		sc.Args = nil
  2914  	}
  2915  
  2916  	if len(sc.Header) == 0 {
  2917  		sc.Header = nil
  2918  	} else {
  2919  		for k, v := range sc.Header {
  2920  			if len(v) == 0 {
  2921  				sc.Header[k] = nil
  2922  			}
  2923  		}
  2924  	}
  2925  
  2926  	if sc.Name == "" {
  2927  		sc.Name = fmt.Sprintf("service: %q check", serviceName)
  2928  	}
  2929  }
  2930  
  2931  // validate a Service's ServiceCheck
  2932  func (sc *ServiceCheck) validate() error {
  2933  	// Validate Type
  2934  	switch strings.ToLower(sc.Type) {
  2935  	case ServiceCheckTCP:
  2936  	case ServiceCheckHTTP:
  2937  		if sc.Path == "" {
  2938  			return fmt.Errorf("http type must have a valid http path")
  2939  		}
  2940  
  2941  	case ServiceCheckScript:
  2942  		if sc.Command == "" {
  2943  			return fmt.Errorf("script type must have a valid script path")
  2944  		}
  2945  	default:
  2946  		return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
  2947  	}
  2948  
  2949  	// Validate interval and timeout
  2950  	if sc.Interval == 0 {
  2951  		return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval)
  2952  	} else if sc.Interval < minCheckInterval {
  2953  		return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval)
  2954  	}
  2955  
  2956  	if sc.Timeout == 0 {
  2957  		return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  2958  	} else if sc.Timeout < minCheckTimeout {
  2959  		return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  2960  	}
  2961  
  2962  	// Validate InitialStatus
  2963  	switch sc.InitialStatus {
  2964  	case "":
  2965  	case api.HealthPassing:
  2966  	case api.HealthWarning:
  2967  	case api.HealthCritical:
  2968  	default:
  2969  		return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical)
  2970  
  2971  	}
  2972  
  2973  	// Validate AddressMode
  2974  	switch sc.AddressMode {
  2975  	case "", AddressModeHost, AddressModeDriver:
  2976  		// Ok
  2977  	case AddressModeAuto:
  2978  		return fmt.Errorf("invalid address_mode %q - %s only valid for services", sc.AddressMode, AddressModeAuto)
  2979  	default:
  2980  		return fmt.Errorf("invalid address_mode %q", sc.AddressMode)
  2981  	}
  2982  
  2983  	return sc.CheckRestart.Validate()
  2984  }
  2985  
  2986  // RequiresPort returns whether the service check requires the task has a port.
  2987  func (sc *ServiceCheck) RequiresPort() bool {
  2988  	switch sc.Type {
  2989  	case ServiceCheckHTTP, ServiceCheckTCP:
  2990  		return true
  2991  	default:
  2992  		return false
  2993  	}
  2994  }
  2995  
  2996  // TriggersRestarts returns true if this check should be watched and trigger a restart
  2997  // on failure.
  2998  func (sc *ServiceCheck) TriggersRestarts() bool {
  2999  	return sc.CheckRestart != nil && sc.CheckRestart.Limit > 0
  3000  }
  3001  
  3002  // Hash all ServiceCheck fields and the check's corresponding service ID to
  3003  // create an identifier. The identifier is not guaranteed to be unique as if
  3004  // the PortLabel is blank, the Service's PortLabel will be used after Hash is
  3005  // called.
  3006  func (sc *ServiceCheck) Hash(serviceID string) string {
  3007  	h := sha1.New()
  3008  	io.WriteString(h, serviceID)
  3009  	io.WriteString(h, sc.Name)
  3010  	io.WriteString(h, sc.Type)
  3011  	io.WriteString(h, sc.Command)
  3012  	io.WriteString(h, strings.Join(sc.Args, ""))
  3013  	io.WriteString(h, sc.Path)
  3014  	io.WriteString(h, sc.Protocol)
  3015  	io.WriteString(h, sc.PortLabel)
  3016  	io.WriteString(h, sc.Interval.String())
  3017  	io.WriteString(h, sc.Timeout.String())
  3018  	io.WriteString(h, sc.Method)
  3019  	// Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6
  3020  	if sc.TLSSkipVerify {
  3021  		io.WriteString(h, "true")
  3022  	}
  3023  
  3024  	// Since map iteration order isn't stable we need to write k/v pairs to
  3025  	// a slice and sort it before hashing.
  3026  	if len(sc.Header) > 0 {
  3027  		headers := make([]string, 0, len(sc.Header))
  3028  		for k, v := range sc.Header {
  3029  			headers = append(headers, k+strings.Join(v, ""))
  3030  		}
  3031  		sort.Strings(headers)
  3032  		io.WriteString(h, strings.Join(headers, ""))
  3033  	}
  3034  
  3035  	// Only include AddressMode if set to maintain ID stability with Nomad <0.7.1
  3036  	if len(sc.AddressMode) > 0 {
  3037  		io.WriteString(h, sc.AddressMode)
  3038  	}
  3039  
  3040  	return fmt.Sprintf("%x", h.Sum(nil))
  3041  }
  3042  
  3043  const (
  3044  	AddressModeAuto   = "auto"
  3045  	AddressModeHost   = "host"
  3046  	AddressModeDriver = "driver"
  3047  )
  3048  
  3049  // Service represents a Consul service definition in Nomad
  3050  type Service struct {
  3051  	// Name of the service registered with Consul. Consul defaults the
  3052  	// Name to ServiceID if not specified.  The Name if specified is used
  3053  	// as one of the seed values when generating a Consul ServiceID.
  3054  	Name string
  3055  
  3056  	// PortLabel is either the numeric port number or the `host:port`.
  3057  	// To specify the port number using the host's Consul Advertise
  3058  	// address, specify an empty host in the PortLabel (e.g. `:port`).
  3059  	PortLabel string
  3060  
  3061  	// AddressMode specifies whether or not to use the host ip:port for
  3062  	// this service.
  3063  	AddressMode string
  3064  
  3065  	Tags   []string        // List of tags for the service
  3066  	Checks []*ServiceCheck // List of checks associated with the service
  3067  }
  3068  
  3069  func (s *Service) Copy() *Service {
  3070  	if s == nil {
  3071  		return nil
  3072  	}
  3073  	ns := new(Service)
  3074  	*ns = *s
  3075  	ns.Tags = helper.CopySliceString(ns.Tags)
  3076  
  3077  	if s.Checks != nil {
  3078  		checks := make([]*ServiceCheck, len(ns.Checks))
  3079  		for i, c := range ns.Checks {
  3080  			checks[i] = c.Copy()
  3081  		}
  3082  		ns.Checks = checks
  3083  	}
  3084  
  3085  	return ns
  3086  }
  3087  
  3088  // Canonicalize interpolates values of Job, Task Group and Task in the Service
  3089  // Name. This also generates check names, service id and check ids.
  3090  func (s *Service) Canonicalize(job string, taskGroup string, task string) {
  3091  	// Ensure empty lists are treated as null to avoid scheduler issues when
  3092  	// using DeepEquals
  3093  	if len(s.Tags) == 0 {
  3094  		s.Tags = nil
  3095  	}
  3096  	if len(s.Checks) == 0 {
  3097  		s.Checks = nil
  3098  	}
  3099  
  3100  	s.Name = args.ReplaceEnv(s.Name, map[string]string{
  3101  		"JOB":       job,
  3102  		"TASKGROUP": taskGroup,
  3103  		"TASK":      task,
  3104  		"BASE":      fmt.Sprintf("%s-%s-%s", job, taskGroup, task),
  3105  	},
  3106  	)
  3107  
  3108  	for _, check := range s.Checks {
  3109  		check.Canonicalize(s.Name)
  3110  	}
  3111  }
  3112  
  3113  // Validate checks if the Check definition is valid
  3114  func (s *Service) Validate() error {
  3115  	var mErr multierror.Error
  3116  
  3117  	// Ensure the service name is valid per the below RFCs but make an exception
  3118  	// for our interpolation syntax by first stripping any environment variables from the name
  3119  
  3120  	serviceNameStripped := args.ReplaceEnvWithPlaceHolder(s.Name, "ENV-VAR")
  3121  
  3122  	if err := s.ValidateName(serviceNameStripped); err != nil {
  3123  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name))
  3124  	}
  3125  
  3126  	switch s.AddressMode {
  3127  	case "", AddressModeAuto, AddressModeHost, AddressModeDriver:
  3128  		// OK
  3129  	default:
  3130  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode))
  3131  	}
  3132  
  3133  	for _, c := range s.Checks {
  3134  		if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() {
  3135  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name))
  3136  			continue
  3137  		}
  3138  
  3139  		if err := c.validate(); err != nil {
  3140  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err))
  3141  		}
  3142  	}
  3143  
  3144  	return mErr.ErrorOrNil()
  3145  }
  3146  
  3147  // ValidateName checks if the services Name is valid and should be called after
  3148  // the name has been interpolated
  3149  func (s *Service) ValidateName(name string) error {
  3150  	// Ensure the service name is valid per RFC-952 §1
  3151  	// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  3152  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  3153  	// (https://tools.ietf.org/html/rfc2782).
  3154  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`)
  3155  	if !re.MatchString(name) {
  3156  		return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name)
  3157  	}
  3158  	return nil
  3159  }
  3160  
  3161  // Hash returns a base32 encoded hash of a Service's contents excluding checks
  3162  // as they're hashed independently.
  3163  func (s *Service) Hash(allocID, taskName string) string {
  3164  	h := sha1.New()
  3165  	io.WriteString(h, allocID)
  3166  	io.WriteString(h, taskName)
  3167  	io.WriteString(h, s.Name)
  3168  	io.WriteString(h, s.PortLabel)
  3169  	io.WriteString(h, s.AddressMode)
  3170  	for _, tag := range s.Tags {
  3171  		io.WriteString(h, tag)
  3172  	}
  3173  
  3174  	// Base32 is used for encoding the hash as sha1 hashes can always be
  3175  	// encoded without padding, only 4 bytes larger than base64, and saves
  3176  	// 8 bytes vs hex. Since these hashes are used in Consul URLs it's nice
  3177  	// to have a reasonably compact URL-safe representation.
  3178  	return b32.EncodeToString(h.Sum(nil))
  3179  }
  3180  
  3181  const (
  3182  	// DefaultKillTimeout is the default timeout between signaling a task it
  3183  	// will be killed and killing it.
  3184  	DefaultKillTimeout = 5 * time.Second
  3185  )
  3186  
  3187  // LogConfig provides configuration for log rotation
  3188  type LogConfig struct {
  3189  	MaxFiles      int
  3190  	MaxFileSizeMB int
  3191  }
  3192  
  3193  // DefaultLogConfig returns the default LogConfig values.
  3194  func DefaultLogConfig() *LogConfig {
  3195  	return &LogConfig{
  3196  		MaxFiles:      10,
  3197  		MaxFileSizeMB: 10,
  3198  	}
  3199  }
  3200  
  3201  // Validate returns an error if the log config specified are less than
  3202  // the minimum allowed.
  3203  func (l *LogConfig) Validate() error {
  3204  	var mErr multierror.Error
  3205  	if l.MaxFiles < 1 {
  3206  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles))
  3207  	}
  3208  	if l.MaxFileSizeMB < 1 {
  3209  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB))
  3210  	}
  3211  	return mErr.ErrorOrNil()
  3212  }
  3213  
  3214  // Task is a single process typically that is executed as part of a task group.
  3215  type Task struct {
  3216  	// Name of the task
  3217  	Name string
  3218  
  3219  	// Driver is used to control which driver is used
  3220  	Driver string
  3221  
  3222  	// User is used to determine which user will run the task. It defaults to
  3223  	// the same user the Nomad client is being run as.
  3224  	User string
  3225  
  3226  	// Config is provided to the driver to initialize
  3227  	Config map[string]interface{}
  3228  
  3229  	// Map of environment variables to be used by the driver
  3230  	Env map[string]string
  3231  
  3232  	// List of service definitions exposed by the Task
  3233  	Services []*Service
  3234  
  3235  	// Vault is used to define the set of Vault policies that this task should
  3236  	// have access to.
  3237  	Vault *Vault
  3238  
  3239  	// Templates are the set of templates to be rendered for the task.
  3240  	Templates []*Template
  3241  
  3242  	// Constraints can be specified at a task level and apply only to
  3243  	// the particular task.
  3244  	Constraints []*Constraint
  3245  
  3246  	// Resources is the resources needed by this task
  3247  	Resources *Resources
  3248  
  3249  	// DispatchPayload configures how the task retrieves its input from a dispatch
  3250  	DispatchPayload *DispatchPayloadConfig
  3251  
  3252  	// Meta is used to associate arbitrary metadata with this
  3253  	// task. This is opaque to Nomad.
  3254  	Meta map[string]string
  3255  
  3256  	// KillTimeout is the time between signaling a task that it will be
  3257  	// killed and killing it.
  3258  	KillTimeout time.Duration
  3259  
  3260  	// LogConfig provides configuration for log rotation
  3261  	LogConfig *LogConfig
  3262  
  3263  	// Artifacts is a list of artifacts to download and extract before running
  3264  	// the task.
  3265  	Artifacts []*TaskArtifact
  3266  
  3267  	// Leader marks the task as the leader within the group. When the leader
  3268  	// task exits, other tasks will be gracefully terminated.
  3269  	Leader bool
  3270  
  3271  	// ShutdownDelay is the duration of the delay between deregistering a
  3272  	// task from Consul and sending it a signal to shutdown. See #2441
  3273  	ShutdownDelay time.Duration
  3274  
  3275  	// The kill signal to use for the task. This is an optional specification,
  3276  
  3277  	// KillSignal is the kill signal to use for the task. This is an optional
  3278  	// specification and defaults to SIGINT
  3279  	KillSignal string
  3280  }
  3281  
  3282  func (t *Task) Copy() *Task {
  3283  	if t == nil {
  3284  		return nil
  3285  	}
  3286  	nt := new(Task)
  3287  	*nt = *t
  3288  	nt.Env = helper.CopyMapStringString(nt.Env)
  3289  
  3290  	if t.Services != nil {
  3291  		services := make([]*Service, len(nt.Services))
  3292  		for i, s := range nt.Services {
  3293  			services[i] = s.Copy()
  3294  		}
  3295  		nt.Services = services
  3296  	}
  3297  
  3298  	nt.Constraints = CopySliceConstraints(nt.Constraints)
  3299  
  3300  	nt.Vault = nt.Vault.Copy()
  3301  	nt.Resources = nt.Resources.Copy()
  3302  	nt.Meta = helper.CopyMapStringString(nt.Meta)
  3303  	nt.DispatchPayload = nt.DispatchPayload.Copy()
  3304  
  3305  	if t.Artifacts != nil {
  3306  		artifacts := make([]*TaskArtifact, 0, len(t.Artifacts))
  3307  		for _, a := range nt.Artifacts {
  3308  			artifacts = append(artifacts, a.Copy())
  3309  		}
  3310  		nt.Artifacts = artifacts
  3311  	}
  3312  
  3313  	if i, err := copystructure.Copy(nt.Config); err != nil {
  3314  		panic(err.Error())
  3315  	} else {
  3316  		nt.Config = i.(map[string]interface{})
  3317  	}
  3318  
  3319  	if t.Templates != nil {
  3320  		templates := make([]*Template, len(t.Templates))
  3321  		for i, tmpl := range nt.Templates {
  3322  			templates[i] = tmpl.Copy()
  3323  		}
  3324  		nt.Templates = templates
  3325  	}
  3326  
  3327  	return nt
  3328  }
  3329  
  3330  // Canonicalize canonicalizes fields in the task.
  3331  func (t *Task) Canonicalize(job *Job, tg *TaskGroup) {
  3332  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  3333  	// problems since we use reflect DeepEquals.
  3334  	if len(t.Meta) == 0 {
  3335  		t.Meta = nil
  3336  	}
  3337  	if len(t.Config) == 0 {
  3338  		t.Config = nil
  3339  	}
  3340  	if len(t.Env) == 0 {
  3341  		t.Env = nil
  3342  	}
  3343  
  3344  	for _, service := range t.Services {
  3345  		service.Canonicalize(job.Name, tg.Name, t.Name)
  3346  	}
  3347  
  3348  	// If Resources are nil initialize them to defaults, otherwise canonicalize
  3349  	if t.Resources == nil {
  3350  		t.Resources = DefaultResources()
  3351  	} else {
  3352  		t.Resources.Canonicalize()
  3353  	}
  3354  
  3355  	// Set the default timeout if it is not specified.
  3356  	if t.KillTimeout == 0 {
  3357  		t.KillTimeout = DefaultKillTimeout
  3358  	}
  3359  
  3360  	if t.Vault != nil {
  3361  		t.Vault.Canonicalize()
  3362  	}
  3363  
  3364  	for _, template := range t.Templates {
  3365  		template.Canonicalize()
  3366  	}
  3367  }
  3368  
  3369  func (t *Task) GoString() string {
  3370  	return fmt.Sprintf("*%#v", *t)
  3371  }
  3372  
  3373  // Validate is used to sanity check a task
  3374  func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error {
  3375  	var mErr multierror.Error
  3376  	if t.Name == "" {
  3377  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  3378  	}
  3379  	if strings.ContainsAny(t.Name, `/\`) {
  3380  		// We enforce this so that when creating the directory on disk it will
  3381  		// not have any slashes.
  3382  		mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes"))
  3383  	}
  3384  	if t.Driver == "" {
  3385  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  3386  	}
  3387  	if t.KillTimeout < 0 {
  3388  		mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
  3389  	}
  3390  	if t.ShutdownDelay < 0 {
  3391  		mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value"))
  3392  	}
  3393  
  3394  	// Validate the resources.
  3395  	if t.Resources == nil {
  3396  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  3397  	} else {
  3398  		if err := t.Resources.MeetsMinResources(); err != nil {
  3399  			mErr.Errors = append(mErr.Errors, err)
  3400  		}
  3401  
  3402  		// Ensure the task isn't asking for disk resources
  3403  		if t.Resources.DiskMB > 0 {
  3404  			mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level."))
  3405  		}
  3406  	}
  3407  
  3408  	// Validate the log config
  3409  	if t.LogConfig == nil {
  3410  		mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config"))
  3411  	} else if err := t.LogConfig.Validate(); err != nil {
  3412  		mErr.Errors = append(mErr.Errors, err)
  3413  	}
  3414  
  3415  	for idx, constr := range t.Constraints {
  3416  		if err := constr.Validate(); err != nil {
  3417  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  3418  			mErr.Errors = append(mErr.Errors, outer)
  3419  		}
  3420  
  3421  		switch constr.Operand {
  3422  		case ConstraintDistinctHosts, ConstraintDistinctProperty:
  3423  			outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand)
  3424  			mErr.Errors = append(mErr.Errors, outer)
  3425  		}
  3426  	}
  3427  
  3428  	// Validate Services
  3429  	if err := validateServices(t); err != nil {
  3430  		mErr.Errors = append(mErr.Errors, err)
  3431  	}
  3432  
  3433  	if t.LogConfig != nil && ephemeralDisk != nil {
  3434  		logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB)
  3435  		if ephemeralDisk.SizeMB <= logUsage {
  3436  			mErr.Errors = append(mErr.Errors,
  3437  				fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)",
  3438  					logUsage, ephemeralDisk.SizeMB))
  3439  		}
  3440  	}
  3441  
  3442  	for idx, artifact := range t.Artifacts {
  3443  		if err := artifact.Validate(); err != nil {
  3444  			outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err)
  3445  			mErr.Errors = append(mErr.Errors, outer)
  3446  		}
  3447  	}
  3448  
  3449  	if t.Vault != nil {
  3450  		if err := t.Vault.Validate(); err != nil {
  3451  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err))
  3452  		}
  3453  	}
  3454  
  3455  	destinations := make(map[string]int, len(t.Templates))
  3456  	for idx, tmpl := range t.Templates {
  3457  		if err := tmpl.Validate(); err != nil {
  3458  			outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err)
  3459  			mErr.Errors = append(mErr.Errors, outer)
  3460  		}
  3461  
  3462  		if other, ok := destinations[tmpl.DestPath]; ok {
  3463  			outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other)
  3464  			mErr.Errors = append(mErr.Errors, outer)
  3465  		} else {
  3466  			destinations[tmpl.DestPath] = idx + 1
  3467  		}
  3468  	}
  3469  
  3470  	// Validate the dispatch payload block if there
  3471  	if t.DispatchPayload != nil {
  3472  		if err := t.DispatchPayload.Validate(); err != nil {
  3473  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err))
  3474  		}
  3475  	}
  3476  
  3477  	return mErr.ErrorOrNil()
  3478  }
  3479  
  3480  // validateServices takes a task and validates the services within it are valid
  3481  // and reference ports that exist.
  3482  func validateServices(t *Task) error {
  3483  	var mErr multierror.Error
  3484  
  3485  	// Ensure that services don't ask for non-existent ports and their names are
  3486  	// unique.
  3487  	servicePorts := make(map[string]map[string]struct{})
  3488  	addServicePort := func(label, service string) {
  3489  		if _, ok := servicePorts[label]; !ok {
  3490  			servicePorts[label] = map[string]struct{}{}
  3491  		}
  3492  		servicePorts[label][service] = struct{}{}
  3493  	}
  3494  	knownServices := make(map[string]struct{})
  3495  	for i, service := range t.Services {
  3496  		if err := service.Validate(); err != nil {
  3497  			outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err)
  3498  			mErr.Errors = append(mErr.Errors, outer)
  3499  		}
  3500  
  3501  		// Ensure that services with the same name are not being registered for
  3502  		// the same port
  3503  		if _, ok := knownServices[service.Name+service.PortLabel]; ok {
  3504  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name))
  3505  		}
  3506  		knownServices[service.Name+service.PortLabel] = struct{}{}
  3507  
  3508  		if service.PortLabel != "" {
  3509  			if service.AddressMode == "driver" {
  3510  				// Numeric port labels are valid for address_mode=driver
  3511  				_, err := strconv.Atoi(service.PortLabel)
  3512  				if err != nil {
  3513  					// Not a numeric port label, add it to list to check
  3514  					addServicePort(service.PortLabel, service.Name)
  3515  				}
  3516  			} else {
  3517  				addServicePort(service.PortLabel, service.Name)
  3518  			}
  3519  		}
  3520  
  3521  		// Ensure that check names are unique and have valid ports
  3522  		knownChecks := make(map[string]struct{})
  3523  		for _, check := range service.Checks {
  3524  			if _, ok := knownChecks[check.Name]; ok {
  3525  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name))
  3526  			}
  3527  			knownChecks[check.Name] = struct{}{}
  3528  
  3529  			if !check.RequiresPort() {
  3530  				// No need to continue validating check if it doesn't need a port
  3531  				continue
  3532  			}
  3533  
  3534  			effectivePort := check.PortLabel
  3535  			if effectivePort == "" {
  3536  				// Inherits from service
  3537  				effectivePort = service.PortLabel
  3538  			}
  3539  
  3540  			if effectivePort == "" {
  3541  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name))
  3542  				continue
  3543  			}
  3544  
  3545  			isNumeric := false
  3546  			portNumber, err := strconv.Atoi(effectivePort)
  3547  			if err == nil {
  3548  				isNumeric = true
  3549  			}
  3550  
  3551  			// Numeric ports are fine for address_mode = "driver"
  3552  			if check.AddressMode == "driver" && isNumeric {
  3553  				if portNumber <= 0 {
  3554  					mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber))
  3555  				}
  3556  				continue
  3557  			}
  3558  
  3559  			if isNumeric {
  3560  				mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber))
  3561  				continue
  3562  			}
  3563  
  3564  			// PortLabel must exist, report errors by its parent service
  3565  			addServicePort(effectivePort, service.Name)
  3566  		}
  3567  	}
  3568  
  3569  	// Get the set of port labels.
  3570  	portLabels := make(map[string]struct{})
  3571  	if t.Resources != nil {
  3572  		for _, network := range t.Resources.Networks {
  3573  			ports := network.PortLabels()
  3574  			for portLabel := range ports {
  3575  				portLabels[portLabel] = struct{}{}
  3576  			}
  3577  		}
  3578  	}
  3579  
  3580  	// Iterate over a sorted list of keys to make error listings stable
  3581  	keys := make([]string, 0, len(servicePorts))
  3582  	for p := range servicePorts {
  3583  		keys = append(keys, p)
  3584  	}
  3585  	sort.Strings(keys)
  3586  
  3587  	// Ensure all ports referenced in services exist.
  3588  	for _, servicePort := range keys {
  3589  		services := servicePorts[servicePort]
  3590  		_, ok := portLabels[servicePort]
  3591  		if !ok {
  3592  			names := make([]string, 0, len(services))
  3593  			for name := range services {
  3594  				names = append(names, name)
  3595  			}
  3596  
  3597  			// Keep order deterministic
  3598  			sort.Strings(names)
  3599  			joined := strings.Join(names, ", ")
  3600  			err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined)
  3601  			mErr.Errors = append(mErr.Errors, err)
  3602  		}
  3603  	}
  3604  
  3605  	// Ensure address mode is valid
  3606  	return mErr.ErrorOrNil()
  3607  }
  3608  
  3609  const (
  3610  	// TemplateChangeModeNoop marks that no action should be taken if the
  3611  	// template is re-rendered
  3612  	TemplateChangeModeNoop = "noop"
  3613  
  3614  	// TemplateChangeModeSignal marks that the task should be signaled if the
  3615  	// template is re-rendered
  3616  	TemplateChangeModeSignal = "signal"
  3617  
  3618  	// TemplateChangeModeRestart marks that the task should be restarted if the
  3619  	// template is re-rendered
  3620  	TemplateChangeModeRestart = "restart"
  3621  )
  3622  
  3623  var (
  3624  	// TemplateChangeModeInvalidError is the error for when an invalid change
  3625  	// mode is given
  3626  	TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart")
  3627  )
  3628  
  3629  // Template represents a template configuration to be rendered for a given task
  3630  type Template struct {
  3631  	// SourcePath is the path to the template to be rendered
  3632  	SourcePath string
  3633  
  3634  	// DestPath is the path to where the template should be rendered
  3635  	DestPath string
  3636  
  3637  	// EmbeddedTmpl store the raw template. This is useful for smaller templates
  3638  	// where they are embedded in the job file rather than sent as an artificat
  3639  	EmbeddedTmpl string
  3640  
  3641  	// ChangeMode indicates what should be done if the template is re-rendered
  3642  	ChangeMode string
  3643  
  3644  	// ChangeSignal is the signal that should be sent if the change mode
  3645  	// requires it.
  3646  	ChangeSignal string
  3647  
  3648  	// Splay is used to avoid coordinated restarts of processes by applying a
  3649  	// random wait between 0 and the given splay value before signalling the
  3650  	// application of a change
  3651  	Splay time.Duration
  3652  
  3653  	// Perms is the permission the file should be written out with.
  3654  	Perms string
  3655  
  3656  	// LeftDelim and RightDelim are optional configurations to control what
  3657  	// delimiter is utilized when parsing the template.
  3658  	LeftDelim  string
  3659  	RightDelim string
  3660  
  3661  	// Envvars enables exposing the template as environment variables
  3662  	// instead of as a file. The template must be of the form:
  3663  	//
  3664  	//	VAR_NAME_1={{ key service/my-key }}
  3665  	//	VAR_NAME_2=raw string and {{ env "attr.kernel.name" }}
  3666  	//
  3667  	// Lines will be split on the initial "=" with the first part being the
  3668  	// key name and the second part the value.
  3669  	// Empty lines and lines starting with # will be ignored, but to avoid
  3670  	// escaping issues #s within lines will not be treated as comments.
  3671  	Envvars bool
  3672  
  3673  	// VaultGrace is the grace duration between lease renewal and reacquiring a
  3674  	// secret. If the lease of a secret is less than the grace, a new secret is
  3675  	// acquired.
  3676  	VaultGrace time.Duration
  3677  }
  3678  
  3679  // DefaultTemplate returns a default template.
  3680  func DefaultTemplate() *Template {
  3681  	return &Template{
  3682  		ChangeMode: TemplateChangeModeRestart,
  3683  		Splay:      5 * time.Second,
  3684  		Perms:      "0644",
  3685  	}
  3686  }
  3687  
  3688  func (t *Template) Copy() *Template {
  3689  	if t == nil {
  3690  		return nil
  3691  	}
  3692  	copy := new(Template)
  3693  	*copy = *t
  3694  	return copy
  3695  }
  3696  
  3697  func (t *Template) Canonicalize() {
  3698  	if t.ChangeSignal != "" {
  3699  		t.ChangeSignal = strings.ToUpper(t.ChangeSignal)
  3700  	}
  3701  }
  3702  
  3703  func (t *Template) Validate() error {
  3704  	var mErr multierror.Error
  3705  
  3706  	// Verify we have something to render
  3707  	if t.SourcePath == "" && t.EmbeddedTmpl == "" {
  3708  		multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template"))
  3709  	}
  3710  
  3711  	// Verify we can render somewhere
  3712  	if t.DestPath == "" {
  3713  		multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template"))
  3714  	}
  3715  
  3716  	// Verify the destination doesn't escape
  3717  	escaped, err := PathEscapesAllocDir("task", t.DestPath)
  3718  	if err != nil {
  3719  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  3720  	} else if escaped {
  3721  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  3722  	}
  3723  
  3724  	// Verify a proper change mode
  3725  	switch t.ChangeMode {
  3726  	case TemplateChangeModeNoop, TemplateChangeModeRestart:
  3727  	case TemplateChangeModeSignal:
  3728  		if t.ChangeSignal == "" {
  3729  			multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal"))
  3730  		}
  3731  		if t.Envvars {
  3732  			multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates"))
  3733  		}
  3734  	default:
  3735  		multierror.Append(&mErr, TemplateChangeModeInvalidError)
  3736  	}
  3737  
  3738  	// Verify the splay is positive
  3739  	if t.Splay < 0 {
  3740  		multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value"))
  3741  	}
  3742  
  3743  	// Verify the permissions
  3744  	if t.Perms != "" {
  3745  		if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil {
  3746  			multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err))
  3747  		}
  3748  	}
  3749  
  3750  	if t.VaultGrace.Nanoseconds() < 0 {
  3751  		multierror.Append(&mErr, fmt.Errorf("Vault grace must be greater than zero: %v < 0", t.VaultGrace))
  3752  	}
  3753  
  3754  	return mErr.ErrorOrNil()
  3755  }
  3756  
  3757  // Set of possible states for a task.
  3758  const (
  3759  	TaskStatePending = "pending" // The task is waiting to be run.
  3760  	TaskStateRunning = "running" // The task is currently running.
  3761  	TaskStateDead    = "dead"    // Terminal state of task.
  3762  )
  3763  
  3764  // TaskState tracks the current state of a task and events that caused state
  3765  // transitions.
  3766  type TaskState struct {
  3767  	// The current state of the task.
  3768  	State string
  3769  
  3770  	// Failed marks a task as having failed
  3771  	Failed bool
  3772  
  3773  	// Restarts is the number of times the task has restarted
  3774  	Restarts uint64
  3775  
  3776  	// LastRestart is the time the task last restarted. It is updated each time the
  3777  	// task restarts
  3778  	LastRestart time.Time
  3779  
  3780  	// StartedAt is the time the task is started. It is updated each time the
  3781  	// task starts
  3782  	StartedAt time.Time
  3783  
  3784  	// FinishedAt is the time at which the task transistioned to dead and will
  3785  	// not be started again.
  3786  	FinishedAt time.Time
  3787  
  3788  	// Series of task events that transition the state of the task.
  3789  	Events []*TaskEvent
  3790  }
  3791  
  3792  func (ts *TaskState) Copy() *TaskState {
  3793  	if ts == nil {
  3794  		return nil
  3795  	}
  3796  	copy := new(TaskState)
  3797  	*copy = *ts
  3798  
  3799  	if ts.Events != nil {
  3800  		copy.Events = make([]*TaskEvent, len(ts.Events))
  3801  		for i, e := range ts.Events {
  3802  			copy.Events[i] = e.Copy()
  3803  		}
  3804  	}
  3805  	return copy
  3806  }
  3807  
  3808  // Successful returns whether a task finished successfully.
  3809  func (ts *TaskState) Successful() bool {
  3810  	l := len(ts.Events)
  3811  	if ts.State != TaskStateDead || l == 0 {
  3812  		return false
  3813  	}
  3814  
  3815  	e := ts.Events[l-1]
  3816  	if e.Type != TaskTerminated {
  3817  		return false
  3818  	}
  3819  
  3820  	return e.ExitCode == 0
  3821  }
  3822  
  3823  const (
  3824  	// TaskSetupFailure indicates that the task could not be started due to a
  3825  	// a setup failure.
  3826  	TaskSetupFailure = "Setup Failure"
  3827  
  3828  	// TaskDriveFailure indicates that the task could not be started due to a
  3829  	// failure in the driver.
  3830  	TaskDriverFailure = "Driver Failure"
  3831  
  3832  	// TaskReceived signals that the task has been pulled by the client at the
  3833  	// given timestamp.
  3834  	TaskReceived = "Received"
  3835  
  3836  	// TaskFailedValidation indicates the task was invalid and as such was not
  3837  	// run.
  3838  	TaskFailedValidation = "Failed Validation"
  3839  
  3840  	// TaskStarted signals that the task was started and its timestamp can be
  3841  	// used to determine the running length of the task.
  3842  	TaskStarted = "Started"
  3843  
  3844  	// TaskTerminated indicates that the task was started and exited.
  3845  	TaskTerminated = "Terminated"
  3846  
  3847  	// TaskKilling indicates a kill signal has been sent to the task.
  3848  	TaskKilling = "Killing"
  3849  
  3850  	// TaskKilled indicates a user has killed the task.
  3851  	TaskKilled = "Killed"
  3852  
  3853  	// TaskRestarting indicates that task terminated and is being restarted.
  3854  	TaskRestarting = "Restarting"
  3855  
  3856  	// TaskNotRestarting indicates that the task has failed and is not being
  3857  	// restarted because it has exceeded its restart policy.
  3858  	TaskNotRestarting = "Not Restarting"
  3859  
  3860  	// TaskRestartSignal indicates that the task has been signalled to be
  3861  	// restarted
  3862  	TaskRestartSignal = "Restart Signaled"
  3863  
  3864  	// TaskSignaling indicates that the task is being signalled.
  3865  	TaskSignaling = "Signaling"
  3866  
  3867  	// TaskDownloadingArtifacts means the task is downloading the artifacts
  3868  	// specified in the task.
  3869  	TaskDownloadingArtifacts = "Downloading Artifacts"
  3870  
  3871  	// TaskArtifactDownloadFailed indicates that downloading the artifacts
  3872  	// failed.
  3873  	TaskArtifactDownloadFailed = "Failed Artifact Download"
  3874  
  3875  	// TaskBuildingTaskDir indicates that the task directory/chroot is being
  3876  	// built.
  3877  	TaskBuildingTaskDir = "Building Task Directory"
  3878  
  3879  	// TaskSetup indicates the task runner is setting up the task environment
  3880  	TaskSetup = "Task Setup"
  3881  
  3882  	// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
  3883  	// exceeded the requested disk resources.
  3884  	TaskDiskExceeded = "Disk Resources Exceeded"
  3885  
  3886  	// TaskSiblingFailed indicates that a sibling task in the task group has
  3887  	// failed.
  3888  	TaskSiblingFailed = "Sibling Task Failed"
  3889  
  3890  	// TaskDriverMessage is an informational event message emitted by
  3891  	// drivers such as when they're performing a long running action like
  3892  	// downloading an image.
  3893  	TaskDriverMessage = "Driver"
  3894  
  3895  	// TaskLeaderDead indicates that the leader task within the has finished.
  3896  	TaskLeaderDead = "Leader Task Dead"
  3897  )
  3898  
  3899  // TaskEvent is an event that effects the state of a task and contains meta-data
  3900  // appropriate to the events type.
  3901  type TaskEvent struct {
  3902  	Type string
  3903  	Time int64 // Unix Nanosecond timestamp
  3904  
  3905  	Message string // A possible message explaining the termination of the task.
  3906  
  3907  	// DisplayMessage is a human friendly message about the event
  3908  	DisplayMessage string
  3909  
  3910  	// Details is a map with annotated info about the event
  3911  	Details map[string]string
  3912  
  3913  	// DEPRECATION NOTICE: The following fields are deprecated and will be removed
  3914  	// in a future release. Field values are available in the Details map.
  3915  
  3916  	// FailsTask marks whether this event fails the task.
  3917  	// Deprecated, use Details["fails_task"] to access this.
  3918  	FailsTask bool
  3919  
  3920  	// Restart fields.
  3921  	// Deprecated, use Details["restart_reason"] to access this.
  3922  	RestartReason string
  3923  
  3924  	// Setup Failure fields.
  3925  	// Deprecated, use Details["setup_error"] to access this.
  3926  	SetupError string
  3927  
  3928  	// Driver Failure fields.
  3929  	// Deprecated, use Details["driver_error"] to access this.
  3930  	DriverError string // A driver error occurred while starting the task.
  3931  
  3932  	// Task Terminated Fields.
  3933  
  3934  	// Deprecated, use Details["exit_code"] to access this.
  3935  	ExitCode int // The exit code of the task.
  3936  
  3937  	// Deprecated, use Details["signal"] to access this.
  3938  	Signal int // The signal that terminated the task.
  3939  
  3940  	// Killing fields
  3941  	// Deprecated, use Details["kill_timeout"] to access this.
  3942  	KillTimeout time.Duration
  3943  
  3944  	// Task Killed Fields.
  3945  	// Deprecated, use Details["kill_error"] to access this.
  3946  	KillError string // Error killing the task.
  3947  
  3948  	// KillReason is the reason the task was killed
  3949  	// Deprecated, use Details["kill_reason"] to access this.
  3950  	KillReason string
  3951  
  3952  	// TaskRestarting fields.
  3953  	// Deprecated, use Details["start_delay"] to access this.
  3954  	StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
  3955  
  3956  	// Artifact Download fields
  3957  	// Deprecated, use Details["download_error"] to access this.
  3958  	DownloadError string // Error downloading artifacts
  3959  
  3960  	// Validation fields
  3961  	// Deprecated, use Details["validation_error"] to access this.
  3962  	ValidationError string // Validation error
  3963  
  3964  	// The maximum allowed task disk size.
  3965  	// Deprecated, use Details["disk_limit"] to access this.
  3966  	DiskLimit int64
  3967  
  3968  	// Name of the sibling task that caused termination of the task that
  3969  	// the TaskEvent refers to.
  3970  	// Deprecated, use Details["failed_sibling"] to access this.
  3971  	FailedSibling string
  3972  
  3973  	// VaultError is the error from token renewal
  3974  	// Deprecated, use Details["vault_renewal_error"] to access this.
  3975  	VaultError string
  3976  
  3977  	// TaskSignalReason indicates the reason the task is being signalled.
  3978  	// Deprecated, use Details["task_signal_reason"] to access this.
  3979  	TaskSignalReason string
  3980  
  3981  	// TaskSignal is the signal that was sent to the task
  3982  	// Deprecated, use Details["task_signal"] to access this.
  3983  	TaskSignal string
  3984  
  3985  	// DriverMessage indicates a driver action being taken.
  3986  	// Deprecated, use Details["driver_message"] to access this.
  3987  	DriverMessage string
  3988  
  3989  	// GenericSource is the source of a message.
  3990  	// Deprecated, is redundant with event type.
  3991  	GenericSource string
  3992  }
  3993  
  3994  func (event *TaskEvent) PopulateEventDisplayMessage() {
  3995  	// Build up the description based on the event type.
  3996  	if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why?
  3997  		return
  3998  	}
  3999  
  4000  	if event.DisplayMessage != "" {
  4001  		return
  4002  	}
  4003  
  4004  	var desc string
  4005  	switch event.Type {
  4006  	case TaskSetup:
  4007  		desc = event.Message
  4008  	case TaskStarted:
  4009  		desc = "Task started by client"
  4010  	case TaskReceived:
  4011  		desc = "Task received by client"
  4012  	case TaskFailedValidation:
  4013  		if event.ValidationError != "" {
  4014  			desc = event.ValidationError
  4015  		} else {
  4016  			desc = "Validation of task failed"
  4017  		}
  4018  	case TaskSetupFailure:
  4019  		if event.SetupError != "" {
  4020  			desc = event.SetupError
  4021  		} else {
  4022  			desc = "Task setup failed"
  4023  		}
  4024  	case TaskDriverFailure:
  4025  		if event.DriverError != "" {
  4026  			desc = event.DriverError
  4027  		} else {
  4028  			desc = "Failed to start task"
  4029  		}
  4030  	case TaskDownloadingArtifacts:
  4031  		desc = "Client is downloading artifacts"
  4032  	case TaskArtifactDownloadFailed:
  4033  		if event.DownloadError != "" {
  4034  			desc = event.DownloadError
  4035  		} else {
  4036  			desc = "Failed to download artifacts"
  4037  		}
  4038  	case TaskKilling:
  4039  		if event.KillReason != "" {
  4040  			desc = fmt.Sprintf("Killing task: %v", event.KillReason)
  4041  		} else if event.KillTimeout != 0 {
  4042  			desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout)
  4043  		} else {
  4044  			desc = "Sent interrupt"
  4045  		}
  4046  	case TaskKilled:
  4047  		if event.KillError != "" {
  4048  			desc = event.KillError
  4049  		} else {
  4050  			desc = "Task successfully killed"
  4051  		}
  4052  	case TaskTerminated:
  4053  		var parts []string
  4054  		parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode))
  4055  
  4056  		if event.Signal != 0 {
  4057  			parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal))
  4058  		}
  4059  
  4060  		if event.Message != "" {
  4061  			parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message))
  4062  		}
  4063  		desc = strings.Join(parts, ", ")
  4064  	case TaskRestarting:
  4065  		in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay))
  4066  		if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy {
  4067  			desc = fmt.Sprintf("%s - %s", event.RestartReason, in)
  4068  		} else {
  4069  			desc = in
  4070  		}
  4071  	case TaskNotRestarting:
  4072  		if event.RestartReason != "" {
  4073  			desc = event.RestartReason
  4074  		} else {
  4075  			desc = "Task exceeded restart policy"
  4076  		}
  4077  	case TaskSiblingFailed:
  4078  		if event.FailedSibling != "" {
  4079  			desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling)
  4080  		} else {
  4081  			desc = "Task's sibling failed"
  4082  		}
  4083  	case TaskSignaling:
  4084  		sig := event.TaskSignal
  4085  		reason := event.TaskSignalReason
  4086  
  4087  		if sig == "" && reason == "" {
  4088  			desc = "Task being sent a signal"
  4089  		} else if sig == "" {
  4090  			desc = reason
  4091  		} else if reason == "" {
  4092  			desc = fmt.Sprintf("Task being sent signal %v", sig)
  4093  		} else {
  4094  			desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason)
  4095  		}
  4096  	case TaskRestartSignal:
  4097  		if event.RestartReason != "" {
  4098  			desc = event.RestartReason
  4099  		} else {
  4100  			desc = "Task signaled to restart"
  4101  		}
  4102  	case TaskDriverMessage:
  4103  		desc = event.DriverMessage
  4104  	case TaskLeaderDead:
  4105  		desc = "Leader Task in Group dead"
  4106  	default:
  4107  		desc = event.Message
  4108  	}
  4109  
  4110  	event.DisplayMessage = desc
  4111  }
  4112  
  4113  func (te *TaskEvent) GoString() string {
  4114  	return fmt.Sprintf("%v - %v", te.Time, te.Type)
  4115  }
  4116  
  4117  // SetMessage sets the message of TaskEvent
  4118  func (te *TaskEvent) SetMessage(msg string) *TaskEvent {
  4119  	te.Message = msg
  4120  	te.Details["message"] = msg
  4121  	return te
  4122  }
  4123  
  4124  func (te *TaskEvent) Copy() *TaskEvent {
  4125  	if te == nil {
  4126  		return nil
  4127  	}
  4128  	copy := new(TaskEvent)
  4129  	*copy = *te
  4130  	return copy
  4131  }
  4132  
  4133  func NewTaskEvent(event string) *TaskEvent {
  4134  	return &TaskEvent{
  4135  		Type:    event,
  4136  		Time:    time.Now().UnixNano(),
  4137  		Details: make(map[string]string),
  4138  	}
  4139  }
  4140  
  4141  // SetSetupError is used to store an error that occurred while setting up the
  4142  // task
  4143  func (e *TaskEvent) SetSetupError(err error) *TaskEvent {
  4144  	if err != nil {
  4145  		e.SetupError = err.Error()
  4146  		e.Details["setup_error"] = err.Error()
  4147  	}
  4148  	return e
  4149  }
  4150  
  4151  func (e *TaskEvent) SetFailsTask() *TaskEvent {
  4152  	e.FailsTask = true
  4153  	e.Details["fails_task"] = "true"
  4154  	return e
  4155  }
  4156  
  4157  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  4158  	if err != nil {
  4159  		e.DriverError = err.Error()
  4160  		e.Details["driver_error"] = err.Error()
  4161  	}
  4162  	return e
  4163  }
  4164  
  4165  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  4166  	e.ExitCode = c
  4167  	e.Details["exit_code"] = fmt.Sprintf("%d", c)
  4168  	return e
  4169  }
  4170  
  4171  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  4172  	e.Signal = s
  4173  	e.Details["signal"] = fmt.Sprintf("%d", s)
  4174  	return e
  4175  }
  4176  
  4177  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  4178  	if err != nil {
  4179  		e.Message = err.Error()
  4180  		e.Details["exit_message"] = err.Error()
  4181  	}
  4182  	return e
  4183  }
  4184  
  4185  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  4186  	if err != nil {
  4187  		e.KillError = err.Error()
  4188  		e.Details["kill_error"] = err.Error()
  4189  	}
  4190  	return e
  4191  }
  4192  
  4193  func (e *TaskEvent) SetKillReason(r string) *TaskEvent {
  4194  	e.KillReason = r
  4195  	e.Details["kill_reason"] = r
  4196  	return e
  4197  }
  4198  
  4199  func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent {
  4200  	e.StartDelay = int64(delay)
  4201  	e.Details["start_delay"] = fmt.Sprintf("%d", delay)
  4202  	return e
  4203  }
  4204  
  4205  func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent {
  4206  	e.RestartReason = reason
  4207  	e.Details["restart_reason"] = reason
  4208  	return e
  4209  }
  4210  
  4211  func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent {
  4212  	e.TaskSignalReason = r
  4213  	e.Details["task_signal_reason"] = r
  4214  	return e
  4215  }
  4216  
  4217  func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent {
  4218  	e.TaskSignal = s.String()
  4219  	e.Details["task_signal"] = s.String()
  4220  	return e
  4221  }
  4222  
  4223  func (e *TaskEvent) SetDownloadError(err error) *TaskEvent {
  4224  	if err != nil {
  4225  		e.DownloadError = err.Error()
  4226  		e.Details["download_error"] = err.Error()
  4227  	}
  4228  	return e
  4229  }
  4230  
  4231  func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
  4232  	if err != nil {
  4233  		e.ValidationError = err.Error()
  4234  		e.Details["validation_error"] = err.Error()
  4235  	}
  4236  	return e
  4237  }
  4238  
  4239  func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
  4240  	e.KillTimeout = timeout
  4241  	e.Details["kill_timeout"] = timeout.String()
  4242  	return e
  4243  }
  4244  
  4245  func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent {
  4246  	e.DiskLimit = limit
  4247  	e.Details["disk_limit"] = fmt.Sprintf("%d", limit)
  4248  	return e
  4249  }
  4250  
  4251  func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent {
  4252  	e.FailedSibling = sibling
  4253  	e.Details["failed_sibling"] = sibling
  4254  	return e
  4255  }
  4256  
  4257  func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent {
  4258  	if err != nil {
  4259  		e.VaultError = err.Error()
  4260  		e.Details["vault_renewal_error"] = err.Error()
  4261  	}
  4262  	return e
  4263  }
  4264  
  4265  func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent {
  4266  	e.DriverMessage = m
  4267  	e.Details["driver_message"] = m
  4268  	return e
  4269  }
  4270  
  4271  // TaskArtifact is an artifact to download before running the task.
  4272  type TaskArtifact struct {
  4273  	// GetterSource is the source to download an artifact using go-getter
  4274  	GetterSource string
  4275  
  4276  	// GetterOptions are options to use when downloading the artifact using
  4277  	// go-getter.
  4278  	GetterOptions map[string]string
  4279  
  4280  	// GetterMode is the go-getter.ClientMode for fetching resources.
  4281  	// Defaults to "any" but can be set to "file" or "dir".
  4282  	GetterMode string
  4283  
  4284  	// RelativeDest is the download destination given relative to the task's
  4285  	// directory.
  4286  	RelativeDest string
  4287  }
  4288  
  4289  func (ta *TaskArtifact) Copy() *TaskArtifact {
  4290  	if ta == nil {
  4291  		return nil
  4292  	}
  4293  	nta := new(TaskArtifact)
  4294  	*nta = *ta
  4295  	nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions)
  4296  	return nta
  4297  }
  4298  
  4299  func (ta *TaskArtifact) GoString() string {
  4300  	return fmt.Sprintf("%+v", ta)
  4301  }
  4302  
  4303  // PathEscapesAllocDir returns if the given path escapes the allocation
  4304  // directory. The prefix allows adding a prefix if the path will be joined, for
  4305  // example a "task/local" prefix may be provided if the path will be joined
  4306  // against that prefix.
  4307  func PathEscapesAllocDir(prefix, path string) (bool, error) {
  4308  	// Verify the destination doesn't escape the tasks directory
  4309  	alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/"))
  4310  	if err != nil {
  4311  		return false, err
  4312  	}
  4313  	abs, err := filepath.Abs(filepath.Join(alloc, prefix, path))
  4314  	if err != nil {
  4315  		return false, err
  4316  	}
  4317  	rel, err := filepath.Rel(alloc, abs)
  4318  	if err != nil {
  4319  		return false, err
  4320  	}
  4321  
  4322  	return strings.HasPrefix(rel, ".."), nil
  4323  }
  4324  
  4325  func (ta *TaskArtifact) Validate() error {
  4326  	// Verify the source
  4327  	var mErr multierror.Error
  4328  	if ta.GetterSource == "" {
  4329  		mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified"))
  4330  	}
  4331  
  4332  	switch ta.GetterMode {
  4333  	case "":
  4334  		// Default to any
  4335  		ta.GetterMode = GetterModeAny
  4336  	case GetterModeAny, GetterModeFile, GetterModeDir:
  4337  		// Ok
  4338  	default:
  4339  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s",
  4340  			ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir))
  4341  	}
  4342  
  4343  	escaped, err := PathEscapesAllocDir("task", ta.RelativeDest)
  4344  	if err != nil {
  4345  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  4346  	} else if escaped {
  4347  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  4348  	}
  4349  
  4350  	// Verify the checksum
  4351  	if check, ok := ta.GetterOptions["checksum"]; ok {
  4352  		check = strings.TrimSpace(check)
  4353  		if check == "" {
  4354  			mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty"))
  4355  			return mErr.ErrorOrNil()
  4356  		}
  4357  
  4358  		parts := strings.Split(check, ":")
  4359  		if l := len(parts); l != 2 {
  4360  			mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check))
  4361  			return mErr.ErrorOrNil()
  4362  		}
  4363  
  4364  		checksumVal := parts[1]
  4365  		checksumBytes, err := hex.DecodeString(checksumVal)
  4366  		if err != nil {
  4367  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err))
  4368  			return mErr.ErrorOrNil()
  4369  		}
  4370  
  4371  		checksumType := parts[0]
  4372  		expectedLength := 0
  4373  		switch checksumType {
  4374  		case "md5":
  4375  			expectedLength = md5.Size
  4376  		case "sha1":
  4377  			expectedLength = sha1.Size
  4378  		case "sha256":
  4379  			expectedLength = sha256.Size
  4380  		case "sha512":
  4381  			expectedLength = sha512.Size
  4382  		default:
  4383  			mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType))
  4384  			return mErr.ErrorOrNil()
  4385  		}
  4386  
  4387  		if len(checksumBytes) != expectedLength {
  4388  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal))
  4389  			return mErr.ErrorOrNil()
  4390  		}
  4391  	}
  4392  
  4393  	return mErr.ErrorOrNil()
  4394  }
  4395  
  4396  const (
  4397  	ConstraintDistinctProperty = "distinct_property"
  4398  	ConstraintDistinctHosts    = "distinct_hosts"
  4399  	ConstraintRegex            = "regexp"
  4400  	ConstraintVersion          = "version"
  4401  	ConstraintSetContains      = "set_contains"
  4402  )
  4403  
  4404  // Constraints are used to restrict placement options.
  4405  type Constraint struct {
  4406  	LTarget string // Left-hand target
  4407  	RTarget string // Right-hand target
  4408  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  4409  	str     string // Memoized string
  4410  }
  4411  
  4412  // Equal checks if two constraints are equal
  4413  func (c *Constraint) Equal(o *Constraint) bool {
  4414  	return c.LTarget == o.LTarget &&
  4415  		c.RTarget == o.RTarget &&
  4416  		c.Operand == o.Operand
  4417  }
  4418  
  4419  func (c *Constraint) Copy() *Constraint {
  4420  	if c == nil {
  4421  		return nil
  4422  	}
  4423  	nc := new(Constraint)
  4424  	*nc = *c
  4425  	return nc
  4426  }
  4427  
  4428  func (c *Constraint) String() string {
  4429  	if c.str != "" {
  4430  		return c.str
  4431  	}
  4432  	c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  4433  	return c.str
  4434  }
  4435  
  4436  func (c *Constraint) Validate() error {
  4437  	var mErr multierror.Error
  4438  	if c.Operand == "" {
  4439  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  4440  	}
  4441  
  4442  	// requireLtarget specifies whether the constraint requires an LTarget to be
  4443  	// provided.
  4444  	requireLtarget := true
  4445  
  4446  	// Perform additional validation based on operand
  4447  	switch c.Operand {
  4448  	case ConstraintDistinctHosts:
  4449  		requireLtarget = false
  4450  	case ConstraintSetContains:
  4451  		if c.RTarget == "" {
  4452  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget"))
  4453  		}
  4454  	case ConstraintRegex:
  4455  		if _, err := regexp.Compile(c.RTarget); err != nil {
  4456  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  4457  		}
  4458  	case ConstraintVersion:
  4459  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  4460  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  4461  		}
  4462  	case ConstraintDistinctProperty:
  4463  		// If a count is set, make sure it is convertible to a uint64
  4464  		if c.RTarget != "" {
  4465  			count, err := strconv.ParseUint(c.RTarget, 10, 64)
  4466  			if err != nil {
  4467  				mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err))
  4468  			} else if count < 1 {
  4469  				mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count))
  4470  			}
  4471  		}
  4472  	case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=":
  4473  		if c.RTarget == "" {
  4474  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand))
  4475  		}
  4476  	default:
  4477  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand))
  4478  	}
  4479  
  4480  	// Ensure we have an LTarget for the constraints that need one
  4481  	if requireLtarget && c.LTarget == "" {
  4482  		mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint"))
  4483  	}
  4484  
  4485  	return mErr.ErrorOrNil()
  4486  }
  4487  
  4488  // EphemeralDisk is an ephemeral disk object
  4489  type EphemeralDisk struct {
  4490  	// Sticky indicates whether the allocation is sticky to a node
  4491  	Sticky bool
  4492  
  4493  	// SizeMB is the size of the local disk
  4494  	SizeMB int
  4495  
  4496  	// Migrate determines if Nomad client should migrate the allocation dir for
  4497  	// sticky allocations
  4498  	Migrate bool
  4499  }
  4500  
  4501  // DefaultEphemeralDisk returns a EphemeralDisk with default configurations
  4502  func DefaultEphemeralDisk() *EphemeralDisk {
  4503  	return &EphemeralDisk{
  4504  		SizeMB: 300,
  4505  	}
  4506  }
  4507  
  4508  // Validate validates EphemeralDisk
  4509  func (d *EphemeralDisk) Validate() error {
  4510  	if d.SizeMB < 10 {
  4511  		return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB)
  4512  	}
  4513  	return nil
  4514  }
  4515  
  4516  // Copy copies the EphemeralDisk struct and returns a new one
  4517  func (d *EphemeralDisk) Copy() *EphemeralDisk {
  4518  	ld := new(EphemeralDisk)
  4519  	*ld = *d
  4520  	return ld
  4521  }
  4522  
  4523  const (
  4524  	// VaultChangeModeNoop takes no action when a new token is retrieved.
  4525  	VaultChangeModeNoop = "noop"
  4526  
  4527  	// VaultChangeModeSignal signals the task when a new token is retrieved.
  4528  	VaultChangeModeSignal = "signal"
  4529  
  4530  	// VaultChangeModeRestart restarts the task when a new token is retrieved.
  4531  	VaultChangeModeRestart = "restart"
  4532  )
  4533  
  4534  // Vault stores the set of permissions a task needs access to from Vault.
  4535  type Vault struct {
  4536  	// Policies is the set of policies that the task needs access to
  4537  	Policies []string
  4538  
  4539  	// Env marks whether the Vault Token should be exposed as an environment
  4540  	// variable
  4541  	Env bool
  4542  
  4543  	// ChangeMode is used to configure the task's behavior when the Vault
  4544  	// token changes because the original token could not be renewed in time.
  4545  	ChangeMode string
  4546  
  4547  	// ChangeSignal is the signal sent to the task when a new token is
  4548  	// retrieved. This is only valid when using the signal change mode.
  4549  	ChangeSignal string
  4550  }
  4551  
  4552  func DefaultVaultBlock() *Vault {
  4553  	return &Vault{
  4554  		Env:        true,
  4555  		ChangeMode: VaultChangeModeRestart,
  4556  	}
  4557  }
  4558  
  4559  // Copy returns a copy of this Vault block.
  4560  func (v *Vault) Copy() *Vault {
  4561  	if v == nil {
  4562  		return nil
  4563  	}
  4564  
  4565  	nv := new(Vault)
  4566  	*nv = *v
  4567  	return nv
  4568  }
  4569  
  4570  func (v *Vault) Canonicalize() {
  4571  	if v.ChangeSignal != "" {
  4572  		v.ChangeSignal = strings.ToUpper(v.ChangeSignal)
  4573  	}
  4574  }
  4575  
  4576  // Validate returns if the Vault block is valid.
  4577  func (v *Vault) Validate() error {
  4578  	if v == nil {
  4579  		return nil
  4580  	}
  4581  
  4582  	var mErr multierror.Error
  4583  	if len(v.Policies) == 0 {
  4584  		multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty"))
  4585  	}
  4586  
  4587  	for _, p := range v.Policies {
  4588  		if p == "root" {
  4589  			multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy"))
  4590  		}
  4591  	}
  4592  
  4593  	switch v.ChangeMode {
  4594  	case VaultChangeModeSignal:
  4595  		if v.ChangeSignal == "" {
  4596  			multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal))
  4597  		}
  4598  	case VaultChangeModeNoop, VaultChangeModeRestart:
  4599  	default:
  4600  		multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode))
  4601  	}
  4602  
  4603  	return mErr.ErrorOrNil()
  4604  }
  4605  
  4606  const (
  4607  	// DeploymentStatuses are the various states a deployment can be be in
  4608  	DeploymentStatusRunning    = "running"
  4609  	DeploymentStatusPaused     = "paused"
  4610  	DeploymentStatusFailed     = "failed"
  4611  	DeploymentStatusSuccessful = "successful"
  4612  	DeploymentStatusCancelled  = "cancelled"
  4613  
  4614  	// DeploymentStatusDescriptions are the various descriptions of the states a
  4615  	// deployment can be in.
  4616  	DeploymentStatusDescriptionRunning               = "Deployment is running"
  4617  	DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion"
  4618  	DeploymentStatusDescriptionPaused                = "Deployment is paused"
  4619  	DeploymentStatusDescriptionSuccessful            = "Deployment completed successfully"
  4620  	DeploymentStatusDescriptionStoppedJob            = "Cancelled because job is stopped"
  4621  	DeploymentStatusDescriptionNewerJob              = "Cancelled due to newer version of job"
  4622  	DeploymentStatusDescriptionFailedAllocations     = "Failed due to unhealthy allocations"
  4623  	DeploymentStatusDescriptionFailedByUser          = "Deployment marked as failed"
  4624  )
  4625  
  4626  // DeploymentStatusDescriptionRollback is used to get the status description of
  4627  // a deployment when rolling back to an older job.
  4628  func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string {
  4629  	return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion)
  4630  }
  4631  
  4632  // DeploymentStatusDescriptionRollbackNoop is used to get the status description of
  4633  // a deployment when rolling back is not possible because it has the same specification
  4634  func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string {
  4635  	return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion)
  4636  }
  4637  
  4638  // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of
  4639  // a deployment when there is no target to rollback to but autorevet is desired.
  4640  func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string {
  4641  	return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription)
  4642  }
  4643  
  4644  // Deployment is the object that represents a job deployment which is used to
  4645  // transition a job between versions.
  4646  type Deployment struct {
  4647  	// ID is a generated UUID for the deployment
  4648  	ID string
  4649  
  4650  	// Namespace is the namespace the deployment is created in
  4651  	Namespace string
  4652  
  4653  	// JobID is the job the deployment is created for
  4654  	JobID string
  4655  
  4656  	// JobVersion is the version of the job at which the deployment is tracking
  4657  	JobVersion uint64
  4658  
  4659  	// JobModifyIndex is the modify index of the job at which the deployment is tracking
  4660  	JobModifyIndex uint64
  4661  
  4662  	// JobCreateIndex is the create index of the job which the deployment is
  4663  	// tracking. It is needed so that if the job gets stopped and reran we can
  4664  	// present the correct list of deployments for the job and not old ones.
  4665  	JobCreateIndex uint64
  4666  
  4667  	// TaskGroups is the set of task groups effected by the deployment and their
  4668  	// current deployment status.
  4669  	TaskGroups map[string]*DeploymentState
  4670  
  4671  	// The status of the deployment
  4672  	Status string
  4673  
  4674  	// StatusDescription allows a human readable description of the deployment
  4675  	// status.
  4676  	StatusDescription string
  4677  
  4678  	CreateIndex uint64
  4679  	ModifyIndex uint64
  4680  }
  4681  
  4682  // NewDeployment creates a new deployment given the job.
  4683  func NewDeployment(job *Job) *Deployment {
  4684  	return &Deployment{
  4685  		ID:                uuid.Generate(),
  4686  		Namespace:         job.Namespace,
  4687  		JobID:             job.ID,
  4688  		JobVersion:        job.Version,
  4689  		JobModifyIndex:    job.ModifyIndex,
  4690  		JobCreateIndex:    job.CreateIndex,
  4691  		Status:            DeploymentStatusRunning,
  4692  		StatusDescription: DeploymentStatusDescriptionRunning,
  4693  		TaskGroups:        make(map[string]*DeploymentState, len(job.TaskGroups)),
  4694  	}
  4695  }
  4696  
  4697  func (d *Deployment) Copy() *Deployment {
  4698  	if d == nil {
  4699  		return nil
  4700  	}
  4701  
  4702  	c := &Deployment{}
  4703  	*c = *d
  4704  
  4705  	c.TaskGroups = nil
  4706  	if l := len(d.TaskGroups); d.TaskGroups != nil {
  4707  		c.TaskGroups = make(map[string]*DeploymentState, l)
  4708  		for tg, s := range d.TaskGroups {
  4709  			c.TaskGroups[tg] = s.Copy()
  4710  		}
  4711  	}
  4712  
  4713  	return c
  4714  }
  4715  
  4716  // Active returns whether the deployment is active or terminal.
  4717  func (d *Deployment) Active() bool {
  4718  	switch d.Status {
  4719  	case DeploymentStatusRunning, DeploymentStatusPaused:
  4720  		return true
  4721  	default:
  4722  		return false
  4723  	}
  4724  }
  4725  
  4726  // GetID is a helper for getting the ID when the object may be nil
  4727  func (d *Deployment) GetID() string {
  4728  	if d == nil {
  4729  		return ""
  4730  	}
  4731  	return d.ID
  4732  }
  4733  
  4734  // HasPlacedCanaries returns whether the deployment has placed canaries
  4735  func (d *Deployment) HasPlacedCanaries() bool {
  4736  	if d == nil || len(d.TaskGroups) == 0 {
  4737  		return false
  4738  	}
  4739  	for _, group := range d.TaskGroups {
  4740  		if len(group.PlacedCanaries) != 0 {
  4741  			return true
  4742  		}
  4743  	}
  4744  	return false
  4745  }
  4746  
  4747  // RequiresPromotion returns whether the deployment requires promotion to
  4748  // continue
  4749  func (d *Deployment) RequiresPromotion() bool {
  4750  	if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning {
  4751  		return false
  4752  	}
  4753  	for _, group := range d.TaskGroups {
  4754  		if group.DesiredCanaries > 0 && !group.Promoted {
  4755  			return true
  4756  		}
  4757  	}
  4758  	return false
  4759  }
  4760  
  4761  func (d *Deployment) GoString() string {
  4762  	base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription)
  4763  	for group, state := range d.TaskGroups {
  4764  		base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state)
  4765  	}
  4766  	return base
  4767  }
  4768  
  4769  // DeploymentState tracks the state of a deployment for a given task group.
  4770  type DeploymentState struct {
  4771  	// AutoRevert marks whether the task group has indicated the job should be
  4772  	// reverted on failure
  4773  	AutoRevert bool
  4774  
  4775  	// Promoted marks whether the canaries have been promoted
  4776  	Promoted bool
  4777  
  4778  	// PlacedCanaries is the set of placed canary allocations
  4779  	PlacedCanaries []string
  4780  
  4781  	// DesiredCanaries is the number of canaries that should be created.
  4782  	DesiredCanaries int
  4783  
  4784  	// DesiredTotal is the total number of allocations that should be created as
  4785  	// part of the deployment.
  4786  	DesiredTotal int
  4787  
  4788  	// PlacedAllocs is the number of allocations that have been placed
  4789  	PlacedAllocs int
  4790  
  4791  	// HealthyAllocs is the number of allocations that have been marked healthy.
  4792  	HealthyAllocs int
  4793  
  4794  	// UnhealthyAllocs are allocations that have been marked as unhealthy.
  4795  	UnhealthyAllocs int
  4796  }
  4797  
  4798  func (d *DeploymentState) GoString() string {
  4799  	base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal)
  4800  	base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries)
  4801  	base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries)
  4802  	base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted)
  4803  	base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs)
  4804  	base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs)
  4805  	base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs)
  4806  	base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert)
  4807  	return base
  4808  }
  4809  
  4810  func (d *DeploymentState) Copy() *DeploymentState {
  4811  	c := &DeploymentState{}
  4812  	*c = *d
  4813  	c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries)
  4814  	return c
  4815  }
  4816  
  4817  // DeploymentStatusUpdate is used to update the status of a given deployment
  4818  type DeploymentStatusUpdate struct {
  4819  	// DeploymentID is the ID of the deployment to update
  4820  	DeploymentID string
  4821  
  4822  	// Status is the new status of the deployment.
  4823  	Status string
  4824  
  4825  	// StatusDescription is the new status description of the deployment.
  4826  	StatusDescription string
  4827  }
  4828  
  4829  const (
  4830  	AllocDesiredStatusRun   = "run"   // Allocation should run
  4831  	AllocDesiredStatusStop  = "stop"  // Allocation should stop
  4832  	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
  4833  )
  4834  
  4835  const (
  4836  	AllocClientStatusPending  = "pending"
  4837  	AllocClientStatusRunning  = "running"
  4838  	AllocClientStatusComplete = "complete"
  4839  	AllocClientStatusFailed   = "failed"
  4840  	AllocClientStatusLost     = "lost"
  4841  )
  4842  
  4843  // Allocation is used to allocate the placement of a task group to a node.
  4844  type Allocation struct {
  4845  	// ID of the allocation (UUID)
  4846  	ID string
  4847  
  4848  	// Namespace is the namespace the allocation is created in
  4849  	Namespace string
  4850  
  4851  	// ID of the evaluation that generated this allocation
  4852  	EvalID string
  4853  
  4854  	// Name is a logical name of the allocation.
  4855  	Name string
  4856  
  4857  	// NodeID is the node this is being placed on
  4858  	NodeID string
  4859  
  4860  	// Job is the parent job of the task group being allocated.
  4861  	// This is copied at allocation time to avoid issues if the job
  4862  	// definition is updated.
  4863  	JobID string
  4864  	Job   *Job
  4865  
  4866  	// TaskGroup is the name of the task group that should be run
  4867  	TaskGroup string
  4868  
  4869  	// Resources is the total set of resources allocated as part
  4870  	// of this allocation of the task group.
  4871  	Resources *Resources
  4872  
  4873  	// SharedResources are the resources that are shared by all the tasks in an
  4874  	// allocation
  4875  	SharedResources *Resources
  4876  
  4877  	// TaskResources is the set of resources allocated to each
  4878  	// task. These should sum to the total Resources.
  4879  	TaskResources map[string]*Resources
  4880  
  4881  	// Metrics associated with this allocation
  4882  	Metrics *AllocMetric
  4883  
  4884  	// Desired Status of the allocation on the client
  4885  	DesiredStatus string
  4886  
  4887  	// DesiredStatusDescription is meant to provide more human useful information
  4888  	DesiredDescription string
  4889  
  4890  	// Status of the allocation on the client
  4891  	ClientStatus string
  4892  
  4893  	// ClientStatusDescription is meant to provide more human useful information
  4894  	ClientDescription string
  4895  
  4896  	// TaskStates stores the state of each task,
  4897  	TaskStates map[string]*TaskState
  4898  
  4899  	// PreviousAllocation is the allocation that this allocation is replacing
  4900  	PreviousAllocation string
  4901  
  4902  	// DeploymentID identifies an allocation as being created from a
  4903  	// particular deployment
  4904  	DeploymentID string
  4905  
  4906  	// DeploymentStatus captures the status of the allocation as part of the
  4907  	// given deployment
  4908  	DeploymentStatus *AllocDeploymentStatus
  4909  
  4910  	// Raft Indexes
  4911  	CreateIndex uint64
  4912  	ModifyIndex uint64
  4913  
  4914  	// AllocModifyIndex is not updated when the client updates allocations. This
  4915  	// lets the client pull only the allocs updated by the server.
  4916  	AllocModifyIndex uint64
  4917  
  4918  	// CreateTime is the time the allocation has finished scheduling and been
  4919  	// verified by the plan applier.
  4920  	CreateTime int64
  4921  
  4922  	// ModifyTime is the time the allocation was last updated.
  4923  	ModifyTime int64
  4924  }
  4925  
  4926  // Index returns the index of the allocation. If the allocation is from a task
  4927  // group with count greater than 1, there will be multiple allocations for it.
  4928  func (a *Allocation) Index() uint {
  4929  	l := len(a.Name)
  4930  	prefix := len(a.JobID) + len(a.TaskGroup) + 2
  4931  	if l <= 3 || l <= prefix {
  4932  		return uint(0)
  4933  	}
  4934  
  4935  	strNum := a.Name[prefix : len(a.Name)-1]
  4936  	num, _ := strconv.Atoi(strNum)
  4937  	return uint(num)
  4938  }
  4939  
  4940  func (a *Allocation) Copy() *Allocation {
  4941  	return a.copyImpl(true)
  4942  }
  4943  
  4944  // Copy provides a copy of the allocation but doesn't deep copy the job
  4945  func (a *Allocation) CopySkipJob() *Allocation {
  4946  	return a.copyImpl(false)
  4947  }
  4948  
  4949  func (a *Allocation) copyImpl(job bool) *Allocation {
  4950  	if a == nil {
  4951  		return nil
  4952  	}
  4953  	na := new(Allocation)
  4954  	*na = *a
  4955  
  4956  	if job {
  4957  		na.Job = na.Job.Copy()
  4958  	}
  4959  
  4960  	na.Resources = na.Resources.Copy()
  4961  	na.SharedResources = na.SharedResources.Copy()
  4962  
  4963  	if a.TaskResources != nil {
  4964  		tr := make(map[string]*Resources, len(na.TaskResources))
  4965  		for task, resource := range na.TaskResources {
  4966  			tr[task] = resource.Copy()
  4967  		}
  4968  		na.TaskResources = tr
  4969  	}
  4970  
  4971  	na.Metrics = na.Metrics.Copy()
  4972  	na.DeploymentStatus = na.DeploymentStatus.Copy()
  4973  
  4974  	if a.TaskStates != nil {
  4975  		ts := make(map[string]*TaskState, len(na.TaskStates))
  4976  		for task, state := range na.TaskStates {
  4977  			ts[task] = state.Copy()
  4978  		}
  4979  		na.TaskStates = ts
  4980  	}
  4981  	return na
  4982  }
  4983  
  4984  // TerminalStatus returns if the desired or actual status is terminal and
  4985  // will no longer transition.
  4986  func (a *Allocation) TerminalStatus() bool {
  4987  	// First check the desired state and if that isn't terminal, check client
  4988  	// state.
  4989  	switch a.DesiredStatus {
  4990  	case AllocDesiredStatusStop, AllocDesiredStatusEvict:
  4991  		return true
  4992  	default:
  4993  	}
  4994  
  4995  	switch a.ClientStatus {
  4996  	case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
  4997  		return true
  4998  	default:
  4999  		return false
  5000  	}
  5001  }
  5002  
  5003  // Terminated returns if the allocation is in a terminal state on a client.
  5004  func (a *Allocation) Terminated() bool {
  5005  	if a.ClientStatus == AllocClientStatusFailed ||
  5006  		a.ClientStatus == AllocClientStatusComplete ||
  5007  		a.ClientStatus == AllocClientStatusLost {
  5008  		return true
  5009  	}
  5010  	return false
  5011  }
  5012  
  5013  // RanSuccessfully returns whether the client has ran the allocation and all
  5014  // tasks finished successfully
  5015  func (a *Allocation) RanSuccessfully() bool {
  5016  	return a.ClientStatus == AllocClientStatusComplete
  5017  }
  5018  
  5019  // ShouldMigrate returns if the allocation needs data migration
  5020  func (a *Allocation) ShouldMigrate() bool {
  5021  	if a.PreviousAllocation == "" {
  5022  		return false
  5023  	}
  5024  
  5025  	if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict {
  5026  		return false
  5027  	}
  5028  
  5029  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
  5030  
  5031  	// if the task group is nil or the ephemeral disk block isn't present then
  5032  	// we won't migrate
  5033  	if tg == nil || tg.EphemeralDisk == nil {
  5034  		return false
  5035  	}
  5036  
  5037  	// We won't migrate any data is the user hasn't enabled migration or the
  5038  	// disk is not marked as sticky
  5039  	if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky {
  5040  		return false
  5041  	}
  5042  
  5043  	return true
  5044  }
  5045  
  5046  // SetEventDisplayMessage populates the display message if its not already set,
  5047  // a temporary fix to handle old allocations that don't have it.
  5048  // This method will be removed in a future release.
  5049  func (a *Allocation) SetEventDisplayMessages() {
  5050  	setDisplayMsg(a.TaskStates)
  5051  }
  5052  
  5053  // Stub returns a list stub for the allocation
  5054  func (a *Allocation) Stub() *AllocListStub {
  5055  	return &AllocListStub{
  5056  		ID:                 a.ID,
  5057  		EvalID:             a.EvalID,
  5058  		Name:               a.Name,
  5059  		NodeID:             a.NodeID,
  5060  		JobID:              a.JobID,
  5061  		JobVersion:         a.Job.Version,
  5062  		TaskGroup:          a.TaskGroup,
  5063  		DesiredStatus:      a.DesiredStatus,
  5064  		DesiredDescription: a.DesiredDescription,
  5065  		ClientStatus:       a.ClientStatus,
  5066  		ClientDescription:  a.ClientDescription,
  5067  		TaskStates:         a.TaskStates,
  5068  		DeploymentStatus:   a.DeploymentStatus,
  5069  		CreateIndex:        a.CreateIndex,
  5070  		ModifyIndex:        a.ModifyIndex,
  5071  		CreateTime:         a.CreateTime,
  5072  		ModifyTime:         a.ModifyTime,
  5073  	}
  5074  }
  5075  
  5076  // AllocListStub is used to return a subset of alloc information
  5077  type AllocListStub struct {
  5078  	ID                 string
  5079  	EvalID             string
  5080  	Name               string
  5081  	NodeID             string
  5082  	JobID              string
  5083  	JobVersion         uint64
  5084  	TaskGroup          string
  5085  	DesiredStatus      string
  5086  	DesiredDescription string
  5087  	ClientStatus       string
  5088  	ClientDescription  string
  5089  	TaskStates         map[string]*TaskState
  5090  	DeploymentStatus   *AllocDeploymentStatus
  5091  	CreateIndex        uint64
  5092  	ModifyIndex        uint64
  5093  	CreateTime         int64
  5094  	ModifyTime         int64
  5095  }
  5096  
  5097  // SetEventDisplayMessage populates the display message if its not already set,
  5098  // a temporary fix to handle old allocations that don't have it.
  5099  // This method will be removed in a future release.
  5100  func (a *AllocListStub) SetEventDisplayMessages() {
  5101  	setDisplayMsg(a.TaskStates)
  5102  }
  5103  
  5104  func setDisplayMsg(taskStates map[string]*TaskState) {
  5105  	if taskStates != nil {
  5106  		for _, taskState := range taskStates {
  5107  			for _, event := range taskState.Events {
  5108  				event.PopulateEventDisplayMessage()
  5109  			}
  5110  		}
  5111  	}
  5112  }
  5113  
  5114  // AllocMetric is used to track various metrics while attempting
  5115  // to make an allocation. These are used to debug a job, or to better
  5116  // understand the pressure within the system.
  5117  type AllocMetric struct {
  5118  	// NodesEvaluated is the number of nodes that were evaluated
  5119  	NodesEvaluated int
  5120  
  5121  	// NodesFiltered is the number of nodes filtered due to a constraint
  5122  	NodesFiltered int
  5123  
  5124  	// NodesAvailable is the number of nodes available for evaluation per DC.
  5125  	NodesAvailable map[string]int
  5126  
  5127  	// ClassFiltered is the number of nodes filtered by class
  5128  	ClassFiltered map[string]int
  5129  
  5130  	// ConstraintFiltered is the number of failures caused by constraint
  5131  	ConstraintFiltered map[string]int
  5132  
  5133  	// NodesExhausted is the number of nodes skipped due to being
  5134  	// exhausted of at least one resource
  5135  	NodesExhausted int
  5136  
  5137  	// ClassExhausted is the number of nodes exhausted by class
  5138  	ClassExhausted map[string]int
  5139  
  5140  	// DimensionExhausted provides the count by dimension or reason
  5141  	DimensionExhausted map[string]int
  5142  
  5143  	// QuotaExhausted provides the exhausted dimensions
  5144  	QuotaExhausted []string
  5145  
  5146  	// Scores is the scores of the final few nodes remaining
  5147  	// for placement. The top score is typically selected.
  5148  	Scores map[string]float64
  5149  
  5150  	// AllocationTime is a measure of how long the allocation
  5151  	// attempt took. This can affect performance and SLAs.
  5152  	AllocationTime time.Duration
  5153  
  5154  	// CoalescedFailures indicates the number of other
  5155  	// allocations that were coalesced into this failed allocation.
  5156  	// This is to prevent creating many failed allocations for a
  5157  	// single task group.
  5158  	CoalescedFailures int
  5159  }
  5160  
  5161  func (a *AllocMetric) Copy() *AllocMetric {
  5162  	if a == nil {
  5163  		return nil
  5164  	}
  5165  	na := new(AllocMetric)
  5166  	*na = *a
  5167  	na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable)
  5168  	na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered)
  5169  	na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered)
  5170  	na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted)
  5171  	na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted)
  5172  	na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted)
  5173  	na.Scores = helper.CopyMapStringFloat64(na.Scores)
  5174  	return na
  5175  }
  5176  
  5177  func (a *AllocMetric) EvaluateNode() {
  5178  	a.NodesEvaluated += 1
  5179  }
  5180  
  5181  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  5182  	a.NodesFiltered += 1
  5183  	if node != nil && node.NodeClass != "" {
  5184  		if a.ClassFiltered == nil {
  5185  			a.ClassFiltered = make(map[string]int)
  5186  		}
  5187  		a.ClassFiltered[node.NodeClass] += 1
  5188  	}
  5189  	if constraint != "" {
  5190  		if a.ConstraintFiltered == nil {
  5191  			a.ConstraintFiltered = make(map[string]int)
  5192  		}
  5193  		a.ConstraintFiltered[constraint] += 1
  5194  	}
  5195  }
  5196  
  5197  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  5198  	a.NodesExhausted += 1
  5199  	if node != nil && node.NodeClass != "" {
  5200  		if a.ClassExhausted == nil {
  5201  			a.ClassExhausted = make(map[string]int)
  5202  		}
  5203  		a.ClassExhausted[node.NodeClass] += 1
  5204  	}
  5205  	if dimension != "" {
  5206  		if a.DimensionExhausted == nil {
  5207  			a.DimensionExhausted = make(map[string]int)
  5208  		}
  5209  		a.DimensionExhausted[dimension] += 1
  5210  	}
  5211  }
  5212  
  5213  func (a *AllocMetric) ExhaustQuota(dimensions []string) {
  5214  	if a.QuotaExhausted == nil {
  5215  		a.QuotaExhausted = make([]string, 0, len(dimensions))
  5216  	}
  5217  
  5218  	a.QuotaExhausted = append(a.QuotaExhausted, dimensions...)
  5219  }
  5220  
  5221  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  5222  	if a.Scores == nil {
  5223  		a.Scores = make(map[string]float64)
  5224  	}
  5225  	key := fmt.Sprintf("%s.%s", node.ID, name)
  5226  	a.Scores[key] = score
  5227  }
  5228  
  5229  // AllocDeploymentStatus captures the status of the allocation as part of the
  5230  // deployment. This can include things like if the allocation has been marked as
  5231  // heatlhy.
  5232  type AllocDeploymentStatus struct {
  5233  	// Healthy marks whether the allocation has been marked healthy or unhealthy
  5234  	// as part of a deployment. It can be unset if it has neither been marked
  5235  	// healthy or unhealthy.
  5236  	Healthy *bool
  5237  
  5238  	// ModifyIndex is the raft index in which the deployment status was last
  5239  	// changed.
  5240  	ModifyIndex uint64
  5241  }
  5242  
  5243  // IsHealthy returns if the allocation is marked as healthy as part of a
  5244  // deployment
  5245  func (a *AllocDeploymentStatus) IsHealthy() bool {
  5246  	if a == nil {
  5247  		return false
  5248  	}
  5249  
  5250  	return a.Healthy != nil && *a.Healthy
  5251  }
  5252  
  5253  // IsUnhealthy returns if the allocation is marked as unhealthy as part of a
  5254  // deployment
  5255  func (a *AllocDeploymentStatus) IsUnhealthy() bool {
  5256  	if a == nil {
  5257  		return false
  5258  	}
  5259  
  5260  	return a.Healthy != nil && !*a.Healthy
  5261  }
  5262  
  5263  func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus {
  5264  	if a == nil {
  5265  		return nil
  5266  	}
  5267  
  5268  	c := new(AllocDeploymentStatus)
  5269  	*c = *a
  5270  
  5271  	if a.Healthy != nil {
  5272  		c.Healthy = helper.BoolToPtr(*a.Healthy)
  5273  	}
  5274  
  5275  	return c
  5276  }
  5277  
  5278  const (
  5279  	EvalStatusBlocked   = "blocked"
  5280  	EvalStatusPending   = "pending"
  5281  	EvalStatusComplete  = "complete"
  5282  	EvalStatusFailed    = "failed"
  5283  	EvalStatusCancelled = "canceled"
  5284  )
  5285  
  5286  const (
  5287  	EvalTriggerJobRegister       = "job-register"
  5288  	EvalTriggerJobDeregister     = "job-deregister"
  5289  	EvalTriggerPeriodicJob       = "periodic-job"
  5290  	EvalTriggerNodeUpdate        = "node-update"
  5291  	EvalTriggerScheduled         = "scheduled"
  5292  	EvalTriggerRollingUpdate     = "rolling-update"
  5293  	EvalTriggerDeploymentWatcher = "deployment-watcher"
  5294  	EvalTriggerFailedFollowUp    = "failed-follow-up"
  5295  	EvalTriggerMaxPlans          = "max-plan-attempts"
  5296  )
  5297  
  5298  const (
  5299  	// CoreJobEvalGC is used for the garbage collection of evaluations
  5300  	// and allocations. We periodically scan evaluations in a terminal state,
  5301  	// in which all the corresponding allocations are also terminal. We
  5302  	// delete these out of the system to bound the state.
  5303  	CoreJobEvalGC = "eval-gc"
  5304  
  5305  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  5306  	// We periodically scan nodes in a terminal state, and if they have no
  5307  	// corresponding allocations we delete these out of the system.
  5308  	CoreJobNodeGC = "node-gc"
  5309  
  5310  	// CoreJobJobGC is used for the garbage collection of eligible jobs. We
  5311  	// periodically scan garbage collectible jobs and check if both their
  5312  	// evaluations and allocations are terminal. If so, we delete these out of
  5313  	// the system.
  5314  	CoreJobJobGC = "job-gc"
  5315  
  5316  	// CoreJobDeploymentGC is used for the garbage collection of eligible
  5317  	// deployments. We periodically scan garbage collectible deployments and
  5318  	// check if they are terminal. If so, we delete these out of the system.
  5319  	CoreJobDeploymentGC = "deployment-gc"
  5320  
  5321  	// CoreJobForceGC is used to force garbage collection of all GCable objects.
  5322  	CoreJobForceGC = "force-gc"
  5323  )
  5324  
  5325  // Evaluation is used anytime we need to apply business logic as a result
  5326  // of a change to our desired state (job specification) or the emergent state
  5327  // (registered nodes). When the inputs change, we need to "evaluate" them,
  5328  // potentially taking action (allocation of work) or doing nothing if the state
  5329  // of the world does not require it.
  5330  type Evaluation struct {
  5331  	// ID is a randonly generated UUID used for this evaluation. This
  5332  	// is assigned upon the creation of the evaluation.
  5333  	ID string
  5334  
  5335  	// Namespace is the namespace the evaluation is created in
  5336  	Namespace string
  5337  
  5338  	// Priority is used to control scheduling importance and if this job
  5339  	// can preempt other jobs.
  5340  	Priority int
  5341  
  5342  	// Type is used to control which schedulers are available to handle
  5343  	// this evaluation.
  5344  	Type string
  5345  
  5346  	// TriggeredBy is used to give some insight into why this Eval
  5347  	// was created. (Job change, node failure, alloc failure, etc).
  5348  	TriggeredBy string
  5349  
  5350  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  5351  	// be run in parallel for a given JobID, so we serialize on this.
  5352  	JobID string
  5353  
  5354  	// JobModifyIndex is the modify index of the job at the time
  5355  	// the evaluation was created
  5356  	JobModifyIndex uint64
  5357  
  5358  	// NodeID is the node that was affected triggering the evaluation.
  5359  	NodeID string
  5360  
  5361  	// NodeModifyIndex is the modify index of the node at the time
  5362  	// the evaluation was created
  5363  	NodeModifyIndex uint64
  5364  
  5365  	// DeploymentID is the ID of the deployment that triggered the evaluation.
  5366  	DeploymentID string
  5367  
  5368  	// Status of the evaluation
  5369  	Status string
  5370  
  5371  	// StatusDescription is meant to provide more human useful information
  5372  	StatusDescription string
  5373  
  5374  	// Wait is a minimum wait time for running the eval. This is used to
  5375  	// support a rolling upgrade.
  5376  	Wait time.Duration
  5377  
  5378  	// NextEval is the evaluation ID for the eval created to do a followup.
  5379  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  5380  	NextEval string
  5381  
  5382  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  5383  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  5384  	PreviousEval string
  5385  
  5386  	// BlockedEval is the evaluation ID for a created blocked eval. A
  5387  	// blocked eval will be created if all allocations could not be placed due
  5388  	// to constraints or lacking resources.
  5389  	BlockedEval string
  5390  
  5391  	// FailedTGAllocs are task groups which have allocations that could not be
  5392  	// made, but the metrics are persisted so that the user can use the feedback
  5393  	// to determine the cause.
  5394  	FailedTGAllocs map[string]*AllocMetric
  5395  
  5396  	// ClassEligibility tracks computed node classes that have been explicitly
  5397  	// marked as eligible or ineligible.
  5398  	ClassEligibility map[string]bool
  5399  
  5400  	// QuotaLimitReached marks whether a quota limit was reached for the
  5401  	// evaluation.
  5402  	QuotaLimitReached string
  5403  
  5404  	// EscapedComputedClass marks whether the job has constraints that are not
  5405  	// captured by computed node classes.
  5406  	EscapedComputedClass bool
  5407  
  5408  	// AnnotatePlan triggers the scheduler to provide additional annotations
  5409  	// during the evaluation. This should not be set during normal operations.
  5410  	AnnotatePlan bool
  5411  
  5412  	// QueuedAllocations is the number of unplaced allocations at the time the
  5413  	// evaluation was processed. The map is keyed by Task Group names.
  5414  	QueuedAllocations map[string]int
  5415  
  5416  	// LeaderACL provides the ACL token to when issuing RPCs back to the
  5417  	// leader. This will be a valid management token as long as the leader is
  5418  	// active. This should not ever be exposed via the API.
  5419  	LeaderACL string
  5420  
  5421  	// SnapshotIndex is the Raft index of the snapshot used to process the
  5422  	// evaluation. As such it will only be set once it has gone through the
  5423  	// scheduler.
  5424  	SnapshotIndex uint64
  5425  
  5426  	// Raft Indexes
  5427  	CreateIndex uint64
  5428  	ModifyIndex uint64
  5429  }
  5430  
  5431  // TerminalStatus returns if the current status is terminal and
  5432  // will no longer transition.
  5433  func (e *Evaluation) TerminalStatus() bool {
  5434  	switch e.Status {
  5435  	case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled:
  5436  		return true
  5437  	default:
  5438  		return false
  5439  	}
  5440  }
  5441  
  5442  func (e *Evaluation) GoString() string {
  5443  	return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace)
  5444  }
  5445  
  5446  func (e *Evaluation) Copy() *Evaluation {
  5447  	if e == nil {
  5448  		return nil
  5449  	}
  5450  	ne := new(Evaluation)
  5451  	*ne = *e
  5452  
  5453  	// Copy ClassEligibility
  5454  	if e.ClassEligibility != nil {
  5455  		classes := make(map[string]bool, len(e.ClassEligibility))
  5456  		for class, elig := range e.ClassEligibility {
  5457  			classes[class] = elig
  5458  		}
  5459  		ne.ClassEligibility = classes
  5460  	}
  5461  
  5462  	// Copy FailedTGAllocs
  5463  	if e.FailedTGAllocs != nil {
  5464  		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
  5465  		for tg, metric := range e.FailedTGAllocs {
  5466  			failedTGs[tg] = metric.Copy()
  5467  		}
  5468  		ne.FailedTGAllocs = failedTGs
  5469  	}
  5470  
  5471  	// Copy queued allocations
  5472  	if e.QueuedAllocations != nil {
  5473  		queuedAllocations := make(map[string]int, len(e.QueuedAllocations))
  5474  		for tg, num := range e.QueuedAllocations {
  5475  			queuedAllocations[tg] = num
  5476  		}
  5477  		ne.QueuedAllocations = queuedAllocations
  5478  	}
  5479  
  5480  	return ne
  5481  }
  5482  
  5483  // ShouldEnqueue checks if a given evaluation should be enqueued into the
  5484  // eval_broker
  5485  func (e *Evaluation) ShouldEnqueue() bool {
  5486  	switch e.Status {
  5487  	case EvalStatusPending:
  5488  		return true
  5489  	case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled:
  5490  		return false
  5491  	default:
  5492  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  5493  	}
  5494  }
  5495  
  5496  // ShouldBlock checks if a given evaluation should be entered into the blocked
  5497  // eval tracker.
  5498  func (e *Evaluation) ShouldBlock() bool {
  5499  	switch e.Status {
  5500  	case EvalStatusBlocked:
  5501  		return true
  5502  	case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled:
  5503  		return false
  5504  	default:
  5505  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  5506  	}
  5507  }
  5508  
  5509  // MakePlan is used to make a plan from the given evaluation
  5510  // for a given Job
  5511  func (e *Evaluation) MakePlan(j *Job) *Plan {
  5512  	p := &Plan{
  5513  		EvalID:         e.ID,
  5514  		Priority:       e.Priority,
  5515  		Job:            j,
  5516  		NodeUpdate:     make(map[string][]*Allocation),
  5517  		NodeAllocation: make(map[string][]*Allocation),
  5518  	}
  5519  	if j != nil {
  5520  		p.AllAtOnce = j.AllAtOnce
  5521  	}
  5522  	return p
  5523  }
  5524  
  5525  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  5526  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  5527  	return &Evaluation{
  5528  		ID:             uuid.Generate(),
  5529  		Namespace:      e.Namespace,
  5530  		Priority:       e.Priority,
  5531  		Type:           e.Type,
  5532  		TriggeredBy:    EvalTriggerRollingUpdate,
  5533  		JobID:          e.JobID,
  5534  		JobModifyIndex: e.JobModifyIndex,
  5535  		Status:         EvalStatusPending,
  5536  		Wait:           wait,
  5537  		PreviousEval:   e.ID,
  5538  	}
  5539  }
  5540  
  5541  // CreateBlockedEval creates a blocked evaluation to followup this eval to place any
  5542  // failed allocations. It takes the classes marked explicitly eligible or
  5543  // ineligible, whether the job has escaped computed node classes and whether the
  5544  // quota limit was reached.
  5545  func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool,
  5546  	escaped bool, quotaReached string) *Evaluation {
  5547  
  5548  	return &Evaluation{
  5549  		ID:                   uuid.Generate(),
  5550  		Namespace:            e.Namespace,
  5551  		Priority:             e.Priority,
  5552  		Type:                 e.Type,
  5553  		TriggeredBy:          e.TriggeredBy,
  5554  		JobID:                e.JobID,
  5555  		JobModifyIndex:       e.JobModifyIndex,
  5556  		Status:               EvalStatusBlocked,
  5557  		PreviousEval:         e.ID,
  5558  		ClassEligibility:     classEligibility,
  5559  		EscapedComputedClass: escaped,
  5560  		QuotaLimitReached:    quotaReached,
  5561  	}
  5562  }
  5563  
  5564  // CreateFailedFollowUpEval creates a follow up evaluation when the current one
  5565  // has been marked as failed because it has hit the delivery limit and will not
  5566  // be retried by the eval_broker.
  5567  func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation {
  5568  	return &Evaluation{
  5569  		ID:             uuid.Generate(),
  5570  		Namespace:      e.Namespace,
  5571  		Priority:       e.Priority,
  5572  		Type:           e.Type,
  5573  		TriggeredBy:    EvalTriggerFailedFollowUp,
  5574  		JobID:          e.JobID,
  5575  		JobModifyIndex: e.JobModifyIndex,
  5576  		Status:         EvalStatusPending,
  5577  		Wait:           wait,
  5578  		PreviousEval:   e.ID,
  5579  	}
  5580  }
  5581  
  5582  // Plan is used to submit a commit plan for task allocations. These
  5583  // are submitted to the leader which verifies that resources have
  5584  // not been overcommitted before admiting the plan.
  5585  type Plan struct {
  5586  	// EvalID is the evaluation ID this plan is associated with
  5587  	EvalID string
  5588  
  5589  	// EvalToken is used to prevent a split-brain processing of
  5590  	// an evaluation. There should only be a single scheduler running
  5591  	// an Eval at a time, but this could be violated after a leadership
  5592  	// transition. This unique token is used to reject plans that are
  5593  	// being submitted from a different leader.
  5594  	EvalToken string
  5595  
  5596  	// Priority is the priority of the upstream job
  5597  	Priority int
  5598  
  5599  	// AllAtOnce is used to control if incremental scheduling of task groups
  5600  	// is allowed or if we must do a gang scheduling of the entire job.
  5601  	// If this is false, a plan may be partially applied. Otherwise, the
  5602  	// entire plan must be able to make progress.
  5603  	AllAtOnce bool
  5604  
  5605  	// Job is the parent job of all the allocations in the Plan.
  5606  	// Since a Plan only involves a single Job, we can reduce the size
  5607  	// of the plan by only including it once.
  5608  	Job *Job
  5609  
  5610  	// NodeUpdate contains all the allocations for each node. For each node,
  5611  	// this is a list of the allocations to update to either stop or evict.
  5612  	NodeUpdate map[string][]*Allocation
  5613  
  5614  	// NodeAllocation contains all the allocations for each node.
  5615  	// The evicts must be considered prior to the allocations.
  5616  	NodeAllocation map[string][]*Allocation
  5617  
  5618  	// Annotations contains annotations by the scheduler to be used by operators
  5619  	// to understand the decisions made by the scheduler.
  5620  	Annotations *PlanAnnotations
  5621  
  5622  	// Deployment is the deployment created or updated by the scheduler that
  5623  	// should be applied by the planner.
  5624  	Deployment *Deployment
  5625  
  5626  	// DeploymentUpdates is a set of status updates to apply to the given
  5627  	// deployments. This allows the scheduler to cancel any unneeded deployment
  5628  	// because the job is stopped or the update block is removed.
  5629  	DeploymentUpdates []*DeploymentStatusUpdate
  5630  }
  5631  
  5632  // AppendUpdate marks the allocation for eviction. The clientStatus of the
  5633  // allocation may be optionally set by passing in a non-empty value.
  5634  func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) {
  5635  	newAlloc := new(Allocation)
  5636  	*newAlloc = *alloc
  5637  
  5638  	// If the job is not set in the plan we are deregistering a job so we
  5639  	// extract the job from the allocation.
  5640  	if p.Job == nil && newAlloc.Job != nil {
  5641  		p.Job = newAlloc.Job
  5642  	}
  5643  
  5644  	// Normalize the job
  5645  	newAlloc.Job = nil
  5646  
  5647  	// Strip the resources as it can be rebuilt.
  5648  	newAlloc.Resources = nil
  5649  
  5650  	newAlloc.DesiredStatus = desiredStatus
  5651  	newAlloc.DesiredDescription = desiredDesc
  5652  
  5653  	if clientStatus != "" {
  5654  		newAlloc.ClientStatus = clientStatus
  5655  	}
  5656  
  5657  	node := alloc.NodeID
  5658  	existing := p.NodeUpdate[node]
  5659  	p.NodeUpdate[node] = append(existing, newAlloc)
  5660  }
  5661  
  5662  func (p *Plan) PopUpdate(alloc *Allocation) {
  5663  	existing := p.NodeUpdate[alloc.NodeID]
  5664  	n := len(existing)
  5665  	if n > 0 && existing[n-1].ID == alloc.ID {
  5666  		existing = existing[:n-1]
  5667  		if len(existing) > 0 {
  5668  			p.NodeUpdate[alloc.NodeID] = existing
  5669  		} else {
  5670  			delete(p.NodeUpdate, alloc.NodeID)
  5671  		}
  5672  	}
  5673  }
  5674  
  5675  func (p *Plan) AppendAlloc(alloc *Allocation) {
  5676  	node := alloc.NodeID
  5677  	existing := p.NodeAllocation[node]
  5678  	p.NodeAllocation[node] = append(existing, alloc)
  5679  }
  5680  
  5681  // IsNoOp checks if this plan would do nothing
  5682  func (p *Plan) IsNoOp() bool {
  5683  	return len(p.NodeUpdate) == 0 &&
  5684  		len(p.NodeAllocation) == 0 &&
  5685  		p.Deployment == nil &&
  5686  		len(p.DeploymentUpdates) == 0
  5687  }
  5688  
  5689  // PlanResult is the result of a plan submitted to the leader.
  5690  type PlanResult struct {
  5691  	// NodeUpdate contains all the updates that were committed.
  5692  	NodeUpdate map[string][]*Allocation
  5693  
  5694  	// NodeAllocation contains all the allocations that were committed.
  5695  	NodeAllocation map[string][]*Allocation
  5696  
  5697  	// Deployment is the deployment that was committed.
  5698  	Deployment *Deployment
  5699  
  5700  	// DeploymentUpdates is the set of deployment updates that were committed.
  5701  	DeploymentUpdates []*DeploymentStatusUpdate
  5702  
  5703  	// RefreshIndex is the index the worker should refresh state up to.
  5704  	// This allows all evictions and allocations to be materialized.
  5705  	// If any allocations were rejected due to stale data (node state,
  5706  	// over committed) this can be used to force a worker refresh.
  5707  	RefreshIndex uint64
  5708  
  5709  	// AllocIndex is the Raft index in which the evictions and
  5710  	// allocations took place. This is used for the write index.
  5711  	AllocIndex uint64
  5712  }
  5713  
  5714  // IsNoOp checks if this plan result would do nothing
  5715  func (p *PlanResult) IsNoOp() bool {
  5716  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 &&
  5717  		len(p.DeploymentUpdates) == 0 && p.Deployment == nil
  5718  }
  5719  
  5720  // FullCommit is used to check if all the allocations in a plan
  5721  // were committed as part of the result. Returns if there was
  5722  // a match, and the number of expected and actual allocations.
  5723  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  5724  	expected := 0
  5725  	actual := 0
  5726  	for name, allocList := range plan.NodeAllocation {
  5727  		didAlloc, _ := p.NodeAllocation[name]
  5728  		expected += len(allocList)
  5729  		actual += len(didAlloc)
  5730  	}
  5731  	return actual == expected, expected, actual
  5732  }
  5733  
  5734  // PlanAnnotations holds annotations made by the scheduler to give further debug
  5735  // information to operators.
  5736  type PlanAnnotations struct {
  5737  	// DesiredTGUpdates is the set of desired updates per task group.
  5738  	DesiredTGUpdates map[string]*DesiredUpdates
  5739  }
  5740  
  5741  // DesiredUpdates is the set of changes the scheduler would like to make given
  5742  // sufficient resources and cluster capacity.
  5743  type DesiredUpdates struct {
  5744  	Ignore            uint64
  5745  	Place             uint64
  5746  	Migrate           uint64
  5747  	Stop              uint64
  5748  	InPlaceUpdate     uint64
  5749  	DestructiveUpdate uint64
  5750  	Canary            uint64
  5751  }
  5752  
  5753  func (d *DesiredUpdates) GoString() string {
  5754  	return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)",
  5755  		d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary)
  5756  }
  5757  
  5758  // msgpackHandle is a shared handle for encoding/decoding of structs
  5759  var MsgpackHandle = func() *codec.MsgpackHandle {
  5760  	h := &codec.MsgpackHandle{RawToString: true}
  5761  
  5762  	// Sets the default type for decoding a map into a nil interface{}.
  5763  	// This is necessary in particular because we store the driver configs as a
  5764  	// nil interface{}.
  5765  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5766  	return h
  5767  }()
  5768  
  5769  var (
  5770  	// JsonHandle and JsonHandlePretty are the codec handles to JSON encode
  5771  	// structs. The pretty handle will add indents for easier human consumption.
  5772  	JsonHandle = &codec.JsonHandle{
  5773  		HTMLCharsAsIs: true,
  5774  	}
  5775  	JsonHandlePretty = &codec.JsonHandle{
  5776  		HTMLCharsAsIs: true,
  5777  		Indent:        4,
  5778  	}
  5779  )
  5780  
  5781  var HashiMsgpackHandle = func() *hcodec.MsgpackHandle {
  5782  	h := &hcodec.MsgpackHandle{RawToString: true}
  5783  
  5784  	// Sets the default type for decoding a map into a nil interface{}.
  5785  	// This is necessary in particular because we store the driver configs as a
  5786  	// nil interface{}.
  5787  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  5788  	return h
  5789  }()
  5790  
  5791  // Decode is used to decode a MsgPack encoded object
  5792  func Decode(buf []byte, out interface{}) error {
  5793  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  5794  }
  5795  
  5796  // Encode is used to encode a MsgPack object with type prefix
  5797  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  5798  	var buf bytes.Buffer
  5799  	buf.WriteByte(uint8(t))
  5800  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  5801  	return buf.Bytes(), err
  5802  }
  5803  
  5804  // KeyringResponse is a unified key response and can be used for install,
  5805  // remove, use, as well as listing key queries.
  5806  type KeyringResponse struct {
  5807  	Messages map[string]string
  5808  	Keys     map[string]int
  5809  	NumNodes int
  5810  }
  5811  
  5812  // KeyringRequest is request objects for serf key operations.
  5813  type KeyringRequest struct {
  5814  	Key string
  5815  }
  5816  
  5817  // RecoverableError wraps an error and marks whether it is recoverable and could
  5818  // be retried or it is fatal.
  5819  type RecoverableError struct {
  5820  	Err         string
  5821  	Recoverable bool
  5822  }
  5823  
  5824  // NewRecoverableError is used to wrap an error and mark it as recoverable or
  5825  // not.
  5826  func NewRecoverableError(e error, recoverable bool) error {
  5827  	if e == nil {
  5828  		return nil
  5829  	}
  5830  
  5831  	return &RecoverableError{
  5832  		Err:         e.Error(),
  5833  		Recoverable: recoverable,
  5834  	}
  5835  }
  5836  
  5837  // WrapRecoverable wraps an existing error in a new RecoverableError with a new
  5838  // message. If the error was recoverable before the returned error is as well;
  5839  // otherwise it is unrecoverable.
  5840  func WrapRecoverable(msg string, err error) error {
  5841  	return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)}
  5842  }
  5843  
  5844  func (r *RecoverableError) Error() string {
  5845  	return r.Err
  5846  }
  5847  
  5848  func (r *RecoverableError) IsRecoverable() bool {
  5849  	return r.Recoverable
  5850  }
  5851  
  5852  // Recoverable is an interface for errors to implement to indicate whether or
  5853  // not they are fatal or recoverable.
  5854  type Recoverable interface {
  5855  	error
  5856  	IsRecoverable() bool
  5857  }
  5858  
  5859  // IsRecoverable returns true if error is a RecoverableError with
  5860  // Recoverable=true. Otherwise false is returned.
  5861  func IsRecoverable(e error) bool {
  5862  	if re, ok := e.(Recoverable); ok {
  5863  		return re.IsRecoverable()
  5864  	}
  5865  	return false
  5866  }
  5867  
  5868  // ACLPolicy is used to represent an ACL policy
  5869  type ACLPolicy struct {
  5870  	Name        string // Unique name
  5871  	Description string // Human readable
  5872  	Rules       string // HCL or JSON format
  5873  	Hash        []byte
  5874  	CreateIndex uint64
  5875  	ModifyIndex uint64
  5876  }
  5877  
  5878  // SetHash is used to compute and set the hash of the ACL policy
  5879  func (c *ACLPolicy) SetHash() []byte {
  5880  	// Initialize a 256bit Blake2 hash (32 bytes)
  5881  	hash, err := blake2b.New256(nil)
  5882  	if err != nil {
  5883  		panic(err)
  5884  	}
  5885  
  5886  	// Write all the user set fields
  5887  	hash.Write([]byte(c.Name))
  5888  	hash.Write([]byte(c.Description))
  5889  	hash.Write([]byte(c.Rules))
  5890  
  5891  	// Finalize the hash
  5892  	hashVal := hash.Sum(nil)
  5893  
  5894  	// Set and return the hash
  5895  	c.Hash = hashVal
  5896  	return hashVal
  5897  }
  5898  
  5899  func (a *ACLPolicy) Stub() *ACLPolicyListStub {
  5900  	return &ACLPolicyListStub{
  5901  		Name:        a.Name,
  5902  		Description: a.Description,
  5903  		Hash:        a.Hash,
  5904  		CreateIndex: a.CreateIndex,
  5905  		ModifyIndex: a.ModifyIndex,
  5906  	}
  5907  }
  5908  
  5909  func (a *ACLPolicy) Validate() error {
  5910  	var mErr multierror.Error
  5911  	if !validPolicyName.MatchString(a.Name) {
  5912  		err := fmt.Errorf("invalid name '%s'", a.Name)
  5913  		mErr.Errors = append(mErr.Errors, err)
  5914  	}
  5915  	if _, err := acl.Parse(a.Rules); err != nil {
  5916  		err = fmt.Errorf("failed to parse rules: %v", err)
  5917  		mErr.Errors = append(mErr.Errors, err)
  5918  	}
  5919  	if len(a.Description) > maxPolicyDescriptionLength {
  5920  		err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength)
  5921  		mErr.Errors = append(mErr.Errors, err)
  5922  	}
  5923  	return mErr.ErrorOrNil()
  5924  }
  5925  
  5926  // ACLPolicyListStub is used to for listing ACL policies
  5927  type ACLPolicyListStub struct {
  5928  	Name        string
  5929  	Description string
  5930  	Hash        []byte
  5931  	CreateIndex uint64
  5932  	ModifyIndex uint64
  5933  }
  5934  
  5935  // ACLPolicyListRequest is used to request a list of policies
  5936  type ACLPolicyListRequest struct {
  5937  	QueryOptions
  5938  }
  5939  
  5940  // ACLPolicySpecificRequest is used to query a specific policy
  5941  type ACLPolicySpecificRequest struct {
  5942  	Name string
  5943  	QueryOptions
  5944  }
  5945  
  5946  // ACLPolicySetRequest is used to query a set of policies
  5947  type ACLPolicySetRequest struct {
  5948  	Names []string
  5949  	QueryOptions
  5950  }
  5951  
  5952  // ACLPolicyListResponse is used for a list request
  5953  type ACLPolicyListResponse struct {
  5954  	Policies []*ACLPolicyListStub
  5955  	QueryMeta
  5956  }
  5957  
  5958  // SingleACLPolicyResponse is used to return a single policy
  5959  type SingleACLPolicyResponse struct {
  5960  	Policy *ACLPolicy
  5961  	QueryMeta
  5962  }
  5963  
  5964  // ACLPolicySetResponse is used to return a set of policies
  5965  type ACLPolicySetResponse struct {
  5966  	Policies map[string]*ACLPolicy
  5967  	QueryMeta
  5968  }
  5969  
  5970  // ACLPolicyDeleteRequest is used to delete a set of policies
  5971  type ACLPolicyDeleteRequest struct {
  5972  	Names []string
  5973  	WriteRequest
  5974  }
  5975  
  5976  // ACLPolicyUpsertRequest is used to upsert a set of policies
  5977  type ACLPolicyUpsertRequest struct {
  5978  	Policies []*ACLPolicy
  5979  	WriteRequest
  5980  }
  5981  
  5982  // ACLToken represents a client token which is used to Authenticate
  5983  type ACLToken struct {
  5984  	AccessorID  string   // Public Accessor ID (UUID)
  5985  	SecretID    string   // Secret ID, private (UUID)
  5986  	Name        string   // Human friendly name
  5987  	Type        string   // Client or Management
  5988  	Policies    []string // Policies this token ties to
  5989  	Global      bool     // Global or Region local
  5990  	Hash        []byte
  5991  	CreateTime  time.Time // Time of creation
  5992  	CreateIndex uint64
  5993  	ModifyIndex uint64
  5994  }
  5995  
  5996  var (
  5997  	// AnonymousACLToken is used no SecretID is provided, and the
  5998  	// request is made anonymously.
  5999  	AnonymousACLToken = &ACLToken{
  6000  		AccessorID: "anonymous",
  6001  		Name:       "Anonymous Token",
  6002  		Type:       ACLClientToken,
  6003  		Policies:   []string{"anonymous"},
  6004  		Global:     false,
  6005  	}
  6006  )
  6007  
  6008  type ACLTokenListStub struct {
  6009  	AccessorID  string
  6010  	Name        string
  6011  	Type        string
  6012  	Policies    []string
  6013  	Global      bool
  6014  	Hash        []byte
  6015  	CreateTime  time.Time
  6016  	CreateIndex uint64
  6017  	ModifyIndex uint64
  6018  }
  6019  
  6020  // SetHash is used to compute and set the hash of the ACL token
  6021  func (a *ACLToken) SetHash() []byte {
  6022  	// Initialize a 256bit Blake2 hash (32 bytes)
  6023  	hash, err := blake2b.New256(nil)
  6024  	if err != nil {
  6025  		panic(err)
  6026  	}
  6027  
  6028  	// Write all the user set fields
  6029  	hash.Write([]byte(a.Name))
  6030  	hash.Write([]byte(a.Type))
  6031  	for _, policyName := range a.Policies {
  6032  		hash.Write([]byte(policyName))
  6033  	}
  6034  	if a.Global {
  6035  		hash.Write([]byte("global"))
  6036  	} else {
  6037  		hash.Write([]byte("local"))
  6038  	}
  6039  
  6040  	// Finalize the hash
  6041  	hashVal := hash.Sum(nil)
  6042  
  6043  	// Set and return the hash
  6044  	a.Hash = hashVal
  6045  	return hashVal
  6046  }
  6047  
  6048  func (a *ACLToken) Stub() *ACLTokenListStub {
  6049  	return &ACLTokenListStub{
  6050  		AccessorID:  a.AccessorID,
  6051  		Name:        a.Name,
  6052  		Type:        a.Type,
  6053  		Policies:    a.Policies,
  6054  		Global:      a.Global,
  6055  		Hash:        a.Hash,
  6056  		CreateTime:  a.CreateTime,
  6057  		CreateIndex: a.CreateIndex,
  6058  		ModifyIndex: a.ModifyIndex,
  6059  	}
  6060  }
  6061  
  6062  // Validate is used to sanity check a token
  6063  func (a *ACLToken) Validate() error {
  6064  	var mErr multierror.Error
  6065  	if len(a.Name) > maxTokenNameLength {
  6066  		mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long"))
  6067  	}
  6068  	switch a.Type {
  6069  	case ACLClientToken:
  6070  		if len(a.Policies) == 0 {
  6071  			mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies"))
  6072  		}
  6073  	case ACLManagementToken:
  6074  		if len(a.Policies) != 0 {
  6075  			mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies"))
  6076  		}
  6077  	default:
  6078  		mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management"))
  6079  	}
  6080  	return mErr.ErrorOrNil()
  6081  }
  6082  
  6083  // PolicySubset checks if a given set of policies is a subset of the token
  6084  func (a *ACLToken) PolicySubset(policies []string) bool {
  6085  	// Hot-path the management tokens, superset of all policies.
  6086  	if a.Type == ACLManagementToken {
  6087  		return true
  6088  	}
  6089  	associatedPolicies := make(map[string]struct{}, len(a.Policies))
  6090  	for _, policy := range a.Policies {
  6091  		associatedPolicies[policy] = struct{}{}
  6092  	}
  6093  	for _, policy := range policies {
  6094  		if _, ok := associatedPolicies[policy]; !ok {
  6095  			return false
  6096  		}
  6097  	}
  6098  	return true
  6099  }
  6100  
  6101  // ACLTokenListRequest is used to request a list of tokens
  6102  type ACLTokenListRequest struct {
  6103  	GlobalOnly bool
  6104  	QueryOptions
  6105  }
  6106  
  6107  // ACLTokenSpecificRequest is used to query a specific token
  6108  type ACLTokenSpecificRequest struct {
  6109  	AccessorID string
  6110  	QueryOptions
  6111  }
  6112  
  6113  // ACLTokenSetRequest is used to query a set of tokens
  6114  type ACLTokenSetRequest struct {
  6115  	AccessorIDS []string
  6116  	QueryOptions
  6117  }
  6118  
  6119  // ACLTokenListResponse is used for a list request
  6120  type ACLTokenListResponse struct {
  6121  	Tokens []*ACLTokenListStub
  6122  	QueryMeta
  6123  }
  6124  
  6125  // SingleACLTokenResponse is used to return a single token
  6126  type SingleACLTokenResponse struct {
  6127  	Token *ACLToken
  6128  	QueryMeta
  6129  }
  6130  
  6131  // ACLTokenSetResponse is used to return a set of token
  6132  type ACLTokenSetResponse struct {
  6133  	Tokens map[string]*ACLToken // Keyed by Accessor ID
  6134  	QueryMeta
  6135  }
  6136  
  6137  // ResolveACLTokenRequest is used to resolve a specific token
  6138  type ResolveACLTokenRequest struct {
  6139  	SecretID string
  6140  	QueryOptions
  6141  }
  6142  
  6143  // ResolveACLTokenResponse is used to resolve a single token
  6144  type ResolveACLTokenResponse struct {
  6145  	Token *ACLToken
  6146  	QueryMeta
  6147  }
  6148  
  6149  // ACLTokenDeleteRequest is used to delete a set of tokens
  6150  type ACLTokenDeleteRequest struct {
  6151  	AccessorIDs []string
  6152  	WriteRequest
  6153  }
  6154  
  6155  // ACLTokenBootstrapRequest is used to bootstrap ACLs
  6156  type ACLTokenBootstrapRequest struct {
  6157  	Token      *ACLToken // Not client specifiable
  6158  	ResetIndex uint64    // Reset index is used to clear the bootstrap token
  6159  	WriteRequest
  6160  }
  6161  
  6162  // ACLTokenUpsertRequest is used to upsert a set of tokens
  6163  type ACLTokenUpsertRequest struct {
  6164  	Tokens []*ACLToken
  6165  	WriteRequest
  6166  }
  6167  
  6168  // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest
  6169  type ACLTokenUpsertResponse struct {
  6170  	Tokens []*ACLToken
  6171  	WriteMeta
  6172  }