github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/structs/structs.go

github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"crypto/sha1"
     7  	"crypto/sha256"
     8  	"crypto/sha512"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"net"
    14  	"os"
    15  	"path/filepath"
    16  	"reflect"
    17  	"regexp"
    18  	"sort"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  
    23  	"github.com/gorhill/cronexpr"
    24  	"github.com/hashicorp/consul/api"
    25  	"github.com/hashicorp/go-multierror"
    26  	"github.com/hashicorp/go-version"
    27  	"github.com/hashicorp/nomad/helper/args"
    28  	"github.com/mitchellh/copystructure"
    29  	"github.com/ugorji/go/codec"
    30  
    31  	hcodec "github.com/hashicorp/go-msgpack/codec"
    32  )
    33  
    34  var (
    35  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    36  	ErrNoRegionPath = fmt.Errorf("No path to region")
    37  )
    38  
    39  type MessageType uint8
    40  
    41  const (
    42  	NodeRegisterRequestType MessageType = iota
    43  	NodeDeregisterRequestType
    44  	NodeUpdateStatusRequestType
    45  	NodeUpdateDrainRequestType
    46  	JobRegisterRequestType
    47  	JobDeregisterRequestType
    48  	EvalUpdateRequestType
    49  	EvalDeleteRequestType
    50  	AllocUpdateRequestType
    51  	AllocClientUpdateRequestType
    52  	ReconcileJobSummariesRequestType
    53  	VaultAccessorRegisterRequestType
    54  	VaultAccessorDegisterRequestType
    55  )
    56  
    57  const (
    58  	// IgnoreUnknownTypeFlag is set along with a MessageType
    59  	// to indicate that the message type can be safely ignored
    60  	// if it is not recognized. This is for future proofing, so
    61  	// that new commands can be added in a way that won't cause
    62  	// old servers to crash when the FSM attempts to process them.
    63  	IgnoreUnknownTypeFlag MessageType = 128
    64  
    65  	// ApiMajorVersion is returned as part of the Status.Version request.
    66  	// It should be incremented anytime the APIs are changed in a way
    67  	// that would break clients for sane client versioning.
    68  	ApiMajorVersion = 1
    69  
    70  	// ApiMinorVersion is returned as part of the Status.Version request.
    71  	// It should be incremented anytime the APIs are changed to allow
    72  	// for sane client versioning. Minor changes should be compatible
    73  	// within the major version.
    74  	ApiMinorVersion = 1
    75  
    76  	ProtocolVersion = "protocol"
    77  	APIMajorVersion = "api.major"
    78  	APIMinorVersion = "api.minor"
    79  )
    80  
    81  // RPCInfo is used to describe common information about query
    82  type RPCInfo interface {
    83  	RequestRegion() string
    84  	IsRead() bool
    85  	AllowStaleRead() bool
    86  }
    87  
    88  // QueryOptions is used to specify various flags for read queries
    89  type QueryOptions struct {
    90  	// The target region for this query
    91  	Region string
    92  
    93  	// If set, wait until query exceeds given index. Must be provided
    94  	// with MaxQueryTime.
    95  	MinQueryIndex uint64
    96  
    97  	// Provided with MinQueryIndex to wait for change.
    98  	MaxQueryTime time.Duration
    99  
   100  	// If set, any follower can service the request. Results
   101  	// may be arbitrarily stale.
   102  	AllowStale bool
   103  
   104  	// If set, used as prefix for resource list searches
   105  	Prefix string
   106  }
   107  
   108  func (q QueryOptions) RequestRegion() string {
   109  	return q.Region
   110  }
   111  
   112  // QueryOption only applies to reads, so always true
   113  func (q QueryOptions) IsRead() bool {
   114  	return true
   115  }
   116  
   117  func (q QueryOptions) AllowStaleRead() bool {
   118  	return q.AllowStale
   119  }
   120  
   121  type WriteRequest struct {
   122  	// The target region for this write
   123  	Region string
   124  }
   125  
   126  func (w WriteRequest) RequestRegion() string {
   127  	// The target region for this request
   128  	return w.Region
   129  }
   130  
   131  // WriteRequest only applies to writes, always false
   132  func (w WriteRequest) IsRead() bool {
   133  	return false
   134  }
   135  
   136  func (w WriteRequest) AllowStaleRead() bool {
   137  	return false
   138  }
   139  
   140  // QueryMeta allows a query response to include potentially
   141  // useful metadata about a query
   142  type QueryMeta struct {
   143  	// This is the index associated with the read
   144  	Index uint64
   145  
   146  	// If AllowStale is used, this is time elapsed since
   147  	// last contact between the follower and leader. This
   148  	// can be used to gauge staleness.
   149  	LastContact time.Duration
   150  
   151  	// Used to indicate if there is a known leader node
   152  	KnownLeader bool
   153  }
   154  
   155  // WriteMeta allows a write response to include potentially
   156  // useful metadata about the write
   157  type WriteMeta struct {
   158  	// This is the index associated with the write
   159  	Index uint64
   160  }
   161  
   162  // NodeRegisterRequest is used for Node.Register endpoint
   163  // to register a node as being a schedulable entity.
   164  type NodeRegisterRequest struct {
   165  	Node *Node
   166  	WriteRequest
   167  }
   168  
   169  // NodeDeregisterRequest is used for Node.Deregister endpoint
   170  // to deregister a node as being a schedulable entity.
   171  type NodeDeregisterRequest struct {
   172  	NodeID string
   173  	WriteRequest
   174  }
   175  
   176  // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
   177  // information used in RPC server lists.
   178  type NodeServerInfo struct {
   179  	// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
   180  	// be contacted at for RPCs.
   181  	RPCAdvertiseAddr string
   182  
   183  	// RpcMajorVersion is the major version number the Nomad Server
   184  	// supports
   185  	RPCMajorVersion int32
   186  
   187  	// RpcMinorVersion is the minor version number the Nomad Server
   188  	// supports
   189  	RPCMinorVersion int32
   190  
   191  	// Datacenter is the datacenter that a Nomad server belongs to
   192  	Datacenter string
   193  }
   194  
   195  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   196  // to update the status of a node.
   197  type NodeUpdateStatusRequest struct {
   198  	NodeID string
   199  	Status string
   200  	WriteRequest
   201  }
   202  
   203  // NodeUpdateDrainRequest is used for updatin the drain status
   204  type NodeUpdateDrainRequest struct {
   205  	NodeID string
   206  	Drain  bool
   207  	WriteRequest
   208  }
   209  
   210  // NodeEvaluateRequest is used to re-evaluate the ndoe
   211  type NodeEvaluateRequest struct {
   212  	NodeID string
   213  	WriteRequest
   214  }
   215  
   216  // NodeSpecificRequest is used when we just need to specify a target node
   217  type NodeSpecificRequest struct {
   218  	NodeID   string
   219  	SecretID string
   220  	QueryOptions
   221  }
   222  
   223  // JobRegisterRequest is used for Job.Register endpoint
   224  // to register a job as being a schedulable entity.
   225  type JobRegisterRequest struct {
   226  	Job *Job
   227  
   228  	// If EnforceIndex is set then the job will only be registered if the passed
   229  	// JobModifyIndex matches the current Jobs index. If the index is zero, the
   230  	// register only occurs if the job is new.
   231  	EnforceIndex   bool
   232  	JobModifyIndex uint64
   233  
   234  	WriteRequest
   235  }
   236  
   237  // JobDeregisterRequest is used for Job.Deregister endpoint
   238  // to deregister a job as being a schedulable entity.
   239  type JobDeregisterRequest struct {
   240  	JobID string
   241  	WriteRequest
   242  }
   243  
   244  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   245  type JobEvaluateRequest struct {
   246  	JobID string
   247  	WriteRequest
   248  }
   249  
   250  // JobSpecificRequest is used when we just need to specify a target job
   251  type JobSpecificRequest struct {
   252  	JobID string
   253  	QueryOptions
   254  }
   255  
   256  // JobListRequest is used to parameterize a list request
   257  type JobListRequest struct {
   258  	QueryOptions
   259  }
   260  
   261  // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
   262  // evaluation of the Job.
   263  type JobPlanRequest struct {
   264  	Job  *Job
   265  	Diff bool // Toggles an annotated diff
   266  	WriteRequest
   267  }
   268  
   269  // JobSummaryRequest is used when we just need to get a specific job summary
   270  type JobSummaryRequest struct {
   271  	JobID string
   272  	QueryOptions
   273  }
   274  
   275  // NodeListRequest is used to parameterize a list request
   276  type NodeListRequest struct {
   277  	QueryOptions
   278  }
   279  
   280  // EvalUpdateRequest is used for upserting evaluations.
   281  type EvalUpdateRequest struct {
   282  	Evals     []*Evaluation
   283  	EvalToken string
   284  	WriteRequest
   285  }
   286  
   287  // EvalDeleteRequest is used for deleting an evaluation.
   288  type EvalDeleteRequest struct {
   289  	Evals  []string
   290  	Allocs []string
   291  	WriteRequest
   292  }
   293  
   294  // EvalSpecificRequest is used when we just need to specify a target evaluation
   295  type EvalSpecificRequest struct {
   296  	EvalID string
   297  	QueryOptions
   298  }
   299  
   300  // EvalAckRequest is used to Ack/Nack a specific evaluation
   301  type EvalAckRequest struct {
   302  	EvalID string
   303  	Token  string
   304  	WriteRequest
   305  }
   306  
   307  // EvalDequeueRequest is used when we want to dequeue an evaluation
   308  type EvalDequeueRequest struct {
   309  	Schedulers       []string
   310  	Timeout          time.Duration
   311  	SchedulerVersion uint16
   312  	WriteRequest
   313  }
   314  
   315  // EvalListRequest is used to list the evaluations
   316  type EvalListRequest struct {
   317  	QueryOptions
   318  }
   319  
   320  // PlanRequest is used to submit an allocation plan to the leader
   321  type PlanRequest struct {
   322  	Plan *Plan
   323  	WriteRequest
   324  }
   325  
   326  // AllocUpdateRequest is used to submit changes to allocations, either
   327  // to cause evictions or to assign new allocaitons. Both can be done
   328  // within a single transaction
   329  type AllocUpdateRequest struct {
   330  	// Alloc is the list of new allocations to assign
   331  	Alloc []*Allocation
   332  
   333  	// Job is the shared parent job of the allocations.
   334  	// It is pulled out since it is common to reduce payload size.
   335  	Job *Job
   336  
   337  	WriteRequest
   338  }
   339  
   340  // AllocListRequest is used to request a list of allocations
   341  type AllocListRequest struct {
   342  	QueryOptions
   343  }
   344  
   345  // AllocSpecificRequest is used to query a specific allocation
   346  type AllocSpecificRequest struct {
   347  	AllocID string
   348  	QueryOptions
   349  }
   350  
   351  // AllocsGetRequest is used to query a set of allocations
   352  type AllocsGetRequest struct {
   353  	AllocIDs []string
   354  	QueryOptions
   355  }
   356  
   357  // PeriodicForceReqeuest is used to force a specific periodic job.
   358  type PeriodicForceRequest struct {
   359  	JobID string
   360  	WriteRequest
   361  }
   362  
   363  // ServerMembersResponse has the list of servers in a cluster
   364  type ServerMembersResponse struct {
   365  	ServerName   string
   366  	ServerRegion string
   367  	ServerDC     string
   368  	Members      []*ServerMember
   369  	QueryMeta
   370  }
   371  
   372  // ServerMember holds information about a Nomad server agent in a cluster
   373  type ServerMember struct {
   374  	Name        string
   375  	Addr        net.IP
   376  	Port        uint16
   377  	Tags        map[string]string
   378  	Status      string
   379  	ProtocolMin uint8
   380  	ProtocolMax uint8
   381  	ProtocolCur uint8
   382  	DelegateMin uint8
   383  	DelegateMax uint8
   384  	DelegateCur uint8
   385  }
   386  
   387  // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
   388  // following tasks in the given allocation
   389  type DeriveVaultTokenRequest struct {
   390  	NodeID   string
   391  	SecretID string
   392  	AllocID  string
   393  	Tasks    []string
   394  	QueryOptions
   395  }
   396  
   397  // VaultAccessorsRequest is used to operate on a set of Vault accessors
   398  type VaultAccessorsRequest struct {
   399  	Accessors []*VaultAccessor
   400  }
   401  
   402  // VaultAccessor is a reference to a created Vault token on behalf of
   403  // an allocation's task.
   404  type VaultAccessor struct {
   405  	AllocID     string
   406  	Task        string
   407  	NodeID      string
   408  	Accessor    string
   409  	CreationTTL int
   410  
   411  	// Raft Indexes
   412  	CreateIndex uint64
   413  }
   414  
   415  // DeriveVaultTokenResponse returns the wrapped tokens for each requested task
   416  type DeriveVaultTokenResponse struct {
   417  	// Tasks is a mapping between the task name and the wrapped token
   418  	Tasks map[string]string
   419  
   420  	// Error stores any error that occured. Errors are stored here so we can
   421  	// communicate whether it is retriable
   422  	Error *RecoverableError
   423  
   424  	QueryMeta
   425  }
   426  
   427  // GenericRequest is used to request where no
   428  // specific information is needed.
   429  type GenericRequest struct {
   430  	QueryOptions
   431  }
   432  
   433  // GenericResponse is used to respond to a request where no
   434  // specific response information is needed.
   435  type GenericResponse struct {
   436  	WriteMeta
   437  }
   438  
   439  // VersionResponse is used for the Status.Version reseponse
   440  type VersionResponse struct {
   441  	Build    string
   442  	Versions map[string]int
   443  	QueryMeta
   444  }
   445  
   446  // JobRegisterResponse is used to respond to a job registration
   447  type JobRegisterResponse struct {
   448  	EvalID          string
   449  	EvalCreateIndex uint64
   450  	JobModifyIndex  uint64
   451  	QueryMeta
   452  }
   453  
   454  // JobDeregisterResponse is used to respond to a job deregistration
   455  type JobDeregisterResponse struct {
   456  	EvalID          string
   457  	EvalCreateIndex uint64
   458  	JobModifyIndex  uint64
   459  	QueryMeta
   460  }
   461  
   462  // NodeUpdateResponse is used to respond to a node update
   463  type NodeUpdateResponse struct {
   464  	HeartbeatTTL    time.Duration
   465  	EvalIDs         []string
   466  	EvalCreateIndex uint64
   467  	NodeModifyIndex uint64
   468  
   469  	// LeaderRPCAddr is the RPC address of the current Raft Leader.  If
   470  	// empty, the current Nomad Server is in the minority of a partition.
   471  	LeaderRPCAddr string
   472  
   473  	// NumNodes is the number of Nomad nodes attached to this quorum of
   474  	// Nomad Servers at the time of the response.  This value can
   475  	// fluctuate based on the health of the cluster between heartbeats.
   476  	NumNodes int32
   477  
   478  	// Servers is the full list of known Nomad servers in the local
   479  	// region.
   480  	Servers []*NodeServerInfo
   481  
   482  	QueryMeta
   483  }
   484  
   485  // NodeDrainUpdateResponse is used to respond to a node drain update
   486  type NodeDrainUpdateResponse struct {
   487  	EvalIDs         []string
   488  	EvalCreateIndex uint64
   489  	NodeModifyIndex uint64
   490  	QueryMeta
   491  }
   492  
   493  // NodeAllocsResponse is used to return allocs for a single node
   494  type NodeAllocsResponse struct {
   495  	Allocs []*Allocation
   496  	QueryMeta
   497  }
   498  
   499  // NodeClientAllocsResponse is used to return allocs meta data for a single node
   500  type NodeClientAllocsResponse struct {
   501  	Allocs map[string]uint64
   502  	QueryMeta
   503  }
   504  
   505  // SingleNodeResponse is used to return a single node
   506  type SingleNodeResponse struct {
   507  	Node *Node
   508  	QueryMeta
   509  }
   510  
   511  // JobListResponse is used for a list request
   512  type NodeListResponse struct {
   513  	Nodes []*NodeListStub
   514  	QueryMeta
   515  }
   516  
   517  // SingleJobResponse is used to return a single job
   518  type SingleJobResponse struct {
   519  	Job *Job
   520  	QueryMeta
   521  }
   522  
   523  // JobSummaryResponse is used to return a single job summary
   524  type JobSummaryResponse struct {
   525  	JobSummary *JobSummary
   526  	QueryMeta
   527  }
   528  
   529  // JobListResponse is used for a list request
   530  type JobListResponse struct {
   531  	Jobs []*JobListStub
   532  	QueryMeta
   533  }
   534  
   535  // JobPlanResponse is used to respond to a job plan request
   536  type JobPlanResponse struct {
   537  	// Annotations stores annotations explaining decisions the scheduler made.
   538  	Annotations *PlanAnnotations
   539  
   540  	// FailedTGAllocs is the placement failures per task group.
   541  	FailedTGAllocs map[string]*AllocMetric
   542  
   543  	// JobModifyIndex is the modification index of the job. The value can be
   544  	// used when running `nomad run` to ensure that the Job wasn’t modified
   545  	// since the last plan. If the job is being created, the value is zero.
   546  	JobModifyIndex uint64
   547  
   548  	// CreatedEvals is the set of evaluations created by the scheduler. The
   549  	// reasons for this can be rolling-updates or blocked evals.
   550  	CreatedEvals []*Evaluation
   551  
   552  	// Diff contains the diff of the job and annotations on whether the change
   553  	// causes an in-place update or create/destroy
   554  	Diff *JobDiff
   555  
   556  	// NextPeriodicLaunch is the time duration till the job would be launched if
   557  	// submitted.
   558  	NextPeriodicLaunch time.Time
   559  
   560  	WriteMeta
   561  }
   562  
   563  // SingleAllocResponse is used to return a single allocation
   564  type SingleAllocResponse struct {
   565  	Alloc *Allocation
   566  	QueryMeta
   567  }
   568  
   569  // AllocsGetResponse is used to return a set of allocations
   570  type AllocsGetResponse struct {
   571  	Allocs []*Allocation
   572  	QueryMeta
   573  }
   574  
   575  // JobAllocationsResponse is used to return the allocations for a job
   576  type JobAllocationsResponse struct {
   577  	Allocations []*AllocListStub
   578  	QueryMeta
   579  }
   580  
   581  // JobEvaluationsResponse is used to return the evaluations for a job
   582  type JobEvaluationsResponse struct {
   583  	Evaluations []*Evaluation
   584  	QueryMeta
   585  }
   586  
   587  // SingleEvalResponse is used to return a single evaluation
   588  type SingleEvalResponse struct {
   589  	Eval *Evaluation
   590  	QueryMeta
   591  }
   592  
   593  // EvalDequeueResponse is used to return from a dequeue
   594  type EvalDequeueResponse struct {
   595  	Eval  *Evaluation
   596  	Token string
   597  	QueryMeta
   598  }
   599  
   600  // PlanResponse is used to return from a PlanRequest
   601  type PlanResponse struct {
   602  	Result *PlanResult
   603  	WriteMeta
   604  }
   605  
   606  // AllocListResponse is used for a list request
   607  type AllocListResponse struct {
   608  	Allocations []*AllocListStub
   609  	QueryMeta
   610  }
   611  
   612  // EvalListResponse is used for a list request
   613  type EvalListResponse struct {
   614  	Evaluations []*Evaluation
   615  	QueryMeta
   616  }
   617  
   618  // EvalAllocationsResponse is used to return the allocations for an evaluation
   619  type EvalAllocationsResponse struct {
   620  	Allocations []*AllocListStub
   621  	QueryMeta
   622  }
   623  
   624  // PeriodicForceResponse is used to respond to a periodic job force launch
   625  type PeriodicForceResponse struct {
   626  	EvalID          string
   627  	EvalCreateIndex uint64
   628  	WriteMeta
   629  }
   630  
   631  const (
   632  	NodeStatusInit  = "initializing"
   633  	NodeStatusReady = "ready"
   634  	NodeStatusDown  = "down"
   635  )
   636  
   637  // ShouldDrainNode checks if a given node status should trigger an
   638  // evaluation. Some states don't require any further action.
   639  func ShouldDrainNode(status string) bool {
   640  	switch status {
   641  	case NodeStatusInit, NodeStatusReady:
   642  		return false
   643  	case NodeStatusDown:
   644  		return true
   645  	default:
   646  		panic(fmt.Sprintf("unhandled node status %s", status))
   647  	}
   648  }
   649  
   650  // ValidNodeStatus is used to check if a node status is valid
   651  func ValidNodeStatus(status string) bool {
   652  	switch status {
   653  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   654  		return true
   655  	default:
   656  		return false
   657  	}
   658  }
   659  
   660  // Node is a representation of a schedulable client node
   661  type Node struct {
   662  	// ID is a unique identifier for the node. It can be constructed
   663  	// by doing a concatenation of the Name and Datacenter as a simple
   664  	// approach. Alternatively a UUID may be used.
   665  	ID string
   666  
   667  	// SecretID is an ID that is only known by the Node and the set of Servers.
   668  	// It is not accessible via the API and is used to authenticate nodes
   669  	// conducting priviledged activities.
   670  	SecretID string
   671  
   672  	// Datacenter for this node
   673  	Datacenter string
   674  
   675  	// Node name
   676  	Name string
   677  
   678  	// HTTPAddr is the address on which the Nomad client is listening for http
   679  	// requests
   680  	HTTPAddr string
   681  
   682  	// TLSEnabled indicates if the Agent has TLS enabled for the HTTP API
   683  	TLSEnabled bool
   684  
   685  	// Attributes is an arbitrary set of key/value
   686  	// data that can be used for constraints. Examples
   687  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   688  	// "docker.runtime=1.8.3"
   689  	Attributes map[string]string
   690  
   691  	// Resources is the available resources on the client.
   692  	// For example 'cpu=2' 'memory=2048'
   693  	Resources *Resources
   694  
   695  	// Reserved is the set of resources that are reserved,
   696  	// and should be subtracted from the total resources for
   697  	// the purposes of scheduling. This may be provide certain
   698  	// high-watermark tolerances or because of external schedulers
   699  	// consuming resources.
   700  	Reserved *Resources
   701  
   702  	// Links are used to 'link' this client to external
   703  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   704  	// 'ami=ami-123'
   705  	Links map[string]string
   706  
   707  	// Meta is used to associate arbitrary metadata with this
   708  	// client. This is opaque to Nomad.
   709  	Meta map[string]string
   710  
   711  	// NodeClass is an opaque identifier used to group nodes
   712  	// together for the purpose of determining scheduling pressure.
   713  	NodeClass string
   714  
   715  	// ComputedClass is a unique id that identifies nodes with a common set of
   716  	// attributes and capabilities.
   717  	ComputedClass string
   718  
   719  	// Drain is controlled by the servers, and not the client.
   720  	// If true, no jobs will be scheduled to this node, and existing
   721  	// allocations will be drained.
   722  	Drain bool
   723  
   724  	// Status of this node
   725  	Status string
   726  
   727  	// StatusDescription is meant to provide more human useful information
   728  	StatusDescription string
   729  
   730  	// StatusUpdatedAt is the time stamp at which the state of the node was
   731  	// updated
   732  	StatusUpdatedAt int64
   733  
   734  	// Raft Indexes
   735  	CreateIndex uint64
   736  	ModifyIndex uint64
   737  }
   738  
   739  // Ready returns if the node is ready for running allocations
   740  func (n *Node) Ready() bool {
   741  	return n.Status == NodeStatusReady && !n.Drain
   742  }
   743  
   744  func (n *Node) Copy() *Node {
   745  	if n == nil {
   746  		return nil
   747  	}
   748  	nn := new(Node)
   749  	*nn = *n
   750  	nn.Attributes = CopyMapStringString(nn.Attributes)
   751  	nn.Resources = nn.Resources.Copy()
   752  	nn.Reserved = nn.Reserved.Copy()
   753  	nn.Links = CopyMapStringString(nn.Links)
   754  	nn.Meta = CopyMapStringString(nn.Meta)
   755  	return nn
   756  }
   757  
   758  // TerminalStatus returns if the current status is terminal and
   759  // will no longer transition.
   760  func (n *Node) TerminalStatus() bool {
   761  	switch n.Status {
   762  	case NodeStatusDown:
   763  		return true
   764  	default:
   765  		return false
   766  	}
   767  }
   768  
   769  // Stub returns a summarized version of the node
   770  func (n *Node) Stub() *NodeListStub {
   771  	return &NodeListStub{
   772  		ID:                n.ID,
   773  		Datacenter:        n.Datacenter,
   774  		Name:              n.Name,
   775  		NodeClass:         n.NodeClass,
   776  		Drain:             n.Drain,
   777  		Status:            n.Status,
   778  		StatusDescription: n.StatusDescription,
   779  		CreateIndex:       n.CreateIndex,
   780  		ModifyIndex:       n.ModifyIndex,
   781  	}
   782  }
   783  
   784  // NodeListStub is used to return a subset of job information
   785  // for the job list
   786  type NodeListStub struct {
   787  	ID                string
   788  	Datacenter        string
   789  	Name              string
   790  	NodeClass         string
   791  	Drain             bool
   792  	Status            string
   793  	StatusDescription string
   794  	CreateIndex       uint64
   795  	ModifyIndex       uint64
   796  }
   797  
   798  // Resources is used to define the resources available
   799  // on a client
   800  type Resources struct {
   801  	CPU      int
   802  	MemoryMB int `mapstructure:"memory"`
   803  	DiskMB   int `mapstructure:"disk"`
   804  	IOPS     int
   805  	Networks []*NetworkResource
   806  }
   807  
   808  const (
   809  	BytesInMegabyte = 1024 * 1024
   810  )
   811  
   812  // DefaultResources returns the default resources for a task.
   813  func DefaultResources() *Resources {
   814  	return &Resources{
   815  		CPU:      100,
   816  		MemoryMB: 10,
   817  		IOPS:     0,
   818  	}
   819  }
   820  
   821  // DiskInBytes returns the amount of disk resources in bytes.
   822  func (r *Resources) DiskInBytes() int64 {
   823  	return int64(r.DiskMB * BytesInMegabyte)
   824  }
   825  
   826  // Merge merges this resource with another resource.
   827  func (r *Resources) Merge(other *Resources) {
   828  	if other.CPU != 0 {
   829  		r.CPU = other.CPU
   830  	}
   831  	if other.MemoryMB != 0 {
   832  		r.MemoryMB = other.MemoryMB
   833  	}
   834  	if other.DiskMB != 0 {
   835  		r.DiskMB = other.DiskMB
   836  	}
   837  	if other.IOPS != 0 {
   838  		r.IOPS = other.IOPS
   839  	}
   840  	if len(other.Networks) != 0 {
   841  		r.Networks = other.Networks
   842  	}
   843  }
   844  
   845  func (r *Resources) Canonicalize() {
   846  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
   847  	// problems since we use reflect DeepEquals.
   848  	if len(r.Networks) == 0 {
   849  		r.Networks = nil
   850  	}
   851  
   852  	for _, n := range r.Networks {
   853  		n.Canonicalize()
   854  	}
   855  }
   856  
   857  // MeetsMinResources returns an error if the resources specified are less than
   858  // the minimum allowed.
   859  func (r *Resources) MeetsMinResources() error {
   860  	var mErr multierror.Error
   861  	if r.CPU < 20 {
   862  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU))
   863  	}
   864  	if r.MemoryMB < 10 {
   865  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB))
   866  	}
   867  	if r.IOPS < 0 {
   868  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS))
   869  	}
   870  	for i, n := range r.Networks {
   871  		if err := n.MeetsMinResources(); err != nil {
   872  			mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err))
   873  		}
   874  	}
   875  
   876  	return mErr.ErrorOrNil()
   877  }
   878  
   879  // Copy returns a deep copy of the resources
   880  func (r *Resources) Copy() *Resources {
   881  	if r == nil {
   882  		return nil
   883  	}
   884  	newR := new(Resources)
   885  	*newR = *r
   886  	if r.Networks != nil {
   887  		n := len(r.Networks)
   888  		newR.Networks = make([]*NetworkResource, n)
   889  		for i := 0; i < n; i++ {
   890  			newR.Networks[i] = r.Networks[i].Copy()
   891  		}
   892  	}
   893  	return newR
   894  }
   895  
   896  // NetIndex finds the matching net index using device name
   897  func (r *Resources) NetIndex(n *NetworkResource) int {
   898  	for idx, net := range r.Networks {
   899  		if net.Device == n.Device {
   900  			return idx
   901  		}
   902  	}
   903  	return -1
   904  }
   905  
   906  // Superset checks if one set of resources is a superset
   907  // of another. This ignores network resources, and the NetworkIndex
   908  // should be used for that.
   909  func (r *Resources) Superset(other *Resources) (bool, string) {
   910  	if r.CPU < other.CPU {
   911  		return false, "cpu exhausted"
   912  	}
   913  	if r.MemoryMB < other.MemoryMB {
   914  		return false, "memory exhausted"
   915  	}
   916  	if r.DiskMB < other.DiskMB {
   917  		return false, "disk exhausted"
   918  	}
   919  	if r.IOPS < other.IOPS {
   920  		return false, "iops exhausted"
   921  	}
   922  	return true, ""
   923  }
   924  
   925  // Add adds the resources of the delta to this, potentially
   926  // returning an error if not possible.
   927  func (r *Resources) Add(delta *Resources) error {
   928  	if delta == nil {
   929  		return nil
   930  	}
   931  	r.CPU += delta.CPU
   932  	r.MemoryMB += delta.MemoryMB
   933  	r.DiskMB += delta.DiskMB
   934  	r.IOPS += delta.IOPS
   935  
   936  	for _, n := range delta.Networks {
   937  		// Find the matching interface by IP or CIDR
   938  		idx := r.NetIndex(n)
   939  		if idx == -1 {
   940  			r.Networks = append(r.Networks, n.Copy())
   941  		} else {
   942  			r.Networks[idx].Add(n)
   943  		}
   944  	}
   945  	return nil
   946  }
   947  
   948  func (r *Resources) GoString() string {
   949  	return fmt.Sprintf("*%#v", *r)
   950  }
   951  
   952  type Port struct {
   953  	Label string
   954  	Value int `mapstructure:"static"`
   955  }
   956  
   957  // NetworkResource is used to represent available network
   958  // resources
   959  type NetworkResource struct {
   960  	Device        string // Name of the device
   961  	CIDR          string // CIDR block of addresses
   962  	IP            string // IP address
   963  	MBits         int    // Throughput
   964  	ReservedPorts []Port // Reserved ports
   965  	DynamicPorts  []Port // Dynamically assigned ports
   966  }
   967  
   968  func (n *NetworkResource) Canonicalize() {
   969  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
   970  	// problems since we use reflect DeepEquals.
   971  	if len(n.ReservedPorts) == 0 {
   972  		n.ReservedPorts = nil
   973  	}
   974  	if len(n.DynamicPorts) == 0 {
   975  		n.DynamicPorts = nil
   976  	}
   977  }
   978  
   979  // MeetsMinResources returns an error if the resources specified are less than
   980  // the minimum allowed.
   981  func (n *NetworkResource) MeetsMinResources() error {
   982  	var mErr multierror.Error
   983  	if n.MBits < 1 {
   984  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits))
   985  	}
   986  	return mErr.ErrorOrNil()
   987  }
   988  
   989  // Copy returns a deep copy of the network resource
   990  func (n *NetworkResource) Copy() *NetworkResource {
   991  	if n == nil {
   992  		return nil
   993  	}
   994  	newR := new(NetworkResource)
   995  	*newR = *n
   996  	if n.ReservedPorts != nil {
   997  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
   998  		copy(newR.ReservedPorts, n.ReservedPorts)
   999  	}
  1000  	if n.DynamicPorts != nil {
  1001  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
  1002  		copy(newR.DynamicPorts, n.DynamicPorts)
  1003  	}
  1004  	return newR
  1005  }
  1006  
  1007  // Add adds the resources of the delta to this, potentially
  1008  // returning an error if not possible.
  1009  func (n *NetworkResource) Add(delta *NetworkResource) {
  1010  	if len(delta.ReservedPorts) > 0 {
  1011  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
  1012  	}
  1013  	n.MBits += delta.MBits
  1014  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
  1015  }
  1016  
  1017  func (n *NetworkResource) GoString() string {
  1018  	return fmt.Sprintf("*%#v", *n)
  1019  }
  1020  
  1021  func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int {
  1022  	labelValues := make(map[string]int)
  1023  	ports := append(n.ReservedPorts, n.DynamicPorts...)
  1024  	for _, port := range ports {
  1025  		if mapping, ok := port_map[port.Label]; ok {
  1026  			labelValues[port.Label] = mapping
  1027  		} else {
  1028  			labelValues[port.Label] = port.Value
  1029  		}
  1030  	}
  1031  	return labelValues
  1032  }
  1033  
  1034  const (
  1035  	// JobTypeNomad is reserved for internal system tasks and is
  1036  	// always handled by the CoreScheduler.
  1037  	JobTypeCore    = "_core"
  1038  	JobTypeService = "service"
  1039  	JobTypeBatch   = "batch"
  1040  	JobTypeSystem  = "system"
  1041  )
  1042  
  1043  const (
  1044  	JobStatusPending = "pending" // Pending means the job is waiting on scheduling
  1045  	JobStatusRunning = "running" // Running means the job has non-terminal allocations
  1046  	JobStatusDead    = "dead"    // Dead means all evaluation's and allocations are terminal
  1047  )
  1048  
  1049  const (
  1050  	// JobMinPriority is the minimum allowed priority
  1051  	JobMinPriority = 1
  1052  
  1053  	// JobDefaultPriority is the default priority if not
  1054  	// not specified.
  1055  	JobDefaultPriority = 50
  1056  
  1057  	// JobMaxPriority is the maximum allowed priority
  1058  	JobMaxPriority = 100
  1059  
  1060  	// Ensure CoreJobPriority is higher than any user
  1061  	// specified job so that it gets priority. This is important
  1062  	// for the system to remain healthy.
  1063  	CoreJobPriority = JobMaxPriority * 2
  1064  )
  1065  
  1066  // JobSummary summarizes the state of the allocations of a job
  1067  type JobSummary struct {
  1068  	JobID   string
  1069  	Summary map[string]TaskGroupSummary
  1070  
  1071  	// Raft Indexes
  1072  	CreateIndex uint64
  1073  	ModifyIndex uint64
  1074  }
  1075  
  1076  // Copy returns a new copy of JobSummary
  1077  func (js *JobSummary) Copy() *JobSummary {
  1078  	newJobSummary := new(JobSummary)
  1079  	*newJobSummary = *js
  1080  	newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary))
  1081  	for k, v := range js.Summary {
  1082  		newTGSummary[k] = v
  1083  	}
  1084  	newJobSummary.Summary = newTGSummary
  1085  	return newJobSummary
  1086  }
  1087  
  1088  // TaskGroup summarizes the state of all the allocations of a particular
  1089  // TaskGroup
  1090  type TaskGroupSummary struct {
  1091  	Queued   int
  1092  	Complete int
  1093  	Failed   int
  1094  	Running  int
  1095  	Starting int
  1096  	Lost     int
  1097  }
  1098  
  1099  // Job is the scope of a scheduling request to Nomad. It is the largest
  1100  // scoped object, and is a named collection of task groups. Each task group
  1101  // is further composed of tasks. A task group (TG) is the unit of scheduling
  1102  // however.
  1103  type Job struct {
  1104  	// Region is the Nomad region that handles scheduling this job
  1105  	Region string
  1106  
  1107  	// ID is a unique identifier for the job per region. It can be
  1108  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
  1109  	ID string
  1110  
  1111  	// ParentID is the unique identifier of the job that spawned this job.
  1112  	ParentID string
  1113  
  1114  	// Name is the logical name of the job used to refer to it. This is unique
  1115  	// per region, but not unique globally.
  1116  	Name string
  1117  
  1118  	// Type is used to control various behaviors about the job. Most jobs
  1119  	// are service jobs, meaning they are expected to be long lived.
  1120  	// Some jobs are batch oriented meaning they run and then terminate.
  1121  	// This can be extended in the future to support custom schedulers.
  1122  	Type string
  1123  
  1124  	// Priority is used to control scheduling importance and if this job
  1125  	// can preempt other jobs.
  1126  	Priority int
  1127  
  1128  	// AllAtOnce is used to control if incremental scheduling of task groups
  1129  	// is allowed or if we must do a gang scheduling of the entire job. This
  1130  	// can slow down larger jobs if resources are not available.
  1131  	AllAtOnce bool `mapstructure:"all_at_once"`
  1132  
  1133  	// Datacenters contains all the datacenters this job is allowed to span
  1134  	Datacenters []string
  1135  
  1136  	// Constraints can be specified at a job level and apply to
  1137  	// all the task groups and tasks.
  1138  	Constraints []*Constraint
  1139  
  1140  	// TaskGroups are the collections of task groups that this job needs
  1141  	// to run. Each task group is an atomic unit of scheduling and placement.
  1142  	TaskGroups []*TaskGroup
  1143  
  1144  	// Update is used to control the update strategy
  1145  	Update UpdateStrategy
  1146  
  1147  	// Periodic is used to define the interval the job is run at.
  1148  	Periodic *PeriodicConfig
  1149  
  1150  	// Meta is used to associate arbitrary metadata with this
  1151  	// job. This is opaque to Nomad.
  1152  	Meta map[string]string
  1153  
  1154  	// VaultToken is the Vault token that proves the submitter of the job has
  1155  	// access to the specified Vault policies. This field is only used to
  1156  	// transfer the token and is not stored after Job submission.
  1157  	VaultToken string `mapstructure:"vault_token"`
  1158  
  1159  	// Job status
  1160  	Status string
  1161  
  1162  	// StatusDescription is meant to provide more human useful information
  1163  	StatusDescription string
  1164  
  1165  	// Raft Indexes
  1166  	CreateIndex    uint64
  1167  	ModifyIndex    uint64
  1168  	JobModifyIndex uint64
  1169  }
  1170  
  1171  // Canonicalize is used to canonicalize fields in the Job. This should be called
  1172  // when registering a Job.
  1173  func (j *Job) Canonicalize() {
  1174  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1175  	// problems since we use reflect DeepEquals.
  1176  	if len(j.Meta) == 0 {
  1177  		j.Meta = nil
  1178  	}
  1179  
  1180  	for _, tg := range j.TaskGroups {
  1181  		tg.Canonicalize(j)
  1182  	}
  1183  }
  1184  
  1185  // Copy returns a deep copy of the Job. It is expected that callers use recover.
  1186  // This job can panic if the deep copy failed as it uses reflection.
  1187  func (j *Job) Copy() *Job {
  1188  	if j == nil {
  1189  		return nil
  1190  	}
  1191  	nj := new(Job)
  1192  	*nj = *j
  1193  	nj.Datacenters = CopySliceString(nj.Datacenters)
  1194  	nj.Constraints = CopySliceConstraints(nj.Constraints)
  1195  
  1196  	if j.TaskGroups != nil {
  1197  		tgs := make([]*TaskGroup, len(nj.TaskGroups))
  1198  		for i, tg := range nj.TaskGroups {
  1199  			tgs[i] = tg.Copy()
  1200  		}
  1201  		nj.TaskGroups = tgs
  1202  	}
  1203  
  1204  	nj.Periodic = nj.Periodic.Copy()
  1205  	nj.Meta = CopyMapStringString(nj.Meta)
  1206  	return nj
  1207  }
  1208  
  1209  // Validate is used to sanity check a job input
  1210  func (j *Job) Validate() error {
  1211  	var mErr multierror.Error
  1212  	if j.Region == "" {
  1213  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
  1214  	}
  1215  	if j.ID == "" {
  1216  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
  1217  	} else if strings.Contains(j.ID, " ") {
  1218  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
  1219  	}
  1220  	if j.Name == "" {
  1221  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
  1222  	}
  1223  	if j.Type == "" {
  1224  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
  1225  	}
  1226  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
  1227  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
  1228  	}
  1229  	if len(j.Datacenters) == 0 {
  1230  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
  1231  	}
  1232  	if len(j.TaskGroups) == 0 {
  1233  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
  1234  	}
  1235  	for idx, constr := range j.Constraints {
  1236  		if err := constr.Validate(); err != nil {
  1237  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1238  			mErr.Errors = append(mErr.Errors, outer)
  1239  		}
  1240  	}
  1241  
  1242  	// Check for duplicate task groups
  1243  	taskGroups := make(map[string]int)
  1244  	for idx, tg := range j.TaskGroups {
  1245  		if tg.Name == "" {
  1246  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
  1247  		} else if existing, ok := taskGroups[tg.Name]; ok {
  1248  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
  1249  		} else {
  1250  			taskGroups[tg.Name] = idx
  1251  		}
  1252  
  1253  		if j.Type == "system" && tg.Count > 1 {
  1254  			mErr.Errors = append(mErr.Errors,
  1255  				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
  1256  					tg.Name, tg.Count))
  1257  		}
  1258  	}
  1259  
  1260  	// Validate the task group
  1261  	for _, tg := range j.TaskGroups {
  1262  		if err := tg.Validate(); err != nil {
  1263  			outer := fmt.Errorf("Task group %s validation failed: %s", tg.Name, err)
  1264  			mErr.Errors = append(mErr.Errors, outer)
  1265  		}
  1266  	}
  1267  
  1268  	// Validate periodic is only used with batch jobs.
  1269  	if j.IsPeriodic() && j.Periodic.Enabled {
  1270  		if j.Type != JobTypeBatch {
  1271  			mErr.Errors = append(mErr.Errors,
  1272  				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
  1273  		}
  1274  
  1275  		if err := j.Periodic.Validate(); err != nil {
  1276  			mErr.Errors = append(mErr.Errors, err)
  1277  		}
  1278  	}
  1279  
  1280  	return mErr.ErrorOrNil()
  1281  }
  1282  
  1283  // LookupTaskGroup finds a task group by name
  1284  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
  1285  	for _, tg := range j.TaskGroups {
  1286  		if tg.Name == name {
  1287  			return tg
  1288  		}
  1289  	}
  1290  	return nil
  1291  }
  1292  
  1293  // Stub is used to return a summary of the job
  1294  func (j *Job) Stub(summary *JobSummary) *JobListStub {
  1295  	return &JobListStub{
  1296  		ID:                j.ID,
  1297  		ParentID:          j.ParentID,
  1298  		Name:              j.Name,
  1299  		Type:              j.Type,
  1300  		Priority:          j.Priority,
  1301  		Status:            j.Status,
  1302  		StatusDescription: j.StatusDescription,
  1303  		CreateIndex:       j.CreateIndex,
  1304  		ModifyIndex:       j.ModifyIndex,
  1305  		JobModifyIndex:    j.JobModifyIndex,
  1306  		JobSummary:        summary,
  1307  	}
  1308  }
  1309  
  1310  // IsPeriodic returns whether a job is periodic.
  1311  func (j *Job) IsPeriodic() bool {
  1312  	return j.Periodic != nil
  1313  }
  1314  
  1315  // VaultPolicies returns the set of Vault policies per task group, per task
  1316  func (j *Job) VaultPolicies() map[string]map[string]*Vault {
  1317  	policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
  1318  
  1319  	for _, tg := range j.TaskGroups {
  1320  		tgPolicies := make(map[string]*Vault, len(tg.Tasks))
  1321  
  1322  		for _, task := range tg.Tasks {
  1323  			if task.Vault == nil {
  1324  				continue
  1325  			}
  1326  
  1327  			tgPolicies[task.Name] = task.Vault
  1328  		}
  1329  
  1330  		if len(tgPolicies) != 0 {
  1331  			policies[tg.Name] = tgPolicies
  1332  		}
  1333  	}
  1334  
  1335  	return policies
  1336  }
  1337  
  1338  // RequiredSignals returns a mapping of task groups to tasks to their required
  1339  // set of signals
  1340  func (j *Job) RequiredSignals() map[string]map[string][]string {
  1341  	signals := make(map[string]map[string][]string)
  1342  
  1343  	for _, tg := range j.TaskGroups {
  1344  		for _, task := range tg.Tasks {
  1345  			// Use this local one as a set
  1346  			taskSignals := make(map[string]struct{})
  1347  
  1348  			// Check if the Vault change mode uses signals
  1349  			if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal {
  1350  				taskSignals[task.Vault.ChangeSignal] = struct{}{}
  1351  			}
  1352  
  1353  			// Check if any template change mode uses signals
  1354  			for _, t := range task.Templates {
  1355  				if t.ChangeMode != TemplateChangeModeSignal {
  1356  					continue
  1357  				}
  1358  
  1359  				taskSignals[t.ChangeSignal] = struct{}{}
  1360  			}
  1361  
  1362  			// Flatten and sort the signals
  1363  			l := len(taskSignals)
  1364  			if l == 0 {
  1365  				continue
  1366  			}
  1367  
  1368  			flat := make([]string, 0, l)
  1369  			for sig := range taskSignals {
  1370  				flat = append(flat, sig)
  1371  			}
  1372  
  1373  			sort.Strings(flat)
  1374  			tgSignals, ok := signals[tg.Name]
  1375  			if !ok {
  1376  				tgSignals = make(map[string][]string)
  1377  				signals[tg.Name] = tgSignals
  1378  			}
  1379  			tgSignals[task.Name] = flat
  1380  		}
  1381  
  1382  	}
  1383  
  1384  	return signals
  1385  }
  1386  
  1387  // JobListStub is used to return a subset of job information
  1388  // for the job list
  1389  type JobListStub struct {
  1390  	ID                string
  1391  	ParentID          string
  1392  	Name              string
  1393  	Type              string
  1394  	Priority          int
  1395  	Status            string
  1396  	StatusDescription string
  1397  	JobSummary        *JobSummary
  1398  	CreateIndex       uint64
  1399  	ModifyIndex       uint64
  1400  	JobModifyIndex    uint64
  1401  }
  1402  
  1403  // UpdateStrategy is used to modify how updates are done
  1404  type UpdateStrategy struct {
  1405  	// Stagger is the amount of time between the updates
  1406  	Stagger time.Duration
  1407  
  1408  	// MaxParallel is how many updates can be done in parallel
  1409  	MaxParallel int `mapstructure:"max_parallel"`
  1410  }
  1411  
  1412  // Rolling returns if a rolling strategy should be used
  1413  func (u *UpdateStrategy) Rolling() bool {
  1414  	return u.Stagger > 0 && u.MaxParallel > 0
  1415  }
  1416  
  1417  const (
  1418  	// PeriodicSpecCron is used for a cron spec.
  1419  	PeriodicSpecCron = "cron"
  1420  
  1421  	// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
  1422  	// separated list of unix timestamps at which to launch.
  1423  	PeriodicSpecTest = "_internal_test"
  1424  )
  1425  
  1426  // Periodic defines the interval a job should be run at.
  1427  type PeriodicConfig struct {
  1428  	// Enabled determines if the job should be run periodically.
  1429  	Enabled bool
  1430  
  1431  	// Spec specifies the interval the job should be run as. It is parsed based
  1432  	// on the SpecType.
  1433  	Spec string
  1434  
  1435  	// SpecType defines the format of the spec.
  1436  	SpecType string
  1437  
  1438  	// ProhibitOverlap enforces that spawned jobs do not run in parallel.
  1439  	ProhibitOverlap bool `mapstructure:"prohibit_overlap"`
  1440  }
  1441  
  1442  func (p *PeriodicConfig) Copy() *PeriodicConfig {
  1443  	if p == nil {
  1444  		return nil
  1445  	}
  1446  	np := new(PeriodicConfig)
  1447  	*np = *p
  1448  	return np
  1449  }
  1450  
  1451  func (p *PeriodicConfig) Validate() error {
  1452  	if !p.Enabled {
  1453  		return nil
  1454  	}
  1455  
  1456  	if p.Spec == "" {
  1457  		return fmt.Errorf("Must specify a spec")
  1458  	}
  1459  
  1460  	switch p.SpecType {
  1461  	case PeriodicSpecCron:
  1462  		// Validate the cron spec
  1463  		if _, err := cronexpr.Parse(p.Spec); err != nil {
  1464  			return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)
  1465  		}
  1466  	case PeriodicSpecTest:
  1467  		// No-op
  1468  	default:
  1469  		return fmt.Errorf("Unknown periodic specification type %q", p.SpecType)
  1470  	}
  1471  
  1472  	return nil
  1473  }
  1474  
  1475  // Next returns the closest time instant matching the spec that is after the
  1476  // passed time. If no matching instance exists, the zero value of time.Time is
  1477  // returned. The `time.Location` of the returned value matches that of the
  1478  // passed time.
  1479  func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
  1480  	switch p.SpecType {
  1481  	case PeriodicSpecCron:
  1482  		if e, err := cronexpr.Parse(p.Spec); err == nil {
  1483  			return e.Next(fromTime)
  1484  		}
  1485  	case PeriodicSpecTest:
  1486  		split := strings.Split(p.Spec, ",")
  1487  		if len(split) == 1 && split[0] == "" {
  1488  			return time.Time{}
  1489  		}
  1490  
  1491  		// Parse the times
  1492  		times := make([]time.Time, len(split))
  1493  		for i, s := range split {
  1494  			unix, err := strconv.Atoi(s)
  1495  			if err != nil {
  1496  				return time.Time{}
  1497  			}
  1498  
  1499  			times[i] = time.Unix(int64(unix), 0)
  1500  		}
  1501  
  1502  		// Find the next match
  1503  		for _, next := range times {
  1504  			if fromTime.Before(next) {
  1505  				return next
  1506  			}
  1507  		}
  1508  	}
  1509  
  1510  	return time.Time{}
  1511  }
  1512  
  1513  const (
  1514  	// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
  1515  	// when launching derived instances of it.
  1516  	PeriodicLaunchSuffix = "/periodic-"
  1517  )
  1518  
  1519  // PeriodicLaunch tracks the last launch time of a periodic job.
  1520  type PeriodicLaunch struct {
  1521  	ID     string    // ID of the periodic job.
  1522  	Launch time.Time // The last launch time.
  1523  
  1524  	// Raft Indexes
  1525  	CreateIndex uint64
  1526  	ModifyIndex uint64
  1527  }
  1528  
  1529  var (
  1530  	defaultServiceJobRestartPolicy = RestartPolicy{
  1531  		Delay:    15 * time.Second,
  1532  		Attempts: 2,
  1533  		Interval: 1 * time.Minute,
  1534  		Mode:     RestartPolicyModeDelay,
  1535  	}
  1536  	defaultBatchJobRestartPolicy = RestartPolicy{
  1537  		Delay:    15 * time.Second,
  1538  		Attempts: 15,
  1539  		Interval: 7 * 24 * time.Hour,
  1540  		Mode:     RestartPolicyModeDelay,
  1541  	}
  1542  )
  1543  
  1544  const (
  1545  	// RestartPolicyModeDelay causes an artificial delay till the next interval is
  1546  	// reached when the specified attempts have been reached in the interval.
  1547  	RestartPolicyModeDelay = "delay"
  1548  
  1549  	// RestartPolicyModeFail causes a job to fail if the specified number of
  1550  	// attempts are reached within an interval.
  1551  	RestartPolicyModeFail = "fail"
  1552  )
  1553  
  1554  // RestartPolicy configures how Tasks are restarted when they crash or fail.
  1555  type RestartPolicy struct {
  1556  	// Attempts is the number of restart that will occur in an interval.
  1557  	Attempts int
  1558  
  1559  	// Interval is a duration in which we can limit the number of restarts
  1560  	// within.
  1561  	Interval time.Duration
  1562  
  1563  	// Delay is the time between a failure and a restart.
  1564  	Delay time.Duration
  1565  
  1566  	// Mode controls what happens when the task restarts more than attempt times
  1567  	// in an interval.
  1568  	Mode string
  1569  }
  1570  
  1571  func (r *RestartPolicy) Copy() *RestartPolicy {
  1572  	if r == nil {
  1573  		return nil
  1574  	}
  1575  	nrp := new(RestartPolicy)
  1576  	*nrp = *r
  1577  	return nrp
  1578  }
  1579  
  1580  func (r *RestartPolicy) Validate() error {
  1581  	switch r.Mode {
  1582  	case RestartPolicyModeDelay, RestartPolicyModeFail:
  1583  	default:
  1584  		return fmt.Errorf("Unsupported restart mode: %q", r.Mode)
  1585  	}
  1586  
  1587  	// Check for ambiguous/confusing settings
  1588  	if r.Attempts == 0 && r.Mode != RestartPolicyModeFail {
  1589  		return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)
  1590  	}
  1591  
  1592  	if r.Interval == 0 {
  1593  		return nil
  1594  	}
  1595  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
  1596  		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
  1597  	}
  1598  	return nil
  1599  }
  1600  
  1601  func NewRestartPolicy(jobType string) *RestartPolicy {
  1602  	switch jobType {
  1603  	case JobTypeService, JobTypeSystem:
  1604  		rp := defaultServiceJobRestartPolicy
  1605  		return &rp
  1606  	case JobTypeBatch:
  1607  		rp := defaultBatchJobRestartPolicy
  1608  		return &rp
  1609  	}
  1610  	return nil
  1611  }
  1612  
  1613  // TaskGroup is an atomic unit of placement. Each task group belongs to
  1614  // a job and may contain any number of tasks. A task group support running
  1615  // in many replicas using the same configuration..
  1616  type TaskGroup struct {
  1617  	// Name of the task group
  1618  	Name string
  1619  
  1620  	// Count is the number of replicas of this task group that should
  1621  	// be scheduled.
  1622  	Count int
  1623  
  1624  	// Constraints can be specified at a task group level and apply to
  1625  	// all the tasks contained.
  1626  	Constraints []*Constraint
  1627  
  1628  	//RestartPolicy of a TaskGroup
  1629  	RestartPolicy *RestartPolicy
  1630  
  1631  	// Tasks are the collection of tasks that this task group needs to run
  1632  	Tasks []*Task
  1633  
  1634  	// EphemeralDisk is the disk resources that the task group requests
  1635  	EphemeralDisk *EphemeralDisk
  1636  
  1637  	// Meta is used to associate arbitrary metadata with this
  1638  	// task group. This is opaque to Nomad.
  1639  	Meta map[string]string
  1640  }
  1641  
  1642  func (tg *TaskGroup) Copy() *TaskGroup {
  1643  	if tg == nil {
  1644  		return nil
  1645  	}
  1646  	ntg := new(TaskGroup)
  1647  	*ntg = *tg
  1648  	ntg.Constraints = CopySliceConstraints(ntg.Constraints)
  1649  
  1650  	ntg.RestartPolicy = ntg.RestartPolicy.Copy()
  1651  
  1652  	if tg.Tasks != nil {
  1653  		tasks := make([]*Task, len(ntg.Tasks))
  1654  		for i, t := range ntg.Tasks {
  1655  			tasks[i] = t.Copy()
  1656  		}
  1657  		ntg.Tasks = tasks
  1658  	}
  1659  
  1660  	ntg.Meta = CopyMapStringString(ntg.Meta)
  1661  
  1662  	if tg.EphemeralDisk != nil {
  1663  		ntg.EphemeralDisk = tg.EphemeralDisk.Copy()
  1664  	}
  1665  	return ntg
  1666  }
  1667  
  1668  // Canonicalize is used to canonicalize fields in the TaskGroup.
  1669  func (tg *TaskGroup) Canonicalize(job *Job) {
  1670  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1671  	// problems since we use reflect DeepEquals.
  1672  	if len(tg.Meta) == 0 {
  1673  		tg.Meta = nil
  1674  	}
  1675  
  1676  	// Set the default restart policy.
  1677  	if tg.RestartPolicy == nil {
  1678  		tg.RestartPolicy = NewRestartPolicy(job.Type)
  1679  	}
  1680  
  1681  	// Set a default ephemeral disk object if the user has not requested for one
  1682  	if tg.EphemeralDisk == nil {
  1683  		tg.EphemeralDisk = DefaultEphemeralDisk()
  1684  	}
  1685  
  1686  	for _, task := range tg.Tasks {
  1687  		task.Canonicalize(job, tg)
  1688  	}
  1689  
  1690  	// Add up the disk resources to EphemeralDisk. This is done so that users
  1691  	// are not required to move their disk attribute from resources to
  1692  	// EphemeralDisk section of the job spec in Nomad 0.5
  1693  	// COMPAT 0.4.1 -> 0.5
  1694  	// Remove in 0.6
  1695  	var diskMB int
  1696  	for _, task := range tg.Tasks {
  1697  		diskMB += task.Resources.DiskMB
  1698  	}
  1699  	if diskMB > 0 {
  1700  		tg.EphemeralDisk.SizeMB = diskMB
  1701  	}
  1702  }
  1703  
  1704  // Validate is used to sanity check a task group
  1705  func (tg *TaskGroup) Validate() error {
  1706  	var mErr multierror.Error
  1707  	if tg.Name == "" {
  1708  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
  1709  	}
  1710  	if tg.Count < 0 {
  1711  		mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative"))
  1712  	}
  1713  	if len(tg.Tasks) == 0 {
  1714  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
  1715  	}
  1716  	for idx, constr := range tg.Constraints {
  1717  		if err := constr.Validate(); err != nil {
  1718  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1719  			mErr.Errors = append(mErr.Errors, outer)
  1720  		}
  1721  	}
  1722  
  1723  	if tg.RestartPolicy != nil {
  1724  		if err := tg.RestartPolicy.Validate(); err != nil {
  1725  			mErr.Errors = append(mErr.Errors, err)
  1726  		}
  1727  	} else {
  1728  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
  1729  	}
  1730  
  1731  	if tg.EphemeralDisk != nil {
  1732  		if err := tg.EphemeralDisk.Validate(); err != nil {
  1733  			mErr.Errors = append(mErr.Errors, err)
  1734  		}
  1735  	} else {
  1736  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name))
  1737  	}
  1738  
  1739  	// Check for duplicate tasks
  1740  	tasks := make(map[string]int)
  1741  	for idx, task := range tg.Tasks {
  1742  		if task.Name == "" {
  1743  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
  1744  		} else if existing, ok := tasks[task.Name]; ok {
  1745  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
  1746  		} else {
  1747  			tasks[task.Name] = idx
  1748  		}
  1749  	}
  1750  
  1751  	// Validate the tasks
  1752  	for _, task := range tg.Tasks {
  1753  		if err := task.Validate(tg.EphemeralDisk); err != nil {
  1754  			outer := fmt.Errorf("Task %s validation failed: %s", task.Name, err)
  1755  			mErr.Errors = append(mErr.Errors, outer)
  1756  		}
  1757  	}
  1758  	return mErr.ErrorOrNil()
  1759  }
  1760  
  1761  // LookupTask finds a task by name
  1762  func (tg *TaskGroup) LookupTask(name string) *Task {
  1763  	for _, t := range tg.Tasks {
  1764  		if t.Name == name {
  1765  			return t
  1766  		}
  1767  	}
  1768  	return nil
  1769  }
  1770  
  1771  func (tg *TaskGroup) GoString() string {
  1772  	return fmt.Sprintf("*%#v", *tg)
  1773  }
  1774  
  1775  const (
  1776  	// TODO add Consul TTL check
  1777  	ServiceCheckHTTP   = "http"
  1778  	ServiceCheckTCP    = "tcp"
  1779  	ServiceCheckScript = "script"
  1780  
  1781  	// minCheckInterval is the minimum check interval permitted.  Consul
  1782  	// currently has its MinInterval set to 1s.  Mirror that here for
  1783  	// consistency.
  1784  	minCheckInterval = 1 * time.Second
  1785  
  1786  	// minCheckTimeout is the minimum check timeout permitted for Consul
  1787  	// script TTL checks.
  1788  	minCheckTimeout = 1 * time.Second
  1789  )
  1790  
  1791  // The ServiceCheck data model represents the consul health check that
  1792  // Nomad registers for a Task
  1793  type ServiceCheck struct {
  1794  	Name          string        // Name of the check, defaults to id
  1795  	Type          string        // Type of the check - tcp, http, docker and script
  1796  	Command       string        // Command is the command to run for script checks
  1797  	Args          []string      // Args is a list of argumes for script checks
  1798  	Path          string        // path of the health check url for http type check
  1799  	Protocol      string        // Protocol to use if check is http, defaults to http
  1800  	PortLabel     string        `mapstructure:"port"` // The port to use for tcp/http checks
  1801  	Interval      time.Duration // Interval of the check
  1802  	Timeout       time.Duration // Timeout of the response from the check before consul fails the check
  1803  	InitialStatus string        `mapstructure:"initial_status"` // Initial status of the check
  1804  }
  1805  
  1806  func (sc *ServiceCheck) Copy() *ServiceCheck {
  1807  	if sc == nil {
  1808  		return nil
  1809  	}
  1810  	nsc := new(ServiceCheck)
  1811  	*nsc = *sc
  1812  	return nsc
  1813  }
  1814  
  1815  func (sc *ServiceCheck) Canonicalize(serviceName string) {
  1816  	// Ensure empty slices are treated as null to avoid scheduling issues when
  1817  	// using DeepEquals.
  1818  	if len(sc.Args) == 0 {
  1819  		sc.Args = nil
  1820  	}
  1821  
  1822  	if sc.Name == "" {
  1823  		sc.Name = fmt.Sprintf("service: %q check", serviceName)
  1824  	}
  1825  }
  1826  
  1827  // validate a Service's ServiceCheck
  1828  func (sc *ServiceCheck) validate() error {
  1829  	switch strings.ToLower(sc.Type) {
  1830  	case ServiceCheckTCP:
  1831  		if sc.Timeout == 0 {
  1832  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  1833  		} else if sc.Timeout < minCheckTimeout {
  1834  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  1835  		}
  1836  	case ServiceCheckHTTP:
  1837  		if sc.Path == "" {
  1838  			return fmt.Errorf("http type must have a valid http path")
  1839  		}
  1840  
  1841  		if sc.Timeout == 0 {
  1842  			return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval)
  1843  		} else if sc.Timeout < minCheckTimeout {
  1844  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  1845  		}
  1846  	case ServiceCheckScript:
  1847  		if sc.Command == "" {
  1848  			return fmt.Errorf("script type must have a valid script path")
  1849  		}
  1850  
  1851  		// TODO: enforce timeout on the Client side and reenable
  1852  		// validation.
  1853  	default:
  1854  		return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
  1855  	}
  1856  
  1857  	if sc.Interval == 0 {
  1858  		return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval)
  1859  	} else if sc.Interval < minCheckInterval {
  1860  		return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval)
  1861  	}
  1862  
  1863  	switch sc.InitialStatus {
  1864  	case "":
  1865  		// case api.HealthUnknown: TODO: Add when Consul releases 0.7.1
  1866  	case api.HealthPassing:
  1867  	case api.HealthWarning:
  1868  	case api.HealthCritical:
  1869  	default:
  1870  		return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical)
  1871  
  1872  	}
  1873  
  1874  	return nil
  1875  }
  1876  
  1877  // RequiresPort returns whether the service check requires the task has a port.
  1878  func (sc *ServiceCheck) RequiresPort() bool {
  1879  	switch sc.Type {
  1880  	case ServiceCheckHTTP, ServiceCheckTCP:
  1881  		return true
  1882  	default:
  1883  		return false
  1884  	}
  1885  }
  1886  
  1887  func (sc *ServiceCheck) Hash(serviceID string) string {
  1888  	h := sha1.New()
  1889  	io.WriteString(h, serviceID)
  1890  	io.WriteString(h, sc.Name)
  1891  	io.WriteString(h, sc.Type)
  1892  	io.WriteString(h, sc.Command)
  1893  	io.WriteString(h, strings.Join(sc.Args, ""))
  1894  	io.WriteString(h, sc.Path)
  1895  	io.WriteString(h, sc.Protocol)
  1896  	io.WriteString(h, sc.PortLabel)
  1897  	io.WriteString(h, sc.Interval.String())
  1898  	io.WriteString(h, sc.Timeout.String())
  1899  	return fmt.Sprintf("%x", h.Sum(nil))
  1900  }
  1901  
  1902  // Service represents a Consul service definition in Nomad
  1903  type Service struct {
  1904  	// Name of the service registered with Consul. Consul defaults the
  1905  	// Name to ServiceID if not specified.  The Name if specified is used
  1906  	// as one of the seed values when generating a Consul ServiceID.
  1907  	Name string
  1908  
  1909  	// PortLabel is either the numeric port number or the `host:port`.
  1910  	// To specify the port number using the host's Consul Advertise
  1911  	// address, specify an empty host in the PortLabel (e.g. `:port`).
  1912  	PortLabel string          `mapstructure:"port"`
  1913  	Tags      []string        // List of tags for the service
  1914  	Checks    []*ServiceCheck // List of checks associated with the service
  1915  }
  1916  
  1917  func (s *Service) Copy() *Service {
  1918  	if s == nil {
  1919  		return nil
  1920  	}
  1921  	ns := new(Service)
  1922  	*ns = *s
  1923  	ns.Tags = CopySliceString(ns.Tags)
  1924  
  1925  	if s.Checks != nil {
  1926  		checks := make([]*ServiceCheck, len(ns.Checks))
  1927  		for i, c := range ns.Checks {
  1928  			checks[i] = c.Copy()
  1929  		}
  1930  		ns.Checks = checks
  1931  	}
  1932  
  1933  	return ns
  1934  }
  1935  
  1936  // Canonicalize interpolates values of Job, Task Group and Task in the Service
  1937  // Name. This also generates check names, service id and check ids.
  1938  func (s *Service) Canonicalize(job string, taskGroup string, task string) {
  1939  	// Ensure empty lists are treated as null to avoid scheduler issues when
  1940  	// using DeepEquals
  1941  	if len(s.Tags) == 0 {
  1942  		s.Tags = nil
  1943  	}
  1944  	if len(s.Checks) == 0 {
  1945  		s.Checks = nil
  1946  	}
  1947  
  1948  	s.Name = args.ReplaceEnv(s.Name, map[string]string{
  1949  		"JOB":       job,
  1950  		"TASKGROUP": taskGroup,
  1951  		"TASK":      task,
  1952  		"BASE":      fmt.Sprintf("%s-%s-%s", job, taskGroup, task),
  1953  	},
  1954  	)
  1955  
  1956  	for _, check := range s.Checks {
  1957  		check.Canonicalize(s.Name)
  1958  	}
  1959  }
  1960  
  1961  // Validate checks if the Check definition is valid
  1962  func (s *Service) Validate() error {
  1963  	var mErr multierror.Error
  1964  
  1965  	// Ensure the service name is valid per the below RFCs but make an exception
  1966  	// for our interpolation syntax
  1967  	// RFC-952 §1 (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  1968  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  1969  	// (https://tools.ietf.org/html/rfc2782).
  1970  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9\$][a-zA-Z0-9\-\$\{\}\_\.]*[a-z0-9\}])$`)
  1971  	if !re.MatchString(s.Name) {
  1972  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name))
  1973  	}
  1974  
  1975  	for _, c := range s.Checks {
  1976  		if s.PortLabel == "" && c.RequiresPort() {
  1977  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name))
  1978  			continue
  1979  		}
  1980  
  1981  		if err := c.validate(); err != nil {
  1982  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err))
  1983  		}
  1984  	}
  1985  	return mErr.ErrorOrNil()
  1986  }
  1987  
  1988  // ValidateName checks if the services Name is valid and should be called after
  1989  // the name has been interpolated
  1990  func (s *Service) ValidateName(name string) error {
  1991  	// Ensure the service name is valid per RFC-952 §1
  1992  	// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  1993  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  1994  	// (https://tools.ietf.org/html/rfc2782).
  1995  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`)
  1996  	if !re.MatchString(name) {
  1997  		return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be less than 63 characters long: %q", name)
  1998  	}
  1999  	return nil
  2000  }
  2001  
  2002  // Hash calculates the hash of the check based on it's content and the service
  2003  // which owns it
  2004  func (s *Service) Hash() string {
  2005  	h := sha1.New()
  2006  	io.WriteString(h, s.Name)
  2007  	io.WriteString(h, strings.Join(s.Tags, ""))
  2008  	io.WriteString(h, s.PortLabel)
  2009  	return fmt.Sprintf("%x", h.Sum(nil))
  2010  }
  2011  
  2012  const (
  2013  	// DefaultKillTimeout is the default timeout between signaling a task it
  2014  	// will be killed and killing it.
  2015  	DefaultKillTimeout = 5 * time.Second
  2016  )
  2017  
  2018  // LogConfig provides configuration for log rotation
  2019  type LogConfig struct {
  2020  	MaxFiles      int `mapstructure:"max_files"`
  2021  	MaxFileSizeMB int `mapstructure:"max_file_size"`
  2022  }
  2023  
  2024  // DefaultLogConfig returns the default LogConfig values.
  2025  func DefaultLogConfig() *LogConfig {
  2026  	return &LogConfig{
  2027  		MaxFiles:      10,
  2028  		MaxFileSizeMB: 10,
  2029  	}
  2030  }
  2031  
  2032  // Validate returns an error if the log config specified are less than
  2033  // the minimum allowed.
  2034  func (l *LogConfig) Validate() error {
  2035  	var mErr multierror.Error
  2036  	if l.MaxFiles < 1 {
  2037  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles))
  2038  	}
  2039  	if l.MaxFileSizeMB < 1 {
  2040  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB))
  2041  	}
  2042  	return mErr.ErrorOrNil()
  2043  }
  2044  
  2045  // Task is a single process typically that is executed as part of a task group.
  2046  type Task struct {
  2047  	// Name of the task
  2048  	Name string
  2049  
  2050  	// Driver is used to control which driver is used
  2051  	Driver string
  2052  
  2053  	// User is used to determine which user will run the task. It defaults to
  2054  	// the same user the Nomad client is being run as.
  2055  	User string
  2056  
  2057  	// Config is provided to the driver to initialize
  2058  	Config map[string]interface{}
  2059  
  2060  	// Map of environment variables to be used by the driver
  2061  	Env map[string]string
  2062  
  2063  	// List of service definitions exposed by the Task
  2064  	Services []*Service
  2065  
  2066  	// Vault is used to define the set of Vault policies that this task should
  2067  	// have access to.
  2068  	Vault *Vault
  2069  
  2070  	// Templates are the set of templates to be rendered for the task.
  2071  	Templates []*Template
  2072  
  2073  	// Constraints can be specified at a task level and apply only to
  2074  	// the particular task.
  2075  	Constraints []*Constraint
  2076  
  2077  	// Resources is the resources needed by this task
  2078  	Resources *Resources
  2079  
  2080  	// Meta is used to associate arbitrary metadata with this
  2081  	// task. This is opaque to Nomad.
  2082  	Meta map[string]string
  2083  
  2084  	// KillTimeout is the time between signaling a task that it will be
  2085  	// killed and killing it.
  2086  	KillTimeout time.Duration `mapstructure:"kill_timeout"`
  2087  
  2088  	// LogConfig provides configuration for log rotation
  2089  	LogConfig *LogConfig `mapstructure:"logs"`
  2090  
  2091  	// Artifacts is a list of artifacts to download and extract before running
  2092  	// the task.
  2093  	Artifacts []*TaskArtifact
  2094  }
  2095  
  2096  func (t *Task) Copy() *Task {
  2097  	if t == nil {
  2098  		return nil
  2099  	}
  2100  	nt := new(Task)
  2101  	*nt = *t
  2102  	nt.Env = CopyMapStringString(nt.Env)
  2103  
  2104  	if t.Services != nil {
  2105  		services := make([]*Service, len(nt.Services))
  2106  		for i, s := range nt.Services {
  2107  			services[i] = s.Copy()
  2108  		}
  2109  		nt.Services = services
  2110  	}
  2111  
  2112  	nt.Constraints = CopySliceConstraints(nt.Constraints)
  2113  
  2114  	nt.Vault = nt.Vault.Copy()
  2115  	nt.Resources = nt.Resources.Copy()
  2116  	nt.Meta = CopyMapStringString(nt.Meta)
  2117  
  2118  	if t.Artifacts != nil {
  2119  		artifacts := make([]*TaskArtifact, 0, len(t.Artifacts))
  2120  		for _, a := range nt.Artifacts {
  2121  			artifacts = append(artifacts, a.Copy())
  2122  		}
  2123  		nt.Artifacts = artifacts
  2124  	}
  2125  
  2126  	if i, err := copystructure.Copy(nt.Config); err != nil {
  2127  		nt.Config = i.(map[string]interface{})
  2128  	}
  2129  
  2130  	if t.Templates != nil {
  2131  		templates := make([]*Template, len(t.Templates))
  2132  		for i, tmpl := range nt.Templates {
  2133  			templates[i] = tmpl.Copy()
  2134  		}
  2135  		nt.Templates = templates
  2136  	}
  2137  
  2138  	return nt
  2139  }
  2140  
  2141  // Canonicalize canonicalizes fields in the task.
  2142  func (t *Task) Canonicalize(job *Job, tg *TaskGroup) {
  2143  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  2144  	// problems since we use reflect DeepEquals.
  2145  	if len(t.Meta) == 0 {
  2146  		t.Meta = nil
  2147  	}
  2148  	if len(t.Config) == 0 {
  2149  		t.Config = nil
  2150  	}
  2151  	if len(t.Env) == 0 {
  2152  		t.Env = nil
  2153  	}
  2154  
  2155  	for _, service := range t.Services {
  2156  		service.Canonicalize(job.Name, tg.Name, t.Name)
  2157  	}
  2158  
  2159  	// If Resources are nil initialize them to defaults, otherwise canonicalize
  2160  	if t.Resources == nil {
  2161  		t.Resources = DefaultResources()
  2162  	} else {
  2163  		t.Resources.Canonicalize()
  2164  	}
  2165  
  2166  	// Set the default timeout if it is not specified.
  2167  	if t.KillTimeout == 0 {
  2168  		t.KillTimeout = DefaultKillTimeout
  2169  	}
  2170  
  2171  	if t.Vault != nil {
  2172  		t.Vault.Canonicalize()
  2173  	}
  2174  
  2175  	for _, template := range t.Templates {
  2176  		template.Canonicalize()
  2177  	}
  2178  }
  2179  
  2180  func (t *Task) GoString() string {
  2181  	return fmt.Sprintf("*%#v", *t)
  2182  }
  2183  
  2184  func (t *Task) FindHostAndPortFor(portLabel string) (string, int) {
  2185  	for _, network := range t.Resources.Networks {
  2186  		if p, ok := network.MapLabelToValues(nil)[portLabel]; ok {
  2187  			return network.IP, p
  2188  		}
  2189  	}
  2190  	return "", 0
  2191  }
  2192  
  2193  // Validate is used to sanity check a task
  2194  func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error {
  2195  	var mErr multierror.Error
  2196  	if t.Name == "" {
  2197  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  2198  	}
  2199  	if strings.ContainsAny(t.Name, `/\`) {
  2200  		// We enforce this so that when creating the directory on disk it will
  2201  		// not have any slashes.
  2202  		mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes"))
  2203  	}
  2204  	if t.Driver == "" {
  2205  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  2206  	}
  2207  	if t.KillTimeout.Nanoseconds() < 0 {
  2208  		mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
  2209  	}
  2210  
  2211  	// Validate the resources.
  2212  	if t.Resources == nil {
  2213  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  2214  	} else {
  2215  		if err := t.Resources.MeetsMinResources(); err != nil {
  2216  			mErr.Errors = append(mErr.Errors, err)
  2217  		}
  2218  
  2219  		// Ensure the task isn't asking for disk resources
  2220  		if t.Resources.DiskMB > 0 {
  2221  			mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level."))
  2222  		}
  2223  	}
  2224  
  2225  	// Validate the log config
  2226  	if t.LogConfig == nil {
  2227  		mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config"))
  2228  	} else if err := t.LogConfig.Validate(); err != nil {
  2229  		mErr.Errors = append(mErr.Errors, err)
  2230  	}
  2231  
  2232  	for idx, constr := range t.Constraints {
  2233  		if err := constr.Validate(); err != nil {
  2234  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  2235  			mErr.Errors = append(mErr.Errors, outer)
  2236  		}
  2237  	}
  2238  
  2239  	// Validate Services
  2240  	if err := validateServices(t); err != nil {
  2241  		mErr.Errors = append(mErr.Errors, err)
  2242  	}
  2243  
  2244  	if t.LogConfig != nil && ephemeralDisk != nil {
  2245  		logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB)
  2246  		if ephemeralDisk.SizeMB <= logUsage {
  2247  			mErr.Errors = append(mErr.Errors,
  2248  				fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)",
  2249  					logUsage, ephemeralDisk.SizeMB))
  2250  		}
  2251  	}
  2252  
  2253  	for idx, artifact := range t.Artifacts {
  2254  		if err := artifact.Validate(); err != nil {
  2255  			outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err)
  2256  			mErr.Errors = append(mErr.Errors, outer)
  2257  		}
  2258  	}
  2259  
  2260  	if t.Vault != nil {
  2261  		if err := t.Vault.Validate(); err != nil {
  2262  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err))
  2263  		}
  2264  	}
  2265  
  2266  	destinations := make(map[string]int, len(t.Templates))
  2267  	for idx, tmpl := range t.Templates {
  2268  		if err := tmpl.Validate(); err != nil {
  2269  			outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err)
  2270  			mErr.Errors = append(mErr.Errors, outer)
  2271  		}
  2272  
  2273  		if other, ok := destinations[tmpl.DestPath]; ok {
  2274  			outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other)
  2275  			mErr.Errors = append(mErr.Errors, outer)
  2276  		} else {
  2277  			destinations[tmpl.DestPath] = idx + 1
  2278  		}
  2279  	}
  2280  
  2281  	return mErr.ErrorOrNil()
  2282  }
  2283  
  2284  // validateServices takes a task and validates the services within it are valid
  2285  // and reference ports that exist.
  2286  func validateServices(t *Task) error {
  2287  	var mErr multierror.Error
  2288  
  2289  	// Ensure that services don't ask for non-existent ports and their names are
  2290  	// unique.
  2291  	servicePorts := make(map[string][]string)
  2292  	knownServices := make(map[string]struct{})
  2293  	for i, service := range t.Services {
  2294  		if err := service.Validate(); err != nil {
  2295  			outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err)
  2296  			mErr.Errors = append(mErr.Errors, outer)
  2297  		}
  2298  		if _, ok := knownServices[service.Name]; ok {
  2299  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name))
  2300  		}
  2301  		knownServices[service.Name] = struct{}{}
  2302  
  2303  		if service.PortLabel != "" {
  2304  			servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name)
  2305  		}
  2306  
  2307  		// Ensure that check names are unique.
  2308  		knownChecks := make(map[string]struct{})
  2309  		for _, check := range service.Checks {
  2310  			if _, ok := knownChecks[check.Name]; ok {
  2311  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name))
  2312  			}
  2313  			knownChecks[check.Name] = struct{}{}
  2314  		}
  2315  	}
  2316  
  2317  	// Get the set of port labels.
  2318  	portLabels := make(map[string]struct{})
  2319  	if t.Resources != nil {
  2320  		for _, network := range t.Resources.Networks {
  2321  			ports := network.MapLabelToValues(nil)
  2322  			for portLabel, _ := range ports {
  2323  				portLabels[portLabel] = struct{}{}
  2324  			}
  2325  		}
  2326  	}
  2327  
  2328  	// Ensure all ports referenced in services exist.
  2329  	for servicePort, services := range servicePorts {
  2330  		_, ok := portLabels[servicePort]
  2331  		if !ok {
  2332  			joined := strings.Join(services, ", ")
  2333  			err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined)
  2334  			mErr.Errors = append(mErr.Errors, err)
  2335  		}
  2336  	}
  2337  	return mErr.ErrorOrNil()
  2338  }
  2339  
  2340  const (
  2341  	// TemplateChangeModeNoop marks that no action should be taken if the
  2342  	// template is re-rendered
  2343  	TemplateChangeModeNoop = "noop"
  2344  
  2345  	// TemplateChangeModeSignal marks that the task should be signaled if the
  2346  	// template is re-rendered
  2347  	TemplateChangeModeSignal = "signal"
  2348  
  2349  	// TemplateChangeModeRestart marks that the task should be restarted if the
  2350  	// template is re-rendered
  2351  	TemplateChangeModeRestart = "restart"
  2352  )
  2353  
  2354  var (
  2355  	// TemplateChangeModeInvalidError is the error for when an invalid change
  2356  	// mode is given
  2357  	TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart")
  2358  )
  2359  
  2360  // Template represents a template configuration to be rendered for a given task
  2361  type Template struct {
  2362  	// SourcePath is the path to the template to be rendered
  2363  	SourcePath string `mapstructure:"source"`
  2364  
  2365  	// DestPath is the path to where the template should be rendered
  2366  	DestPath string `mapstructure:"destination"`
  2367  
  2368  	// EmbeddedTmpl store the raw template. This is useful for smaller templates
  2369  	// where they are embedded in the job file rather than sent as an artificat
  2370  	EmbeddedTmpl string `mapstructure:"data"`
  2371  
  2372  	// ChangeMode indicates what should be done if the template is re-rendered
  2373  	ChangeMode string `mapstructure:"change_mode"`
  2374  
  2375  	// ChangeSignal is the signal that should be sent if the change mode
  2376  	// requires it.
  2377  	ChangeSignal string `mapstructure:"change_signal"`
  2378  
  2379  	// Splay is used to avoid coordinated restarts of processes by applying a
  2380  	// random wait between 0 and the given splay value before signalling the
  2381  	// application of a change
  2382  	Splay time.Duration `mapstructure:"splay"`
  2383  }
  2384  
  2385  // DefaultTemplate returns a default template.
  2386  func DefaultTemplate() *Template {
  2387  	return &Template{
  2388  		ChangeMode: TemplateChangeModeRestart,
  2389  		Splay:      5 * time.Second,
  2390  	}
  2391  }
  2392  
  2393  func (t *Template) Copy() *Template {
  2394  	if t == nil {
  2395  		return nil
  2396  	}
  2397  	copy := new(Template)
  2398  	*copy = *t
  2399  	return copy
  2400  }
  2401  
  2402  func (t *Template) Canonicalize() {
  2403  	if t.ChangeSignal != "" {
  2404  		t.ChangeSignal = strings.ToUpper(t.ChangeSignal)
  2405  	}
  2406  }
  2407  
  2408  func (t *Template) Validate() error {
  2409  	var mErr multierror.Error
  2410  
  2411  	// Verify we have something to render
  2412  	if t.SourcePath == "" && t.EmbeddedTmpl == "" {
  2413  		multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template"))
  2414  	}
  2415  
  2416  	// Verify we can render somewhere
  2417  	if t.DestPath == "" {
  2418  		multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template"))
  2419  	}
  2420  
  2421  	// Verify the destination doesn't escape
  2422  	escaped, err := PathEscapesAllocDir(t.DestPath)
  2423  	if err != nil {
  2424  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  2425  	} else if escaped {
  2426  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory"))
  2427  	}
  2428  
  2429  	// Verify a proper change mode
  2430  	switch t.ChangeMode {
  2431  	case TemplateChangeModeNoop, TemplateChangeModeRestart:
  2432  	case TemplateChangeModeSignal:
  2433  		if t.ChangeSignal == "" {
  2434  			multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal"))
  2435  		}
  2436  	default:
  2437  		multierror.Append(&mErr, TemplateChangeModeInvalidError)
  2438  	}
  2439  
  2440  	// Verify the splay is positive
  2441  	if t.Splay < 0 {
  2442  		multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value"))
  2443  	}
  2444  
  2445  	return mErr.ErrorOrNil()
  2446  }
  2447  
  2448  // Set of possible states for a task.
  2449  const (
  2450  	TaskStatePending = "pending" // The task is waiting to be run.
  2451  	TaskStateRunning = "running" // The task is currently running.
  2452  	TaskStateDead    = "dead"    // Terminal state of task.
  2453  )
  2454  
  2455  // TaskState tracks the current state of a task and events that caused state
  2456  // transitions.
  2457  type TaskState struct {
  2458  	// The current state of the task.
  2459  	State string
  2460  
  2461  	// Failed marks a task as having failed
  2462  	Failed bool
  2463  
  2464  	// Series of task events that transition the state of the task.
  2465  	Events []*TaskEvent
  2466  }
  2467  
  2468  func (ts *TaskState) Copy() *TaskState {
  2469  	if ts == nil {
  2470  		return nil
  2471  	}
  2472  	copy := new(TaskState)
  2473  	copy.State = ts.State
  2474  	copy.Failed = ts.Failed
  2475  
  2476  	if ts.Events != nil {
  2477  		copy.Events = make([]*TaskEvent, len(ts.Events))
  2478  		for i, e := range ts.Events {
  2479  			copy.Events[i] = e.Copy()
  2480  		}
  2481  	}
  2482  	return copy
  2483  }
  2484  
  2485  // Successful returns whether a task finished successfully.
  2486  func (ts *TaskState) Successful() bool {
  2487  	l := len(ts.Events)
  2488  	if ts.State != TaskStateDead || l == 0 {
  2489  		return false
  2490  	}
  2491  
  2492  	e := ts.Events[l-1]
  2493  	if e.Type != TaskTerminated {
  2494  		return false
  2495  	}
  2496  
  2497  	return e.ExitCode == 0
  2498  }
  2499  
  2500  const (
  2501  	// TaskSetupFailure indicates that the task could not be started due to a
  2502  	// a setup failure.
  2503  	TaskSetupFailure = "Setup Failure"
  2504  
  2505  	// TaskDriveFailure indicates that the task could not be started due to a
  2506  	// failure in the driver.
  2507  	TaskDriverFailure = "Driver Failure"
  2508  
  2509  	// TaskReceived signals that the task has been pulled by the client at the
  2510  	// given timestamp.
  2511  	TaskReceived = "Received"
  2512  
  2513  	// TaskFailedValidation indicates the task was invalid and as such was not
  2514  	// run.
  2515  	TaskFailedValidation = "Failed Validation"
  2516  
  2517  	// TaskStarted signals that the task was started and its timestamp can be
  2518  	// used to determine the running length of the task.
  2519  	TaskStarted = "Started"
  2520  
  2521  	// TaskTerminated indicates that the task was started and exited.
  2522  	TaskTerminated = "Terminated"
  2523  
  2524  	// TaskKilling indicates a kill signal has been sent to the task.
  2525  	TaskKilling = "Killing"
  2526  
  2527  	// TaskKilled indicates a user has killed the task.
  2528  	TaskKilled = "Killed"
  2529  
  2530  	// TaskRestarting indicates that task terminated and is being restarted.
  2531  	TaskRestarting = "Restarting"
  2532  
  2533  	// TaskNotRestarting indicates that the task has failed and is not being
  2534  	// restarted because it has exceeded its restart policy.
  2535  	TaskNotRestarting = "Not Restarting"
  2536  
  2537  	// TaskRestartSignal indicates that the task has been signalled to be
  2538  	// restarted
  2539  	TaskRestartSignal = "Restart Signaled"
  2540  
  2541  	// TaskSignaling indicates that the task is being signalled.
  2542  	TaskSignaling = "Signaling"
  2543  
  2544  	// TaskDownloadingArtifacts means the task is downloading the artifacts
  2545  	// specified in the task.
  2546  	TaskDownloadingArtifacts = "Downloading Artifacts"
  2547  
  2548  	// TaskArtifactDownloadFailed indicates that downloading the artifacts
  2549  	// failed.
  2550  	TaskArtifactDownloadFailed = "Failed Artifact Download"
  2551  
  2552  	// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
  2553  	// exceeded the requested disk resources.
  2554  	TaskDiskExceeded = "Disk Resources Exceeded"
  2555  
  2556  	// TaskSiblingFailed indicates that a sibling task in the task group has
  2557  	// failed.
  2558  	TaskSiblingFailed = "Sibling task failed"
  2559  )
  2560  
  2561  // TaskEvent is an event that effects the state of a task and contains meta-data
  2562  // appropriate to the events type.
  2563  type TaskEvent struct {
  2564  	Type string
  2565  	Time int64 // Unix Nanosecond timestamp
  2566  
  2567  	// FailsTask marks whether this event fails the task
  2568  	FailsTask bool
  2569  
  2570  	// Restart fields.
  2571  	RestartReason string
  2572  
  2573  	// Setup Failure fields.
  2574  	SetupError string
  2575  
  2576  	// Driver Failure fields.
  2577  	DriverError string // A driver error occurred while starting the task.
  2578  
  2579  	// Task Terminated Fields.
  2580  	ExitCode int    // The exit code of the task.
  2581  	Signal   int    // The signal that terminated the task.
  2582  	Message  string // A possible message explaining the termination of the task.
  2583  
  2584  	// Killing fields
  2585  	KillTimeout time.Duration
  2586  
  2587  	// Task Killed Fields.
  2588  	KillError string // Error killing the task.
  2589  
  2590  	// KillReason is the reason the task was killed
  2591  	KillReason string
  2592  
  2593  	// TaskRestarting fields.
  2594  	StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
  2595  
  2596  	// Artifact Download fields
  2597  	DownloadError string // Error downloading artifacts
  2598  
  2599  	// Validation fields
  2600  	ValidationError string // Validation error
  2601  
  2602  	// The maximum allowed task disk size.
  2603  	DiskLimit int64
  2604  
  2605  	// Name of the sibling task that caused termination of the task that
  2606  	// the TaskEvent refers to.
  2607  	FailedSibling string
  2608  
  2609  	// VaultError is the error from token renewal
  2610  	VaultError string
  2611  
  2612  	// TaskSignalReason indicates the reason the task is being signalled.
  2613  	TaskSignalReason string
  2614  
  2615  	// TaskSignal is the signal that was sent to the task
  2616  	TaskSignal string
  2617  }
  2618  
  2619  func (te *TaskEvent) GoString() string {
  2620  	return fmt.Sprintf("%v at %v", te.Type, te.Time)
  2621  }
  2622  
  2623  func (te *TaskEvent) Copy() *TaskEvent {
  2624  	if te == nil {
  2625  		return nil
  2626  	}
  2627  	copy := new(TaskEvent)
  2628  	*copy = *te
  2629  	return copy
  2630  }
  2631  
  2632  func NewTaskEvent(event string) *TaskEvent {
  2633  	return &TaskEvent{
  2634  		Type: event,
  2635  		Time: time.Now().UnixNano(),
  2636  	}
  2637  }
  2638  
  2639  // SetSetupError is used to store an error that occured while setting up the
  2640  // task
  2641  func (e *TaskEvent) SetSetupError(err error) *TaskEvent {
  2642  	if err != nil {
  2643  		e.SetupError = err.Error()
  2644  	}
  2645  	return e
  2646  }
  2647  
  2648  func (e *TaskEvent) SetFailsTask() *TaskEvent {
  2649  	e.FailsTask = true
  2650  	return e
  2651  }
  2652  
  2653  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  2654  	if err != nil {
  2655  		e.DriverError = err.Error()
  2656  	}
  2657  	return e
  2658  }
  2659  
  2660  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  2661  	e.ExitCode = c
  2662  	return e
  2663  }
  2664  
  2665  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  2666  	e.Signal = s
  2667  	return e
  2668  }
  2669  
  2670  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  2671  	if err != nil {
  2672  		e.Message = err.Error()
  2673  	}
  2674  	return e
  2675  }
  2676  
  2677  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  2678  	if err != nil {
  2679  		e.KillError = err.Error()
  2680  	}
  2681  	return e
  2682  }
  2683  
  2684  func (e *TaskEvent) SetKillReason(r string) *TaskEvent {
  2685  	e.KillReason = r
  2686  	return e
  2687  }
  2688  
  2689  func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent {
  2690  	e.StartDelay = int64(delay)
  2691  	return e
  2692  }
  2693  
  2694  func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent {
  2695  	e.RestartReason = reason
  2696  	return e
  2697  }
  2698  
  2699  func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent {
  2700  	e.TaskSignalReason = r
  2701  	return e
  2702  }
  2703  
  2704  func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent {
  2705  	e.TaskSignal = s.String()
  2706  	return e
  2707  }
  2708  
  2709  func (e *TaskEvent) SetDownloadError(err error) *TaskEvent {
  2710  	if err != nil {
  2711  		e.DownloadError = err.Error()
  2712  	}
  2713  	return e
  2714  }
  2715  
  2716  func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
  2717  	if err != nil {
  2718  		e.ValidationError = err.Error()
  2719  	}
  2720  	return e
  2721  }
  2722  
  2723  func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
  2724  	e.KillTimeout = timeout
  2725  	return e
  2726  }
  2727  
  2728  func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent {
  2729  	e.DiskLimit = limit
  2730  	return e
  2731  }
  2732  
  2733  func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent {
  2734  	e.FailedSibling = sibling
  2735  	return e
  2736  }
  2737  
  2738  func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent {
  2739  	if err != nil {
  2740  		e.VaultError = err.Error()
  2741  	}
  2742  	return e
  2743  }
  2744  
  2745  // TaskArtifact is an artifact to download before running the task.
  2746  type TaskArtifact struct {
  2747  	// GetterSource is the source to download an artifact using go-getter
  2748  	GetterSource string `mapstructure:"source"`
  2749  
  2750  	// GetterOptions are options to use when downloading the artifact using
  2751  	// go-getter.
  2752  	GetterOptions map[string]string `mapstructure:"options"`
  2753  
  2754  	// RelativeDest is the download destination given relative to the task's
  2755  	// directory.
  2756  	RelativeDest string `mapstructure:"destination"`
  2757  }
  2758  
  2759  func (ta *TaskArtifact) Copy() *TaskArtifact {
  2760  	if ta == nil {
  2761  		return nil
  2762  	}
  2763  	nta := new(TaskArtifact)
  2764  	*nta = *ta
  2765  	nta.GetterOptions = CopyMapStringString(ta.GetterOptions)
  2766  	return nta
  2767  }
  2768  
  2769  func (ta *TaskArtifact) GoString() string {
  2770  	return fmt.Sprintf("%+v", ta)
  2771  }
  2772  
  2773  // PathEscapesAllocDir returns if the given path escapes the allocation
  2774  // directory
  2775  func PathEscapesAllocDir(path string) (bool, error) {
  2776  	// Verify the destination doesn't escape the tasks directory
  2777  	alloc, err := filepath.Abs(filepath.Join("/", "foo/", "bar/"))
  2778  	if err != nil {
  2779  		return false, err
  2780  	}
  2781  	abs, err := filepath.Abs(filepath.Join(alloc, path))
  2782  	if err != nil {
  2783  		return false, err
  2784  	}
  2785  	rel, err := filepath.Rel(alloc, abs)
  2786  	if err != nil {
  2787  		return false, err
  2788  	}
  2789  
  2790  	return strings.HasPrefix(rel, ".."), nil
  2791  }
  2792  
  2793  func (ta *TaskArtifact) Validate() error {
  2794  	// Verify the source
  2795  	var mErr multierror.Error
  2796  	if ta.GetterSource == "" {
  2797  		mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified"))
  2798  	}
  2799  
  2800  	escaped, err := PathEscapesAllocDir(ta.RelativeDest)
  2801  	if err != nil {
  2802  		mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err))
  2803  	} else if escaped {
  2804  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes task's directory"))
  2805  	}
  2806  
  2807  	// Verify the checksum
  2808  	if check, ok := ta.GetterOptions["checksum"]; ok {
  2809  		check = strings.TrimSpace(check)
  2810  		if check == "" {
  2811  			mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty"))
  2812  			return mErr.ErrorOrNil()
  2813  		}
  2814  
  2815  		parts := strings.Split(check, ":")
  2816  		if l := len(parts); l != 2 {
  2817  			mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check))
  2818  			return mErr.ErrorOrNil()
  2819  		}
  2820  
  2821  		checksumVal := parts[1]
  2822  		checksumBytes, err := hex.DecodeString(checksumVal)
  2823  		if err != nil {
  2824  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err))
  2825  			return mErr.ErrorOrNil()
  2826  		}
  2827  
  2828  		checksumType := parts[0]
  2829  		expectedLength := 0
  2830  		switch checksumType {
  2831  		case "md5":
  2832  			expectedLength = md5.Size
  2833  		case "sha1":
  2834  			expectedLength = sha1.Size
  2835  		case "sha256":
  2836  			expectedLength = sha256.Size
  2837  		case "sha512":
  2838  			expectedLength = sha512.Size
  2839  		default:
  2840  			mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType))
  2841  			return mErr.ErrorOrNil()
  2842  		}
  2843  
  2844  		if len(checksumBytes) != expectedLength {
  2845  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal))
  2846  			return mErr.ErrorOrNil()
  2847  		}
  2848  	}
  2849  
  2850  	return mErr.ErrorOrNil()
  2851  }
  2852  
  2853  const (
  2854  	ConstraintDistinctHosts = "distinct_hosts"
  2855  	ConstraintRegex         = "regexp"
  2856  	ConstraintVersion       = "version"
  2857  	ConstraintSetContains   = "set_contains"
  2858  )
  2859  
  2860  // Constraints are used to restrict placement options.
  2861  type Constraint struct {
  2862  	LTarget string // Left-hand target
  2863  	RTarget string // Right-hand target
  2864  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  2865  	str     string // Memoized string
  2866  }
  2867  
  2868  // Equal checks if two constraints are equal
  2869  func (c *Constraint) Equal(o *Constraint) bool {
  2870  	return c.LTarget == o.LTarget &&
  2871  		c.RTarget == o.RTarget &&
  2872  		c.Operand == o.Operand
  2873  }
  2874  
  2875  func (c *Constraint) Copy() *Constraint {
  2876  	if c == nil {
  2877  		return nil
  2878  	}
  2879  	nc := new(Constraint)
  2880  	*nc = *c
  2881  	return nc
  2882  }
  2883  
  2884  func (c *Constraint) String() string {
  2885  	if c.str != "" {
  2886  		return c.str
  2887  	}
  2888  	c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  2889  	return c.str
  2890  }
  2891  
  2892  func (c *Constraint) Validate() error {
  2893  	var mErr multierror.Error
  2894  	if c.Operand == "" {
  2895  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  2896  	}
  2897  
  2898  	// Perform additional validation based on operand
  2899  	switch c.Operand {
  2900  	case ConstraintRegex:
  2901  		if _, err := regexp.Compile(c.RTarget); err != nil {
  2902  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  2903  		}
  2904  	case ConstraintVersion:
  2905  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  2906  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  2907  		}
  2908  	}
  2909  	return mErr.ErrorOrNil()
  2910  }
  2911  
  2912  // EphemeralDisk is an ephemeral disk object
  2913  type EphemeralDisk struct {
  2914  	// Sticky indicates whether the allocation is sticky to a node
  2915  	Sticky bool
  2916  
  2917  	// SizeMB is the size of the local disk
  2918  	SizeMB int `mapstructure:"size"`
  2919  
  2920  	// Migrate determines if Nomad client should migrate the allocation dir for
  2921  	// sticky allocations
  2922  	Migrate bool
  2923  }
  2924  
  2925  // DefaultEphemeralDisk returns a EphemeralDisk with default configurations
  2926  func DefaultEphemeralDisk() *EphemeralDisk {
  2927  	return &EphemeralDisk{
  2928  		SizeMB: 300,
  2929  	}
  2930  }
  2931  
  2932  // Validate validates EphemeralDisk
  2933  func (d *EphemeralDisk) Validate() error {
  2934  	if d.SizeMB < 10 {
  2935  		return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB)
  2936  	}
  2937  	return nil
  2938  }
  2939  
  2940  // Copy copies the EphemeralDisk struct and returns a new one
  2941  func (d *EphemeralDisk) Copy() *EphemeralDisk {
  2942  	ld := new(EphemeralDisk)
  2943  	*ld = *d
  2944  	return ld
  2945  }
  2946  
  2947  const (
  2948  	// VaultChangeModeNoop takes no action when a new token is retrieved.
  2949  	VaultChangeModeNoop = "noop"
  2950  
  2951  	// VaultChangeModeSignal signals the task when a new token is retrieved.
  2952  	VaultChangeModeSignal = "signal"
  2953  
  2954  	// VaultChangeModeRestart restarts the task when a new token is retrieved.
  2955  	VaultChangeModeRestart = "restart"
  2956  )
  2957  
  2958  // Vault stores the set of premissions a task needs access to from Vault.
  2959  type Vault struct {
  2960  	// Policies is the set of policies that the task needs access to
  2961  	Policies []string
  2962  
  2963  	// Env marks whether the Vault Token should be exposed as an environment
  2964  	// variable
  2965  	Env bool
  2966  
  2967  	// ChangeMode is used to configure the task's behavior when the Vault
  2968  	// token changes because the original token could not be renewed in time.
  2969  	ChangeMode string `mapstructure:"change_mode"`
  2970  
  2971  	// ChangeSignal is the signal sent to the task when a new token is
  2972  	// retrieved. This is only valid when using the signal change mode.
  2973  	ChangeSignal string `mapstructure:"change_signal"`
  2974  }
  2975  
  2976  func DefaultVaultBlock() *Vault {
  2977  	return &Vault{
  2978  		Env:        true,
  2979  		ChangeMode: VaultChangeModeRestart,
  2980  	}
  2981  }
  2982  
  2983  // Copy returns a copy of this Vault block.
  2984  func (v *Vault) Copy() *Vault {
  2985  	if v == nil {
  2986  		return nil
  2987  	}
  2988  
  2989  	nv := new(Vault)
  2990  	*nv = *v
  2991  	return nv
  2992  }
  2993  
  2994  func (v *Vault) Canonicalize() {
  2995  	if v.ChangeSignal != "" {
  2996  		v.ChangeSignal = strings.ToUpper(v.ChangeSignal)
  2997  	}
  2998  }
  2999  
  3000  // Validate returns if the Vault block is valid.
  3001  func (v *Vault) Validate() error {
  3002  	if v == nil {
  3003  		return nil
  3004  	}
  3005  
  3006  	if len(v.Policies) == 0 {
  3007  		return fmt.Errorf("Policy list cannot be empty")
  3008  	}
  3009  
  3010  	switch v.ChangeMode {
  3011  	case VaultChangeModeSignal:
  3012  		if v.ChangeSignal == "" {
  3013  			return fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)
  3014  		}
  3015  	case VaultChangeModeNoop, VaultChangeModeRestart:
  3016  	default:
  3017  		return fmt.Errorf("Unknown change mode %q", v.ChangeMode)
  3018  	}
  3019  
  3020  	return nil
  3021  }
  3022  
  3023  const (
  3024  	AllocDesiredStatusRun   = "run"   // Allocation should run
  3025  	AllocDesiredStatusStop  = "stop"  // Allocation should stop
  3026  	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
  3027  )
  3028  
  3029  const (
  3030  	AllocClientStatusPending  = "pending"
  3031  	AllocClientStatusRunning  = "running"
  3032  	AllocClientStatusComplete = "complete"
  3033  	AllocClientStatusFailed   = "failed"
  3034  	AllocClientStatusLost     = "lost"
  3035  )
  3036  
  3037  // Allocation is used to allocate the placement of a task group to a node.
  3038  type Allocation struct {
  3039  	// ID of the allocation (UUID)
  3040  	ID string
  3041  
  3042  	// ID of the evaluation that generated this allocation
  3043  	EvalID string
  3044  
  3045  	// Name is a logical name of the allocation.
  3046  	Name string
  3047  
  3048  	// NodeID is the node this is being placed on
  3049  	NodeID string
  3050  
  3051  	// Job is the parent job of the task group being allocated.
  3052  	// This is copied at allocation time to avoid issues if the job
  3053  	// definition is updated.
  3054  	JobID string
  3055  	Job   *Job
  3056  
  3057  	// TaskGroup is the name of the task group that should be run
  3058  	TaskGroup string
  3059  
  3060  	// Resources is the total set of resources allocated as part
  3061  	// of this allocation of the task group.
  3062  	Resources *Resources
  3063  
  3064  	// SharedResources are the resources that are shared by all the tasks in an
  3065  	// allocation
  3066  	SharedResources *Resources
  3067  
  3068  	// TaskResources is the set of resources allocated to each
  3069  	// task. These should sum to the total Resources.
  3070  	TaskResources map[string]*Resources
  3071  
  3072  	// Metrics associated with this allocation
  3073  	Metrics *AllocMetric
  3074  
  3075  	// Desired Status of the allocation on the client
  3076  	DesiredStatus string
  3077  
  3078  	// DesiredStatusDescription is meant to provide more human useful information
  3079  	DesiredDescription string
  3080  
  3081  	// Status of the allocation on the client
  3082  	ClientStatus string
  3083  
  3084  	// ClientStatusDescription is meant to provide more human useful information
  3085  	ClientDescription string
  3086  
  3087  	// TaskStates stores the state of each task,
  3088  	TaskStates map[string]*TaskState
  3089  
  3090  	// PreviousAllocation is the allocation that this allocation is replacing
  3091  	PreviousAllocation string
  3092  
  3093  	// Raft Indexes
  3094  	CreateIndex uint64
  3095  	ModifyIndex uint64
  3096  
  3097  	// AllocModifyIndex is not updated when the client updates allocations. This
  3098  	// lets the client pull only the allocs updated by the server.
  3099  	AllocModifyIndex uint64
  3100  
  3101  	// CreateTime is the time the allocation has finished scheduling and been
  3102  	// verified by the plan applier.
  3103  	CreateTime int64
  3104  }
  3105  
  3106  func (a *Allocation) Copy() *Allocation {
  3107  	if a == nil {
  3108  		return nil
  3109  	}
  3110  	na := new(Allocation)
  3111  	*na = *a
  3112  
  3113  	na.Job = na.Job.Copy()
  3114  	na.Resources = na.Resources.Copy()
  3115  	na.SharedResources = na.SharedResources.Copy()
  3116  
  3117  	if a.TaskResources != nil {
  3118  		tr := make(map[string]*Resources, len(na.TaskResources))
  3119  		for task, resource := range na.TaskResources {
  3120  			tr[task] = resource.Copy()
  3121  		}
  3122  		na.TaskResources = tr
  3123  	}
  3124  
  3125  	na.Metrics = na.Metrics.Copy()
  3126  
  3127  	if a.TaskStates != nil {
  3128  		ts := make(map[string]*TaskState, len(na.TaskStates))
  3129  		for task, state := range na.TaskStates {
  3130  			ts[task] = state.Copy()
  3131  		}
  3132  		na.TaskStates = ts
  3133  	}
  3134  	return na
  3135  }
  3136  
  3137  // TerminalStatus returns if the desired or actual status is terminal and
  3138  // will no longer transition.
  3139  func (a *Allocation) TerminalStatus() bool {
  3140  	// First check the desired state and if that isn't terminal, check client
  3141  	// state.
  3142  	switch a.DesiredStatus {
  3143  	case AllocDesiredStatusStop, AllocDesiredStatusEvict:
  3144  		return true
  3145  	default:
  3146  	}
  3147  
  3148  	switch a.ClientStatus {
  3149  	case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
  3150  		return true
  3151  	default:
  3152  		return false
  3153  	}
  3154  }
  3155  
  3156  // Terminated returns if the allocation is in a terminal state on a client.
  3157  func (a *Allocation) Terminated() bool {
  3158  	if a.ClientStatus == AllocClientStatusFailed ||
  3159  		a.ClientStatus == AllocClientStatusComplete ||
  3160  		a.ClientStatus == AllocClientStatusLost {
  3161  		return true
  3162  	}
  3163  	return false
  3164  }
  3165  
  3166  // RanSuccessfully returns whether the client has ran the allocation and all
  3167  // tasks finished successfully
  3168  func (a *Allocation) RanSuccessfully() bool {
  3169  	// Handle the case the client hasn't started the allocation.
  3170  	if len(a.TaskStates) == 0 {
  3171  		return false
  3172  	}
  3173  
  3174  	// Check to see if all the tasks finised successfully in the allocation
  3175  	allSuccess := true
  3176  	for _, state := range a.TaskStates {
  3177  		allSuccess = allSuccess && state.Successful()
  3178  	}
  3179  
  3180  	return allSuccess
  3181  }
  3182  
  3183  // Stub returns a list stub for the allocation
  3184  func (a *Allocation) Stub() *AllocListStub {
  3185  	return &AllocListStub{
  3186  		ID:                 a.ID,
  3187  		EvalID:             a.EvalID,
  3188  		Name:               a.Name,
  3189  		NodeID:             a.NodeID,
  3190  		JobID:              a.JobID,
  3191  		TaskGroup:          a.TaskGroup,
  3192  		DesiredStatus:      a.DesiredStatus,
  3193  		DesiredDescription: a.DesiredDescription,
  3194  		ClientStatus:       a.ClientStatus,
  3195  		ClientDescription:  a.ClientDescription,
  3196  		TaskStates:         a.TaskStates,
  3197  		CreateIndex:        a.CreateIndex,
  3198  		ModifyIndex:        a.ModifyIndex,
  3199  		CreateTime:         a.CreateTime,
  3200  	}
  3201  }
  3202  
  3203  // ShouldMigrate returns if the allocation needs data migration
  3204  func (a *Allocation) ShouldMigrate() bool {
  3205  	if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict {
  3206  		return false
  3207  	}
  3208  
  3209  	tg := a.Job.LookupTaskGroup(a.TaskGroup)
  3210  
  3211  	// if the task group is nil or the ephemeral disk block isn't present then
  3212  	// we won't migrate
  3213  	if tg == nil || tg.EphemeralDisk == nil {
  3214  		return false
  3215  	}
  3216  
  3217  	// We won't migrate any data is the user hasn't enabled migration or the
  3218  	// disk is not marked as sticky
  3219  	if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky {
  3220  		return false
  3221  	}
  3222  
  3223  	return true
  3224  }
  3225  
  3226  var (
  3227  	// AllocationIndexRegex is a regular expression to find the allocation index.
  3228  	AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
  3229  )
  3230  
  3231  // Index returns the index of the allocation. If the allocation is from a task
  3232  // group with count greater than 1, there will be multiple allocations for it.
  3233  func (a *Allocation) Index() int {
  3234  	matches := AllocationIndexRegex.FindStringSubmatch(a.Name)
  3235  	if len(matches) != 2 {
  3236  		return -1
  3237  	}
  3238  
  3239  	index, err := strconv.Atoi(matches[1])
  3240  	if err != nil {
  3241  		return -1
  3242  	}
  3243  
  3244  	return index
  3245  }
  3246  
  3247  // AllocListStub is used to return a subset of alloc information
  3248  type AllocListStub struct {
  3249  	ID                 string
  3250  	EvalID             string
  3251  	Name               string
  3252  	NodeID             string
  3253  	JobID              string
  3254  	TaskGroup          string
  3255  	DesiredStatus      string
  3256  	DesiredDescription string
  3257  	ClientStatus       string
  3258  	ClientDescription  string
  3259  	TaskStates         map[string]*TaskState
  3260  	CreateIndex        uint64
  3261  	ModifyIndex        uint64
  3262  	CreateTime         int64
  3263  }
  3264  
  3265  // AllocMetric is used to track various metrics while attempting
  3266  // to make an allocation. These are used to debug a job, or to better
  3267  // understand the pressure within the system.
  3268  type AllocMetric struct {
  3269  	// NodesEvaluated is the number of nodes that were evaluated
  3270  	NodesEvaluated int
  3271  
  3272  	// NodesFiltered is the number of nodes filtered due to a constraint
  3273  	NodesFiltered int
  3274  
  3275  	// NodesAvailable is the number of nodes available for evaluation per DC.
  3276  	NodesAvailable map[string]int
  3277  
  3278  	// ClassFiltered is the number of nodes filtered by class
  3279  	ClassFiltered map[string]int
  3280  
  3281  	// ConstraintFiltered is the number of failures caused by constraint
  3282  	ConstraintFiltered map[string]int
  3283  
  3284  	// NodesExhausted is the number of nodes skipped due to being
  3285  	// exhausted of at least one resource
  3286  	NodesExhausted int
  3287  
  3288  	// ClassExhausted is the number of nodes exhausted by class
  3289  	ClassExhausted map[string]int
  3290  
  3291  	// DimensionExhausted provides the count by dimension or reason
  3292  	DimensionExhausted map[string]int
  3293  
  3294  	// Scores is the scores of the final few nodes remaining
  3295  	// for placement. The top score is typically selected.
  3296  	Scores map[string]float64
  3297  
  3298  	// AllocationTime is a measure of how long the allocation
  3299  	// attempt took. This can affect performance and SLAs.
  3300  	AllocationTime time.Duration
  3301  
  3302  	// CoalescedFailures indicates the number of other
  3303  	// allocations that were coalesced into this failed allocation.
  3304  	// This is to prevent creating many failed allocations for a
  3305  	// single task group.
  3306  	CoalescedFailures int
  3307  }
  3308  
  3309  func (a *AllocMetric) Copy() *AllocMetric {
  3310  	if a == nil {
  3311  		return nil
  3312  	}
  3313  	na := new(AllocMetric)
  3314  	*na = *a
  3315  	na.NodesAvailable = CopyMapStringInt(na.NodesAvailable)
  3316  	na.ClassFiltered = CopyMapStringInt(na.ClassFiltered)
  3317  	na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered)
  3318  	na.ClassExhausted = CopyMapStringInt(na.ClassExhausted)
  3319  	na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted)
  3320  	na.Scores = CopyMapStringFloat64(na.Scores)
  3321  	return na
  3322  }
  3323  
  3324  func (a *AllocMetric) EvaluateNode() {
  3325  	a.NodesEvaluated += 1
  3326  }
  3327  
  3328  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  3329  	a.NodesFiltered += 1
  3330  	if node != nil && node.NodeClass != "" {
  3331  		if a.ClassFiltered == nil {
  3332  			a.ClassFiltered = make(map[string]int)
  3333  		}
  3334  		a.ClassFiltered[node.NodeClass] += 1
  3335  	}
  3336  	if constraint != "" {
  3337  		if a.ConstraintFiltered == nil {
  3338  			a.ConstraintFiltered = make(map[string]int)
  3339  		}
  3340  		a.ConstraintFiltered[constraint] += 1
  3341  	}
  3342  }
  3343  
  3344  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  3345  	a.NodesExhausted += 1
  3346  	if node != nil && node.NodeClass != "" {
  3347  		if a.ClassExhausted == nil {
  3348  			a.ClassExhausted = make(map[string]int)
  3349  		}
  3350  		a.ClassExhausted[node.NodeClass] += 1
  3351  	}
  3352  	if dimension != "" {
  3353  		if a.DimensionExhausted == nil {
  3354  			a.DimensionExhausted = make(map[string]int)
  3355  		}
  3356  		a.DimensionExhausted[dimension] += 1
  3357  	}
  3358  }
  3359  
  3360  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  3361  	if a.Scores == nil {
  3362  		a.Scores = make(map[string]float64)
  3363  	}
  3364  	key := fmt.Sprintf("%s.%s", node.ID, name)
  3365  	a.Scores[key] = score
  3366  }
  3367  
  3368  const (
  3369  	EvalStatusBlocked   = "blocked"
  3370  	EvalStatusPending   = "pending"
  3371  	EvalStatusComplete  = "complete"
  3372  	EvalStatusFailed    = "failed"
  3373  	EvalStatusCancelled = "canceled"
  3374  )
  3375  
  3376  const (
  3377  	EvalTriggerJobRegister   = "job-register"
  3378  	EvalTriggerJobDeregister = "job-deregister"
  3379  	EvalTriggerPeriodicJob   = "periodic-job"
  3380  	EvalTriggerNodeUpdate    = "node-update"
  3381  	EvalTriggerScheduled     = "scheduled"
  3382  	EvalTriggerRollingUpdate = "rolling-update"
  3383  	EvalTriggerMaxPlans      = "max-plan-attempts"
  3384  )
  3385  
  3386  const (
  3387  	// CoreJobEvalGC is used for the garbage collection of evaluations
  3388  	// and allocations. We periodically scan evaluations in a terminal state,
  3389  	// in which all the corresponding allocations are also terminal. We
  3390  	// delete these out of the system to bound the state.
  3391  	CoreJobEvalGC = "eval-gc"
  3392  
  3393  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  3394  	// We periodically scan nodes in a terminal state, and if they have no
  3395  	// corresponding allocations we delete these out of the system.
  3396  	CoreJobNodeGC = "node-gc"
  3397  
  3398  	// CoreJobJobGC is used for the garbage collection of eligible jobs. We
  3399  	// periodically scan garbage collectible jobs and check if both their
  3400  	// evaluations and allocations are terminal. If so, we delete these out of
  3401  	// the system.
  3402  	CoreJobJobGC = "job-gc"
  3403  
  3404  	// CoreJobForceGC is used to force garbage collection of all GCable objects.
  3405  	CoreJobForceGC = "force-gc"
  3406  )
  3407  
  3408  // Evaluation is used anytime we need to apply business logic as a result
  3409  // of a change to our desired state (job specification) or the emergent state
  3410  // (registered nodes). When the inputs change, we need to "evaluate" them,
  3411  // potentially taking action (allocation of work) or doing nothing if the state
  3412  // of the world does not require it.
  3413  type Evaluation struct {
  3414  	// ID is a randonly generated UUID used for this evaluation. This
  3415  	// is assigned upon the creation of the evaluation.
  3416  	ID string
  3417  
  3418  	// Priority is used to control scheduling importance and if this job
  3419  	// can preempt other jobs.
  3420  	Priority int
  3421  
  3422  	// Type is used to control which schedulers are available to handle
  3423  	// this evaluation.
  3424  	Type string
  3425  
  3426  	// TriggeredBy is used to give some insight into why this Eval
  3427  	// was created. (Job change, node failure, alloc failure, etc).
  3428  	TriggeredBy string
  3429  
  3430  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  3431  	// be run in parallel for a given JobID, so we serialize on this.
  3432  	JobID string
  3433  
  3434  	// JobModifyIndex is the modify index of the job at the time
  3435  	// the evaluation was created
  3436  	JobModifyIndex uint64
  3437  
  3438  	// NodeID is the node that was affected triggering the evaluation.
  3439  	NodeID string
  3440  
  3441  	// NodeModifyIndex is the modify index of the node at the time
  3442  	// the evaluation was created
  3443  	NodeModifyIndex uint64
  3444  
  3445  	// Status of the evaluation
  3446  	Status string
  3447  
  3448  	// StatusDescription is meant to provide more human useful information
  3449  	StatusDescription string
  3450  
  3451  	// Wait is a minimum wait time for running the eval. This is used to
  3452  	// support a rolling upgrade.
  3453  	Wait time.Duration
  3454  
  3455  	// NextEval is the evaluation ID for the eval created to do a followup.
  3456  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  3457  	NextEval string
  3458  
  3459  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  3460  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  3461  	PreviousEval string
  3462  
  3463  	// BlockedEval is the evaluation ID for a created blocked eval. A
  3464  	// blocked eval will be created if all allocations could not be placed due
  3465  	// to constraints or lacking resources.
  3466  	BlockedEval string
  3467  
  3468  	// FailedTGAllocs are task groups which have allocations that could not be
  3469  	// made, but the metrics are persisted so that the user can use the feedback
  3470  	// to determine the cause.
  3471  	FailedTGAllocs map[string]*AllocMetric
  3472  
  3473  	// ClassEligibility tracks computed node classes that have been explicitly
  3474  	// marked as eligible or ineligible.
  3475  	ClassEligibility map[string]bool
  3476  
  3477  	// EscapedComputedClass marks whether the job has constraints that are not
  3478  	// captured by computed node classes.
  3479  	EscapedComputedClass bool
  3480  
  3481  	// AnnotatePlan triggers the scheduler to provide additional annotations
  3482  	// during the evaluation. This should not be set during normal operations.
  3483  	AnnotatePlan bool
  3484  
  3485  	// SnapshotIndex is the Raft index of the snapshot used to process the
  3486  	// evaluation. As such it will only be set once it has gone through the
  3487  	// scheduler.
  3488  	SnapshotIndex uint64
  3489  
  3490  	// QueuedAllocations is the number of unplaced allocations at the time the
  3491  	// evaluation was processed. The map is keyed by Task Group names.
  3492  	QueuedAllocations map[string]int
  3493  
  3494  	// Raft Indexes
  3495  	CreateIndex uint64
  3496  	ModifyIndex uint64
  3497  }
  3498  
  3499  // TerminalStatus returns if the current status is terminal and
  3500  // will no longer transition.
  3501  func (e *Evaluation) TerminalStatus() bool {
  3502  	switch e.Status {
  3503  	case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled:
  3504  		return true
  3505  	default:
  3506  		return false
  3507  	}
  3508  }
  3509  
  3510  func (e *Evaluation) GoString() string {
  3511  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  3512  }
  3513  
  3514  func (e *Evaluation) Copy() *Evaluation {
  3515  	if e == nil {
  3516  		return nil
  3517  	}
  3518  	ne := new(Evaluation)
  3519  	*ne = *e
  3520  
  3521  	// Copy ClassEligibility
  3522  	if e.ClassEligibility != nil {
  3523  		classes := make(map[string]bool, len(e.ClassEligibility))
  3524  		for class, elig := range e.ClassEligibility {
  3525  			classes[class] = elig
  3526  		}
  3527  		ne.ClassEligibility = classes
  3528  	}
  3529  
  3530  	// Copy FailedTGAllocs
  3531  	if e.FailedTGAllocs != nil {
  3532  		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
  3533  		for tg, metric := range e.FailedTGAllocs {
  3534  			failedTGs[tg] = metric.Copy()
  3535  		}
  3536  		ne.FailedTGAllocs = failedTGs
  3537  	}
  3538  
  3539  	// Copy queued allocations
  3540  	if e.QueuedAllocations != nil {
  3541  		queuedAllocations := make(map[string]int, len(e.QueuedAllocations))
  3542  		for tg, num := range e.QueuedAllocations {
  3543  			queuedAllocations[tg] = num
  3544  		}
  3545  		ne.QueuedAllocations = queuedAllocations
  3546  	}
  3547  
  3548  	return ne
  3549  }
  3550  
  3551  // ShouldEnqueue checks if a given evaluation should be enqueued into the
  3552  // eval_broker
  3553  func (e *Evaluation) ShouldEnqueue() bool {
  3554  	switch e.Status {
  3555  	case EvalStatusPending:
  3556  		return true
  3557  	case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled:
  3558  		return false
  3559  	default:
  3560  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  3561  	}
  3562  }
  3563  
  3564  // ShouldBlock checks if a given evaluation should be entered into the blocked
  3565  // eval tracker.
  3566  func (e *Evaluation) ShouldBlock() bool {
  3567  	switch e.Status {
  3568  	case EvalStatusBlocked:
  3569  		return true
  3570  	case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled:
  3571  		return false
  3572  	default:
  3573  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  3574  	}
  3575  }
  3576  
  3577  // MakePlan is used to make a plan from the given evaluation
  3578  // for a given Job
  3579  func (e *Evaluation) MakePlan(j *Job) *Plan {
  3580  	p := &Plan{
  3581  		EvalID:         e.ID,
  3582  		Priority:       e.Priority,
  3583  		Job:            j,
  3584  		NodeUpdate:     make(map[string][]*Allocation),
  3585  		NodeAllocation: make(map[string][]*Allocation),
  3586  	}
  3587  	if j != nil {
  3588  		p.AllAtOnce = j.AllAtOnce
  3589  	}
  3590  	return p
  3591  }
  3592  
  3593  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  3594  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  3595  	return &Evaluation{
  3596  		ID:             GenerateUUID(),
  3597  		Priority:       e.Priority,
  3598  		Type:           e.Type,
  3599  		TriggeredBy:    EvalTriggerRollingUpdate,
  3600  		JobID:          e.JobID,
  3601  		JobModifyIndex: e.JobModifyIndex,
  3602  		Status:         EvalStatusPending,
  3603  		Wait:           wait,
  3604  		PreviousEval:   e.ID,
  3605  	}
  3606  }
  3607  
  3608  // CreateBlockedEval creates a blocked evaluation to followup this eval to place any
  3609  // failed allocations. It takes the classes marked explicitly eligible or
  3610  // ineligible and whether the job has escaped computed node classes.
  3611  func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
  3612  	return &Evaluation{
  3613  		ID:                   GenerateUUID(),
  3614  		Priority:             e.Priority,
  3615  		Type:                 e.Type,
  3616  		TriggeredBy:          e.TriggeredBy,
  3617  		JobID:                e.JobID,
  3618  		JobModifyIndex:       e.JobModifyIndex,
  3619  		Status:               EvalStatusBlocked,
  3620  		PreviousEval:         e.ID,
  3621  		ClassEligibility:     classEligibility,
  3622  		EscapedComputedClass: escaped,
  3623  	}
  3624  }
  3625  
  3626  // Plan is used to submit a commit plan for task allocations. These
  3627  // are submitted to the leader which verifies that resources have
  3628  // not been overcommitted before admiting the plan.
  3629  type Plan struct {
  3630  	// EvalID is the evaluation ID this plan is associated with
  3631  	EvalID string
  3632  
  3633  	// EvalToken is used to prevent a split-brain processing of
  3634  	// an evaluation. There should only be a single scheduler running
  3635  	// an Eval at a time, but this could be violated after a leadership
  3636  	// transition. This unique token is used to reject plans that are
  3637  	// being submitted from a different leader.
  3638  	EvalToken string
  3639  
  3640  	// Priority is the priority of the upstream job
  3641  	Priority int
  3642  
  3643  	// AllAtOnce is used to control if incremental scheduling of task groups
  3644  	// is allowed or if we must do a gang scheduling of the entire job.
  3645  	// If this is false, a plan may be partially applied. Otherwise, the
  3646  	// entire plan must be able to make progress.
  3647  	AllAtOnce bool
  3648  
  3649  	// Job is the parent job of all the allocations in the Plan.
  3650  	// Since a Plan only involves a single Job, we can reduce the size
  3651  	// of the plan by only including it once.
  3652  	Job *Job
  3653  
  3654  	// NodeUpdate contains all the allocations for each node. For each node,
  3655  	// this is a list of the allocations to update to either stop or evict.
  3656  	NodeUpdate map[string][]*Allocation
  3657  
  3658  	// NodeAllocation contains all the allocations for each node.
  3659  	// The evicts must be considered prior to the allocations.
  3660  	NodeAllocation map[string][]*Allocation
  3661  
  3662  	// Annotations contains annotations by the scheduler to be used by operators
  3663  	// to understand the decisions made by the scheduler.
  3664  	Annotations *PlanAnnotations
  3665  }
  3666  
  3667  // AppendUpdate marks the allocation for eviction. The clientStatus of the
  3668  // allocation may be optionally set by passing in a non-empty value.
  3669  func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) {
  3670  	newAlloc := new(Allocation)
  3671  	*newAlloc = *alloc
  3672  
  3673  	// If the job is not set in the plan we are deregistering a job so we
  3674  	// extract the job from the allocation.
  3675  	if p.Job == nil && newAlloc.Job != nil {
  3676  		p.Job = newAlloc.Job
  3677  	}
  3678  
  3679  	// Normalize the job
  3680  	newAlloc.Job = nil
  3681  
  3682  	// Strip the resources as it can be rebuilt.
  3683  	newAlloc.Resources = nil
  3684  
  3685  	newAlloc.DesiredStatus = desiredStatus
  3686  	newAlloc.DesiredDescription = desiredDesc
  3687  
  3688  	if clientStatus != "" {
  3689  		newAlloc.ClientStatus = clientStatus
  3690  	}
  3691  
  3692  	node := alloc.NodeID
  3693  	existing := p.NodeUpdate[node]
  3694  	p.NodeUpdate[node] = append(existing, newAlloc)
  3695  }
  3696  
  3697  func (p *Plan) PopUpdate(alloc *Allocation) {
  3698  	existing := p.NodeUpdate[alloc.NodeID]
  3699  	n := len(existing)
  3700  	if n > 0 && existing[n-1].ID == alloc.ID {
  3701  		existing = existing[:n-1]
  3702  		if len(existing) > 0 {
  3703  			p.NodeUpdate[alloc.NodeID] = existing
  3704  		} else {
  3705  			delete(p.NodeUpdate, alloc.NodeID)
  3706  		}
  3707  	}
  3708  }
  3709  
  3710  func (p *Plan) AppendAlloc(alloc *Allocation) {
  3711  	node := alloc.NodeID
  3712  	existing := p.NodeAllocation[node]
  3713  	p.NodeAllocation[node] = append(existing, alloc)
  3714  }
  3715  
  3716  // IsNoOp checks if this plan would do nothing
  3717  func (p *Plan) IsNoOp() bool {
  3718  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
  3719  }
  3720  
  3721  // PlanResult is the result of a plan submitted to the leader.
  3722  type PlanResult struct {
  3723  	// NodeUpdate contains all the updates that were committed.
  3724  	NodeUpdate map[string][]*Allocation
  3725  
  3726  	// NodeAllocation contains all the allocations that were committed.
  3727  	NodeAllocation map[string][]*Allocation
  3728  
  3729  	// RefreshIndex is the index the worker should refresh state up to.
  3730  	// This allows all evictions and allocations to be materialized.
  3731  	// If any allocations were rejected due to stale data (node state,
  3732  	// over committed) this can be used to force a worker refresh.
  3733  	RefreshIndex uint64
  3734  
  3735  	// AllocIndex is the Raft index in which the evictions and
  3736  	// allocations took place. This is used for the write index.
  3737  	AllocIndex uint64
  3738  }
  3739  
  3740  // IsNoOp checks if this plan result would do nothing
  3741  func (p *PlanResult) IsNoOp() bool {
  3742  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
  3743  }
  3744  
  3745  // FullCommit is used to check if all the allocations in a plan
  3746  // were committed as part of the result. Returns if there was
  3747  // a match, and the number of expected and actual allocations.
  3748  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  3749  	expected := 0
  3750  	actual := 0
  3751  	for name, allocList := range plan.NodeAllocation {
  3752  		didAlloc, _ := p.NodeAllocation[name]
  3753  		expected += len(allocList)
  3754  		actual += len(didAlloc)
  3755  	}
  3756  	return actual == expected, expected, actual
  3757  }
  3758  
  3759  // PlanAnnotations holds annotations made by the scheduler to give further debug
  3760  // information to operators.
  3761  type PlanAnnotations struct {
  3762  	// DesiredTGUpdates is the set of desired updates per task group.
  3763  	DesiredTGUpdates map[string]*DesiredUpdates
  3764  }
  3765  
  3766  // DesiredUpdates is the set of changes the scheduler would like to make given
  3767  // sufficient resources and cluster capacity.
  3768  type DesiredUpdates struct {
  3769  	Ignore            uint64
  3770  	Place             uint64
  3771  	Migrate           uint64
  3772  	Stop              uint64
  3773  	InPlaceUpdate     uint64
  3774  	DestructiveUpdate uint64
  3775  }
  3776  
  3777  // msgpackHandle is a shared handle for encoding/decoding of structs
  3778  var MsgpackHandle = func() *codec.MsgpackHandle {
  3779  	h := &codec.MsgpackHandle{RawToString: true}
  3780  
  3781  	// Sets the default type for decoding a map into a nil interface{}.
  3782  	// This is necessary in particular because we store the driver configs as a
  3783  	// nil interface{}.
  3784  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  3785  	return h
  3786  }()
  3787  
  3788  var HashiMsgpackHandle = func() *hcodec.MsgpackHandle {
  3789  	h := &hcodec.MsgpackHandle{RawToString: true}
  3790  
  3791  	// Sets the default type for decoding a map into a nil interface{}.
  3792  	// This is necessary in particular because we store the driver configs as a
  3793  	// nil interface{}.
  3794  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  3795  	return h
  3796  }()
  3797  
  3798  // Decode is used to decode a MsgPack encoded object
  3799  func Decode(buf []byte, out interface{}) error {
  3800  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  3801  }
  3802  
  3803  // Encode is used to encode a MsgPack object with type prefix
  3804  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  3805  	var buf bytes.Buffer
  3806  	buf.WriteByte(uint8(t))
  3807  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  3808  	return buf.Bytes(), err
  3809  }
  3810  
  3811  // KeyringResponse is a unified key response and can be used for install,
  3812  // remove, use, as well as listing key queries.
  3813  type KeyringResponse struct {
  3814  	Messages map[string]string
  3815  	Keys     map[string]int
  3816  	NumNodes int
  3817  }
  3818  
  3819  // KeyringRequest is request objects for serf key operations.
  3820  type KeyringRequest struct {
  3821  	Key string
  3822  }
  3823  
  3824  // RecoverableError wraps an error and marks whether it is recoverable and could
  3825  // be retried or it is fatal.
  3826  type RecoverableError struct {
  3827  	Err         string
  3828  	Recoverable bool
  3829  }
  3830  
  3831  // NewRecoverableError is used to wrap an error and mark it as recoverable or
  3832  // not.
  3833  func NewRecoverableError(e error, recoverable bool) *RecoverableError {
  3834  	if e == nil {
  3835  		return nil
  3836  	}
  3837  
  3838  	return &RecoverableError{
  3839  		Err:         e.Error(),
  3840  		Recoverable: recoverable,
  3841  	}
  3842  }
  3843  
  3844  func (r *RecoverableError) Error() string {
  3845  	return r.Err
  3846  }