github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"crypto/md5"
     6  	"crypto/sha1"
     7  	"crypto/sha256"
     8  	"crypto/sha512"
     9  	"encoding/hex"
    10  	"errors"
    11  	"fmt"
    12  	"io"
    13  	"path/filepath"
    14  	"reflect"
    15  	"regexp"
    16  	"strconv"
    17  	"strings"
    18  	"time"
    19  
    20  	"github.com/gorhill/cronexpr"
    21  	"github.com/hashicorp/consul/api"
    22  	"github.com/hashicorp/go-multierror"
    23  	"github.com/hashicorp/go-version"
    24  	"github.com/hashicorp/nomad/helper/args"
    25  	"github.com/mitchellh/copystructure"
    26  	"github.com/ugorji/go/codec"
    27  
    28  	hcodec "github.com/hashicorp/go-msgpack/codec"
    29  )
    30  
    31  var (
    32  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    33  	ErrNoRegionPath = fmt.Errorf("No path to region")
    34  )
    35  
    36  type MessageType uint8
    37  
    38  const (
    39  	NodeRegisterRequestType MessageType = iota
    40  	NodeDeregisterRequestType
    41  	NodeUpdateStatusRequestType
    42  	NodeUpdateDrainRequestType
    43  	JobRegisterRequestType
    44  	JobDeregisterRequestType
    45  	EvalUpdateRequestType
    46  	EvalDeleteRequestType
    47  	AllocUpdateRequestType
    48  	AllocClientUpdateRequestType
    49  	ReconcileJobSummariesRequestType
    50  	VaultAccessorRegisterRequestType
    51  	VaultAccessorDegisterRequestType
    52  )
    53  
    54  const (
    55  	// IgnoreUnknownTypeFlag is set along with a MessageType
    56  	// to indicate that the message type can be safely ignored
    57  	// if it is not recognized. This is for future proofing, so
    58  	// that new commands can be added in a way that won't cause
    59  	// old servers to crash when the FSM attempts to process them.
    60  	IgnoreUnknownTypeFlag MessageType = 128
    61  
    62  	// ApiMajorVersion is returned as part of the Status.Version request.
    63  	// It should be incremented anytime the APIs are changed in a way
    64  	// that would break clients for sane client versioning.
    65  	ApiMajorVersion = 1
    66  
    67  	// ApiMinorVersion is returned as part of the Status.Version request.
    68  	// It should be incremented anytime the APIs are changed to allow
    69  	// for sane client versioning. Minor changes should be compatible
    70  	// within the major version.
    71  	ApiMinorVersion = 1
    72  
    73  	ProtocolVersion = "protocol"
    74  	APIMajorVersion = "api.major"
    75  	APIMinorVersion = "api.minor"
    76  )
    77  
    78  // RPCInfo is used to describe common information about query
    79  type RPCInfo interface {
    80  	RequestRegion() string
    81  	IsRead() bool
    82  	AllowStaleRead() bool
    83  }
    84  
    85  // QueryOptions is used to specify various flags for read queries
    86  type QueryOptions struct {
    87  	// The target region for this query
    88  	Region string
    89  
    90  	// If set, wait until query exceeds given index. Must be provided
    91  	// with MaxQueryTime.
    92  	MinQueryIndex uint64
    93  
    94  	// Provided with MinQueryIndex to wait for change.
    95  	MaxQueryTime time.Duration
    96  
    97  	// If set, any follower can service the request. Results
    98  	// may be arbitrarily stale.
    99  	AllowStale bool
   100  
   101  	// If set, used as prefix for resource list searches
   102  	Prefix string
   103  }
   104  
   105  func (q QueryOptions) RequestRegion() string {
   106  	return q.Region
   107  }
   108  
   109  // QueryOption only applies to reads, so always true
   110  func (q QueryOptions) IsRead() bool {
   111  	return true
   112  }
   113  
   114  func (q QueryOptions) AllowStaleRead() bool {
   115  	return q.AllowStale
   116  }
   117  
   118  type WriteRequest struct {
   119  	// The target region for this write
   120  	Region string
   121  }
   122  
   123  func (w WriteRequest) RequestRegion() string {
   124  	// The target region for this request
   125  	return w.Region
   126  }
   127  
   128  // WriteRequest only applies to writes, always false
   129  func (w WriteRequest) IsRead() bool {
   130  	return false
   131  }
   132  
   133  func (w WriteRequest) AllowStaleRead() bool {
   134  	return false
   135  }
   136  
   137  // QueryMeta allows a query response to include potentially
   138  // useful metadata about a query
   139  type QueryMeta struct {
   140  	// This is the index associated with the read
   141  	Index uint64
   142  
   143  	// If AllowStale is used, this is time elapsed since
   144  	// last contact between the follower and leader. This
   145  	// can be used to gauge staleness.
   146  	LastContact time.Duration
   147  
   148  	// Used to indicate if there is a known leader node
   149  	KnownLeader bool
   150  }
   151  
   152  // WriteMeta allows a write response to include potentially
   153  // useful metadata about the write
   154  type WriteMeta struct {
   155  	// This is the index associated with the write
   156  	Index uint64
   157  }
   158  
   159  // NodeRegisterRequest is used for Node.Register endpoint
   160  // to register a node as being a schedulable entity.
   161  type NodeRegisterRequest struct {
   162  	Node *Node
   163  	WriteRequest
   164  }
   165  
   166  // NodeDeregisterRequest is used for Node.Deregister endpoint
   167  // to deregister a node as being a schedulable entity.
   168  type NodeDeregisterRequest struct {
   169  	NodeID string
   170  	WriteRequest
   171  }
   172  
   173  // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server
   174  // information used in RPC server lists.
   175  type NodeServerInfo struct {
   176  	// RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to
   177  	// be contacted at for RPCs.
   178  	RPCAdvertiseAddr string
   179  
   180  	// RpcMajorVersion is the major version number the Nomad Server
   181  	// supports
   182  	RPCMajorVersion int32
   183  
   184  	// RpcMinorVersion is the minor version number the Nomad Server
   185  	// supports
   186  	RPCMinorVersion int32
   187  
   188  	// Datacenter is the datacenter that a Nomad server belongs to
   189  	Datacenter string
   190  }
   191  
   192  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   193  // to update the status of a node.
   194  type NodeUpdateStatusRequest struct {
   195  	NodeID string
   196  	Status string
   197  	WriteRequest
   198  }
   199  
   200  // NodeUpdateDrainRequest is used for updatin the drain status
   201  type NodeUpdateDrainRequest struct {
   202  	NodeID string
   203  	Drain  bool
   204  	WriteRequest
   205  }
   206  
   207  // NodeEvaluateRequest is used to re-evaluate the ndoe
   208  type NodeEvaluateRequest struct {
   209  	NodeID string
   210  	WriteRequest
   211  }
   212  
   213  // NodeSpecificRequest is used when we just need to specify a target node
   214  type NodeSpecificRequest struct {
   215  	NodeID   string
   216  	SecretID string
   217  	QueryOptions
   218  }
   219  
   220  // JobRegisterRequest is used for Job.Register endpoint
   221  // to register a job as being a schedulable entity.
   222  type JobRegisterRequest struct {
   223  	Job *Job
   224  
   225  	// If EnforceIndex is set then the job will only be registered if the passed
   226  	// JobModifyIndex matches the current Jobs index. If the index is zero, the
   227  	// register only occurs if the job is new.
   228  	EnforceIndex   bool
   229  	JobModifyIndex uint64
   230  
   231  	WriteRequest
   232  }
   233  
   234  // JobDeregisterRequest is used for Job.Deregister endpoint
   235  // to deregister a job as being a schedulable entity.
   236  type JobDeregisterRequest struct {
   237  	JobID string
   238  	WriteRequest
   239  }
   240  
   241  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   242  type JobEvaluateRequest struct {
   243  	JobID string
   244  	WriteRequest
   245  }
   246  
   247  // JobSpecificRequest is used when we just need to specify a target job
   248  type JobSpecificRequest struct {
   249  	JobID string
   250  	QueryOptions
   251  }
   252  
   253  // JobListRequest is used to parameterize a list request
   254  type JobListRequest struct {
   255  	QueryOptions
   256  }
   257  
   258  // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run
   259  // evaluation of the Job.
   260  type JobPlanRequest struct {
   261  	Job  *Job
   262  	Diff bool // Toggles an annotated diff
   263  	WriteRequest
   264  }
   265  
   266  // JobSummaryRequest is used when we just need to get a specific job summary
   267  type JobSummaryRequest struct {
   268  	JobID string
   269  	QueryOptions
   270  }
   271  
   272  // NodeListRequest is used to parameterize a list request
   273  type NodeListRequest struct {
   274  	QueryOptions
   275  }
   276  
   277  // EvalUpdateRequest is used for upserting evaluations.
   278  type EvalUpdateRequest struct {
   279  	Evals     []*Evaluation
   280  	EvalToken string
   281  	WriteRequest
   282  }
   283  
   284  // EvalDeleteRequest is used for deleting an evaluation.
   285  type EvalDeleteRequest struct {
   286  	Evals  []string
   287  	Allocs []string
   288  	WriteRequest
   289  }
   290  
   291  // EvalSpecificRequest is used when we just need to specify a target evaluation
   292  type EvalSpecificRequest struct {
   293  	EvalID string
   294  	QueryOptions
   295  }
   296  
   297  // EvalAckRequest is used to Ack/Nack a specific evaluation
   298  type EvalAckRequest struct {
   299  	EvalID string
   300  	Token  string
   301  	WriteRequest
   302  }
   303  
   304  // EvalDequeueRequest is used when we want to dequeue an evaluation
   305  type EvalDequeueRequest struct {
   306  	Schedulers []string
   307  	Timeout    time.Duration
   308  	WriteRequest
   309  }
   310  
   311  // EvalListRequest is used to list the evaluations
   312  type EvalListRequest struct {
   313  	QueryOptions
   314  }
   315  
   316  // PlanRequest is used to submit an allocation plan to the leader
   317  type PlanRequest struct {
   318  	Plan *Plan
   319  	WriteRequest
   320  }
   321  
   322  // AllocUpdateRequest is used to submit changes to allocations, either
   323  // to cause evictions or to assign new allocaitons. Both can be done
   324  // within a single transaction
   325  type AllocUpdateRequest struct {
   326  	// Alloc is the list of new allocations to assign
   327  	Alloc []*Allocation
   328  
   329  	// Job is the shared parent job of the allocations.
   330  	// It is pulled out since it is common to reduce payload size.
   331  	Job *Job
   332  
   333  	WriteRequest
   334  }
   335  
   336  // AllocListRequest is used to request a list of allocations
   337  type AllocListRequest struct {
   338  	QueryOptions
   339  }
   340  
   341  // AllocSpecificRequest is used to query a specific allocation
   342  type AllocSpecificRequest struct {
   343  	AllocID string
   344  	QueryOptions
   345  }
   346  
   347  // AllocsGetRequest is used to query a set of allocations
   348  type AllocsGetRequest struct {
   349  	AllocIDs []string
   350  	QueryOptions
   351  }
   352  
   353  // PeriodicForceReqeuest is used to force a specific periodic job.
   354  type PeriodicForceRequest struct {
   355  	JobID string
   356  	WriteRequest
   357  }
   358  
   359  // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the
   360  // following tasks in the given allocation
   361  type DeriveVaultTokenRequest struct {
   362  	NodeID   string
   363  	SecretID string
   364  	AllocID  string
   365  	Tasks    []string
   366  	QueryOptions
   367  }
   368  
   369  // VaultAccessorsRequest is used to operate on a set of Vault accessors
   370  type VaultAccessorsRequest struct {
   371  	Accessors []*VaultAccessor
   372  }
   373  
   374  // VaultAccessor is a reference to a created Vault token on behalf of
   375  // an allocation's task.
   376  type VaultAccessor struct {
   377  	AllocID     string
   378  	Task        string
   379  	NodeID      string
   380  	Accessor    string
   381  	CreationTTL int
   382  
   383  	// Raft Indexes
   384  	CreateIndex uint64
   385  }
   386  
   387  // DeriveVaultTokenResponse returns the wrapped tokens for each requested task
   388  type DeriveVaultTokenResponse struct {
   389  	// Tasks is a mapping between the task name and the wrapped token
   390  	Tasks map[string]string
   391  	QueryMeta
   392  }
   393  
   394  // GenericRequest is used to request where no
   395  // specific information is needed.
   396  type GenericRequest struct {
   397  	QueryOptions
   398  }
   399  
   400  // GenericResponse is used to respond to a request where no
   401  // specific response information is needed.
   402  type GenericResponse struct {
   403  	WriteMeta
   404  }
   405  
   406  // VersionResponse is used for the Status.Version reseponse
   407  type VersionResponse struct {
   408  	Build    string
   409  	Versions map[string]int
   410  	QueryMeta
   411  }
   412  
   413  // JobRegisterResponse is used to respond to a job registration
   414  type JobRegisterResponse struct {
   415  	EvalID          string
   416  	EvalCreateIndex uint64
   417  	JobModifyIndex  uint64
   418  	QueryMeta
   419  }
   420  
   421  // JobDeregisterResponse is used to respond to a job deregistration
   422  type JobDeregisterResponse struct {
   423  	EvalID          string
   424  	EvalCreateIndex uint64
   425  	JobModifyIndex  uint64
   426  	QueryMeta
   427  }
   428  
   429  // NodeUpdateResponse is used to respond to a node update
   430  type NodeUpdateResponse struct {
   431  	HeartbeatTTL    time.Duration
   432  	EvalIDs         []string
   433  	EvalCreateIndex uint64
   434  	NodeModifyIndex uint64
   435  
   436  	// LeaderRPCAddr is the RPC address of the current Raft Leader.  If
   437  	// empty, the current Nomad Server is in the minority of a partition.
   438  	LeaderRPCAddr string
   439  
   440  	// NumNodes is the number of Nomad nodes attached to this quorum of
   441  	// Nomad Servers at the time of the response.  This value can
   442  	// fluctuate based on the health of the cluster between heartbeats.
   443  	NumNodes int32
   444  
   445  	// Servers is the full list of known Nomad servers in the local
   446  	// region.
   447  	Servers []*NodeServerInfo
   448  
   449  	QueryMeta
   450  }
   451  
   452  // NodeDrainUpdateResponse is used to respond to a node drain update
   453  type NodeDrainUpdateResponse struct {
   454  	EvalIDs         []string
   455  	EvalCreateIndex uint64
   456  	NodeModifyIndex uint64
   457  	QueryMeta
   458  }
   459  
   460  // NodeAllocsResponse is used to return allocs for a single node
   461  type NodeAllocsResponse struct {
   462  	Allocs []*Allocation
   463  	QueryMeta
   464  }
   465  
   466  // NodeClientAllocsResponse is used to return allocs meta data for a single node
   467  type NodeClientAllocsResponse struct {
   468  	Allocs map[string]uint64
   469  	QueryMeta
   470  }
   471  
   472  // SingleNodeResponse is used to return a single node
   473  type SingleNodeResponse struct {
   474  	Node *Node
   475  	QueryMeta
   476  }
   477  
   478  // JobListResponse is used for a list request
   479  type NodeListResponse struct {
   480  	Nodes []*NodeListStub
   481  	QueryMeta
   482  }
   483  
   484  // SingleJobResponse is used to return a single job
   485  type SingleJobResponse struct {
   486  	Job *Job
   487  	QueryMeta
   488  }
   489  
   490  // JobSummaryResponse is used to return a single job summary
   491  type JobSummaryResponse struct {
   492  	JobSummary *JobSummary
   493  	QueryMeta
   494  }
   495  
   496  // JobListResponse is used for a list request
   497  type JobListResponse struct {
   498  	Jobs []*JobListStub
   499  	QueryMeta
   500  }
   501  
   502  // JobPlanResponse is used to respond to a job plan request
   503  type JobPlanResponse struct {
   504  	// Annotations stores annotations explaining decisions the scheduler made.
   505  	Annotations *PlanAnnotations
   506  
   507  	// FailedTGAllocs is the placement failures per task group.
   508  	FailedTGAllocs map[string]*AllocMetric
   509  
   510  	// JobModifyIndex is the modification index of the job. The value can be
   511  	// used when running `nomad run` to ensure that the Job wasn’t modified
   512  	// since the last plan. If the job is being created, the value is zero.
   513  	JobModifyIndex uint64
   514  
   515  	// CreatedEvals is the set of evaluations created by the scheduler. The
   516  	// reasons for this can be rolling-updates or blocked evals.
   517  	CreatedEvals []*Evaluation
   518  
   519  	// Diff contains the diff of the job and annotations on whether the change
   520  	// causes an in-place update or create/destroy
   521  	Diff *JobDiff
   522  
   523  	// NextPeriodicLaunch is the time duration till the job would be launched if
   524  	// submitted.
   525  	NextPeriodicLaunch time.Time
   526  
   527  	WriteMeta
   528  }
   529  
   530  // SingleAllocResponse is used to return a single allocation
   531  type SingleAllocResponse struct {
   532  	Alloc *Allocation
   533  	QueryMeta
   534  }
   535  
   536  // AllocsGetResponse is used to return a set of allocations
   537  type AllocsGetResponse struct {
   538  	Allocs []*Allocation
   539  	QueryMeta
   540  }
   541  
   542  // JobAllocationsResponse is used to return the allocations for a job
   543  type JobAllocationsResponse struct {
   544  	Allocations []*AllocListStub
   545  	QueryMeta
   546  }
   547  
   548  // JobEvaluationsResponse is used to return the evaluations for a job
   549  type JobEvaluationsResponse struct {
   550  	Evaluations []*Evaluation
   551  	QueryMeta
   552  }
   553  
   554  // SingleEvalResponse is used to return a single evaluation
   555  type SingleEvalResponse struct {
   556  	Eval *Evaluation
   557  	QueryMeta
   558  }
   559  
   560  // EvalDequeueResponse is used to return from a dequeue
   561  type EvalDequeueResponse struct {
   562  	Eval  *Evaluation
   563  	Token string
   564  	QueryMeta
   565  }
   566  
   567  // PlanResponse is used to return from a PlanRequest
   568  type PlanResponse struct {
   569  	Result *PlanResult
   570  	WriteMeta
   571  }
   572  
   573  // AllocListResponse is used for a list request
   574  type AllocListResponse struct {
   575  	Allocations []*AllocListStub
   576  	QueryMeta
   577  }
   578  
   579  // EvalListResponse is used for a list request
   580  type EvalListResponse struct {
   581  	Evaluations []*Evaluation
   582  	QueryMeta
   583  }
   584  
   585  // EvalAllocationsResponse is used to return the allocations for an evaluation
   586  type EvalAllocationsResponse struct {
   587  	Allocations []*AllocListStub
   588  	QueryMeta
   589  }
   590  
   591  // PeriodicForceResponse is used to respond to a periodic job force launch
   592  type PeriodicForceResponse struct {
   593  	EvalID          string
   594  	EvalCreateIndex uint64
   595  	WriteMeta
   596  }
   597  
   598  const (
   599  	NodeStatusInit  = "initializing"
   600  	NodeStatusReady = "ready"
   601  	NodeStatusDown  = "down"
   602  )
   603  
   604  // ShouldDrainNode checks if a given node status should trigger an
   605  // evaluation. Some states don't require any further action.
   606  func ShouldDrainNode(status string) bool {
   607  	switch status {
   608  	case NodeStatusInit, NodeStatusReady:
   609  		return false
   610  	case NodeStatusDown:
   611  		return true
   612  	default:
   613  		panic(fmt.Sprintf("unhandled node status %s", status))
   614  	}
   615  }
   616  
   617  // ValidNodeStatus is used to check if a node status is valid
   618  func ValidNodeStatus(status string) bool {
   619  	switch status {
   620  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   621  		return true
   622  	default:
   623  		return false
   624  	}
   625  }
   626  
   627  // Node is a representation of a schedulable client node
   628  type Node struct {
   629  	// ID is a unique identifier for the node. It can be constructed
   630  	// by doing a concatenation of the Name and Datacenter as a simple
   631  	// approach. Alternatively a UUID may be used.
   632  	ID string
   633  
   634  	// SecretID is an ID that is only known by the Node and the set of Servers.
   635  	// It is not accessible via the API and is used to authenticate nodes
   636  	// conducting priviledged activities.
   637  	SecretID string
   638  
   639  	// Datacenter for this node
   640  	Datacenter string
   641  
   642  	// Node name
   643  	Name string
   644  
   645  	// HTTPAddr is the address on which the Nomad client is listening for http
   646  	// requests
   647  	HTTPAddr string
   648  
   649  	// Attributes is an arbitrary set of key/value
   650  	// data that can be used for constraints. Examples
   651  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   652  	// "docker.runtime=1.8.3"
   653  	Attributes map[string]string
   654  
   655  	// Resources is the available resources on the client.
   656  	// For example 'cpu=2' 'memory=2048'
   657  	Resources *Resources
   658  
   659  	// Reserved is the set of resources that are reserved,
   660  	// and should be subtracted from the total resources for
   661  	// the purposes of scheduling. This may be provide certain
   662  	// high-watermark tolerances or because of external schedulers
   663  	// consuming resources.
   664  	Reserved *Resources
   665  
   666  	// Links are used to 'link' this client to external
   667  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   668  	// 'ami=ami-123'
   669  	Links map[string]string
   670  
   671  	// Meta is used to associate arbitrary metadata with this
   672  	// client. This is opaque to Nomad.
   673  	Meta map[string]string
   674  
   675  	// NodeClass is an opaque identifier used to group nodes
   676  	// together for the purpose of determining scheduling pressure.
   677  	NodeClass string
   678  
   679  	// ComputedClass is a unique id that identifies nodes with a common set of
   680  	// attributes and capabilities.
   681  	ComputedClass string
   682  
   683  	// Drain is controlled by the servers, and not the client.
   684  	// If true, no jobs will be scheduled to this node, and existing
   685  	// allocations will be drained.
   686  	Drain bool
   687  
   688  	// Status of this node
   689  	Status string
   690  
   691  	// StatusDescription is meant to provide more human useful information
   692  	StatusDescription string
   693  
   694  	// StatusUpdatedAt is the time stamp at which the state of the node was
   695  	// updated
   696  	StatusUpdatedAt int64
   697  
   698  	// Raft Indexes
   699  	CreateIndex uint64
   700  	ModifyIndex uint64
   701  }
   702  
   703  func (n *Node) Copy() *Node {
   704  	if n == nil {
   705  		return nil
   706  	}
   707  	nn := new(Node)
   708  	*nn = *n
   709  	nn.Attributes = CopyMapStringString(nn.Attributes)
   710  	nn.Resources = nn.Resources.Copy()
   711  	nn.Reserved = nn.Reserved.Copy()
   712  	nn.Links = CopyMapStringString(nn.Links)
   713  	nn.Meta = CopyMapStringString(nn.Meta)
   714  	return nn
   715  }
   716  
   717  // TerminalStatus returns if the current status is terminal and
   718  // will no longer transition.
   719  func (n *Node) TerminalStatus() bool {
   720  	switch n.Status {
   721  	case NodeStatusDown:
   722  		return true
   723  	default:
   724  		return false
   725  	}
   726  }
   727  
   728  // Stub returns a summarized version of the node
   729  func (n *Node) Stub() *NodeListStub {
   730  	return &NodeListStub{
   731  		ID:                n.ID,
   732  		Datacenter:        n.Datacenter,
   733  		Name:              n.Name,
   734  		NodeClass:         n.NodeClass,
   735  		Drain:             n.Drain,
   736  		Status:            n.Status,
   737  		StatusDescription: n.StatusDescription,
   738  		CreateIndex:       n.CreateIndex,
   739  		ModifyIndex:       n.ModifyIndex,
   740  	}
   741  }
   742  
   743  // NodeListStub is used to return a subset of job information
   744  // for the job list
   745  type NodeListStub struct {
   746  	ID                string
   747  	Datacenter        string
   748  	Name              string
   749  	NodeClass         string
   750  	Drain             bool
   751  	Status            string
   752  	StatusDescription string
   753  	CreateIndex       uint64
   754  	ModifyIndex       uint64
   755  }
   756  
   757  // Resources is used to define the resources available
   758  // on a client
   759  type Resources struct {
   760  	CPU      int
   761  	MemoryMB int `mapstructure:"memory"`
   762  	DiskMB   int `mapstructure:"disk"`
   763  	IOPS     int
   764  	Networks []*NetworkResource
   765  }
   766  
   767  const (
   768  	BytesInMegabyte = 1024 * 1024
   769  )
   770  
   771  // DefaultResources returns the default resources for a task.
   772  func DefaultResources() *Resources {
   773  	return &Resources{
   774  		CPU:      100,
   775  		MemoryMB: 10,
   776  		IOPS:     0,
   777  	}
   778  }
   779  
   780  // DiskInBytes returns the amount of disk resources in bytes.
   781  func (r *Resources) DiskInBytes() int64 {
   782  	return int64(r.DiskMB * BytesInMegabyte)
   783  }
   784  
   785  // Merge merges this resource with another resource.
   786  func (r *Resources) Merge(other *Resources) {
   787  	if other.CPU != 0 {
   788  		r.CPU = other.CPU
   789  	}
   790  	if other.MemoryMB != 0 {
   791  		r.MemoryMB = other.MemoryMB
   792  	}
   793  	if other.DiskMB != 0 {
   794  		r.DiskMB = other.DiskMB
   795  	}
   796  	if other.IOPS != 0 {
   797  		r.IOPS = other.IOPS
   798  	}
   799  	if len(other.Networks) != 0 {
   800  		r.Networks = other.Networks
   801  	}
   802  }
   803  
   804  func (r *Resources) Canonicalize() {
   805  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
   806  	// problems since we use reflect DeepEquals.
   807  	if len(r.Networks) == 0 {
   808  		r.Networks = nil
   809  	}
   810  
   811  	for _, n := range r.Networks {
   812  		n.Canonicalize()
   813  	}
   814  }
   815  
   816  // MeetsMinResources returns an error if the resources specified are less than
   817  // the minimum allowed.
   818  func (r *Resources) MeetsMinResources() error {
   819  	var mErr multierror.Error
   820  	if r.CPU < 20 {
   821  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU))
   822  	}
   823  	if r.MemoryMB < 10 {
   824  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB))
   825  	}
   826  	if r.IOPS < 0 {
   827  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS))
   828  	}
   829  	for i, n := range r.Networks {
   830  		if err := n.MeetsMinResources(); err != nil {
   831  			mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err))
   832  		}
   833  	}
   834  
   835  	return mErr.ErrorOrNil()
   836  }
   837  
   838  // Copy returns a deep copy of the resources
   839  func (r *Resources) Copy() *Resources {
   840  	if r == nil {
   841  		return nil
   842  	}
   843  	newR := new(Resources)
   844  	*newR = *r
   845  	if r.Networks != nil {
   846  		n := len(r.Networks)
   847  		newR.Networks = make([]*NetworkResource, n)
   848  		for i := 0; i < n; i++ {
   849  			newR.Networks[i] = r.Networks[i].Copy()
   850  		}
   851  	}
   852  	return newR
   853  }
   854  
   855  // NetIndex finds the matching net index using device name
   856  func (r *Resources) NetIndex(n *NetworkResource) int {
   857  	for idx, net := range r.Networks {
   858  		if net.Device == n.Device {
   859  			return idx
   860  		}
   861  	}
   862  	return -1
   863  }
   864  
   865  // Superset checks if one set of resources is a superset
   866  // of another. This ignores network resources, and the NetworkIndex
   867  // should be used for that.
   868  func (r *Resources) Superset(other *Resources) (bool, string) {
   869  	if r.CPU < other.CPU {
   870  		return false, "cpu exhausted"
   871  	}
   872  	if r.MemoryMB < other.MemoryMB {
   873  		return false, "memory exhausted"
   874  	}
   875  	if r.DiskMB < other.DiskMB {
   876  		return false, "disk exhausted"
   877  	}
   878  	if r.IOPS < other.IOPS {
   879  		return false, "iops exhausted"
   880  	}
   881  	return true, ""
   882  }
   883  
   884  // Add adds the resources of the delta to this, potentially
   885  // returning an error if not possible.
   886  func (r *Resources) Add(delta *Resources) error {
   887  	if delta == nil {
   888  		return nil
   889  	}
   890  	r.CPU += delta.CPU
   891  	r.MemoryMB += delta.MemoryMB
   892  	r.DiskMB += delta.DiskMB
   893  	r.IOPS += delta.IOPS
   894  
   895  	for _, n := range delta.Networks {
   896  		// Find the matching interface by IP or CIDR
   897  		idx := r.NetIndex(n)
   898  		if idx == -1 {
   899  			r.Networks = append(r.Networks, n.Copy())
   900  		} else {
   901  			r.Networks[idx].Add(n)
   902  		}
   903  	}
   904  	return nil
   905  }
   906  
   907  func (r *Resources) GoString() string {
   908  	return fmt.Sprintf("*%#v", *r)
   909  }
   910  
   911  type Port struct {
   912  	Label string
   913  	Value int `mapstructure:"static"`
   914  }
   915  
   916  // NetworkResource is used to represent available network
   917  // resources
   918  type NetworkResource struct {
   919  	Device        string // Name of the device
   920  	CIDR          string // CIDR block of addresses
   921  	IP            string // IP address
   922  	MBits         int    // Throughput
   923  	ReservedPorts []Port // Reserved ports
   924  	DynamicPorts  []Port // Dynamically assigned ports
   925  }
   926  
   927  func (n *NetworkResource) Canonicalize() {
   928  	// Ensure that an empty and nil slices are treated the same to avoid scheduling
   929  	// problems since we use reflect DeepEquals.
   930  	if len(n.ReservedPorts) == 0 {
   931  		n.ReservedPorts = nil
   932  	}
   933  	if len(n.DynamicPorts) == 0 {
   934  		n.DynamicPorts = nil
   935  	}
   936  }
   937  
   938  // MeetsMinResources returns an error if the resources specified are less than
   939  // the minimum allowed.
   940  func (n *NetworkResource) MeetsMinResources() error {
   941  	var mErr multierror.Error
   942  	if n.MBits < 1 {
   943  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits))
   944  	}
   945  	return mErr.ErrorOrNil()
   946  }
   947  
   948  // Copy returns a deep copy of the network resource
   949  func (n *NetworkResource) Copy() *NetworkResource {
   950  	if n == nil {
   951  		return nil
   952  	}
   953  	newR := new(NetworkResource)
   954  	*newR = *n
   955  	if n.ReservedPorts != nil {
   956  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
   957  		copy(newR.ReservedPorts, n.ReservedPorts)
   958  	}
   959  	if n.DynamicPorts != nil {
   960  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
   961  		copy(newR.DynamicPorts, n.DynamicPorts)
   962  	}
   963  	return newR
   964  }
   965  
   966  // Add adds the resources of the delta to this, potentially
   967  // returning an error if not possible.
   968  func (n *NetworkResource) Add(delta *NetworkResource) {
   969  	if len(delta.ReservedPorts) > 0 {
   970  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
   971  	}
   972  	n.MBits += delta.MBits
   973  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
   974  }
   975  
   976  func (n *NetworkResource) GoString() string {
   977  	return fmt.Sprintf("*%#v", *n)
   978  }
   979  
   980  func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int {
   981  	labelValues := make(map[string]int)
   982  	ports := append(n.ReservedPorts, n.DynamicPorts...)
   983  	for _, port := range ports {
   984  		if mapping, ok := port_map[port.Label]; ok {
   985  			labelValues[port.Label] = mapping
   986  		} else {
   987  			labelValues[port.Label] = port.Value
   988  		}
   989  	}
   990  	return labelValues
   991  }
   992  
   993  const (
   994  	// JobTypeNomad is reserved for internal system tasks and is
   995  	// always handled by the CoreScheduler.
   996  	JobTypeCore    = "_core"
   997  	JobTypeService = "service"
   998  	JobTypeBatch   = "batch"
   999  	JobTypeSystem  = "system"
  1000  )
  1001  
  1002  const (
  1003  	JobStatusPending = "pending" // Pending means the job is waiting on scheduling
  1004  	JobStatusRunning = "running" // Running means the job has non-terminal allocations
  1005  	JobStatusDead    = "dead"    // Dead means all evaluation's and allocations are terminal
  1006  )
  1007  
  1008  const (
  1009  	// JobMinPriority is the minimum allowed priority
  1010  	JobMinPriority = 1
  1011  
  1012  	// JobDefaultPriority is the default priority if not
  1013  	// not specified.
  1014  	JobDefaultPriority = 50
  1015  
  1016  	// JobMaxPriority is the maximum allowed priority
  1017  	JobMaxPriority = 100
  1018  
  1019  	// Ensure CoreJobPriority is higher than any user
  1020  	// specified job so that it gets priority. This is important
  1021  	// for the system to remain healthy.
  1022  	CoreJobPriority = JobMaxPriority * 2
  1023  )
  1024  
  1025  // JobSummary summarizes the state of the allocations of a job
  1026  type JobSummary struct {
  1027  	JobID   string
  1028  	Summary map[string]TaskGroupSummary
  1029  
  1030  	// Raft Indexes
  1031  	CreateIndex uint64
  1032  	ModifyIndex uint64
  1033  }
  1034  
  1035  // Copy returns a new copy of JobSummary
  1036  func (js *JobSummary) Copy() *JobSummary {
  1037  	newJobSummary := new(JobSummary)
  1038  	*newJobSummary = *js
  1039  	newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary))
  1040  	for k, v := range js.Summary {
  1041  		newTGSummary[k] = v
  1042  	}
  1043  	newJobSummary.Summary = newTGSummary
  1044  	return newJobSummary
  1045  }
  1046  
  1047  // TaskGroup summarizes the state of all the allocations of a particular
  1048  // TaskGroup
  1049  type TaskGroupSummary struct {
  1050  	Queued   int
  1051  	Complete int
  1052  	Failed   int
  1053  	Running  int
  1054  	Starting int
  1055  	Lost     int
  1056  }
  1057  
  1058  // Job is the scope of a scheduling request to Nomad. It is the largest
  1059  // scoped object, and is a named collection of task groups. Each task group
  1060  // is further composed of tasks. A task group (TG) is the unit of scheduling
  1061  // however.
  1062  type Job struct {
  1063  	// Region is the Nomad region that handles scheduling this job
  1064  	Region string
  1065  
  1066  	// ID is a unique identifier for the job per region. It can be
  1067  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
  1068  	ID string
  1069  
  1070  	// ParentID is the unique identifier of the job that spawned this job.
  1071  	ParentID string
  1072  
  1073  	// Name is the logical name of the job used to refer to it. This is unique
  1074  	// per region, but not unique globally.
  1075  	Name string
  1076  
  1077  	// Type is used to control various behaviors about the job. Most jobs
  1078  	// are service jobs, meaning they are expected to be long lived.
  1079  	// Some jobs are batch oriented meaning they run and then terminate.
  1080  	// This can be extended in the future to support custom schedulers.
  1081  	Type string
  1082  
  1083  	// Priority is used to control scheduling importance and if this job
  1084  	// can preempt other jobs.
  1085  	Priority int
  1086  
  1087  	// AllAtOnce is used to control if incremental scheduling of task groups
  1088  	// is allowed or if we must do a gang scheduling of the entire job. This
  1089  	// can slow down larger jobs if resources are not available.
  1090  	AllAtOnce bool `mapstructure:"all_at_once"`
  1091  
  1092  	// Datacenters contains all the datacenters this job is allowed to span
  1093  	Datacenters []string
  1094  
  1095  	// Constraints can be specified at a job level and apply to
  1096  	// all the task groups and tasks.
  1097  	Constraints []*Constraint
  1098  
  1099  	// TaskGroups are the collections of task groups that this job needs
  1100  	// to run. Each task group is an atomic unit of scheduling and placement.
  1101  	TaskGroups []*TaskGroup
  1102  
  1103  	// Update is used to control the update strategy
  1104  	Update UpdateStrategy
  1105  
  1106  	// Periodic is used to define the interval the job is run at.
  1107  	Periodic *PeriodicConfig
  1108  
  1109  	// Meta is used to associate arbitrary metadata with this
  1110  	// job. This is opaque to Nomad.
  1111  	Meta map[string]string
  1112  
  1113  	// VaultToken is the Vault token that proves the submitter of the job has
  1114  	// access to the specified Vault policies. This field is only used to
  1115  	// transfer the token and is not stored after Job submission.
  1116  	VaultToken string `mapstructure:"vault_token"`
  1117  
  1118  	// Job status
  1119  	Status string
  1120  
  1121  	// StatusDescription is meant to provide more human useful information
  1122  	StatusDescription string
  1123  
  1124  	// Raft Indexes
  1125  	CreateIndex    uint64
  1126  	ModifyIndex    uint64
  1127  	JobModifyIndex uint64
  1128  }
  1129  
  1130  // Canonicalize is used to canonicalize fields in the Job. This should be called
  1131  // when registering a Job.
  1132  func (j *Job) Canonicalize() {
  1133  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1134  	// problems since we use reflect DeepEquals.
  1135  	if len(j.Meta) == 0 {
  1136  		j.Meta = nil
  1137  	}
  1138  
  1139  	for _, tg := range j.TaskGroups {
  1140  		tg.Canonicalize(j)
  1141  	}
  1142  }
  1143  
  1144  // Copy returns a deep copy of the Job. It is expected that callers use recover.
  1145  // This job can panic if the deep copy failed as it uses reflection.
  1146  func (j *Job) Copy() *Job {
  1147  	if j == nil {
  1148  		return nil
  1149  	}
  1150  	nj := new(Job)
  1151  	*nj = *j
  1152  	nj.Datacenters = CopySliceString(nj.Datacenters)
  1153  	nj.Constraints = CopySliceConstraints(nj.Constraints)
  1154  
  1155  	if j.TaskGroups != nil {
  1156  		tgs := make([]*TaskGroup, len(nj.TaskGroups))
  1157  		for i, tg := range nj.TaskGroups {
  1158  			tgs[i] = tg.Copy()
  1159  		}
  1160  		nj.TaskGroups = tgs
  1161  	}
  1162  
  1163  	nj.Periodic = nj.Periodic.Copy()
  1164  	nj.Meta = CopyMapStringString(nj.Meta)
  1165  	return nj
  1166  }
  1167  
  1168  // Validate is used to sanity check a job input
  1169  func (j *Job) Validate() error {
  1170  	var mErr multierror.Error
  1171  	if j.Region == "" {
  1172  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
  1173  	}
  1174  	if j.ID == "" {
  1175  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
  1176  	} else if strings.Contains(j.ID, " ") {
  1177  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
  1178  	}
  1179  	if j.Name == "" {
  1180  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
  1181  	}
  1182  	if j.Type == "" {
  1183  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
  1184  	}
  1185  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
  1186  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
  1187  	}
  1188  	if len(j.Datacenters) == 0 {
  1189  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
  1190  	}
  1191  	if len(j.TaskGroups) == 0 {
  1192  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
  1193  	}
  1194  	for idx, constr := range j.Constraints {
  1195  		if err := constr.Validate(); err != nil {
  1196  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1197  			mErr.Errors = append(mErr.Errors, outer)
  1198  		}
  1199  	}
  1200  
  1201  	// Check for duplicate task groups
  1202  	taskGroups := make(map[string]int)
  1203  	for idx, tg := range j.TaskGroups {
  1204  		if tg.Name == "" {
  1205  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
  1206  		} else if existing, ok := taskGroups[tg.Name]; ok {
  1207  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
  1208  		} else {
  1209  			taskGroups[tg.Name] = idx
  1210  		}
  1211  
  1212  		if j.Type == "system" && tg.Count > 1 {
  1213  			mErr.Errors = append(mErr.Errors,
  1214  				fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler",
  1215  					tg.Name, tg.Count))
  1216  		}
  1217  	}
  1218  
  1219  	// Validate the task group
  1220  	for _, tg := range j.TaskGroups {
  1221  		if err := tg.Validate(); err != nil {
  1222  			outer := fmt.Errorf("Task group %s validation failed: %s", tg.Name, err)
  1223  			mErr.Errors = append(mErr.Errors, outer)
  1224  		}
  1225  	}
  1226  
  1227  	// Validate periodic is only used with batch jobs.
  1228  	if j.IsPeriodic() && j.Periodic.Enabled {
  1229  		if j.Type != JobTypeBatch {
  1230  			mErr.Errors = append(mErr.Errors,
  1231  				fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch))
  1232  		}
  1233  
  1234  		if err := j.Periodic.Validate(); err != nil {
  1235  			mErr.Errors = append(mErr.Errors, err)
  1236  		}
  1237  	}
  1238  
  1239  	return mErr.ErrorOrNil()
  1240  }
  1241  
  1242  // LookupTaskGroup finds a task group by name
  1243  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
  1244  	for _, tg := range j.TaskGroups {
  1245  		if tg.Name == name {
  1246  			return tg
  1247  		}
  1248  	}
  1249  	return nil
  1250  }
  1251  
  1252  // Stub is used to return a summary of the job
  1253  func (j *Job) Stub(summary *JobSummary) *JobListStub {
  1254  	return &JobListStub{
  1255  		ID:                j.ID,
  1256  		ParentID:          j.ParentID,
  1257  		Name:              j.Name,
  1258  		Type:              j.Type,
  1259  		Priority:          j.Priority,
  1260  		Status:            j.Status,
  1261  		StatusDescription: j.StatusDescription,
  1262  		CreateIndex:       j.CreateIndex,
  1263  		ModifyIndex:       j.ModifyIndex,
  1264  		JobModifyIndex:    j.JobModifyIndex,
  1265  		JobSummary:        summary,
  1266  	}
  1267  }
  1268  
  1269  // IsPeriodic returns whether a job is periodic.
  1270  func (j *Job) IsPeriodic() bool {
  1271  	return j.Periodic != nil
  1272  }
  1273  
  1274  // VaultPolicies returns the set of Vault policies per task group, per task
  1275  func (j *Job) VaultPolicies() map[string]map[string]*Vault {
  1276  	policies := make(map[string]map[string]*Vault, len(j.TaskGroups))
  1277  
  1278  	for _, tg := range j.TaskGroups {
  1279  		tgPolicies := make(map[string]*Vault, len(tg.Tasks))
  1280  		policies[tg.Name] = tgPolicies
  1281  
  1282  		for _, task := range tg.Tasks {
  1283  			if task.Vault == nil {
  1284  				continue
  1285  			}
  1286  
  1287  			tgPolicies[task.Name] = task.Vault
  1288  		}
  1289  	}
  1290  
  1291  	return policies
  1292  }
  1293  
  1294  // JobListStub is used to return a subset of job information
  1295  // for the job list
  1296  type JobListStub struct {
  1297  	ID                string
  1298  	ParentID          string
  1299  	Name              string
  1300  	Type              string
  1301  	Priority          int
  1302  	Status            string
  1303  	StatusDescription string
  1304  	JobSummary        *JobSummary
  1305  	CreateIndex       uint64
  1306  	ModifyIndex       uint64
  1307  	JobModifyIndex    uint64
  1308  }
  1309  
  1310  // UpdateStrategy is used to modify how updates are done
  1311  type UpdateStrategy struct {
  1312  	// Stagger is the amount of time between the updates
  1313  	Stagger time.Duration
  1314  
  1315  	// MaxParallel is how many updates can be done in parallel
  1316  	MaxParallel int `mapstructure:"max_parallel"`
  1317  }
  1318  
  1319  // Rolling returns if a rolling strategy should be used
  1320  func (u *UpdateStrategy) Rolling() bool {
  1321  	return u.Stagger > 0 && u.MaxParallel > 0
  1322  }
  1323  
  1324  const (
  1325  	// PeriodicSpecCron is used for a cron spec.
  1326  	PeriodicSpecCron = "cron"
  1327  
  1328  	// PeriodicSpecTest is only used by unit tests. It is a sorted, comma
  1329  	// separated list of unix timestamps at which to launch.
  1330  	PeriodicSpecTest = "_internal_test"
  1331  )
  1332  
  1333  // Periodic defines the interval a job should be run at.
  1334  type PeriodicConfig struct {
  1335  	// Enabled determines if the job should be run periodically.
  1336  	Enabled bool
  1337  
  1338  	// Spec specifies the interval the job should be run as. It is parsed based
  1339  	// on the SpecType.
  1340  	Spec string
  1341  
  1342  	// SpecType defines the format of the spec.
  1343  	SpecType string
  1344  
  1345  	// ProhibitOverlap enforces that spawned jobs do not run in parallel.
  1346  	ProhibitOverlap bool `mapstructure:"prohibit_overlap"`
  1347  }
  1348  
  1349  func (p *PeriodicConfig) Copy() *PeriodicConfig {
  1350  	if p == nil {
  1351  		return nil
  1352  	}
  1353  	np := new(PeriodicConfig)
  1354  	*np = *p
  1355  	return np
  1356  }
  1357  
  1358  func (p *PeriodicConfig) Validate() error {
  1359  	if !p.Enabled {
  1360  		return nil
  1361  	}
  1362  
  1363  	if p.Spec == "" {
  1364  		return fmt.Errorf("Must specify a spec")
  1365  	}
  1366  
  1367  	switch p.SpecType {
  1368  	case PeriodicSpecCron:
  1369  		// Validate the cron spec
  1370  		if _, err := cronexpr.Parse(p.Spec); err != nil {
  1371  			return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)
  1372  		}
  1373  	case PeriodicSpecTest:
  1374  		// No-op
  1375  	default:
  1376  		return fmt.Errorf("Unknown periodic specification type %q", p.SpecType)
  1377  	}
  1378  
  1379  	return nil
  1380  }
  1381  
  1382  // Next returns the closest time instant matching the spec that is after the
  1383  // passed time. If no matching instance exists, the zero value of time.Time is
  1384  // returned. The `time.Location` of the returned value matches that of the
  1385  // passed time.
  1386  func (p *PeriodicConfig) Next(fromTime time.Time) time.Time {
  1387  	switch p.SpecType {
  1388  	case PeriodicSpecCron:
  1389  		if e, err := cronexpr.Parse(p.Spec); err == nil {
  1390  			return e.Next(fromTime)
  1391  		}
  1392  	case PeriodicSpecTest:
  1393  		split := strings.Split(p.Spec, ",")
  1394  		if len(split) == 1 && split[0] == "" {
  1395  			return time.Time{}
  1396  		}
  1397  
  1398  		// Parse the times
  1399  		times := make([]time.Time, len(split))
  1400  		for i, s := range split {
  1401  			unix, err := strconv.Atoi(s)
  1402  			if err != nil {
  1403  				return time.Time{}
  1404  			}
  1405  
  1406  			times[i] = time.Unix(int64(unix), 0)
  1407  		}
  1408  
  1409  		// Find the next match
  1410  		for _, next := range times {
  1411  			if fromTime.Before(next) {
  1412  				return next
  1413  			}
  1414  		}
  1415  	}
  1416  
  1417  	return time.Time{}
  1418  }
  1419  
  1420  const (
  1421  	// PeriodicLaunchSuffix is the string appended to the periodic jobs ID
  1422  	// when launching derived instances of it.
  1423  	PeriodicLaunchSuffix = "/periodic-"
  1424  )
  1425  
  1426  // PeriodicLaunch tracks the last launch time of a periodic job.
  1427  type PeriodicLaunch struct {
  1428  	ID     string    // ID of the periodic job.
  1429  	Launch time.Time // The last launch time.
  1430  
  1431  	// Raft Indexes
  1432  	CreateIndex uint64
  1433  	ModifyIndex uint64
  1434  }
  1435  
  1436  var (
  1437  	defaultServiceJobRestartPolicy = RestartPolicy{
  1438  		Delay:    15 * time.Second,
  1439  		Attempts: 2,
  1440  		Interval: 1 * time.Minute,
  1441  		Mode:     RestartPolicyModeDelay,
  1442  	}
  1443  	defaultBatchJobRestartPolicy = RestartPolicy{
  1444  		Delay:    15 * time.Second,
  1445  		Attempts: 15,
  1446  		Interval: 7 * 24 * time.Hour,
  1447  		Mode:     RestartPolicyModeDelay,
  1448  	}
  1449  )
  1450  
  1451  const (
  1452  	// RestartPolicyModeDelay causes an artificial delay till the next interval is
  1453  	// reached when the specified attempts have been reached in the interval.
  1454  	RestartPolicyModeDelay = "delay"
  1455  
  1456  	// RestartPolicyModeFail causes a job to fail if the specified number of
  1457  	// attempts are reached within an interval.
  1458  	RestartPolicyModeFail = "fail"
  1459  )
  1460  
  1461  // RestartPolicy configures how Tasks are restarted when they crash or fail.
  1462  type RestartPolicy struct {
  1463  	// Attempts is the number of restart that will occur in an interval.
  1464  	Attempts int
  1465  
  1466  	// Interval is a duration in which we can limit the number of restarts
  1467  	// within.
  1468  	Interval time.Duration
  1469  
  1470  	// Delay is the time between a failure and a restart.
  1471  	Delay time.Duration
  1472  
  1473  	// Mode controls what happens when the task restarts more than attempt times
  1474  	// in an interval.
  1475  	Mode string
  1476  }
  1477  
  1478  func (r *RestartPolicy) Copy() *RestartPolicy {
  1479  	if r == nil {
  1480  		return nil
  1481  	}
  1482  	nrp := new(RestartPolicy)
  1483  	*nrp = *r
  1484  	return nrp
  1485  }
  1486  
  1487  func (r *RestartPolicy) Validate() error {
  1488  	switch r.Mode {
  1489  	case RestartPolicyModeDelay, RestartPolicyModeFail:
  1490  	default:
  1491  		return fmt.Errorf("Unsupported restart mode: %q", r.Mode)
  1492  	}
  1493  
  1494  	// Check for ambiguous/confusing settings
  1495  	if r.Attempts == 0 && r.Mode != RestartPolicyModeFail {
  1496  		return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)
  1497  	}
  1498  
  1499  	if r.Interval == 0 {
  1500  		return nil
  1501  	}
  1502  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
  1503  		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
  1504  	}
  1505  	return nil
  1506  }
  1507  
  1508  func NewRestartPolicy(jobType string) *RestartPolicy {
  1509  	switch jobType {
  1510  	case JobTypeService, JobTypeSystem:
  1511  		rp := defaultServiceJobRestartPolicy
  1512  		return &rp
  1513  	case JobTypeBatch:
  1514  		rp := defaultBatchJobRestartPolicy
  1515  		return &rp
  1516  	}
  1517  	return nil
  1518  }
  1519  
  1520  // TaskGroup is an atomic unit of placement. Each task group belongs to
  1521  // a job and may contain any number of tasks. A task group support running
  1522  // in many replicas using the same configuration..
  1523  type TaskGroup struct {
  1524  	// Name of the task group
  1525  	Name string
  1526  
  1527  	// Count is the number of replicas of this task group that should
  1528  	// be scheduled.
  1529  	Count int
  1530  
  1531  	// Constraints can be specified at a task group level and apply to
  1532  	// all the tasks contained.
  1533  	Constraints []*Constraint
  1534  
  1535  	//RestartPolicy of a TaskGroup
  1536  	RestartPolicy *RestartPolicy
  1537  
  1538  	// Tasks are the collection of tasks that this task group needs to run
  1539  	Tasks []*Task
  1540  
  1541  	// EphemeralDisk is the disk resources that the task group requests
  1542  	EphemeralDisk *EphemeralDisk
  1543  
  1544  	// Meta is used to associate arbitrary metadata with this
  1545  	// task group. This is opaque to Nomad.
  1546  	Meta map[string]string
  1547  }
  1548  
  1549  func (tg *TaskGroup) Copy() *TaskGroup {
  1550  	if tg == nil {
  1551  		return nil
  1552  	}
  1553  	ntg := new(TaskGroup)
  1554  	*ntg = *tg
  1555  	ntg.Constraints = CopySliceConstraints(ntg.Constraints)
  1556  
  1557  	ntg.RestartPolicy = ntg.RestartPolicy.Copy()
  1558  
  1559  	if tg.Tasks != nil {
  1560  		tasks := make([]*Task, len(ntg.Tasks))
  1561  		for i, t := range ntg.Tasks {
  1562  			tasks[i] = t.Copy()
  1563  		}
  1564  		ntg.Tasks = tasks
  1565  	}
  1566  
  1567  	ntg.Meta = CopyMapStringString(ntg.Meta)
  1568  
  1569  	if tg.EphemeralDisk != nil {
  1570  		ntg.EphemeralDisk = tg.EphemeralDisk.Copy()
  1571  	}
  1572  	return ntg
  1573  }
  1574  
  1575  // Canonicalize is used to canonicalize fields in the TaskGroup.
  1576  func (tg *TaskGroup) Canonicalize(job *Job) {
  1577  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  1578  	// problems since we use reflect DeepEquals.
  1579  	if len(tg.Meta) == 0 {
  1580  		tg.Meta = nil
  1581  	}
  1582  
  1583  	// Set the default restart policy.
  1584  	if tg.RestartPolicy == nil {
  1585  		tg.RestartPolicy = NewRestartPolicy(job.Type)
  1586  	}
  1587  
  1588  	for _, task := range tg.Tasks {
  1589  		task.Canonicalize(job, tg)
  1590  	}
  1591  }
  1592  
  1593  // Validate is used to sanity check a task group
  1594  func (tg *TaskGroup) Validate() error {
  1595  	var mErr multierror.Error
  1596  	if tg.Name == "" {
  1597  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
  1598  	}
  1599  	if tg.Count < 0 {
  1600  		mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative"))
  1601  	}
  1602  	if len(tg.Tasks) == 0 {
  1603  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
  1604  	}
  1605  	for idx, constr := range tg.Constraints {
  1606  		if err := constr.Validate(); err != nil {
  1607  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1608  			mErr.Errors = append(mErr.Errors, outer)
  1609  		}
  1610  	}
  1611  
  1612  	if tg.RestartPolicy != nil {
  1613  		if err := tg.RestartPolicy.Validate(); err != nil {
  1614  			mErr.Errors = append(mErr.Errors, err)
  1615  		}
  1616  	} else {
  1617  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
  1618  	}
  1619  
  1620  	if tg.EphemeralDisk != nil {
  1621  		if err := tg.EphemeralDisk.Validate(); err != nil {
  1622  			mErr.Errors = append(mErr.Errors, err)
  1623  		}
  1624  	} else {
  1625  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a local disk object", tg.Name))
  1626  	}
  1627  
  1628  	// Check for duplicate tasks
  1629  	tasks := make(map[string]int)
  1630  	for idx, task := range tg.Tasks {
  1631  		if task.Name == "" {
  1632  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
  1633  		} else if existing, ok := tasks[task.Name]; ok {
  1634  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
  1635  		} else {
  1636  			tasks[task.Name] = idx
  1637  		}
  1638  	}
  1639  
  1640  	// Validate the tasks
  1641  	for _, task := range tg.Tasks {
  1642  		if err := task.Validate(tg.EphemeralDisk); err != nil {
  1643  			outer := fmt.Errorf("Task %s validation failed: %s", task.Name, err)
  1644  			mErr.Errors = append(mErr.Errors, outer)
  1645  		}
  1646  	}
  1647  	return mErr.ErrorOrNil()
  1648  }
  1649  
  1650  // LookupTask finds a task by name
  1651  func (tg *TaskGroup) LookupTask(name string) *Task {
  1652  	for _, t := range tg.Tasks {
  1653  		if t.Name == name {
  1654  			return t
  1655  		}
  1656  	}
  1657  	return nil
  1658  }
  1659  
  1660  func (tg *TaskGroup) GoString() string {
  1661  	return fmt.Sprintf("*%#v", *tg)
  1662  }
  1663  
  1664  const (
  1665  	// TODO add Consul TTL check
  1666  	ServiceCheckHTTP   = "http"
  1667  	ServiceCheckTCP    = "tcp"
  1668  	ServiceCheckScript = "script"
  1669  
  1670  	// minCheckInterval is the minimum check interval permitted.  Consul
  1671  	// currently has its MinInterval set to 1s.  Mirror that here for
  1672  	// consistency.
  1673  	minCheckInterval = 1 * time.Second
  1674  
  1675  	// minCheckTimeout is the minimum check timeout permitted for Consul
  1676  	// script TTL checks.
  1677  	minCheckTimeout = 1 * time.Second
  1678  )
  1679  
  1680  // The ServiceCheck data model represents the consul health check that
  1681  // Nomad registers for a Task
  1682  type ServiceCheck struct {
  1683  	Name          string        // Name of the check, defaults to id
  1684  	Type          string        // Type of the check - tcp, http, docker and script
  1685  	Command       string        // Command is the command to run for script checks
  1686  	Args          []string      // Args is a list of argumes for script checks
  1687  	Path          string        // path of the health check url for http type check
  1688  	Protocol      string        // Protocol to use if check is http, defaults to http
  1689  	PortLabel     string        `mapstructure:"port"` // The port to use for tcp/http checks
  1690  	Interval      time.Duration // Interval of the check
  1691  	Timeout       time.Duration // Timeout of the response from the check before consul fails the check
  1692  	InitialStatus string        `mapstructure:"initial_status"` // Initial status of the check
  1693  }
  1694  
  1695  func (sc *ServiceCheck) Copy() *ServiceCheck {
  1696  	if sc == nil {
  1697  		return nil
  1698  	}
  1699  	nsc := new(ServiceCheck)
  1700  	*nsc = *sc
  1701  	return nsc
  1702  }
  1703  
  1704  func (sc *ServiceCheck) Canonicalize(serviceName string) {
  1705  	// Ensure empty slices are treated as null to avoid scheduling issues when
  1706  	// using DeepEquals.
  1707  	if len(sc.Args) == 0 {
  1708  		sc.Args = nil
  1709  	}
  1710  
  1711  	if sc.Name == "" {
  1712  		sc.Name = fmt.Sprintf("service: %q check", serviceName)
  1713  	}
  1714  }
  1715  
  1716  // validate a Service's ServiceCheck
  1717  func (sc *ServiceCheck) validate() error {
  1718  	switch strings.ToLower(sc.Type) {
  1719  	case ServiceCheckTCP:
  1720  		if sc.Timeout < minCheckTimeout {
  1721  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  1722  		}
  1723  	case ServiceCheckHTTP:
  1724  		if sc.Path == "" {
  1725  			return fmt.Errorf("http type must have a valid http path")
  1726  		}
  1727  
  1728  		if sc.Timeout < minCheckTimeout {
  1729  			return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval)
  1730  		}
  1731  	case ServiceCheckScript:
  1732  		if sc.Command == "" {
  1733  			return fmt.Errorf("script type must have a valid script path")
  1734  		}
  1735  
  1736  		// TODO: enforce timeout on the Client side and reenable
  1737  		// validation.
  1738  	default:
  1739  		return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type)
  1740  	}
  1741  
  1742  	if sc.Interval < minCheckInterval {
  1743  		return fmt.Errorf("interval (%v) can not be lower than %v", sc.Interval, minCheckInterval)
  1744  	}
  1745  
  1746  	switch sc.InitialStatus {
  1747  	case "":
  1748  	case api.HealthUnknown:
  1749  	case api.HealthPassing:
  1750  	case api.HealthWarning:
  1751  	case api.HealthCritical:
  1752  	default:
  1753  		return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q, %q or empty`, sc.InitialStatus, api.HealthUnknown, api.HealthPassing, api.HealthWarning, api.HealthCritical)
  1754  
  1755  	}
  1756  
  1757  	return nil
  1758  }
  1759  
  1760  // RequiresPort returns whether the service check requires the task has a port.
  1761  func (sc *ServiceCheck) RequiresPort() bool {
  1762  	switch sc.Type {
  1763  	case ServiceCheckHTTP, ServiceCheckTCP:
  1764  		return true
  1765  	default:
  1766  		return false
  1767  	}
  1768  }
  1769  
  1770  func (sc *ServiceCheck) Hash(serviceID string) string {
  1771  	h := sha1.New()
  1772  	io.WriteString(h, serviceID)
  1773  	io.WriteString(h, sc.Name)
  1774  	io.WriteString(h, sc.Type)
  1775  	io.WriteString(h, sc.Command)
  1776  	io.WriteString(h, strings.Join(sc.Args, ""))
  1777  	io.WriteString(h, sc.Path)
  1778  	io.WriteString(h, sc.Protocol)
  1779  	io.WriteString(h, sc.PortLabel)
  1780  	io.WriteString(h, sc.Interval.String())
  1781  	io.WriteString(h, sc.Timeout.String())
  1782  	return fmt.Sprintf("%x", h.Sum(nil))
  1783  }
  1784  
  1785  // Service represents a Consul service definition in Nomad
  1786  type Service struct {
  1787  	// Name of the service registered with Consul. Consul defaults the
  1788  	// Name to ServiceID if not specified.  The Name if specified is used
  1789  	// as one of the seed values when generating a Consul ServiceID.
  1790  	Name string
  1791  
  1792  	// PortLabel is either the numeric port number or the `host:port`.
  1793  	// To specify the port number using the host's Consul Advertise
  1794  	// address, specify an empty host in the PortLabel (e.g. `:port`).
  1795  	PortLabel string          `mapstructure:"port"`
  1796  	Tags      []string        // List of tags for the service
  1797  	Checks    []*ServiceCheck // List of checks associated with the service
  1798  }
  1799  
  1800  func (s *Service) Copy() *Service {
  1801  	if s == nil {
  1802  		return nil
  1803  	}
  1804  	ns := new(Service)
  1805  	*ns = *s
  1806  	ns.Tags = CopySliceString(ns.Tags)
  1807  
  1808  	if s.Checks != nil {
  1809  		checks := make([]*ServiceCheck, len(ns.Checks))
  1810  		for i, c := range ns.Checks {
  1811  			checks[i] = c.Copy()
  1812  		}
  1813  		ns.Checks = checks
  1814  	}
  1815  
  1816  	return ns
  1817  }
  1818  
  1819  // Canonicalize interpolates values of Job, Task Group and Task in the Service
  1820  // Name. This also generates check names, service id and check ids.
  1821  func (s *Service) Canonicalize(job string, taskGroup string, task string) {
  1822  	// Ensure empty lists are treated as null to avoid scheduler issues when
  1823  	// using DeepEquals
  1824  	if len(s.Tags) == 0 {
  1825  		s.Tags = nil
  1826  	}
  1827  	if len(s.Checks) == 0 {
  1828  		s.Checks = nil
  1829  	}
  1830  
  1831  	s.Name = args.ReplaceEnv(s.Name, map[string]string{
  1832  		"JOB":       job,
  1833  		"TASKGROUP": taskGroup,
  1834  		"TASK":      task,
  1835  		"BASE":      fmt.Sprintf("%s-%s-%s", job, taskGroup, task),
  1836  	},
  1837  	)
  1838  
  1839  	for _, check := range s.Checks {
  1840  		check.Canonicalize(s.Name)
  1841  	}
  1842  }
  1843  
  1844  // Validate checks if the Check definition is valid
  1845  func (s *Service) Validate() error {
  1846  	var mErr multierror.Error
  1847  
  1848  	// Ensure the service name is valid per RFC-952 §1
  1849  	// (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1
  1850  	// (https://tools.ietf.org/html/rfc1123), and RFC-2782
  1851  	// (https://tools.ietf.org/html/rfc2782).
  1852  	re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`)
  1853  	if !re.MatchString(s.Name) {
  1854  		mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be less than 63 characters long: %q", s.Name))
  1855  	}
  1856  
  1857  	for _, c := range s.Checks {
  1858  		if s.PortLabel == "" && c.RequiresPort() {
  1859  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name))
  1860  			continue
  1861  		}
  1862  
  1863  		if err := c.validate(); err != nil {
  1864  			mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err))
  1865  		}
  1866  	}
  1867  	return mErr.ErrorOrNil()
  1868  }
  1869  
  1870  // Hash calculates the hash of the check based on it's content and the service
  1871  // which owns it
  1872  func (s *Service) Hash() string {
  1873  	h := sha1.New()
  1874  	io.WriteString(h, s.Name)
  1875  	io.WriteString(h, strings.Join(s.Tags, ""))
  1876  	io.WriteString(h, s.PortLabel)
  1877  	return fmt.Sprintf("%x", h.Sum(nil))
  1878  }
  1879  
  1880  const (
  1881  	// DefaultKillTimeout is the default timeout between signaling a task it
  1882  	// will be killed and killing it.
  1883  	DefaultKillTimeout = 5 * time.Second
  1884  )
  1885  
  1886  // LogConfig provides configuration for log rotation
  1887  type LogConfig struct {
  1888  	MaxFiles         int `mapstructure:"max_files"`
  1889  	MaxFileSizeMB    int `mapstructure:"max_file_size"`
  1890  	LogShuttleConfig *LogShuttleConfig
  1891  }
  1892  
  1893  // LogShuttleConfig configures log-shuttle log delivery
  1894  type LogShuttleConfig struct {
  1895  	UseGzip       bool
  1896  	Drop          bool
  1897  	Prival        string
  1898  	Version       string
  1899  	Procid        string
  1900  	Appname       string
  1901  	LogplexToken  string
  1902  	Hostname      string
  1903  	Msgid         string
  1904  	LogsURL       string
  1905  	StatsSource   string
  1906  	StatsInterval time.Duration
  1907  	WaitDuration  time.Duration
  1908  	Timeout       time.Duration
  1909  	MaxAttempts   int
  1910  	NumOutlets    int
  1911  	BatchSize     int
  1912  	BackBuff      int
  1913  	MaxLineLength int
  1914  	KinesisShards int
  1915  }
  1916  
  1917  // DefaultLogConfig returns the default LogConfig values.
  1918  func DefaultLogConfig() *LogConfig {
  1919  	return &LogConfig{
  1920  		MaxFiles:      10,
  1921  		MaxFileSizeMB: 10,
  1922  	}
  1923  }
  1924  
  1925  // Validate returns an error if the log config specified are less than
  1926  // the minimum allowed.
  1927  func (l *LogConfig) Validate() error {
  1928  	var mErr multierror.Error
  1929  	if l.MaxFiles < 1 {
  1930  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles))
  1931  	}
  1932  	if l.MaxFileSizeMB < 1 {
  1933  		mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB))
  1934  	}
  1935  	return mErr.ErrorOrNil()
  1936  }
  1937  
  1938  // Task is a single process typically that is executed as part of a task group.
  1939  type Task struct {
  1940  	// Name of the task
  1941  	Name string
  1942  
  1943  	// Driver is used to control which driver is used
  1944  	Driver string
  1945  
  1946  	// User is used to determine which user will run the task. It defaults to
  1947  	// the same user the Nomad client is being run as.
  1948  	User string
  1949  
  1950  	// Config is provided to the driver to initialize
  1951  	Config map[string]interface{}
  1952  
  1953  	// Map of environment variables to be used by the driver
  1954  	Env map[string]string
  1955  
  1956  	// Only use explicitly set Env variables in task environment
  1957  	ExcludeNomadEnv bool
  1958  
  1959  	// List of service definitions exposed by the Task
  1960  	Services []*Service
  1961  
  1962  	// Vault is used to define the set of Vault policies that this task should
  1963  	// have access to.
  1964  	Vault *Vault
  1965  
  1966  	// Constraints can be specified at a task level and apply only to
  1967  	// the particular task.
  1968  	Constraints []*Constraint
  1969  
  1970  	// Resources is the resources needed by this task
  1971  	Resources *Resources
  1972  
  1973  	// Meta is used to associate arbitrary metadata with this
  1974  	// task. This is opaque to Nomad.
  1975  	Meta map[string]string
  1976  
  1977  	// KillTimeout is the time between signaling a task that it will be
  1978  	// killed and killing it.
  1979  	KillTimeout time.Duration `mapstructure:"kill_timeout"`
  1980  
  1981  	// LogConfig provides configuration for log rotation
  1982  	LogConfig *LogConfig `mapstructure:"logs"`
  1983  
  1984  	// Artifacts is a list of artifacts to download and extract before running
  1985  	// the task.
  1986  	Artifacts []*TaskArtifact
  1987  }
  1988  
  1989  func (t *Task) Copy() *Task {
  1990  	if t == nil {
  1991  		return nil
  1992  	}
  1993  	nt := new(Task)
  1994  	*nt = *t
  1995  	nt.Env = CopyMapStringString(nt.Env)
  1996  
  1997  	if t.Services != nil {
  1998  		services := make([]*Service, len(nt.Services))
  1999  		for i, s := range nt.Services {
  2000  			services[i] = s.Copy()
  2001  		}
  2002  		nt.Services = services
  2003  	}
  2004  
  2005  	nt.Constraints = CopySliceConstraints(nt.Constraints)
  2006  
  2007  	nt.Vault = nt.Vault.Copy()
  2008  	nt.Resources = nt.Resources.Copy()
  2009  	nt.Meta = CopyMapStringString(nt.Meta)
  2010  
  2011  	if t.Artifacts != nil {
  2012  		artifacts := make([]*TaskArtifact, 0, len(t.Artifacts))
  2013  		for _, a := range nt.Artifacts {
  2014  			artifacts = append(artifacts, a.Copy())
  2015  		}
  2016  		nt.Artifacts = artifacts
  2017  	}
  2018  
  2019  	if i, err := copystructure.Copy(nt.Config); err != nil {
  2020  		nt.Config = i.(map[string]interface{})
  2021  	}
  2022  
  2023  	return nt
  2024  }
  2025  
  2026  // Canonicalize canonicalizes fields in the task.
  2027  func (t *Task) Canonicalize(job *Job, tg *TaskGroup) {
  2028  	// Ensure that an empty and nil map are treated the same to avoid scheduling
  2029  	// problems since we use reflect DeepEquals.
  2030  	if len(t.Meta) == 0 {
  2031  		t.Meta = nil
  2032  	}
  2033  	if len(t.Config) == 0 {
  2034  		t.Config = nil
  2035  	}
  2036  	if len(t.Env) == 0 {
  2037  		t.Env = nil
  2038  	}
  2039  
  2040  	for _, service := range t.Services {
  2041  		service.Canonicalize(job.Name, tg.Name, t.Name)
  2042  	}
  2043  
  2044  	if t.Resources != nil {
  2045  		t.Resources.Canonicalize()
  2046  	}
  2047  
  2048  	// Set the default timeout if it is not specified.
  2049  	if t.KillTimeout == 0 {
  2050  		t.KillTimeout = DefaultKillTimeout
  2051  	}
  2052  }
  2053  
  2054  func (t *Task) GoString() string {
  2055  	return fmt.Sprintf("*%#v", *t)
  2056  }
  2057  
  2058  func (t *Task) FindHostAndPortFor(portLabel string) (string, int) {
  2059  	for _, network := range t.Resources.Networks {
  2060  		if p, ok := network.MapLabelToValues(nil)[portLabel]; ok {
  2061  			return network.IP, p
  2062  		}
  2063  	}
  2064  	return "", 0
  2065  }
  2066  
  2067  // Validate is used to sanity check a task
  2068  func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error {
  2069  	var mErr multierror.Error
  2070  	if t.Name == "" {
  2071  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  2072  	}
  2073  	if strings.ContainsAny(t.Name, `/\`) {
  2074  		// We enforce this so that when creating the directory on disk it will
  2075  		// not have any slashes.
  2076  		mErr.Errors = append(mErr.Errors, errors.New("Task name can not include slashes"))
  2077  	}
  2078  	if t.Driver == "" {
  2079  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  2080  	}
  2081  	if t.KillTimeout.Nanoseconds() < 0 {
  2082  		mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value"))
  2083  	}
  2084  
  2085  	// Validate the resources.
  2086  	if t.Resources == nil {
  2087  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  2088  	} else if err := t.Resources.MeetsMinResources(); err != nil {
  2089  		mErr.Errors = append(mErr.Errors, err)
  2090  	}
  2091  
  2092  	// Ensure the task isn't asking for disk resources
  2093  	if t.Resources != nil {
  2094  		if t.Resources.DiskMB > 0 {
  2095  			mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level."))
  2096  		}
  2097  	}
  2098  
  2099  	// Validate the log config
  2100  	if t.LogConfig == nil {
  2101  		mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config"))
  2102  	} else if err := t.LogConfig.Validate(); err != nil {
  2103  		mErr.Errors = append(mErr.Errors, err)
  2104  	}
  2105  
  2106  	for idx, constr := range t.Constraints {
  2107  		if err := constr.Validate(); err != nil {
  2108  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  2109  			mErr.Errors = append(mErr.Errors, outer)
  2110  		}
  2111  	}
  2112  
  2113  	// Validate Services
  2114  	if err := validateServices(t); err != nil {
  2115  		mErr.Errors = append(mErr.Errors, err)
  2116  	}
  2117  
  2118  	if t.LogConfig != nil && ephemeralDisk != nil {
  2119  		logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB)
  2120  		if ephemeralDisk.SizeMB <= logUsage {
  2121  			mErr.Errors = append(mErr.Errors,
  2122  				fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)",
  2123  					logUsage, ephemeralDisk.SizeMB))
  2124  		}
  2125  	}
  2126  
  2127  	for idx, artifact := range t.Artifacts {
  2128  		if err := artifact.Validate(); err != nil {
  2129  			outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err)
  2130  			mErr.Errors = append(mErr.Errors, outer)
  2131  		}
  2132  	}
  2133  
  2134  	if t.Vault != nil {
  2135  		if err := t.Vault.Validate(); err != nil {
  2136  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err))
  2137  		}
  2138  	}
  2139  
  2140  	return mErr.ErrorOrNil()
  2141  }
  2142  
  2143  // validateServices takes a task and validates the services within it are valid
  2144  // and reference ports that exist.
  2145  func validateServices(t *Task) error {
  2146  	var mErr multierror.Error
  2147  
  2148  	// Ensure that services don't ask for non-existent ports and their names are
  2149  	// unique.
  2150  	servicePorts := make(map[string][]string)
  2151  	knownServices := make(map[string]struct{})
  2152  	for i, service := range t.Services {
  2153  		if err := service.Validate(); err != nil {
  2154  			outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err)
  2155  			mErr.Errors = append(mErr.Errors, outer)
  2156  		}
  2157  		if _, ok := knownServices[service.Name]; ok {
  2158  			mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name))
  2159  		}
  2160  		knownServices[service.Name] = struct{}{}
  2161  
  2162  		if service.PortLabel != "" {
  2163  			servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name)
  2164  		}
  2165  
  2166  		// Ensure that check names are unique.
  2167  		knownChecks := make(map[string]struct{})
  2168  		for _, check := range service.Checks {
  2169  			if _, ok := knownChecks[check.Name]; ok {
  2170  				mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name))
  2171  			}
  2172  			knownChecks[check.Name] = struct{}{}
  2173  		}
  2174  	}
  2175  
  2176  	// Get the set of port labels.
  2177  	portLabels := make(map[string]struct{})
  2178  	if t.Resources != nil {
  2179  		for _, network := range t.Resources.Networks {
  2180  			ports := network.MapLabelToValues(nil)
  2181  			for portLabel, _ := range ports {
  2182  				portLabels[portLabel] = struct{}{}
  2183  			}
  2184  		}
  2185  	}
  2186  
  2187  	// Ensure all ports referenced in services exist.
  2188  	for servicePort, services := range servicePorts {
  2189  		_, ok := portLabels[servicePort]
  2190  		if !ok {
  2191  			joined := strings.Join(services, ", ")
  2192  			err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined)
  2193  			mErr.Errors = append(mErr.Errors, err)
  2194  		}
  2195  	}
  2196  	return mErr.ErrorOrNil()
  2197  }
  2198  
  2199  // Set of possible states for a task.
  2200  const (
  2201  	TaskStatePending = "pending" // The task is waiting to be run.
  2202  	TaskStateRunning = "running" // The task is currently running.
  2203  	TaskStateDead    = "dead"    // Terminal state of task.
  2204  )
  2205  
  2206  // TaskState tracks the current state of a task and events that caused state
  2207  // transitions.
  2208  type TaskState struct {
  2209  	// The current state of the task.
  2210  	State string
  2211  
  2212  	// Series of task events that transition the state of the task.
  2213  	Events []*TaskEvent
  2214  }
  2215  
  2216  func (ts *TaskState) Copy() *TaskState {
  2217  	if ts == nil {
  2218  		return nil
  2219  	}
  2220  	copy := new(TaskState)
  2221  	copy.State = ts.State
  2222  
  2223  	if ts.Events != nil {
  2224  		copy.Events = make([]*TaskEvent, len(ts.Events))
  2225  		for i, e := range ts.Events {
  2226  			copy.Events[i] = e.Copy()
  2227  		}
  2228  	}
  2229  	return copy
  2230  }
  2231  
  2232  // Failed returns true if the task has has failed.
  2233  func (ts *TaskState) Failed() bool {
  2234  	l := len(ts.Events)
  2235  	if ts.State != TaskStateDead || l == 0 {
  2236  		return false
  2237  	}
  2238  
  2239  	switch ts.Events[l-1].Type {
  2240  	case TaskDiskExceeded, TaskNotRestarting, TaskArtifactDownloadFailed, TaskFailedValidation:
  2241  		return true
  2242  	default:
  2243  		return false
  2244  	}
  2245  }
  2246  
  2247  // Successful returns whether a task finished successfully.
  2248  func (ts *TaskState) Successful() bool {
  2249  	l := len(ts.Events)
  2250  	if ts.State != TaskStateDead || l == 0 {
  2251  		return false
  2252  	}
  2253  
  2254  	e := ts.Events[l-1]
  2255  	if e.Type != TaskTerminated {
  2256  		return false
  2257  	}
  2258  
  2259  	return e.ExitCode == 0
  2260  }
  2261  
  2262  const (
  2263  	// TaskDriveFailure indicates that the task could not be started due to a
  2264  	// failure in the driver.
  2265  	TaskDriverFailure = "Driver Failure"
  2266  
  2267  	// TaskReceived signals that the task has been pulled by the client at the
  2268  	// given timestamp.
  2269  	TaskReceived = "Received"
  2270  
  2271  	// TaskFailedValidation indicates the task was invalid and as such was not
  2272  	// run.
  2273  	TaskFailedValidation = "Failed Validation"
  2274  
  2275  	// TaskStarted signals that the task was started and its timestamp can be
  2276  	// used to determine the running length of the task.
  2277  	TaskStarted = "Started"
  2278  
  2279  	// TaskTerminated indicates that the task was started and exited.
  2280  	TaskTerminated = "Terminated"
  2281  
  2282  	// TaskKilling indicates a kill signal has been sent to the task.
  2283  	TaskKilling = "Killing"
  2284  
  2285  	// TaskKilled indicates a user has killed the task.
  2286  	TaskKilled = "Killed"
  2287  
  2288  	// TaskRestarting indicates that task terminated and is being restarted.
  2289  	TaskRestarting = "Restarting"
  2290  
  2291  	// TaskNotRestarting indicates that the task has failed and is not being
  2292  	// restarted because it has exceeded its restart policy.
  2293  	TaskNotRestarting = "Not Restarting"
  2294  
  2295  	// TaskDownloadingArtifacts means the task is downloading the artifacts
  2296  	// specified in the task.
  2297  	TaskDownloadingArtifacts = "Downloading Artifacts"
  2298  
  2299  	// TaskArtifactDownloadFailed indicates that downloading the artifacts
  2300  	// failed.
  2301  	TaskArtifactDownloadFailed = "Failed Artifact Download"
  2302  
  2303  	// TaskDiskExceeded indicates that one of the tasks in a taskgroup has
  2304  	// exceeded the requested disk resources.
  2305  	TaskDiskExceeded = "Disk Resources Exceeded"
  2306  
  2307  	// TaskSiblingFailed indicates that a sibling task in the task group has
  2308  	// failed.
  2309  	TaskSiblingFailed = "Sibling task failed"
  2310  )
  2311  
  2312  // TaskEvent is an event that effects the state of a task and contains meta-data
  2313  // appropriate to the events type.
  2314  type TaskEvent struct {
  2315  	Type string
  2316  	Time int64 // Unix Nanosecond timestamp
  2317  
  2318  	// Restart fields.
  2319  	RestartReason string
  2320  
  2321  	// Driver Failure fields.
  2322  	DriverError string // A driver error occurred while starting the task.
  2323  
  2324  	// Task Terminated Fields.
  2325  	ExitCode int    // The exit code of the task.
  2326  	Signal   int    // The signal that terminated the task.
  2327  	Message  string // A possible message explaining the termination of the task.
  2328  
  2329  	// Killing fields
  2330  	KillTimeout time.Duration
  2331  
  2332  	// Task Killed Fields.
  2333  	KillError string // Error killing the task.
  2334  
  2335  	// TaskRestarting fields.
  2336  	StartDelay int64 // The sleep period before restarting the task in unix nanoseconds.
  2337  
  2338  	// Artifact Download fields
  2339  	DownloadError string // Error downloading artifacts
  2340  
  2341  	// Validation fields
  2342  	ValidationError string // Validation error
  2343  
  2344  	// The maximum allowed task disk size.
  2345  	DiskLimit int64
  2346  
  2347  	// The recorded task disk size.
  2348  	DiskSize int64
  2349  
  2350  	// Name of the sibling task that caused termination of the task that
  2351  	// the TaskEvent refers to.
  2352  	FailedSibling string
  2353  }
  2354  
  2355  func (te *TaskEvent) GoString() string {
  2356  	return fmt.Sprintf("%v at %v", te.Type, te.Time)
  2357  }
  2358  
  2359  func (te *TaskEvent) Copy() *TaskEvent {
  2360  	if te == nil {
  2361  		return nil
  2362  	}
  2363  	copy := new(TaskEvent)
  2364  	*copy = *te
  2365  	return copy
  2366  }
  2367  
  2368  func NewTaskEvent(event string) *TaskEvent {
  2369  	return &TaskEvent{
  2370  		Type: event,
  2371  		Time: time.Now().UnixNano(),
  2372  	}
  2373  }
  2374  
  2375  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  2376  	if err != nil {
  2377  		e.DriverError = err.Error()
  2378  	}
  2379  	return e
  2380  }
  2381  
  2382  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  2383  	e.ExitCode = c
  2384  	return e
  2385  }
  2386  
  2387  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  2388  	e.Signal = s
  2389  	return e
  2390  }
  2391  
  2392  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  2393  	if err != nil {
  2394  		e.Message = err.Error()
  2395  	}
  2396  	return e
  2397  }
  2398  
  2399  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  2400  	if err != nil {
  2401  		e.KillError = err.Error()
  2402  	}
  2403  	return e
  2404  }
  2405  
  2406  func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent {
  2407  	e.StartDelay = int64(delay)
  2408  	return e
  2409  }
  2410  
  2411  func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent {
  2412  	e.RestartReason = reason
  2413  	return e
  2414  }
  2415  
  2416  func (e *TaskEvent) SetDownloadError(err error) *TaskEvent {
  2417  	if err != nil {
  2418  		e.DownloadError = err.Error()
  2419  	}
  2420  	return e
  2421  }
  2422  
  2423  func (e *TaskEvent) SetValidationError(err error) *TaskEvent {
  2424  	if err != nil {
  2425  		e.ValidationError = err.Error()
  2426  	}
  2427  	return e
  2428  }
  2429  
  2430  func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent {
  2431  	e.KillTimeout = timeout
  2432  	return e
  2433  }
  2434  
  2435  func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent {
  2436  	e.DiskLimit = limit
  2437  	return e
  2438  }
  2439  
  2440  func (e *TaskEvent) SetDiskSize(size int64) *TaskEvent {
  2441  	e.DiskSize = size
  2442  	return e
  2443  }
  2444  
  2445  func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent {
  2446  	e.FailedSibling = sibling
  2447  	return e
  2448  }
  2449  
  2450  // TaskArtifact is an artifact to download before running the task.
  2451  type TaskArtifact struct {
  2452  	// GetterSource is the source to download an artifact using go-getter
  2453  	GetterSource string `mapstructure:"source"`
  2454  
  2455  	// GetterOptions are options to use when downloading the artifact using
  2456  	// go-getter.
  2457  	GetterOptions map[string]string `mapstructure:"options"`
  2458  
  2459  	// RelativeDest is the download destination given relative to the task's
  2460  	// directory.
  2461  	RelativeDest string `mapstructure:"destination"`
  2462  }
  2463  
  2464  func (ta *TaskArtifact) Copy() *TaskArtifact {
  2465  	if ta == nil {
  2466  		return nil
  2467  	}
  2468  	nta := new(TaskArtifact)
  2469  	*nta = *ta
  2470  	nta.GetterOptions = CopyMapStringString(ta.GetterOptions)
  2471  	return nta
  2472  }
  2473  
  2474  func (ta *TaskArtifact) GoString() string {
  2475  	return fmt.Sprintf("%+v", ta)
  2476  }
  2477  
  2478  func (ta *TaskArtifact) Validate() error {
  2479  	// Verify the source
  2480  	var mErr multierror.Error
  2481  	if ta.GetterSource == "" {
  2482  		mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified"))
  2483  	}
  2484  
  2485  	// Verify the destination doesn't escape the tasks directory
  2486  	alloc, err := filepath.Abs(filepath.Join("/", "foo/", "bar/"))
  2487  	if err != nil {
  2488  		mErr.Errors = append(mErr.Errors, err)
  2489  		return mErr.ErrorOrNil()
  2490  	}
  2491  	abs, err := filepath.Abs(filepath.Join(alloc, ta.RelativeDest))
  2492  	if err != nil {
  2493  		mErr.Errors = append(mErr.Errors, err)
  2494  		return mErr.ErrorOrNil()
  2495  	}
  2496  	rel, err := filepath.Rel(alloc, abs)
  2497  	if err != nil {
  2498  		mErr.Errors = append(mErr.Errors, err)
  2499  		return mErr.ErrorOrNil()
  2500  	}
  2501  	if strings.HasPrefix(rel, "..") {
  2502  		mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes task's directory"))
  2503  	}
  2504  
  2505  	// Verify the checksum
  2506  	if check, ok := ta.GetterOptions["checksum"]; ok {
  2507  		check = strings.TrimSpace(check)
  2508  		if check == "" {
  2509  			mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value can not be empty"))
  2510  			return mErr.ErrorOrNil()
  2511  		}
  2512  
  2513  		parts := strings.Split(check, ":")
  2514  		if l := len(parts); l != 2 {
  2515  			mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check))
  2516  			return mErr.ErrorOrNil()
  2517  		}
  2518  
  2519  		checksumVal := parts[1]
  2520  		checksumBytes, err := hex.DecodeString(checksumVal)
  2521  		if err != nil {
  2522  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err))
  2523  			return mErr.ErrorOrNil()
  2524  		}
  2525  
  2526  		checksumType := parts[0]
  2527  		expectedLength := 0
  2528  		switch checksumType {
  2529  		case "md5":
  2530  			expectedLength = md5.Size
  2531  		case "sha1":
  2532  			expectedLength = sha1.Size
  2533  		case "sha256":
  2534  			expectedLength = sha256.Size
  2535  		case "sha512":
  2536  			expectedLength = sha512.Size
  2537  		default:
  2538  			mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType))
  2539  			return mErr.ErrorOrNil()
  2540  		}
  2541  
  2542  		if len(checksumBytes) != expectedLength {
  2543  			mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal))
  2544  			return mErr.ErrorOrNil()
  2545  		}
  2546  	}
  2547  
  2548  	return mErr.ErrorOrNil()
  2549  }
  2550  
  2551  const (
  2552  	ConstraintDistinctHosts = "distinct_hosts"
  2553  	ConstraintRegex         = "regexp"
  2554  	ConstraintVersion       = "version"
  2555  )
  2556  
  2557  // Constraints are used to restrict placement options.
  2558  type Constraint struct {
  2559  	LTarget string // Left-hand target
  2560  	RTarget string // Right-hand target
  2561  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  2562  	str     string // Memoized string
  2563  }
  2564  
  2565  func (c *Constraint) Copy() *Constraint {
  2566  	if c == nil {
  2567  		return nil
  2568  	}
  2569  	nc := new(Constraint)
  2570  	*nc = *c
  2571  	return nc
  2572  }
  2573  
  2574  func (c *Constraint) String() string {
  2575  	if c.str != "" {
  2576  		return c.str
  2577  	}
  2578  	c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  2579  	return c.str
  2580  }
  2581  
  2582  func (c *Constraint) Validate() error {
  2583  	var mErr multierror.Error
  2584  	if c.Operand == "" {
  2585  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  2586  	}
  2587  
  2588  	// Perform additional validation based on operand
  2589  	switch c.Operand {
  2590  	case ConstraintRegex:
  2591  		if _, err := regexp.Compile(c.RTarget); err != nil {
  2592  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  2593  		}
  2594  	case ConstraintVersion:
  2595  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  2596  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  2597  		}
  2598  	}
  2599  	return mErr.ErrorOrNil()
  2600  }
  2601  
  2602  // EphemeralDisk is an ephemeral disk object
  2603  type EphemeralDisk struct {
  2604  	// Sticky indicates whether the allocation is sticky to a node
  2605  	Sticky bool
  2606  
  2607  	// SizeMB is the size of the local disk
  2608  	SizeMB int `mapstructure:"size"`
  2609  }
  2610  
  2611  // DefaultEphemeralDisk returns a EphemeralDisk with default configurations
  2612  func DefaultEphemeralDisk() *EphemeralDisk {
  2613  	return &EphemeralDisk{
  2614  		SizeMB: 300,
  2615  	}
  2616  }
  2617  
  2618  // Validate validates EphemeralDisk
  2619  func (d *EphemeralDisk) Validate() error {
  2620  	if d.SizeMB < 10 {
  2621  		return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB)
  2622  	}
  2623  	return nil
  2624  }
  2625  
  2626  // Copy copies the EphemeralDisk struct and returns a new one
  2627  func (d *EphemeralDisk) Copy() *EphemeralDisk {
  2628  	ld := new(EphemeralDisk)
  2629  	*ld = *d
  2630  	return ld
  2631  }
  2632  
  2633  // Vault stores the set of premissions a task needs access to from Vault.
  2634  type Vault struct {
  2635  	// Policies is the set of policies that the task needs access to
  2636  	Policies []string
  2637  }
  2638  
  2639  // Copy returns a copy of this Vault block.
  2640  func (v *Vault) Copy() *Vault {
  2641  	if v == nil {
  2642  		return nil
  2643  	}
  2644  
  2645  	nv := new(Vault)
  2646  	*nv = *v
  2647  	return nv
  2648  }
  2649  
  2650  // Validate returns if the Vault block is valid.
  2651  func (v *Vault) Validate() error {
  2652  	if v == nil {
  2653  		return nil
  2654  	}
  2655  
  2656  	if len(v.Policies) == 0 {
  2657  		return fmt.Errorf("Policy list can not be empty")
  2658  	}
  2659  
  2660  	return nil
  2661  }
  2662  
  2663  const (
  2664  	AllocDesiredStatusRun   = "run"   // Allocation should run
  2665  	AllocDesiredStatusStop  = "stop"  // Allocation should stop
  2666  	AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted
  2667  )
  2668  
  2669  const (
  2670  	AllocClientStatusPending  = "pending"
  2671  	AllocClientStatusRunning  = "running"
  2672  	AllocClientStatusComplete = "complete"
  2673  	AllocClientStatusFailed   = "failed"
  2674  	AllocClientStatusLost     = "lost"
  2675  )
  2676  
  2677  // Allocation is used to allocate the placement of a task group to a node.
  2678  type Allocation struct {
  2679  	// ID of the allocation (UUID)
  2680  	ID string
  2681  
  2682  	// ID of the evaluation that generated this allocation
  2683  	EvalID string
  2684  
  2685  	// Name is a logical name of the allocation.
  2686  	Name string
  2687  
  2688  	// NodeID is the node this is being placed on
  2689  	NodeID string
  2690  
  2691  	// Job is the parent job of the task group being allocated.
  2692  	// This is copied at allocation time to avoid issues if the job
  2693  	// definition is updated.
  2694  	JobID string
  2695  	Job   *Job
  2696  
  2697  	// TaskGroup is the name of the task group that should be run
  2698  	TaskGroup string
  2699  
  2700  	// Resources is the total set of resources allocated as part
  2701  	// of this allocation of the task group.
  2702  	Resources *Resources
  2703  
  2704  	// SharedResources are the resources that are shared by all the tasks in an
  2705  	// allocation
  2706  	SharedResources *Resources
  2707  
  2708  	// TaskResources is the set of resources allocated to each
  2709  	// task. These should sum to the total Resources.
  2710  	TaskResources map[string]*Resources
  2711  
  2712  	// Metrics associated with this allocation
  2713  	Metrics *AllocMetric
  2714  
  2715  	// Desired Status of the allocation on the client
  2716  	DesiredStatus string
  2717  
  2718  	// DesiredStatusDescription is meant to provide more human useful information
  2719  	DesiredDescription string
  2720  
  2721  	// Status of the allocation on the client
  2722  	ClientStatus string
  2723  
  2724  	// ClientStatusDescription is meant to provide more human useful information
  2725  	ClientDescription string
  2726  
  2727  	// TaskStates stores the state of each task,
  2728  	TaskStates map[string]*TaskState
  2729  
  2730  	// PreviousAllocation is the allocation that this allocation is replacing
  2731  	PreviousAllocation string
  2732  
  2733  	// Raft Indexes
  2734  	CreateIndex uint64
  2735  	ModifyIndex uint64
  2736  
  2737  	// AllocModifyIndex is not updated when the client updates allocations. This
  2738  	// lets the client pull only the allocs updated by the server.
  2739  	AllocModifyIndex uint64
  2740  
  2741  	// CreateTime is the time the allocation has finished scheduling and been
  2742  	// verified by the plan applier.
  2743  	CreateTime int64
  2744  }
  2745  
  2746  func (a *Allocation) Copy() *Allocation {
  2747  	if a == nil {
  2748  		return nil
  2749  	}
  2750  	na := new(Allocation)
  2751  	*na = *a
  2752  
  2753  	na.Job = na.Job.Copy()
  2754  	na.Resources = na.Resources.Copy()
  2755  	na.SharedResources = na.SharedResources.Copy()
  2756  
  2757  	if a.TaskResources != nil {
  2758  		tr := make(map[string]*Resources, len(na.TaskResources))
  2759  		for task, resource := range na.TaskResources {
  2760  			tr[task] = resource.Copy()
  2761  		}
  2762  		na.TaskResources = tr
  2763  	}
  2764  
  2765  	na.Metrics = na.Metrics.Copy()
  2766  
  2767  	if a.TaskStates != nil {
  2768  		ts := make(map[string]*TaskState, len(na.TaskStates))
  2769  		for task, state := range na.TaskStates {
  2770  			ts[task] = state.Copy()
  2771  		}
  2772  		na.TaskStates = ts
  2773  	}
  2774  	return na
  2775  }
  2776  
  2777  // TerminalStatus returns if the desired or actual status is terminal and
  2778  // will no longer transition.
  2779  func (a *Allocation) TerminalStatus() bool {
  2780  	// First check the desired state and if that isn't terminal, check client
  2781  	// state.
  2782  	switch a.DesiredStatus {
  2783  	case AllocDesiredStatusStop, AllocDesiredStatusEvict:
  2784  		return true
  2785  	default:
  2786  	}
  2787  
  2788  	switch a.ClientStatus {
  2789  	case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost:
  2790  		return true
  2791  	default:
  2792  		return false
  2793  	}
  2794  }
  2795  
  2796  // Terminated returns if the allocation is in a terminal state on a client.
  2797  func (a *Allocation) Terminated() bool {
  2798  	if a.ClientStatus == AllocClientStatusFailed ||
  2799  		a.ClientStatus == AllocClientStatusComplete ||
  2800  		a.ClientStatus == AllocClientStatusLost {
  2801  		return true
  2802  	}
  2803  	return false
  2804  }
  2805  
  2806  // RanSuccessfully returns whether the client has ran the allocation and all
  2807  // tasks finished successfully
  2808  func (a *Allocation) RanSuccessfully() bool {
  2809  	// Handle the case the client hasn't started the allocation.
  2810  	if len(a.TaskStates) == 0 {
  2811  		return false
  2812  	}
  2813  
  2814  	// Check to see if all the tasks finised successfully in the allocation
  2815  	allSuccess := true
  2816  	for _, state := range a.TaskStates {
  2817  		allSuccess = allSuccess && state.Successful()
  2818  	}
  2819  
  2820  	return allSuccess
  2821  }
  2822  
  2823  // Stub returns a list stub for the allocation
  2824  func (a *Allocation) Stub() *AllocListStub {
  2825  	return &AllocListStub{
  2826  		ID:                 a.ID,
  2827  		EvalID:             a.EvalID,
  2828  		Name:               a.Name,
  2829  		NodeID:             a.NodeID,
  2830  		JobID:              a.JobID,
  2831  		TaskGroup:          a.TaskGroup,
  2832  		DesiredStatus:      a.DesiredStatus,
  2833  		DesiredDescription: a.DesiredDescription,
  2834  		ClientStatus:       a.ClientStatus,
  2835  		ClientDescription:  a.ClientDescription,
  2836  		TaskStates:         a.TaskStates,
  2837  		CreateIndex:        a.CreateIndex,
  2838  		ModifyIndex:        a.ModifyIndex,
  2839  		CreateTime:         a.CreateTime,
  2840  	}
  2841  }
  2842  
  2843  var (
  2844  	// AllocationIndexRegex is a regular expression to find the allocation index.
  2845  	AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$")
  2846  )
  2847  
  2848  // Index returns the index of the allocation. If the allocation is from a task
  2849  // group with count greater than 1, there will be multiple allocations for it.
  2850  func (a *Allocation) Index() int {
  2851  	matches := AllocationIndexRegex.FindStringSubmatch(a.Name)
  2852  	if len(matches) != 2 {
  2853  		return -1
  2854  	}
  2855  
  2856  	index, err := strconv.Atoi(matches[1])
  2857  	if err != nil {
  2858  		return -1
  2859  	}
  2860  
  2861  	return index
  2862  }
  2863  
  2864  // AllocListStub is used to return a subset of alloc information
  2865  type AllocListStub struct {
  2866  	ID                 string
  2867  	EvalID             string
  2868  	Name               string
  2869  	NodeID             string
  2870  	JobID              string
  2871  	TaskGroup          string
  2872  	DesiredStatus      string
  2873  	DesiredDescription string
  2874  	ClientStatus       string
  2875  	ClientDescription  string
  2876  	TaskStates         map[string]*TaskState
  2877  	CreateIndex        uint64
  2878  	ModifyIndex        uint64
  2879  	CreateTime         int64
  2880  }
  2881  
  2882  // AllocMetric is used to track various metrics while attempting
  2883  // to make an allocation. These are used to debug a job, or to better
  2884  // understand the pressure within the system.
  2885  type AllocMetric struct {
  2886  	// NodesEvaluated is the number of nodes that were evaluated
  2887  	NodesEvaluated int
  2888  
  2889  	// NodesFiltered is the number of nodes filtered due to a constraint
  2890  	NodesFiltered int
  2891  
  2892  	// NodesAvailable is the number of nodes available for evaluation per DC.
  2893  	NodesAvailable map[string]int
  2894  
  2895  	// ClassFiltered is the number of nodes filtered by class
  2896  	ClassFiltered map[string]int
  2897  
  2898  	// ConstraintFiltered is the number of failures caused by constraint
  2899  	ConstraintFiltered map[string]int
  2900  
  2901  	// NodesExhausted is the number of nodes skipped due to being
  2902  	// exhausted of at least one resource
  2903  	NodesExhausted int
  2904  
  2905  	// ClassExhausted is the number of nodes exhausted by class
  2906  	ClassExhausted map[string]int
  2907  
  2908  	// DimensionExhausted provides the count by dimension or reason
  2909  	DimensionExhausted map[string]int
  2910  
  2911  	// Scores is the scores of the final few nodes remaining
  2912  	// for placement. The top score is typically selected.
  2913  	Scores map[string]float64
  2914  
  2915  	// AllocationTime is a measure of how long the allocation
  2916  	// attempt took. This can affect performance and SLAs.
  2917  	AllocationTime time.Duration
  2918  
  2919  	// CoalescedFailures indicates the number of other
  2920  	// allocations that were coalesced into this failed allocation.
  2921  	// This is to prevent creating many failed allocations for a
  2922  	// single task group.
  2923  	CoalescedFailures int
  2924  }
  2925  
  2926  func (a *AllocMetric) Copy() *AllocMetric {
  2927  	if a == nil {
  2928  		return nil
  2929  	}
  2930  	na := new(AllocMetric)
  2931  	*na = *a
  2932  	na.NodesAvailable = CopyMapStringInt(na.NodesAvailable)
  2933  	na.ClassFiltered = CopyMapStringInt(na.ClassFiltered)
  2934  	na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered)
  2935  	na.ClassExhausted = CopyMapStringInt(na.ClassExhausted)
  2936  	na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted)
  2937  	na.Scores = CopyMapStringFloat64(na.Scores)
  2938  	return na
  2939  }
  2940  
  2941  func (a *AllocMetric) EvaluateNode() {
  2942  	a.NodesEvaluated += 1
  2943  }
  2944  
  2945  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  2946  	a.NodesFiltered += 1
  2947  	if node != nil && node.NodeClass != "" {
  2948  		if a.ClassFiltered == nil {
  2949  			a.ClassFiltered = make(map[string]int)
  2950  		}
  2951  		a.ClassFiltered[node.NodeClass] += 1
  2952  	}
  2953  	if constraint != "" {
  2954  		if a.ConstraintFiltered == nil {
  2955  			a.ConstraintFiltered = make(map[string]int)
  2956  		}
  2957  		a.ConstraintFiltered[constraint] += 1
  2958  	}
  2959  }
  2960  
  2961  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  2962  	a.NodesExhausted += 1
  2963  	if node != nil && node.NodeClass != "" {
  2964  		if a.ClassExhausted == nil {
  2965  			a.ClassExhausted = make(map[string]int)
  2966  		}
  2967  		a.ClassExhausted[node.NodeClass] += 1
  2968  	}
  2969  	if dimension != "" {
  2970  		if a.DimensionExhausted == nil {
  2971  			a.DimensionExhausted = make(map[string]int)
  2972  		}
  2973  		a.DimensionExhausted[dimension] += 1
  2974  	}
  2975  }
  2976  
  2977  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  2978  	if a.Scores == nil {
  2979  		a.Scores = make(map[string]float64)
  2980  	}
  2981  	key := fmt.Sprintf("%s.%s", node.ID, name)
  2982  	a.Scores[key] = score
  2983  }
  2984  
  2985  const (
  2986  	EvalStatusBlocked   = "blocked"
  2987  	EvalStatusPending   = "pending"
  2988  	EvalStatusComplete  = "complete"
  2989  	EvalStatusFailed    = "failed"
  2990  	EvalStatusCancelled = "canceled"
  2991  )
  2992  
  2993  const (
  2994  	EvalTriggerJobRegister   = "job-register"
  2995  	EvalTriggerJobDeregister = "job-deregister"
  2996  	EvalTriggerPeriodicJob   = "periodic-job"
  2997  	EvalTriggerNodeUpdate    = "node-update"
  2998  	EvalTriggerScheduled     = "scheduled"
  2999  	EvalTriggerRollingUpdate = "rolling-update"
  3000  	EvalTriggerMaxPlans      = "max-plan-attempts"
  3001  )
  3002  
  3003  const (
  3004  	// CoreJobEvalGC is used for the garbage collection of evaluations
  3005  	// and allocations. We periodically scan evaluations in a terminal state,
  3006  	// in which all the corresponding allocations are also terminal. We
  3007  	// delete these out of the system to bound the state.
  3008  	CoreJobEvalGC = "eval-gc"
  3009  
  3010  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  3011  	// We periodically scan nodes in a terminal state, and if they have no
  3012  	// corresponding allocations we delete these out of the system.
  3013  	CoreJobNodeGC = "node-gc"
  3014  
  3015  	// CoreJobJobGC is used for the garbage collection of eligible jobs. We
  3016  	// periodically scan garbage collectible jobs and check if both their
  3017  	// evaluations and allocations are terminal. If so, we delete these out of
  3018  	// the system.
  3019  	CoreJobJobGC = "job-gc"
  3020  
  3021  	// CoreJobForceGC is used to force garbage collection of all GCable objects.
  3022  	CoreJobForceGC = "force-gc"
  3023  )
  3024  
  3025  // Evaluation is used anytime we need to apply business logic as a result
  3026  // of a change to our desired state (job specification) or the emergent state
  3027  // (registered nodes). When the inputs change, we need to "evaluate" them,
  3028  // potentially taking action (allocation of work) or doing nothing if the state
  3029  // of the world does not require it.
  3030  type Evaluation struct {
  3031  	// ID is a randonly generated UUID used for this evaluation. This
  3032  	// is assigned upon the creation of the evaluation.
  3033  	ID string
  3034  
  3035  	// Priority is used to control scheduling importance and if this job
  3036  	// can preempt other jobs.
  3037  	Priority int
  3038  
  3039  	// Type is used to control which schedulers are available to handle
  3040  	// this evaluation.
  3041  	Type string
  3042  
  3043  	// TriggeredBy is used to give some insight into why this Eval
  3044  	// was created. (Job change, node failure, alloc failure, etc).
  3045  	TriggeredBy string
  3046  
  3047  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  3048  	// be run in parallel for a given JobID, so we serialize on this.
  3049  	JobID string
  3050  
  3051  	// JobModifyIndex is the modify index of the job at the time
  3052  	// the evaluation was created
  3053  	JobModifyIndex uint64
  3054  
  3055  	// NodeID is the node that was affected triggering the evaluation.
  3056  	NodeID string
  3057  
  3058  	// NodeModifyIndex is the modify index of the node at the time
  3059  	// the evaluation was created
  3060  	NodeModifyIndex uint64
  3061  
  3062  	// Status of the evaluation
  3063  	Status string
  3064  
  3065  	// StatusDescription is meant to provide more human useful information
  3066  	StatusDescription string
  3067  
  3068  	// Wait is a minimum wait time for running the eval. This is used to
  3069  	// support a rolling upgrade.
  3070  	Wait time.Duration
  3071  
  3072  	// NextEval is the evaluation ID for the eval created to do a followup.
  3073  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  3074  	NextEval string
  3075  
  3076  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  3077  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  3078  	PreviousEval string
  3079  
  3080  	// BlockedEval is the evaluation ID for a created blocked eval. A
  3081  	// blocked eval will be created if all allocations could not be placed due
  3082  	// to constraints or lacking resources.
  3083  	BlockedEval string
  3084  
  3085  	// FailedTGAllocs are task groups which have allocations that could not be
  3086  	// made, but the metrics are persisted so that the user can use the feedback
  3087  	// to determine the cause.
  3088  	FailedTGAllocs map[string]*AllocMetric
  3089  
  3090  	// ClassEligibility tracks computed node classes that have been explicitly
  3091  	// marked as eligible or ineligible.
  3092  	ClassEligibility map[string]bool
  3093  
  3094  	// EscapedComputedClass marks whether the job has constraints that are not
  3095  	// captured by computed node classes.
  3096  	EscapedComputedClass bool
  3097  
  3098  	// AnnotatePlan triggers the scheduler to provide additional annotations
  3099  	// during the evaluation. This should not be set during normal operations.
  3100  	AnnotatePlan bool
  3101  
  3102  	// SnapshotIndex is the Raft index of the snapshot used to process the
  3103  	// evaluation. As such it will only be set once it has gone through the
  3104  	// scheduler.
  3105  	SnapshotIndex uint64
  3106  
  3107  	// QueuedAllocations is the number of unplaced allocations at the time the
  3108  	// evaluation was processed. The map is keyed by Task Group names.
  3109  	QueuedAllocations map[string]int
  3110  
  3111  	// Raft Indexes
  3112  	CreateIndex uint64
  3113  	ModifyIndex uint64
  3114  }
  3115  
  3116  // TerminalStatus returns if the current status is terminal and
  3117  // will no longer transition.
  3118  func (e *Evaluation) TerminalStatus() bool {
  3119  	switch e.Status {
  3120  	case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled:
  3121  		return true
  3122  	default:
  3123  		return false
  3124  	}
  3125  }
  3126  
  3127  func (e *Evaluation) GoString() string {
  3128  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  3129  }
  3130  
  3131  func (e *Evaluation) Copy() *Evaluation {
  3132  	if e == nil {
  3133  		return nil
  3134  	}
  3135  	ne := new(Evaluation)
  3136  	*ne = *e
  3137  
  3138  	// Copy ClassEligibility
  3139  	if e.ClassEligibility != nil {
  3140  		classes := make(map[string]bool, len(e.ClassEligibility))
  3141  		for class, elig := range e.ClassEligibility {
  3142  			classes[class] = elig
  3143  		}
  3144  		ne.ClassEligibility = classes
  3145  	}
  3146  
  3147  	// Copy FailedTGAllocs
  3148  	if e.FailedTGAllocs != nil {
  3149  		failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs))
  3150  		for tg, metric := range e.FailedTGAllocs {
  3151  			failedTGs[tg] = metric.Copy()
  3152  		}
  3153  		ne.FailedTGAllocs = failedTGs
  3154  	}
  3155  
  3156  	// Copy queued allocations
  3157  	if e.QueuedAllocations != nil {
  3158  		queuedAllocations := make(map[string]int, len(e.QueuedAllocations))
  3159  		for tg, num := range e.QueuedAllocations {
  3160  			queuedAllocations[tg] = num
  3161  		}
  3162  		ne.QueuedAllocations = queuedAllocations
  3163  	}
  3164  
  3165  	return ne
  3166  }
  3167  
  3168  // ShouldEnqueue checks if a given evaluation should be enqueued into the
  3169  // eval_broker
  3170  func (e *Evaluation) ShouldEnqueue() bool {
  3171  	switch e.Status {
  3172  	case EvalStatusPending:
  3173  		return true
  3174  	case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled:
  3175  		return false
  3176  	default:
  3177  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  3178  	}
  3179  }
  3180  
  3181  // ShouldBlock checks if a given evaluation should be entered into the blocked
  3182  // eval tracker.
  3183  func (e *Evaluation) ShouldBlock() bool {
  3184  	switch e.Status {
  3185  	case EvalStatusBlocked:
  3186  		return true
  3187  	case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled:
  3188  		return false
  3189  	default:
  3190  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  3191  	}
  3192  }
  3193  
  3194  // MakePlan is used to make a plan from the given evaluation
  3195  // for a given Job
  3196  func (e *Evaluation) MakePlan(j *Job) *Plan {
  3197  	p := &Plan{
  3198  		EvalID:         e.ID,
  3199  		Priority:       e.Priority,
  3200  		Job:            j,
  3201  		NodeUpdate:     make(map[string][]*Allocation),
  3202  		NodeAllocation: make(map[string][]*Allocation),
  3203  	}
  3204  	if j != nil {
  3205  		p.AllAtOnce = j.AllAtOnce
  3206  	}
  3207  	return p
  3208  }
  3209  
  3210  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  3211  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  3212  	return &Evaluation{
  3213  		ID:             GenerateUUID(),
  3214  		Priority:       e.Priority,
  3215  		Type:           e.Type,
  3216  		TriggeredBy:    EvalTriggerRollingUpdate,
  3217  		JobID:          e.JobID,
  3218  		JobModifyIndex: e.JobModifyIndex,
  3219  		Status:         EvalStatusPending,
  3220  		Wait:           wait,
  3221  		PreviousEval:   e.ID,
  3222  	}
  3223  }
  3224  
  3225  // CreateBlockedEval creates a blocked evaluation to followup this eval to place any
  3226  // failed allocations. It takes the classes marked explicitly eligible or
  3227  // ineligible and whether the job has escaped computed node classes.
  3228  func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation {
  3229  	return &Evaluation{
  3230  		ID:                   GenerateUUID(),
  3231  		Priority:             e.Priority,
  3232  		Type:                 e.Type,
  3233  		TriggeredBy:          e.TriggeredBy,
  3234  		JobID:                e.JobID,
  3235  		JobModifyIndex:       e.JobModifyIndex,
  3236  		Status:               EvalStatusBlocked,
  3237  		PreviousEval:         e.ID,
  3238  		ClassEligibility:     classEligibility,
  3239  		EscapedComputedClass: escaped,
  3240  	}
  3241  }
  3242  
  3243  // Plan is used to submit a commit plan for task allocations. These
  3244  // are submitted to the leader which verifies that resources have
  3245  // not been overcommitted before admiting the plan.
  3246  type Plan struct {
  3247  	// EvalID is the evaluation ID this plan is associated with
  3248  	EvalID string
  3249  
  3250  	// EvalToken is used to prevent a split-brain processing of
  3251  	// an evaluation. There should only be a single scheduler running
  3252  	// an Eval at a time, but this could be violated after a leadership
  3253  	// transition. This unique token is used to reject plans that are
  3254  	// being submitted from a different leader.
  3255  	EvalToken string
  3256  
  3257  	// Priority is the priority of the upstream job
  3258  	Priority int
  3259  
  3260  	// AllAtOnce is used to control if incremental scheduling of task groups
  3261  	// is allowed or if we must do a gang scheduling of the entire job.
  3262  	// If this is false, a plan may be partially applied. Otherwise, the
  3263  	// entire plan must be able to make progress.
  3264  	AllAtOnce bool
  3265  
  3266  	// Job is the parent job of all the allocations in the Plan.
  3267  	// Since a Plan only involves a single Job, we can reduce the size
  3268  	// of the plan by only including it once.
  3269  	Job *Job
  3270  
  3271  	// NodeUpdate contains all the allocations for each node. For each node,
  3272  	// this is a list of the allocations to update to either stop or evict.
  3273  	NodeUpdate map[string][]*Allocation
  3274  
  3275  	// NodeAllocation contains all the allocations for each node.
  3276  	// The evicts must be considered prior to the allocations.
  3277  	NodeAllocation map[string][]*Allocation
  3278  
  3279  	// Annotations contains annotations by the scheduler to be used by operators
  3280  	// to understand the decisions made by the scheduler.
  3281  	Annotations *PlanAnnotations
  3282  }
  3283  
  3284  // AppendUpdate marks the allocation for eviction. The clientStatus of the
  3285  // allocation may be optionally set by passing in a non-empty value.
  3286  func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) {
  3287  	newAlloc := new(Allocation)
  3288  	*newAlloc = *alloc
  3289  
  3290  	// If the job is not set in the plan we are deregistering a job so we
  3291  	// extract the job from the allocation.
  3292  	if p.Job == nil && newAlloc.Job != nil {
  3293  		p.Job = newAlloc.Job
  3294  	}
  3295  
  3296  	// Normalize the job
  3297  	newAlloc.Job = nil
  3298  
  3299  	// Strip the resources as it can be rebuilt.
  3300  	newAlloc.Resources = nil
  3301  
  3302  	newAlloc.DesiredStatus = desiredStatus
  3303  	newAlloc.DesiredDescription = desiredDesc
  3304  
  3305  	if clientStatus != "" {
  3306  		newAlloc.ClientStatus = clientStatus
  3307  	}
  3308  
  3309  	node := alloc.NodeID
  3310  	existing := p.NodeUpdate[node]
  3311  	p.NodeUpdate[node] = append(existing, newAlloc)
  3312  }
  3313  
  3314  func (p *Plan) PopUpdate(alloc *Allocation) {
  3315  	existing := p.NodeUpdate[alloc.NodeID]
  3316  	n := len(existing)
  3317  	if n > 0 && existing[n-1].ID == alloc.ID {
  3318  		existing = existing[:n-1]
  3319  		if len(existing) > 0 {
  3320  			p.NodeUpdate[alloc.NodeID] = existing
  3321  		} else {
  3322  			delete(p.NodeUpdate, alloc.NodeID)
  3323  		}
  3324  	}
  3325  }
  3326  
  3327  func (p *Plan) AppendAlloc(alloc *Allocation) {
  3328  	node := alloc.NodeID
  3329  	existing := p.NodeAllocation[node]
  3330  	p.NodeAllocation[node] = append(existing, alloc)
  3331  }
  3332  
  3333  // IsNoOp checks if this plan would do nothing
  3334  func (p *Plan) IsNoOp() bool {
  3335  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
  3336  }
  3337  
  3338  // PlanResult is the result of a plan submitted to the leader.
  3339  type PlanResult struct {
  3340  	// NodeUpdate contains all the updates that were committed.
  3341  	NodeUpdate map[string][]*Allocation
  3342  
  3343  	// NodeAllocation contains all the allocations that were committed.
  3344  	NodeAllocation map[string][]*Allocation
  3345  
  3346  	// RefreshIndex is the index the worker should refresh state up to.
  3347  	// This allows all evictions and allocations to be materialized.
  3348  	// If any allocations were rejected due to stale data (node state,
  3349  	// over committed) this can be used to force a worker refresh.
  3350  	RefreshIndex uint64
  3351  
  3352  	// AllocIndex is the Raft index in which the evictions and
  3353  	// allocations took place. This is used for the write index.
  3354  	AllocIndex uint64
  3355  }
  3356  
  3357  // IsNoOp checks if this plan result would do nothing
  3358  func (p *PlanResult) IsNoOp() bool {
  3359  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0
  3360  }
  3361  
  3362  // FullCommit is used to check if all the allocations in a plan
  3363  // were committed as part of the result. Returns if there was
  3364  // a match, and the number of expected and actual allocations.
  3365  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  3366  	expected := 0
  3367  	actual := 0
  3368  	for name, allocList := range plan.NodeAllocation {
  3369  		didAlloc, _ := p.NodeAllocation[name]
  3370  		expected += len(allocList)
  3371  		actual += len(didAlloc)
  3372  	}
  3373  	return actual == expected, expected, actual
  3374  }
  3375  
  3376  // PlanAnnotations holds annotations made by the scheduler to give further debug
  3377  // information to operators.
  3378  type PlanAnnotations struct {
  3379  	// DesiredTGUpdates is the set of desired updates per task group.
  3380  	DesiredTGUpdates map[string]*DesiredUpdates
  3381  }
  3382  
  3383  // DesiredUpdates is the set of changes the scheduler would like to make given
  3384  // sufficient resources and cluster capacity.
  3385  type DesiredUpdates struct {
  3386  	Ignore            uint64
  3387  	Place             uint64
  3388  	Migrate           uint64
  3389  	Stop              uint64
  3390  	InPlaceUpdate     uint64
  3391  	DestructiveUpdate uint64
  3392  }
  3393  
  3394  // msgpackHandle is a shared handle for encoding/decoding of structs
  3395  var MsgpackHandle = func() *codec.MsgpackHandle {
  3396  	h := &codec.MsgpackHandle{RawToString: true}
  3397  
  3398  	// Sets the default type for decoding a map into a nil interface{}.
  3399  	// This is necessary in particular because we store the driver configs as a
  3400  	// nil interface{}.
  3401  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  3402  	return h
  3403  }()
  3404  
  3405  var HashiMsgpackHandle = func() *hcodec.MsgpackHandle {
  3406  	h := &hcodec.MsgpackHandle{RawToString: true}
  3407  
  3408  	// Sets the default type for decoding a map into a nil interface{}.
  3409  	// This is necessary in particular because we store the driver configs as a
  3410  	// nil interface{}.
  3411  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  3412  	return h
  3413  }()
  3414  
  3415  // Decode is used to decode a MsgPack encoded object
  3416  func Decode(buf []byte, out interface{}) error {
  3417  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  3418  }
  3419  
  3420  // Encode is used to encode a MsgPack object with type prefix
  3421  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  3422  	var buf bytes.Buffer
  3423  	buf.WriteByte(uint8(t))
  3424  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  3425  	return buf.Bytes(), err
  3426  }