github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"regexp"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/hashicorp/go-msgpack/codec"
    12  	"github.com/hashicorp/go-multierror"
    13  	"github.com/hashicorp/go-version"
    14  )
    15  
    16  var (
    17  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    18  	ErrNoRegionPath = fmt.Errorf("No path to region")
    19  )
    20  
    21  type MessageType uint8
    22  
    23  const (
    24  	NodeRegisterRequestType MessageType = iota
    25  	NodeDeregisterRequestType
    26  	NodeUpdateStatusRequestType
    27  	NodeUpdateDrainRequestType
    28  	JobRegisterRequestType
    29  	JobDeregisterRequestType
    30  	EvalUpdateRequestType
    31  	EvalDeleteRequestType
    32  	AllocUpdateRequestType
    33  	AllocClientUpdateRequestType
    34  )
    35  
    36  const (
    37  	// IgnoreUnknownTypeFlag is set along with a MessageType
    38  	// to indicate that the message type can be safely ignored
    39  	// if it is not recognized. This is for future proofing, so
    40  	// that new commands can be added in a way that won't cause
    41  	// old servers to crash when the FSM attempts to process them.
    42  	IgnoreUnknownTypeFlag MessageType = 128
    43  )
    44  
    45  // RPCInfo is used to describe common information about query
    46  type RPCInfo interface {
    47  	RequestRegion() string
    48  	IsRead() bool
    49  	AllowStaleRead() bool
    50  }
    51  
    52  // QueryOptions is used to specify various flags for read queries
    53  type QueryOptions struct {
    54  	// The target region for this query
    55  	Region string
    56  
    57  	// If set, wait until query exceeds given index. Must be provided
    58  	// with MaxQueryTime.
    59  	MinQueryIndex uint64
    60  
    61  	// Provided with MinQueryIndex to wait for change.
    62  	MaxQueryTime time.Duration
    63  
    64  	// If set, any follower can service the request. Results
    65  	// may be arbitrarily stale.
    66  	AllowStale bool
    67  }
    68  
    69  func (q QueryOptions) RequestRegion() string {
    70  	return q.Region
    71  }
    72  
    73  // QueryOption only applies to reads, so always true
    74  func (q QueryOptions) IsRead() bool {
    75  	return true
    76  }
    77  
    78  func (q QueryOptions) AllowStaleRead() bool {
    79  	return q.AllowStale
    80  }
    81  
    82  type WriteRequest struct {
    83  	// The target region for this write
    84  	Region string
    85  }
    86  
    87  func (w WriteRequest) RequestRegion() string {
    88  	// The target region for this request
    89  	return w.Region
    90  }
    91  
    92  // WriteRequest only applies to writes, always false
    93  func (w WriteRequest) IsRead() bool {
    94  	return false
    95  }
    96  
    97  func (w WriteRequest) AllowStaleRead() bool {
    98  	return false
    99  }
   100  
   101  // QueryMeta allows a query response to include potentially
   102  // useful metadata about a query
   103  type QueryMeta struct {
   104  	// This is the index associated with the read
   105  	Index uint64
   106  
   107  	// If AllowStale is used, this is time elapsed since
   108  	// last contact between the follower and leader. This
   109  	// can be used to gauge staleness.
   110  	LastContact time.Duration
   111  
   112  	// Used to indicate if there is a known leader node
   113  	KnownLeader bool
   114  }
   115  
   116  // WriteMeta allows a write response to includ e potentially
   117  // useful metadata about the write
   118  type WriteMeta struct {
   119  	// This is the index associated with the write
   120  	Index uint64
   121  }
   122  
   123  // NodeRegisterRequest is used for Node.Register endpoint
   124  // to register a node as being a schedulable entity.
   125  type NodeRegisterRequest struct {
   126  	Node *Node
   127  	WriteRequest
   128  }
   129  
   130  // NodeDeregisterRequest is used for Node.Deregister endpoint
   131  // to deregister a node as being a schedulable entity.
   132  type NodeDeregisterRequest struct {
   133  	NodeID string
   134  	WriteRequest
   135  }
   136  
   137  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   138  // to update the status of a node.
   139  type NodeUpdateStatusRequest struct {
   140  	NodeID string
   141  	Status string
   142  	WriteRequest
   143  }
   144  
   145  // NodeUpdateDrainRequest is used for updatin the drain status
   146  type NodeUpdateDrainRequest struct {
   147  	NodeID string
   148  	Drain  bool
   149  	WriteRequest
   150  }
   151  
   152  // NodeEvaluateRequest is used to re-evaluate the ndoe
   153  type NodeEvaluateRequest struct {
   154  	NodeID string
   155  	WriteRequest
   156  }
   157  
   158  // NodeSpecificRequest is used when we just need to specify a target node
   159  type NodeSpecificRequest struct {
   160  	NodeID string
   161  	QueryOptions
   162  }
   163  
   164  // JobRegisterRequest is used for Job.Register endpoint
   165  // to register a job as being a schedulable entity.
   166  type JobRegisterRequest struct {
   167  	Job *Job
   168  	WriteRequest
   169  }
   170  
   171  // JobDeregisterRequest is used for Job.Deregister endpoint
   172  // to deregister a job as being a schedulable entity.
   173  type JobDeregisterRequest struct {
   174  	JobID string
   175  	WriteRequest
   176  }
   177  
   178  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   179  type JobEvaluateRequest struct {
   180  	JobID string
   181  	WriteRequest
   182  }
   183  
   184  // JobSpecificRequest is used when we just need to specify a target job
   185  type JobSpecificRequest struct {
   186  	JobID string
   187  	QueryOptions
   188  }
   189  
   190  // JobListRequest is used to parameterize a list request
   191  type JobListRequest struct {
   192  	QueryOptions
   193  }
   194  
   195  // NodeListRequest is used to parameterize a list request
   196  type NodeListRequest struct {
   197  	QueryOptions
   198  }
   199  
   200  // EvalUpdateRequest is used for upserting evaluations.
   201  type EvalUpdateRequest struct {
   202  	Evals     []*Evaluation
   203  	EvalToken string
   204  	WriteRequest
   205  }
   206  
   207  // EvalDeleteRequest is used for deleting an evaluation.
   208  type EvalDeleteRequest struct {
   209  	Evals  []string
   210  	Allocs []string
   211  	WriteRequest
   212  }
   213  
   214  // EvalSpecificRequest is used when we just need to specify a target evaluation
   215  type EvalSpecificRequest struct {
   216  	EvalID string
   217  	QueryOptions
   218  }
   219  
   220  // EvalAckRequest is used to Ack/Nack a specific evaluation
   221  type EvalAckRequest struct {
   222  	EvalID string
   223  	Token  string
   224  	WriteRequest
   225  }
   226  
   227  // EvalDequeueRequest is used when we want to dequeue an evaluation
   228  type EvalDequeueRequest struct {
   229  	Schedulers []string
   230  	Timeout    time.Duration
   231  	WriteRequest
   232  }
   233  
   234  // EvalListRequest is used to list the evaluations
   235  type EvalListRequest struct {
   236  	QueryOptions
   237  }
   238  
   239  // PlanRequest is used to submit an allocation plan to the leader
   240  type PlanRequest struct {
   241  	Plan *Plan
   242  	WriteRequest
   243  }
   244  
   245  // AllocUpdateRequest is used to submit changes to allocations, either
   246  // to cause evictions or to assign new allocaitons. Both can be done
   247  // within a single transaction
   248  type AllocUpdateRequest struct {
   249  	// Alloc is the list of new allocations to assign
   250  	Alloc []*Allocation
   251  	WriteRequest
   252  }
   253  
   254  // AllocListRequest is used to request a list of allocations
   255  type AllocListRequest struct {
   256  	QueryOptions
   257  }
   258  
   259  // AllocSpecificRequest is used to query a specific allocation
   260  type AllocSpecificRequest struct {
   261  	AllocID string
   262  	QueryOptions
   263  }
   264  
   265  // GenericRequest is used to request where no
   266  // specific information is needed.
   267  type GenericRequest struct {
   268  	QueryOptions
   269  }
   270  
   271  // GenericResponse is used to respond to a request where no
   272  // specific response information is needed.
   273  type GenericResponse struct {
   274  	WriteMeta
   275  }
   276  
   277  const (
   278  	ProtocolVersion = "protocol"
   279  	APIMajorVersion = "api.major"
   280  	APIMinorVersion = "api.minor"
   281  )
   282  
   283  // VersionResponse is used for the Status.Version reseponse
   284  type VersionResponse struct {
   285  	Build    string
   286  	Versions map[string]int
   287  	QueryMeta
   288  }
   289  
   290  // JobRegisterResponse is used to respond to a job registration
   291  type JobRegisterResponse struct {
   292  	EvalID          string
   293  	EvalCreateIndex uint64
   294  	JobModifyIndex  uint64
   295  	QueryMeta
   296  }
   297  
   298  // JobDeregisterResponse is used to respond to a job deregistration
   299  type JobDeregisterResponse struct {
   300  	EvalID          string
   301  	EvalCreateIndex uint64
   302  	JobModifyIndex  uint64
   303  	QueryMeta
   304  }
   305  
   306  // NodeUpdateResponse is used to respond to a node update
   307  type NodeUpdateResponse struct {
   308  	HeartbeatTTL    time.Duration
   309  	EvalIDs         []string
   310  	EvalCreateIndex uint64
   311  	NodeModifyIndex uint64
   312  	QueryMeta
   313  }
   314  
   315  // NodeDrainUpdateResponse is used to respond to a node drain update
   316  type NodeDrainUpdateResponse struct {
   317  	EvalIDs         []string
   318  	EvalCreateIndex uint64
   319  	NodeModifyIndex uint64
   320  	QueryMeta
   321  }
   322  
   323  // NodeAllocsResponse is used to return allocs for a single node
   324  type NodeAllocsResponse struct {
   325  	Allocs []*Allocation
   326  	QueryMeta
   327  }
   328  
   329  // SingleNodeResponse is used to return a single node
   330  type SingleNodeResponse struct {
   331  	Node *Node
   332  	QueryMeta
   333  }
   334  
   335  // JobListResponse is used for a list request
   336  type NodeListResponse struct {
   337  	Nodes []*NodeListStub
   338  	QueryMeta
   339  }
   340  
   341  // SingleJobResponse is used to return a single job
   342  type SingleJobResponse struct {
   343  	Job *Job
   344  	QueryMeta
   345  }
   346  
   347  // JobListResponse is used for a list request
   348  type JobListResponse struct {
   349  	Jobs []*JobListStub
   350  	QueryMeta
   351  }
   352  
   353  // SingleAllocResponse is used to return a single allocation
   354  type SingleAllocResponse struct {
   355  	Alloc *Allocation
   356  	QueryMeta
   357  }
   358  
   359  // JobAllocationsResponse is used to return the allocations for a job
   360  type JobAllocationsResponse struct {
   361  	Allocations []*AllocListStub
   362  	QueryMeta
   363  }
   364  
   365  // JobEvaluationsResponse is used to return the evaluations for a job
   366  type JobEvaluationsResponse struct {
   367  	Evaluations []*Evaluation
   368  	QueryMeta
   369  }
   370  
   371  // SingleEvalResponse is used to return a single evaluation
   372  type SingleEvalResponse struct {
   373  	Eval *Evaluation
   374  	QueryMeta
   375  }
   376  
   377  // EvalDequeueResponse is used to return from a dequeue
   378  type EvalDequeueResponse struct {
   379  	Eval  *Evaluation
   380  	Token string
   381  	QueryMeta
   382  }
   383  
   384  // PlanResponse is used to return from a PlanRequest
   385  type PlanResponse struct {
   386  	Result *PlanResult
   387  	WriteMeta
   388  }
   389  
   390  // AllocListResponse is used for a list request
   391  type AllocListResponse struct {
   392  	Allocations []*AllocListStub
   393  	QueryMeta
   394  }
   395  
   396  // EvalListResponse is used for a list request
   397  type EvalListResponse struct {
   398  	Evaluations []*Evaluation
   399  	QueryMeta
   400  }
   401  
   402  // EvalAllocationsResponse is used to return the allocations for an evaluation
   403  type EvalAllocationsResponse struct {
   404  	Allocations []*AllocListStub
   405  	QueryMeta
   406  }
   407  
   408  const (
   409  	NodeStatusInit  = "initializing"
   410  	NodeStatusReady = "ready"
   411  	NodeStatusDown  = "down"
   412  )
   413  
   414  // ShouldDrainNode checks if a given node status should trigger an
   415  // evaluation. Some states don't require any further action.
   416  func ShouldDrainNode(status string) bool {
   417  	switch status {
   418  	case NodeStatusInit, NodeStatusReady:
   419  		return false
   420  	case NodeStatusDown:
   421  		return true
   422  	default:
   423  		panic(fmt.Sprintf("unhandled node status %s", status))
   424  	}
   425  }
   426  
   427  // ValidNodeStatus is used to check if a node status is valid
   428  func ValidNodeStatus(status string) bool {
   429  	switch status {
   430  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   431  		return true
   432  	default:
   433  		return false
   434  	}
   435  }
   436  
   437  // Node is a representation of a schedulable client node
   438  type Node struct {
   439  	// ID is a unique identifier for the node. It can be constructed
   440  	// by doing a concatenation of the Name and Datacenter as a simple
   441  	// approach. Alternatively a UUID may be used.
   442  	ID string
   443  
   444  	// Datacenter for this node
   445  	Datacenter string
   446  
   447  	// Node name
   448  	Name string
   449  
   450  	// Attributes is an arbitrary set of key/value
   451  	// data that can be used for constraints. Examples
   452  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   453  	// "docker.runtime=1.8.3"
   454  	Attributes map[string]string
   455  
   456  	// Resources is the available resources on the client.
   457  	// For example 'cpu=2' 'memory=2048'
   458  	Resources *Resources
   459  
   460  	// Reserved is the set of resources that are reserved,
   461  	// and should be subtracted from the total resources for
   462  	// the purposes of scheduling. This may be provide certain
   463  	// high-watermark tolerances or because of external schedulers
   464  	// consuming resources.
   465  	Reserved *Resources
   466  
   467  	// Links are used to 'link' this client to external
   468  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   469  	// 'ami=ami-123'
   470  	Links map[string]string
   471  
   472  	// Meta is used to associate arbitrary metadata with this
   473  	// client. This is opaque to Nomad.
   474  	Meta map[string]string
   475  
   476  	// NodeClass is an opaque identifier used to group nodes
   477  	// together for the purpose of determining scheduling pressure.
   478  	NodeClass string
   479  
   480  	// Drain is controlled by the servers, and not the client.
   481  	// If true, no jobs will be scheduled to this node, and existing
   482  	// allocations will be drained.
   483  	Drain bool
   484  
   485  	// Status of this node
   486  	Status string
   487  
   488  	// StatusDescription is meant to provide more human useful information
   489  	StatusDescription string
   490  
   491  	// Raft Indexes
   492  	CreateIndex uint64
   493  	ModifyIndex uint64
   494  }
   495  
   496  // TerminalStatus returns if the current status is terminal and
   497  // will no longer transition.
   498  func (n *Node) TerminalStatus() bool {
   499  	switch n.Status {
   500  	case NodeStatusDown:
   501  		return true
   502  	default:
   503  		return false
   504  	}
   505  }
   506  
   507  // Stub returns a summarized version of the node
   508  func (n *Node) Stub() *NodeListStub {
   509  	return &NodeListStub{
   510  		ID:                n.ID,
   511  		Datacenter:        n.Datacenter,
   512  		Name:              n.Name,
   513  		NodeClass:         n.NodeClass,
   514  		Drain:             n.Drain,
   515  		Status:            n.Status,
   516  		StatusDescription: n.StatusDescription,
   517  		CreateIndex:       n.CreateIndex,
   518  		ModifyIndex:       n.ModifyIndex,
   519  	}
   520  }
   521  
   522  // NodeListStub is used to return a subset of job information
   523  // for the job list
   524  type NodeListStub struct {
   525  	ID                string
   526  	Datacenter        string
   527  	Name              string
   528  	NodeClass         string
   529  	Drain             bool
   530  	Status            string
   531  	StatusDescription string
   532  	CreateIndex       uint64
   533  	ModifyIndex       uint64
   534  }
   535  
   536  // Resources is used to define the resources available
   537  // on a client
   538  type Resources struct {
   539  	CPU      int
   540  	MemoryMB int `mapstructure:"memory"`
   541  	DiskMB   int `mapstructure:"disk"`
   542  	IOPS     int
   543  	Networks []*NetworkResource
   544  }
   545  
   546  // Copy returns a deep copy of the resources
   547  func (r *Resources) Copy() *Resources {
   548  	newR := new(Resources)
   549  	*newR = *r
   550  	n := len(r.Networks)
   551  	newR.Networks = make([]*NetworkResource, n)
   552  	for i := 0; i < n; i++ {
   553  		newR.Networks[i] = r.Networks[i].Copy()
   554  	}
   555  	return newR
   556  }
   557  
   558  // NetIndex finds the matching net index using device name
   559  func (r *Resources) NetIndex(n *NetworkResource) int {
   560  	for idx, net := range r.Networks {
   561  		if net.Device == n.Device {
   562  			return idx
   563  		}
   564  	}
   565  	return -1
   566  }
   567  
   568  // Superset checks if one set of resources is a superset
   569  // of another. This ignores network resources, and the NetworkIndex
   570  // should be used for that.
   571  func (r *Resources) Superset(other *Resources) (bool, string) {
   572  	if r.CPU < other.CPU {
   573  		return false, "cpu exhausted"
   574  	}
   575  	if r.MemoryMB < other.MemoryMB {
   576  		return false, "memory exhausted"
   577  	}
   578  	if r.DiskMB < other.DiskMB {
   579  		return false, "disk exhausted"
   580  	}
   581  	if r.IOPS < other.IOPS {
   582  		return false, "iops exhausted"
   583  	}
   584  	return true, ""
   585  }
   586  
   587  // Add adds the resources of the delta to this, potentially
   588  // returning an error if not possible.
   589  func (r *Resources) Add(delta *Resources) error {
   590  	if delta == nil {
   591  		return nil
   592  	}
   593  	r.CPU += delta.CPU
   594  	r.MemoryMB += delta.MemoryMB
   595  	r.DiskMB += delta.DiskMB
   596  	r.IOPS += delta.IOPS
   597  
   598  	for _, n := range delta.Networks {
   599  		// Find the matching interface by IP or CIDR
   600  		idx := r.NetIndex(n)
   601  		if idx == -1 {
   602  			r.Networks = append(r.Networks, n.Copy())
   603  		} else {
   604  			r.Networks[idx].Add(n)
   605  		}
   606  	}
   607  	return nil
   608  }
   609  
   610  func (r *Resources) GoString() string {
   611  	return fmt.Sprintf("*%#v", *r)
   612  }
   613  
   614  // NetworkResource is used to represent available network
   615  // resources
   616  type NetworkResource struct {
   617  	Device        string   // Name of the device
   618  	CIDR          string   // CIDR block of addresses
   619  	IP            string   // IP address
   620  	MBits         int      // Throughput
   621  	ReservedPorts []int    `mapstructure:"reserved_ports"` // Reserved ports
   622  	DynamicPorts  []string `mapstructure:"dynamic_ports"`  // Dynamically assigned ports
   623  }
   624  
   625  // Copy returns a deep copy of the network resource
   626  func (n *NetworkResource) Copy() *NetworkResource {
   627  	newR := new(NetworkResource)
   628  	*newR = *n
   629  	if n.ReservedPorts != nil {
   630  		newR.ReservedPorts = make([]int, len(n.ReservedPorts))
   631  		copy(newR.ReservedPorts, n.ReservedPorts)
   632  	}
   633  	return newR
   634  }
   635  
   636  // Add adds the resources of the delta to this, potentially
   637  // returning an error if not possible.
   638  func (n *NetworkResource) Add(delta *NetworkResource) {
   639  	if len(delta.ReservedPorts) > 0 {
   640  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
   641  	}
   642  	n.MBits += delta.MBits
   643  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
   644  }
   645  
   646  func (n *NetworkResource) GoString() string {
   647  	return fmt.Sprintf("*%#v", *n)
   648  }
   649  
   650  // MapDynamicPorts returns a mapping of Label:PortNumber for dynamic ports
   651  // allocated on this NetworkResource. The ordering of Label:Port pairs is
   652  // random.
   653  //
   654  // Details:
   655  //
   656  // The jobspec lets us ask for two types of ports: Reserved ports and Dynamic
   657  // ports. Reserved ports are identified by the port number, while Dynamic ports
   658  // are identified by a Label.
   659  //
   660  // When we ask nomad to run a job it checks to see if the Reserved ports we
   661  // requested are available. If they are, it then tries to provision any Dynamic
   662  // ports that we have requested. When available ports are found to satisfy our
   663  // dynamic port requirements, they are APPENDED to the reserved ports list. In
   664  // effect, the reserved ports list serves double-duty. First it indicates the
   665  // ports we *want*, and then it indicates the ports we are *using*.
   666  //
   667  // After the the offer process is complete and the job is scheduled we want to
   668  // see which ports were made available to us. To see the dynamic ports that
   669  // were allocated to us we look at the last N ports in our reservation, where N
   670  // is how many dynamic ports we requested.
   671  //
   672  // MapDynamicPorts matches these port numbers with their labels and gives you
   673  // the port mapping.
   674  //
   675  // Also, be aware that this is intended to be called in the context of
   676  // task.Resources after an offer has been made. If you call it in some other
   677  // context the behavior is unspecified, including maybe crashing. So don't do that.
   678  func (n *NetworkResource) MapDynamicPorts() map[string]int {
   679  	ports := n.ReservedPorts[len(n.ReservedPorts)-len(n.DynamicPorts):]
   680  	mapping := make(map[string]int, len(n.DynamicPorts))
   681  
   682  	for idx, label := range n.DynamicPorts {
   683  		mapping[label] = ports[idx]
   684  	}
   685  
   686  	return mapping
   687  }
   688  
   689  // ListStaticPorts returns the list of Static ports allocated to this
   690  // NetworkResource. These are presumed to have known semantics so there is no
   691  // mapping information.
   692  func (n *NetworkResource) ListStaticPorts() []int {
   693  	return n.ReservedPorts[:len(n.ReservedPorts)-len(n.DynamicPorts)]
   694  }
   695  
   696  const (
   697  	// JobTypeNomad is reserved for internal system tasks and is
   698  	// always handled by the CoreScheduler.
   699  	JobTypeCore    = "_core"
   700  	JobTypeService = "service"
   701  	JobTypeBatch   = "batch"
   702  	JobTypeSystem  = "system"
   703  )
   704  
   705  const (
   706  	JobStatusPending  = "pending"  // Pending means the job is waiting on scheduling
   707  	JobStatusRunning  = "running"  // Running means the entire job is running
   708  	JobStatusComplete = "complete" // Complete means there was a clean termination
   709  	JobStatusDead     = "dead"     // Dead means there was abnormal termination
   710  )
   711  
   712  const (
   713  	// JobMinPriority is the minimum allowed priority
   714  	JobMinPriority = 1
   715  
   716  	// JobDefaultPriority is the default priority if not
   717  	// not specified.
   718  	JobDefaultPriority = 50
   719  
   720  	// JobMaxPriority is the maximum allowed priority
   721  	JobMaxPriority = 100
   722  
   723  	// Ensure CoreJobPriority is higher than any user
   724  	// specified job so that it gets priority. This is important
   725  	// for the system to remain healthy.
   726  	CoreJobPriority = JobMaxPriority * 2
   727  )
   728  
   729  // Job is the scope of a scheduling request to Nomad. It is the largest
   730  // scoped object, and is a named collection of task groups. Each task group
   731  // is further composed of tasks. A task group (TG) is the unit of scheduling
   732  // however.
   733  type Job struct {
   734  	// Region is the Nomad region that handles scheduling this job
   735  	Region string
   736  
   737  	// ID is a unique identifier for the job per region. It can be
   738  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
   739  	ID string
   740  
   741  	// Name is the logical name of the job used to refer to it. This is unique
   742  	// per region, but not unique globally.
   743  	Name string
   744  
   745  	// Type is used to control various behaviors about the job. Most jobs
   746  	// are service jobs, meaning they are expected to be long lived.
   747  	// Some jobs are batch oriented meaning they run and then terminate.
   748  	// This can be extended in the future to support custom schedulers.
   749  	Type string
   750  
   751  	// Priority is used to control scheduling importance and if this job
   752  	// can preempt other jobs.
   753  	Priority int
   754  
   755  	// AllAtOnce is used to control if incremental scheduling of task groups
   756  	// is allowed or if we must do a gang scheduling of the entire job. This
   757  	// can slow down larger jobs if resources are not available.
   758  	AllAtOnce bool `mapstructure:"all_at_once"`
   759  
   760  	// Datacenters contains all the datacenters this job is allowed to span
   761  	Datacenters []string
   762  
   763  	// Constraints can be specified at a job level and apply to
   764  	// all the task groups and tasks.
   765  	Constraints []*Constraint
   766  
   767  	// TaskGroups are the collections of task groups that this job needs
   768  	// to run. Each task group is an atomic unit of scheduling and placement.
   769  	TaskGroups []*TaskGroup
   770  
   771  	// Update is used to control the update strategy
   772  	Update UpdateStrategy
   773  
   774  	// Meta is used to associate arbitrary metadata with this
   775  	// job. This is opaque to Nomad.
   776  	Meta map[string]string
   777  
   778  	// Job status
   779  	Status string
   780  
   781  	// StatusDescription is meant to provide more human useful information
   782  	StatusDescription string
   783  
   784  	// Raft Indexes
   785  	CreateIndex uint64
   786  	ModifyIndex uint64
   787  }
   788  
   789  // Validate is used to sanity check a job input
   790  func (j *Job) Validate() error {
   791  	var mErr multierror.Error
   792  	if j.Region == "" {
   793  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
   794  	}
   795  	if j.ID == "" {
   796  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
   797  	} else if strings.Contains(j.ID, " ") {
   798  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
   799  	}
   800  	if j.Name == "" {
   801  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
   802  	}
   803  	if j.Type == "" {
   804  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
   805  	}
   806  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
   807  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
   808  	}
   809  	if len(j.Datacenters) == 0 {
   810  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
   811  	}
   812  	if len(j.TaskGroups) == 0 {
   813  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
   814  	}
   815  	for idx, constr := range j.Constraints {
   816  		if err := constr.Validate(); err != nil {
   817  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
   818  			mErr.Errors = append(mErr.Errors, outer)
   819  		}
   820  	}
   821  
   822  	// Check for duplicate task groups
   823  	taskGroups := make(map[string]int)
   824  	for idx, tg := range j.TaskGroups {
   825  		if tg.Name == "" {
   826  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
   827  		} else if existing, ok := taskGroups[tg.Name]; ok {
   828  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
   829  		} else {
   830  			taskGroups[tg.Name] = idx
   831  		}
   832  
   833  		if j.Type == "system" && tg.Count != 1 {
   834  			mErr.Errors = append(mErr.Errors,
   835  				fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler",
   836  					idx+1, tg.Count))
   837  		}
   838  	}
   839  
   840  	// Validate the task group
   841  	for idx, tg := range j.TaskGroups {
   842  		if err := tg.Validate(); err != nil {
   843  			outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err)
   844  			mErr.Errors = append(mErr.Errors, outer)
   845  		}
   846  	}
   847  	return mErr.ErrorOrNil()
   848  }
   849  
   850  // LookupTaskGroup finds a task group by name
   851  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
   852  	for _, tg := range j.TaskGroups {
   853  		if tg.Name == name {
   854  			return tg
   855  		}
   856  	}
   857  	return nil
   858  }
   859  
   860  // Stub is used to return a summary of the job
   861  func (j *Job) Stub() *JobListStub {
   862  	return &JobListStub{
   863  		ID:                j.ID,
   864  		Name:              j.Name,
   865  		Type:              j.Type,
   866  		Priority:          j.Priority,
   867  		Status:            j.Status,
   868  		StatusDescription: j.StatusDescription,
   869  		CreateIndex:       j.CreateIndex,
   870  		ModifyIndex:       j.ModifyIndex,
   871  	}
   872  }
   873  
   874  // JobListStub is used to return a subset of job information
   875  // for the job list
   876  type JobListStub struct {
   877  	ID                string
   878  	Name              string
   879  	Type              string
   880  	Priority          int
   881  	Status            string
   882  	StatusDescription string
   883  	CreateIndex       uint64
   884  	ModifyIndex       uint64
   885  }
   886  
   887  // UpdateStrategy is used to modify how updates are done
   888  type UpdateStrategy struct {
   889  	// Stagger is the amount of time between the updates
   890  	Stagger time.Duration
   891  
   892  	// MaxParallel is how many updates can be done in parallel
   893  	MaxParallel int `mapstructure:"max_parallel"`
   894  }
   895  
   896  // Rolling returns if a rolling strategy should be used
   897  func (u *UpdateStrategy) Rolling() bool {
   898  	return u.Stagger > 0 && u.MaxParallel > 0
   899  }
   900  
   901  // TaskGroup is an atomic unit of placement. Each task group belongs to
   902  // a job and may contain any number of tasks. A task group support running
   903  // in many replicas using the same configuration..
   904  type TaskGroup struct {
   905  	// Name of the task group
   906  	Name string
   907  
   908  	// Count is the number of replicas of this task group that should
   909  	// be scheduled.
   910  	Count int
   911  
   912  	// Constraints can be specified at a task group level and apply to
   913  	// all the tasks contained.
   914  	Constraints []*Constraint
   915  
   916  	// Tasks are the collection of tasks that this task group needs to run
   917  	Tasks []*Task
   918  
   919  	// Meta is used to associate arbitrary metadata with this
   920  	// task group. This is opaque to Nomad.
   921  	Meta map[string]string
   922  }
   923  
   924  // Validate is used to sanity check a task group
   925  func (tg *TaskGroup) Validate() error {
   926  	var mErr multierror.Error
   927  	if tg.Name == "" {
   928  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
   929  	}
   930  	if tg.Count <= 0 {
   931  		mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive"))
   932  	}
   933  	if len(tg.Tasks) == 0 {
   934  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
   935  	}
   936  	for idx, constr := range tg.Constraints {
   937  		if err := constr.Validate(); err != nil {
   938  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
   939  			mErr.Errors = append(mErr.Errors, outer)
   940  		}
   941  	}
   942  
   943  	// Check for duplicate tasks
   944  	tasks := make(map[string]int)
   945  	for idx, task := range tg.Tasks {
   946  		if task.Name == "" {
   947  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
   948  		} else if existing, ok := tasks[task.Name]; ok {
   949  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
   950  		} else {
   951  			tasks[task.Name] = idx
   952  		}
   953  	}
   954  
   955  	// Validate the tasks
   956  	for idx, task := range tg.Tasks {
   957  		if err := task.Validate(); err != nil {
   958  			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
   959  			mErr.Errors = append(mErr.Errors, outer)
   960  		}
   961  	}
   962  	return mErr.ErrorOrNil()
   963  }
   964  
   965  // LookupTask finds a task by name
   966  func (tg *TaskGroup) LookupTask(name string) *Task {
   967  	for _, t := range tg.Tasks {
   968  		if t.Name == name {
   969  			return t
   970  		}
   971  	}
   972  	return nil
   973  }
   974  
   975  func (tg *TaskGroup) GoString() string {
   976  	return fmt.Sprintf("*%#v", *tg)
   977  }
   978  
   979  // Task is a single process typically that is executed as part of a task group.
   980  type Task struct {
   981  	// Name of the task
   982  	Name string
   983  
   984  	// Driver is used to control which driver is used
   985  	Driver string
   986  
   987  	// Config is provided to the driver to initialize
   988  	Config map[string]string
   989  
   990  	// Map of environment variables to be used by the driver
   991  	Env map[string]string
   992  
   993  	// Constraints can be specified at a task level and apply only to
   994  	// the particular task.
   995  	Constraints []*Constraint
   996  
   997  	// Resources is the resources needed by this task
   998  	Resources *Resources
   999  
  1000  	// Meta is used to associate arbitrary metadata with this
  1001  	// task. This is opaque to Nomad.
  1002  	Meta map[string]string
  1003  }
  1004  
  1005  func (t *Task) GoString() string {
  1006  	return fmt.Sprintf("*%#v", *t)
  1007  }
  1008  
  1009  // Validate is used to sanity check a task group
  1010  func (t *Task) Validate() error {
  1011  	var mErr multierror.Error
  1012  	if t.Name == "" {
  1013  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  1014  	}
  1015  	if t.Driver == "" {
  1016  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  1017  	}
  1018  	if t.Resources == nil {
  1019  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  1020  	}
  1021  	for idx, constr := range t.Constraints {
  1022  		if err := constr.Validate(); err != nil {
  1023  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1024  			mErr.Errors = append(mErr.Errors, outer)
  1025  		}
  1026  	}
  1027  	return mErr.ErrorOrNil()
  1028  }
  1029  
  1030  // Constraints are used to restrict placement options in the case of
  1031  // a hard constraint, and used to prefer a placement in the case of
  1032  // a soft constraint.
  1033  type Constraint struct {
  1034  	Hard    bool   // Hard or soft constraint
  1035  	LTarget string // Left-hand target
  1036  	RTarget string // Right-hand target
  1037  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  1038  	Weight  int    // Soft constraints can vary the weight
  1039  }
  1040  
  1041  func (c *Constraint) String() string {
  1042  	return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  1043  }
  1044  
  1045  func (c *Constraint) Validate() error {
  1046  	var mErr multierror.Error
  1047  	if c.Operand == "" {
  1048  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  1049  	}
  1050  
  1051  	// Perform additional validation based on operand
  1052  	switch c.Operand {
  1053  	case "regexp":
  1054  		if _, err := regexp.Compile(c.RTarget); err != nil {
  1055  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  1056  		}
  1057  	case "version":
  1058  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  1059  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  1060  		}
  1061  	}
  1062  	return mErr.ErrorOrNil()
  1063  }
  1064  
  1065  const (
  1066  	AllocDesiredStatusRun    = "run"    // Allocation should run
  1067  	AllocDesiredStatusStop   = "stop"   // Allocation should stop
  1068  	AllocDesiredStatusEvict  = "evict"  // Allocation should stop, and was evicted
  1069  	AllocDesiredStatusFailed = "failed" // Allocation failed to be done
  1070  )
  1071  
  1072  const (
  1073  	AllocClientStatusPending = "pending"
  1074  	AllocClientStatusRunning = "running"
  1075  	AllocClientStatusDead    = "dead"
  1076  	AllocClientStatusFailed  = "failed"
  1077  )
  1078  
  1079  // Allocation is used to allocate the placement of a task group to a node.
  1080  type Allocation struct {
  1081  	// ID of the allocation (UUID)
  1082  	ID string
  1083  
  1084  	// ID of the evaluation that generated this allocation
  1085  	EvalID string
  1086  
  1087  	// Name is a logical name of the allocation.
  1088  	Name string
  1089  
  1090  	// NodeID is the node this is being placed on
  1091  	NodeID string
  1092  
  1093  	// Job is the parent job of the task group being allocated.
  1094  	// This is copied at allocation time to avoid issues if the job
  1095  	// definition is updated.
  1096  	JobID string
  1097  	Job   *Job
  1098  
  1099  	// TaskGroup is the name of the task group that should be run
  1100  	TaskGroup string
  1101  
  1102  	// Resources is the total set of resources allocated as part
  1103  	// of this allocation of the task group.
  1104  	Resources *Resources
  1105  
  1106  	// TaskResources is the set of resources allocated to each
  1107  	// task. These should sum to the total Resources.
  1108  	TaskResources map[string]*Resources
  1109  
  1110  	// Metrics associated with this allocation
  1111  	Metrics *AllocMetric
  1112  
  1113  	// Desired Status of the allocation on the client
  1114  	DesiredStatus string
  1115  
  1116  	// DesiredStatusDescription is meant to provide more human useful information
  1117  	DesiredDescription string
  1118  
  1119  	// Status of the allocation on the client
  1120  	ClientStatus string
  1121  
  1122  	// ClientStatusDescription is meant to provide more human useful information
  1123  	ClientDescription string
  1124  
  1125  	// Raft Indexes
  1126  	CreateIndex uint64
  1127  	ModifyIndex uint64
  1128  }
  1129  
  1130  // TerminalStatus returns if the desired status is terminal and
  1131  // will no longer transition. This is not based on the current client status.
  1132  func (a *Allocation) TerminalStatus() bool {
  1133  	switch a.DesiredStatus {
  1134  	case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed:
  1135  		return true
  1136  	default:
  1137  		return false
  1138  	}
  1139  }
  1140  
  1141  // Stub returns a list stub for the allocation
  1142  func (a *Allocation) Stub() *AllocListStub {
  1143  	return &AllocListStub{
  1144  		ID:                 a.ID,
  1145  		EvalID:             a.EvalID,
  1146  		Name:               a.Name,
  1147  		NodeID:             a.NodeID,
  1148  		JobID:              a.JobID,
  1149  		TaskGroup:          a.TaskGroup,
  1150  		DesiredStatus:      a.DesiredStatus,
  1151  		DesiredDescription: a.DesiredDescription,
  1152  		ClientStatus:       a.ClientStatus,
  1153  		ClientDescription:  a.ClientDescription,
  1154  		CreateIndex:        a.CreateIndex,
  1155  		ModifyIndex:        a.ModifyIndex,
  1156  	}
  1157  }
  1158  
  1159  // AllocListStub is used to return a subset of alloc information
  1160  type AllocListStub struct {
  1161  	ID                 string
  1162  	EvalID             string
  1163  	Name               string
  1164  	NodeID             string
  1165  	JobID              string
  1166  	TaskGroup          string
  1167  	DesiredStatus      string
  1168  	DesiredDescription string
  1169  	ClientStatus       string
  1170  	ClientDescription  string
  1171  	CreateIndex        uint64
  1172  	ModifyIndex        uint64
  1173  }
  1174  
  1175  // AllocMetric is used to track various metrics while attempting
  1176  // to make an allocation. These are used to debug a job, or to better
  1177  // understand the pressure within the system.
  1178  type AllocMetric struct {
  1179  	// NodesEvaluated is the number of nodes that were evaluated
  1180  	NodesEvaluated int
  1181  
  1182  	// NodesFiltered is the number of nodes filtered due to
  1183  	// a hard constraint
  1184  	NodesFiltered int
  1185  
  1186  	// ClassFiltered is the number of nodes filtered by class
  1187  	ClassFiltered map[string]int
  1188  
  1189  	// ConstraintFiltered is the number of failures caused by constraint
  1190  	ConstraintFiltered map[string]int
  1191  
  1192  	// NodesExhausted is the number of nodes skipped due to being
  1193  	// exhausted of at least one resource
  1194  	NodesExhausted int
  1195  
  1196  	// ClassExhausted is the number of nodes exhausted by class
  1197  	ClassExhausted map[string]int
  1198  
  1199  	// DimensionExhausted provides the count by dimension or reason
  1200  	DimensionExhausted map[string]int
  1201  
  1202  	// Scores is the scores of the final few nodes remaining
  1203  	// for placement. The top score is typically selected.
  1204  	Scores map[string]float64
  1205  
  1206  	// AllocationTime is a measure of how long the allocation
  1207  	// attempt took. This can affect performance and SLAs.
  1208  	AllocationTime time.Duration
  1209  
  1210  	// CoalescedFailures indicates the number of other
  1211  	// allocations that were coalesced into this failed allocation.
  1212  	// This is to prevent creating many failed allocations for a
  1213  	// single task group.
  1214  	CoalescedFailures int
  1215  }
  1216  
  1217  func (a *AllocMetric) EvaluateNode() {
  1218  	a.NodesEvaluated += 1
  1219  }
  1220  
  1221  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  1222  	a.NodesFiltered += 1
  1223  	if node != nil && node.NodeClass != "" {
  1224  		if a.ClassFiltered == nil {
  1225  			a.ClassFiltered = make(map[string]int)
  1226  		}
  1227  		a.ClassFiltered[node.NodeClass] += 1
  1228  	}
  1229  	if constraint != "" {
  1230  		if a.ConstraintFiltered == nil {
  1231  			a.ConstraintFiltered = make(map[string]int)
  1232  		}
  1233  		a.ConstraintFiltered[constraint] += 1
  1234  	}
  1235  }
  1236  
  1237  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  1238  	a.NodesExhausted += 1
  1239  	if node != nil && node.NodeClass != "" {
  1240  		if a.ClassExhausted == nil {
  1241  			a.ClassExhausted = make(map[string]int)
  1242  		}
  1243  		a.ClassExhausted[node.NodeClass] += 1
  1244  	}
  1245  	if dimension != "" {
  1246  		if a.DimensionExhausted == nil {
  1247  			a.DimensionExhausted = make(map[string]int)
  1248  		}
  1249  		a.DimensionExhausted[dimension] += 1
  1250  	}
  1251  }
  1252  
  1253  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  1254  	if a.Scores == nil {
  1255  		a.Scores = make(map[string]float64)
  1256  	}
  1257  	key := fmt.Sprintf("%s.%s", node.ID, name)
  1258  	a.Scores[key] = score
  1259  }
  1260  
  1261  const (
  1262  	EvalStatusPending  = "pending"
  1263  	EvalStatusComplete = "complete"
  1264  	EvalStatusFailed   = "failed"
  1265  )
  1266  
  1267  const (
  1268  	EvalTriggerJobRegister   = "job-register"
  1269  	EvalTriggerJobDeregister = "job-deregister"
  1270  	EvalTriggerNodeUpdate    = "node-update"
  1271  	EvalTriggerScheduled     = "scheduled"
  1272  	EvalTriggerRollingUpdate = "rolling-update"
  1273  )
  1274  
  1275  const (
  1276  	// CoreJobEvalGC is used for the garbage collection of evaluations
  1277  	// and allocations. We periodically scan evaluations in a terminal state,
  1278  	// in which all the corresponding allocations are also terminal. We
  1279  	// delete these out of the system to bound the state.
  1280  	CoreJobEvalGC = "eval-gc"
  1281  
  1282  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  1283  	// We periodically scan nodes in a terminal state, and if they have no
  1284  	// corresponding allocations we delete these out of the system.
  1285  	CoreJobNodeGC = "node-gc"
  1286  )
  1287  
  1288  // Evaluation is used anytime we need to apply business logic as a result
  1289  // of a change to our desired state (job specification) or the emergent state
  1290  // (registered nodes). When the inputs change, we need to "evaluate" them,
  1291  // potentially taking action (allocation of work) or doing nothing if the state
  1292  // of the world does not require it.
  1293  type Evaluation struct {
  1294  	// ID is a randonly generated UUID used for this evaluation. This
  1295  	// is assigned upon the creation of the evaluation.
  1296  	ID string
  1297  
  1298  	// Priority is used to control scheduling importance and if this job
  1299  	// can preempt other jobs.
  1300  	Priority int
  1301  
  1302  	// Type is used to control which schedulers are available to handle
  1303  	// this evaluation.
  1304  	Type string
  1305  
  1306  	// TriggeredBy is used to give some insight into why this Eval
  1307  	// was created. (Job change, node failure, alloc failure, etc).
  1308  	TriggeredBy string
  1309  
  1310  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  1311  	// be run in parallel for a given JobID, so we serialize on this.
  1312  	JobID string
  1313  
  1314  	// JobModifyIndex is the modify index of the job at the time
  1315  	// the evaluation was created
  1316  	JobModifyIndex uint64
  1317  
  1318  	// NodeID is the node that was affected triggering the evaluation.
  1319  	NodeID string
  1320  
  1321  	// NodeModifyIndex is the modify index of the node at the time
  1322  	// the evaluation was created
  1323  	NodeModifyIndex uint64
  1324  
  1325  	// Status of the evaluation
  1326  	Status string
  1327  
  1328  	// StatusDescription is meant to provide more human useful information
  1329  	StatusDescription string
  1330  
  1331  	// Wait is a minimum wait time for running the eval. This is used to
  1332  	// support a rolling upgrade.
  1333  	Wait time.Duration
  1334  
  1335  	// NextEval is the evaluation ID for the eval created to do a followup.
  1336  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1337  	NextEval string
  1338  
  1339  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  1340  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1341  	PreviousEval string
  1342  
  1343  	// Raft Indexes
  1344  	CreateIndex uint64
  1345  	ModifyIndex uint64
  1346  }
  1347  
  1348  // TerminalStatus returns if the current status is terminal and
  1349  // will no longer transition.
  1350  func (e *Evaluation) TerminalStatus() bool {
  1351  	switch e.Status {
  1352  	case EvalStatusComplete, EvalStatusFailed:
  1353  		return true
  1354  	default:
  1355  		return false
  1356  	}
  1357  }
  1358  
  1359  func (e *Evaluation) GoString() string {
  1360  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  1361  }
  1362  
  1363  func (e *Evaluation) Copy() *Evaluation {
  1364  	ne := new(Evaluation)
  1365  	*ne = *e
  1366  	return ne
  1367  }
  1368  
  1369  // ShouldEnqueue checks if a given evaluation should be enqueued
  1370  func (e *Evaluation) ShouldEnqueue() bool {
  1371  	switch e.Status {
  1372  	case EvalStatusPending:
  1373  		return true
  1374  	case EvalStatusComplete, EvalStatusFailed:
  1375  		return false
  1376  	default:
  1377  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  1378  	}
  1379  }
  1380  
  1381  // MakePlan is used to make a plan from the given evaluation
  1382  // for a given Job
  1383  func (e *Evaluation) MakePlan(j *Job) *Plan {
  1384  	p := &Plan{
  1385  		EvalID:         e.ID,
  1386  		Priority:       e.Priority,
  1387  		NodeUpdate:     make(map[string][]*Allocation),
  1388  		NodeAllocation: make(map[string][]*Allocation),
  1389  	}
  1390  	if j != nil {
  1391  		p.AllAtOnce = j.AllAtOnce
  1392  	}
  1393  	return p
  1394  }
  1395  
  1396  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  1397  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  1398  	return &Evaluation{
  1399  		ID:             GenerateUUID(),
  1400  		Priority:       e.Priority,
  1401  		Type:           e.Type,
  1402  		TriggeredBy:    EvalTriggerRollingUpdate,
  1403  		JobID:          e.JobID,
  1404  		JobModifyIndex: e.JobModifyIndex,
  1405  		Status:         EvalStatusPending,
  1406  		Wait:           wait,
  1407  		PreviousEval:   e.ID,
  1408  	}
  1409  }
  1410  
  1411  // Plan is used to submit a commit plan for task allocations. These
  1412  // are submitted to the leader which verifies that resources have
  1413  // not been overcommitted before admiting the plan.
  1414  type Plan struct {
  1415  	// EvalID is the evaluation ID this plan is associated with
  1416  	EvalID string
  1417  
  1418  	// EvalToken is used to prevent a split-brain processing of
  1419  	// an evaluation. There should only be a single scheduler running
  1420  	// an Eval at a time, but this could be violated after a leadership
  1421  	// transition. This unique token is used to reject plans that are
  1422  	// being submitted from a different leader.
  1423  	EvalToken string
  1424  
  1425  	// Priority is the priority of the upstream job
  1426  	Priority int
  1427  
  1428  	// AllAtOnce is used to control if incremental scheduling of task groups
  1429  	// is allowed or if we must do a gang scheduling of the entire job.
  1430  	// If this is false, a plan may be partially applied. Otherwise, the
  1431  	// entire plan must be able to make progress.
  1432  	AllAtOnce bool
  1433  
  1434  	// NodeUpdate contains all the allocations for each node. For each node,
  1435  	// this is a list of the allocations to update to either stop or evict.
  1436  	NodeUpdate map[string][]*Allocation
  1437  
  1438  	// NodeAllocation contains all the allocations for each node.
  1439  	// The evicts must be considered prior to the allocations.
  1440  	NodeAllocation map[string][]*Allocation
  1441  
  1442  	// FailedAllocs are allocations that could not be made,
  1443  	// but are persisted so that the user can use the feedback
  1444  	// to determine the cause.
  1445  	FailedAllocs []*Allocation
  1446  }
  1447  
  1448  func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) {
  1449  	newAlloc := new(Allocation)
  1450  	*newAlloc = *alloc
  1451  	newAlloc.DesiredStatus = status
  1452  	newAlloc.DesiredDescription = desc
  1453  	node := alloc.NodeID
  1454  	existing := p.NodeUpdate[node]
  1455  	p.NodeUpdate[node] = append(existing, newAlloc)
  1456  }
  1457  
  1458  func (p *Plan) PopUpdate(alloc *Allocation) {
  1459  	existing := p.NodeUpdate[alloc.NodeID]
  1460  	n := len(existing)
  1461  	if n > 0 && existing[n-1].ID == alloc.ID {
  1462  		existing = existing[:n-1]
  1463  		if len(existing) > 0 {
  1464  			p.NodeUpdate[alloc.NodeID] = existing
  1465  		} else {
  1466  			delete(p.NodeUpdate, alloc.NodeID)
  1467  		}
  1468  	}
  1469  }
  1470  
  1471  func (p *Plan) AppendAlloc(alloc *Allocation) {
  1472  	node := alloc.NodeID
  1473  	existing := p.NodeAllocation[node]
  1474  	p.NodeAllocation[node] = append(existing, alloc)
  1475  }
  1476  
  1477  func (p *Plan) AppendFailed(alloc *Allocation) {
  1478  	p.FailedAllocs = append(p.FailedAllocs, alloc)
  1479  }
  1480  
  1481  // IsNoOp checks if this plan would do nothing
  1482  func (p *Plan) IsNoOp() bool {
  1483  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1484  }
  1485  
  1486  // PlanResult is the result of a plan submitted to the leader.
  1487  type PlanResult struct {
  1488  	// NodeUpdate contains all the updates that were committed.
  1489  	NodeUpdate map[string][]*Allocation
  1490  
  1491  	// NodeAllocation contains all the allocations that were committed.
  1492  	NodeAllocation map[string][]*Allocation
  1493  
  1494  	// FailedAllocs are allocations that could not be made,
  1495  	// but are persisted so that the user can use the feedback
  1496  	// to determine the cause.
  1497  	FailedAllocs []*Allocation
  1498  
  1499  	// RefreshIndex is the index the worker should refresh state up to.
  1500  	// This allows all evictions and allocations to be materialized.
  1501  	// If any allocations were rejected due to stale data (node state,
  1502  	// over committed) this can be used to force a worker refresh.
  1503  	RefreshIndex uint64
  1504  
  1505  	// AllocIndex is the Raft index in which the evictions and
  1506  	// allocations took place. This is used for the write index.
  1507  	AllocIndex uint64
  1508  }
  1509  
  1510  // IsNoOp checks if this plan result would do nothing
  1511  func (p *PlanResult) IsNoOp() bool {
  1512  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1513  }
  1514  
  1515  // FullCommit is used to check if all the allocations in a plan
  1516  // were committed as part of the result. Returns if there was
  1517  // a match, and the number of expected and actual allocations.
  1518  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  1519  	expected := 0
  1520  	actual := 0
  1521  	for name, allocList := range plan.NodeAllocation {
  1522  		didAlloc, _ := p.NodeAllocation[name]
  1523  		expected += len(allocList)
  1524  		actual += len(didAlloc)
  1525  	}
  1526  	return actual == expected, expected, actual
  1527  }
  1528  
  1529  // msgpackHandle is a shared handle for encoding/decoding of structs
  1530  var msgpackHandle = &codec.MsgpackHandle{}
  1531  
  1532  // Decode is used to decode a MsgPack encoded object
  1533  func Decode(buf []byte, out interface{}) error {
  1534  	return codec.NewDecoder(bytes.NewReader(buf), msgpackHandle).Decode(out)
  1535  }
  1536  
  1537  // Encode is used to encode a MsgPack object with type prefix
  1538  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  1539  	var buf bytes.Buffer
  1540  	buf.WriteByte(uint8(t))
  1541  	err := codec.NewEncoder(&buf, msgpackHandle).Encode(msg)
  1542  	return buf.Bytes(), err
  1543  }