github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/nomad/structs/structs.go

github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/hashicorp/go-msgpack/codec"
    11  	"github.com/hashicorp/go-multierror"
    12  )
    13  
    14  var (
    15  	ErrNoLeader     = fmt.Errorf("No cluster leader")
    16  	ErrNoRegionPath = fmt.Errorf("No path to region")
    17  )
    18  
    19  type MessageType uint8
    20  
    21  const (
    22  	NodeRegisterRequestType MessageType = iota
    23  	NodeDeregisterRequestType
    24  	NodeUpdateStatusRequestType
    25  	NodeUpdateDrainRequestType
    26  	JobRegisterRequestType
    27  	JobDeregisterRequestType
    28  	EvalUpdateRequestType
    29  	EvalDeleteRequestType
    30  	AllocUpdateRequestType
    31  	AllocClientUpdateRequestType
    32  )
    33  
    34  const (
    35  	// IgnoreUnknownTypeFlag is set along with a MessageType
    36  	// to indicate that the message type can be safely ignored
    37  	// if it is not recognized. This is for future proofing, so
    38  	// that new commands can be added in a way that won't cause
    39  	// old servers to crash when the FSM attempts to process them.
    40  	IgnoreUnknownTypeFlag MessageType = 128
    41  )
    42  
    43  // RPCInfo is used to describe common information about query
    44  type RPCInfo interface {
    45  	RequestRegion() string
    46  	IsRead() bool
    47  	AllowStaleRead() bool
    48  }
    49  
    50  // QueryOptions is used to specify various flags for read queries
    51  type QueryOptions struct {
    52  	// The target region for this query
    53  	Region string
    54  
    55  	// If set, wait until query exceeds given index. Must be provided
    56  	// with MaxQueryTime.
    57  	MinQueryIndex uint64
    58  
    59  	// Provided with MinQueryIndex to wait for change.
    60  	MaxQueryTime time.Duration
    61  
    62  	// If set, any follower can service the request. Results
    63  	// may be arbitrarily stale.
    64  	AllowStale bool
    65  }
    66  
    67  func (q QueryOptions) RequestRegion() string {
    68  	return q.Region
    69  }
    70  
    71  // QueryOption only applies to reads, so always true
    72  func (q QueryOptions) IsRead() bool {
    73  	return true
    74  }
    75  
    76  func (q QueryOptions) AllowStaleRead() bool {
    77  	return q.AllowStale
    78  }
    79  
    80  type WriteRequest struct {
    81  	// The target region for this write
    82  	Region string
    83  }
    84  
    85  func (w WriteRequest) RequestRegion() string {
    86  	// The target region for this request
    87  	return w.Region
    88  }
    89  
    90  // WriteRequest only applies to writes, always false
    91  func (w WriteRequest) IsRead() bool {
    92  	return false
    93  }
    94  
    95  func (w WriteRequest) AllowStaleRead() bool {
    96  	return false
    97  }
    98  
    99  // QueryMeta allows a query response to include potentially
   100  // useful metadata about a query
   101  type QueryMeta struct {
   102  	// This is the index associated with the read
   103  	Index uint64
   104  
   105  	// If AllowStale is used, this is time elapsed since
   106  	// last contact between the follower and leader. This
   107  	// can be used to gauge staleness.
   108  	LastContact time.Duration
   109  
   110  	// Used to indicate if there is a known leader node
   111  	KnownLeader bool
   112  }
   113  
   114  // WriteMeta allows a write response to includ e potentially
   115  // useful metadata about the write
   116  type WriteMeta struct {
   117  	// This is the index associated with the write
   118  	Index uint64
   119  }
   120  
   121  // NodeRegisterRequest is used for Node.Register endpoint
   122  // to register a node as being a schedulable entity.
   123  type NodeRegisterRequest struct {
   124  	Node *Node
   125  	WriteRequest
   126  }
   127  
   128  // NodeDeregisterRequest is used for Node.Deregister endpoint
   129  // to deregister a node as being a schedulable entity.
   130  type NodeDeregisterRequest struct {
   131  	NodeID string
   132  	WriteRequest
   133  }
   134  
   135  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   136  // to update the status of a node.
   137  type NodeUpdateStatusRequest struct {
   138  	NodeID string
   139  	Status string
   140  	WriteRequest
   141  }
   142  
   143  // NodeUpdateDrainRequest is used for updatin the drain status
   144  type NodeUpdateDrainRequest struct {
   145  	NodeID string
   146  	Drain  bool
   147  	WriteRequest
   148  }
   149  
   150  // NodeEvaluateRequest is used to re-evaluate the ndoe
   151  type NodeEvaluateRequest struct {
   152  	NodeID string
   153  	WriteRequest
   154  }
   155  
   156  // NodeSpecificRequest is used when we just need to specify a target node
   157  type NodeSpecificRequest struct {
   158  	NodeID string
   159  	QueryOptions
   160  }
   161  
   162  // JobRegisterRequest is used for Job.Register endpoint
   163  // to register a job as being a schedulable entity.
   164  type JobRegisterRequest struct {
   165  	Job *Job
   166  	WriteRequest
   167  }
   168  
   169  // JobDeregisterRequest is used for Job.Deregister endpoint
   170  // to deregister a job as being a schedulable entity.
   171  type JobDeregisterRequest struct {
   172  	JobID string
   173  	WriteRequest
   174  }
   175  
   176  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   177  type JobEvaluateRequest struct {
   178  	JobID string
   179  	WriteRequest
   180  }
   181  
   182  // JobSpecificRequest is used when we just need to specify a target job
   183  type JobSpecificRequest struct {
   184  	JobID string
   185  	QueryOptions
   186  }
   187  
   188  // JobListRequest is used to parameterize a list request
   189  type JobListRequest struct {
   190  	QueryOptions
   191  }
   192  
   193  // NodeListRequest is used to parameterize a list request
   194  type NodeListRequest struct {
   195  	QueryOptions
   196  }
   197  
   198  // EvalUpdateRequest is used for upserting evaluations.
   199  type EvalUpdateRequest struct {
   200  	Evals     []*Evaluation
   201  	EvalToken string
   202  	WriteRequest
   203  }
   204  
   205  // EvalDeleteRequest is used for deleting an evaluation.
   206  type EvalDeleteRequest struct {
   207  	Evals  []string
   208  	Allocs []string
   209  	WriteRequest
   210  }
   211  
   212  // EvalSpecificRequest is used when we just need to specify a target evaluation
   213  type EvalSpecificRequest struct {
   214  	EvalID string
   215  	QueryOptions
   216  }
   217  
   218  // EvalAckRequest is used to Ack/Nack a specific evaluation
   219  type EvalAckRequest struct {
   220  	EvalID string
   221  	Token  string
   222  	WriteRequest
   223  }
   224  
   225  // EvalDequeueRequest is used when we want to dequeue an evaluation
   226  type EvalDequeueRequest struct {
   227  	Schedulers []string
   228  	Timeout    time.Duration
   229  	WriteRequest
   230  }
   231  
   232  // EvalListRequest is used to list the evaluations
   233  type EvalListRequest struct {
   234  	QueryOptions
   235  }
   236  
   237  // PlanRequest is used to submit an allocation plan to the leader
   238  type PlanRequest struct {
   239  	Plan *Plan
   240  	WriteRequest
   241  }
   242  
   243  // AllocUpdateRequest is used to submit changes to allocations, either
   244  // to cause evictions or to assign new allocaitons. Both can be done
   245  // within a single transaction
   246  type AllocUpdateRequest struct {
   247  	// Alloc is the list of new allocations to assign
   248  	Alloc []*Allocation
   249  	WriteRequest
   250  }
   251  
   252  // AllocListRequest is used to request a list of allocations
   253  type AllocListRequest struct {
   254  	QueryOptions
   255  }
   256  
   257  // AllocSpecificRequest is used to query a specific allocation
   258  type AllocSpecificRequest struct {
   259  	AllocID string
   260  	QueryOptions
   261  }
   262  
   263  // GenericRequest is used to request where no
   264  // specific information is needed.
   265  type GenericRequest struct {
   266  	QueryOptions
   267  }
   268  
   269  // GenericResponse is used to respond to a request where no
   270  // specific response information is needed.
   271  type GenericResponse struct {
   272  	WriteMeta
   273  }
   274  
   275  const (
   276  	ProtocolVersion = "protocol"
   277  	APIMajorVersion = "api.major"
   278  	APIMinorVersion = "api.minor"
   279  )
   280  
   281  // VersionResponse is used for the Status.Version reseponse
   282  type VersionResponse struct {
   283  	Build    string
   284  	Versions map[string]int
   285  	QueryMeta
   286  }
   287  
   288  // JobRegisterResponse is used to respond to a job registration
   289  type JobRegisterResponse struct {
   290  	EvalID          string
   291  	EvalCreateIndex uint64
   292  	JobModifyIndex  uint64
   293  	QueryMeta
   294  }
   295  
   296  // JobDeregisterResponse is used to respond to a job deregistration
   297  type JobDeregisterResponse struct {
   298  	EvalID          string
   299  	EvalCreateIndex uint64
   300  	JobModifyIndex  uint64
   301  	QueryMeta
   302  }
   303  
   304  // NodeUpdateResponse is used to respond to a node update
   305  type NodeUpdateResponse struct {
   306  	HeartbeatTTL    time.Duration
   307  	EvalIDs         []string
   308  	EvalCreateIndex uint64
   309  	NodeModifyIndex uint64
   310  	QueryMeta
   311  }
   312  
   313  // NodeDrainUpdateResponse is used to respond to a node drain update
   314  type NodeDrainUpdateResponse struct {
   315  	EvalIDs         []string
   316  	EvalCreateIndex uint64
   317  	NodeModifyIndex uint64
   318  	QueryMeta
   319  }
   320  
   321  // NodeAllocsResponse is used to return allocs for a single node
   322  type NodeAllocsResponse struct {
   323  	Allocs []*Allocation
   324  	QueryMeta
   325  }
   326  
   327  // SingleNodeResponse is used to return a single node
   328  type SingleNodeResponse struct {
   329  	Node *Node
   330  	QueryMeta
   331  }
   332  
   333  // JobListResponse is used for a list request
   334  type NodeListResponse struct {
   335  	Nodes []*NodeListStub
   336  	QueryMeta
   337  }
   338  
   339  // SingleJobResponse is used to return a single job
   340  type SingleJobResponse struct {
   341  	Job *Job
   342  	QueryMeta
   343  }
   344  
   345  // JobListResponse is used for a list request
   346  type JobListResponse struct {
   347  	Jobs []*JobListStub
   348  	QueryMeta
   349  }
   350  
   351  // SingleAllocResponse is used to return a single allocation
   352  type SingleAllocResponse struct {
   353  	Alloc *Allocation
   354  	QueryMeta
   355  }
   356  
   357  // JobAllocationsResponse is used to return the allocations for a job
   358  type JobAllocationsResponse struct {
   359  	Allocations []*AllocListStub
   360  	QueryMeta
   361  }
   362  
   363  // JobEvaluationsResponse is used to return the evaluations for a job
   364  type JobEvaluationsResponse struct {
   365  	Evaluations []*Evaluation
   366  	QueryMeta
   367  }
   368  
   369  // SingleEvalResponse is used to return a single evaluation
   370  type SingleEvalResponse struct {
   371  	Eval *Evaluation
   372  	QueryMeta
   373  }
   374  
   375  // EvalDequeueResponse is used to return from a dequeue
   376  type EvalDequeueResponse struct {
   377  	Eval  *Evaluation
   378  	Token string
   379  	QueryMeta
   380  }
   381  
   382  // PlanResponse is used to return from a PlanRequest
   383  type PlanResponse struct {
   384  	Result *PlanResult
   385  	WriteMeta
   386  }
   387  
   388  // AllocListResponse is used for a list request
   389  type AllocListResponse struct {
   390  	Allocations []*AllocListStub
   391  	QueryMeta
   392  }
   393  
   394  // EvalListResponse is used for a list request
   395  type EvalListResponse struct {
   396  	Evaluations []*Evaluation
   397  	QueryMeta
   398  }
   399  
   400  // EvalAllocationsResponse is used to return the allocations for an evaluation
   401  type EvalAllocationsResponse struct {
   402  	Allocations []*AllocListStub
   403  	QueryMeta
   404  }
   405  
   406  const (
   407  	NodeStatusInit  = "initializing"
   408  	NodeStatusReady = "ready"
   409  	NodeStatusDown  = "down"
   410  )
   411  
   412  // ShouldDrainNode checks if a given node status should trigger an
   413  // evaluation. Some states don't require any further action.
   414  func ShouldDrainNode(status string) bool {
   415  	switch status {
   416  	case NodeStatusInit, NodeStatusReady:
   417  		return false
   418  	case NodeStatusDown:
   419  		return true
   420  	default:
   421  		panic(fmt.Sprintf("unhandled node status %s", status))
   422  	}
   423  }
   424  
   425  // ValidNodeStatus is used to check if a node status is valid
   426  func ValidNodeStatus(status string) bool {
   427  	switch status {
   428  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   429  		return true
   430  	default:
   431  		return false
   432  	}
   433  }
   434  
   435  // Node is a representation of a schedulable client node
   436  type Node struct {
   437  	// ID is a unique identifier for the node. It can be constructed
   438  	// by doing a concatenation of the Name and Datacenter as a simple
   439  	// approach. Alternatively a UUID may be used.
   440  	ID string
   441  
   442  	// Datacenter for this node
   443  	Datacenter string
   444  
   445  	// Node name
   446  	Name string
   447  
   448  	// Attributes is an arbitrary set of key/value
   449  	// data that can be used for constraints. Examples
   450  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   451  	// "docker.runtime=1.8.3"
   452  	Attributes map[string]string
   453  
   454  	// Resources is the available resources on the client.
   455  	// For example 'cpu=2' 'memory=2048'
   456  	Resources *Resources
   457  
   458  	// Reserved is the set of resources that are reserved,
   459  	// and should be subtracted from the total resources for
   460  	// the purposes of scheduling. This may be provide certain
   461  	// high-watermark tolerances or because of external schedulers
   462  	// consuming resources.
   463  	Reserved *Resources
   464  
   465  	// Links are used to 'link' this client to external
   466  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   467  	// 'ami=ami-123'
   468  	Links map[string]string
   469  
   470  	// Meta is used to associate arbitrary metadata with this
   471  	// client. This is opaque to Nomad.
   472  	Meta map[string]string
   473  
   474  	// NodeClass is an opaque identifier used to group nodes
   475  	// together for the purpose of determining scheduling pressure.
   476  	NodeClass string
   477  
   478  	// Drain is controlled by the servers, and not the client.
   479  	// If true, no jobs will be scheduled to this node, and existing
   480  	// allocations will be drained.
   481  	Drain bool
   482  
   483  	// Status of this node
   484  	Status string
   485  
   486  	// StatusDescription is meant to provide more human useful information
   487  	StatusDescription string
   488  
   489  	// Raft Indexes
   490  	CreateIndex uint64
   491  	ModifyIndex uint64
   492  }
   493  
   494  // TerminalStatus returns if the current status is terminal and
   495  // will no longer transition.
   496  func (n *Node) TerminalStatus() bool {
   497  	switch n.Status {
   498  	case NodeStatusDown:
   499  		return true
   500  	default:
   501  		return false
   502  	}
   503  }
   504  
   505  // Stub returns a summarized version of the node
   506  func (n *Node) Stub() *NodeListStub {
   507  	return &NodeListStub{
   508  		ID:                n.ID,
   509  		Datacenter:        n.Datacenter,
   510  		Name:              n.Name,
   511  		NodeClass:         n.NodeClass,
   512  		Drain:             n.Drain,
   513  		Status:            n.Status,
   514  		StatusDescription: n.StatusDescription,
   515  		CreateIndex:       n.CreateIndex,
   516  		ModifyIndex:       n.ModifyIndex,
   517  	}
   518  }
   519  
   520  // NodeListStub is used to return a subset of job information
   521  // for the job list
   522  type NodeListStub struct {
   523  	ID                string
   524  	Datacenter        string
   525  	Name              string
   526  	NodeClass         string
   527  	Drain             bool
   528  	Status            string
   529  	StatusDescription string
   530  	CreateIndex       uint64
   531  	ModifyIndex       uint64
   532  }
   533  
   534  // Resources is used to define the resources available
   535  // on a client
   536  type Resources struct {
   537  	CPU      int
   538  	MemoryMB int `mapstructure:"memory"`
   539  	DiskMB   int `mapstructure:"disk"`
   540  	IOPS     int
   541  	Networks []*NetworkResource
   542  }
   543  
   544  // Copy returns a deep copy of the resources
   545  func (r *Resources) Copy() *Resources {
   546  	newR := new(Resources)
   547  	*newR = *r
   548  	n := len(r.Networks)
   549  	newR.Networks = make([]*NetworkResource, n)
   550  	for i := 0; i < n; i++ {
   551  		newR.Networks[i] = r.Networks[i].Copy()
   552  	}
   553  	return newR
   554  }
   555  
   556  // NetIndex finds the matching net index using device name
   557  func (r *Resources) NetIndex(n *NetworkResource) int {
   558  	for idx, net := range r.Networks {
   559  		if net.Device == n.Device {
   560  			return idx
   561  		}
   562  	}
   563  	return -1
   564  }
   565  
   566  // Superset checks if one set of resources is a superset
   567  // of another. This ignores network resources, and the NetworkIndex
   568  // should be used for that.
   569  func (r *Resources) Superset(other *Resources) (bool, string) {
   570  	if r.CPU < other.CPU {
   571  		return false, "cpu exhausted"
   572  	}
   573  	if r.MemoryMB < other.MemoryMB {
   574  		return false, "memory exhausted"
   575  	}
   576  	if r.DiskMB < other.DiskMB {
   577  		return false, "disk exhausted"
   578  	}
   579  	if r.IOPS < other.IOPS {
   580  		return false, "iops exhausted"
   581  	}
   582  	return true, ""
   583  }
   584  
   585  // Add adds the resources of the delta to this, potentially
   586  // returning an error if not possible.
   587  func (r *Resources) Add(delta *Resources) error {
   588  	if delta == nil {
   589  		return nil
   590  	}
   591  	r.CPU += delta.CPU
   592  	r.MemoryMB += delta.MemoryMB
   593  	r.DiskMB += delta.DiskMB
   594  	r.IOPS += delta.IOPS
   595  
   596  	for _, n := range delta.Networks {
   597  		// Find the matching interface by IP or CIDR
   598  		idx := r.NetIndex(n)
   599  		if idx == -1 {
   600  			r.Networks = append(r.Networks, n.Copy())
   601  		} else {
   602  			r.Networks[idx].Add(n)
   603  		}
   604  	}
   605  	return nil
   606  }
   607  
   608  func (r *Resources) GoString() string {
   609  	return fmt.Sprintf("*%#v", *r)
   610  }
   611  
   612  // NetworkResource is used to represent available network
   613  // resources
   614  type NetworkResource struct {
   615  	Device        string   // Name of the device
   616  	CIDR          string   // CIDR block of addresses
   617  	IP            string   // IP address
   618  	MBits         int      // Throughput
   619  	ReservedPorts []int    `mapstructure:"reserved_ports"` // Reserved ports
   620  	DynamicPorts  []string `mapstructure:"dynamic_ports"`  // Dynamically assigned ports
   621  }
   622  
   623  // Copy returns a deep copy of the network resource
   624  func (n *NetworkResource) Copy() *NetworkResource {
   625  	newR := new(NetworkResource)
   626  	*newR = *n
   627  	if n.ReservedPorts != nil {
   628  		newR.ReservedPorts = make([]int, len(n.ReservedPorts))
   629  		copy(newR.ReservedPorts, n.ReservedPorts)
   630  	}
   631  	return newR
   632  }
   633  
   634  // Add adds the resources of the delta to this, potentially
   635  // returning an error if not possible.
   636  func (n *NetworkResource) Add(delta *NetworkResource) {
   637  	if len(delta.ReservedPorts) > 0 {
   638  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
   639  	}
   640  	n.MBits += delta.MBits
   641  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
   642  }
   643  
   644  func (n *NetworkResource) GoString() string {
   645  	return fmt.Sprintf("*%#v", *n)
   646  }
   647  
   648  // MapDynamicPorts returns a mapping of Label:PortNumber for dynamic ports
   649  // allocated on this NetworkResource. The ordering of Label:Port pairs is
   650  // random.
   651  //
   652  // Details:
   653  //
   654  // The jobspec lets us ask for two types of ports: Reserved ports and Dynamic
   655  // ports. Reserved ports are identified by the port number, while Dynamic ports
   656  // are identified by a Label.
   657  //
   658  // When we ask nomad to run a job it checks to see if the Reserved ports we
   659  // requested are available. If they are, it then tries to provision any Dynamic
   660  // ports that we have requested. When available ports are found to satisfy our
   661  // dynamic port requirements, they are APPENDED to the reserved ports list. In
   662  // effect, the reserved ports list serves double-duty. First it indicates the
   663  // ports we *want*, and then it indicates the ports we are *using*.
   664  //
   665  // After the the offer process is complete and the job is scheduled we want to
   666  // see which ports were made available to us. To see the dynamic ports that
   667  // were allocated to us we look at the last N ports in our reservation, where N
   668  // is how many dynamic ports we requested.
   669  //
   670  // MapDynamicPorts matches these port numbers with their labels and gives you
   671  // the port mapping.
   672  //
   673  // Also, be aware that this is intended to be called in the context of
   674  // task.Resources after an offer has been made. If you call it in some other
   675  // context the behavior is unspecified, including maybe crashing. So don't do that.
   676  func (n *NetworkResource) MapDynamicPorts() map[string]int {
   677  	ports := n.ReservedPorts[len(n.ReservedPorts)-len(n.DynamicPorts):]
   678  	mapping := make(map[string]int, len(n.DynamicPorts))
   679  
   680  	for idx, label := range n.DynamicPorts {
   681  		mapping[label] = ports[idx]
   682  	}
   683  
   684  	return mapping
   685  }
   686  
   687  // ListStaticPorts returns the list of Static ports allocated to this
   688  // NetworkResource. These are presumed to have known semantics so there is no
   689  // mapping information.
   690  func (n *NetworkResource) ListStaticPorts() []int {
   691  	return n.ReservedPorts[:len(n.ReservedPorts)-len(n.DynamicPorts)]
   692  }
   693  
   694  const (
   695  	// JobTypeNomad is reserved for internal system tasks and is
   696  	// always handled by the CoreScheduler.
   697  	JobTypeCore    = "_core"
   698  	JobTypeService = "service"
   699  	JobTypeBatch   = "batch"
   700  )
   701  
   702  const (
   703  	JobStatusPending  = "pending"  // Pending means the job is waiting on scheduling
   704  	JobStatusRunning  = "running"  // Running means the entire job is running
   705  	JobStatusComplete = "complete" // Complete means there was a clean termination
   706  	JobStatusDead     = "dead"     // Dead means there was abnormal termination
   707  )
   708  
   709  const (
   710  	// JobMinPriority is the minimum allowed priority
   711  	JobMinPriority = 1
   712  
   713  	// JobDefaultPriority is the default priority if not
   714  	// not specified.
   715  	JobDefaultPriority = 50
   716  
   717  	// JobMaxPriority is the maximum allowed priority
   718  	JobMaxPriority = 100
   719  
   720  	// Ensure CoreJobPriority is higher than any user
   721  	// specified job so that it gets priority. This is important
   722  	// for the system to remain healthy.
   723  	CoreJobPriority = JobMaxPriority * 2
   724  )
   725  
   726  // Job is the scope of a scheduling request to Nomad. It is the largest
   727  // scoped object, and is a named collection of task groups. Each task group
   728  // is further composed of tasks. A task group (TG) is the unit of scheduling
   729  // however.
   730  type Job struct {
   731  	// Region is the Nomad region that handles scheduling this job
   732  	Region string
   733  
   734  	// ID is a unique identifier for the job per region. It can be
   735  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
   736  	ID string
   737  
   738  	// Name is the logical name of the job used to refer to it. This is unique
   739  	// per region, but not unique globally.
   740  	Name string
   741  
   742  	// Type is used to control various behaviors about the job. Most jobs
   743  	// are service jobs, meaning they are expected to be long lived.
   744  	// Some jobs are batch oriented meaning they run and then terminate.
   745  	// This can be extended in the future to support custom schedulers.
   746  	Type string
   747  
   748  	// Priority is used to control scheduling importance and if this job
   749  	// can preempt other jobs.
   750  	Priority int
   751  
   752  	// AllAtOnce is used to control if incremental scheduling of task groups
   753  	// is allowed or if we must do a gang scheduling of the entire job. This
   754  	// can slow down larger jobs if resources are not available.
   755  	AllAtOnce bool `mapstructure:"all_at_once"`
   756  
   757  	// Datacenters contains all the datacenters this job is allowed to span
   758  	Datacenters []string
   759  
   760  	// Constraints can be specified at a job level and apply to
   761  	// all the task groups and tasks.
   762  	Constraints []*Constraint
   763  
   764  	// TaskGroups are the collections of task groups that this job needs
   765  	// to run. Each task group is an atomic unit of scheduling and placement.
   766  	TaskGroups []*TaskGroup
   767  
   768  	// Update is used to control the update strategy
   769  	Update UpdateStrategy
   770  
   771  	// Meta is used to associate arbitrary metadata with this
   772  	// job. This is opaque to Nomad.
   773  	Meta map[string]string
   774  
   775  	// Job status
   776  	Status string
   777  
   778  	// StatusDescription is meant to provide more human useful information
   779  	StatusDescription string
   780  
   781  	// Raft Indexes
   782  	CreateIndex uint64
   783  	ModifyIndex uint64
   784  }
   785  
   786  // Validate is used to sanity check a job input
   787  func (j *Job) Validate() error {
   788  	var mErr multierror.Error
   789  	if j.Region == "" {
   790  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
   791  	}
   792  	if j.ID == "" {
   793  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
   794  	} else if strings.Contains(j.ID, " ") {
   795  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
   796  	}
   797  	if j.Name == "" {
   798  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
   799  	}
   800  	if j.Type == "" {
   801  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
   802  	}
   803  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
   804  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
   805  	}
   806  	if len(j.Datacenters) == 0 {
   807  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
   808  	}
   809  	if len(j.TaskGroups) == 0 {
   810  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
   811  	}
   812  
   813  	// Check for duplicate task groups
   814  	taskGroups := make(map[string]int)
   815  	for idx, tg := range j.TaskGroups {
   816  		if tg.Name == "" {
   817  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
   818  		} else if existing, ok := taskGroups[tg.Name]; ok {
   819  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
   820  		} else {
   821  			taskGroups[tg.Name] = idx
   822  		}
   823  	}
   824  
   825  	// Validate the task group
   826  	for idx, tg := range j.TaskGroups {
   827  		if err := tg.Validate(); err != nil {
   828  			outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err)
   829  			mErr.Errors = append(mErr.Errors, outer)
   830  		}
   831  	}
   832  	return mErr.ErrorOrNil()
   833  }
   834  
   835  // LookupTaskGroup finds a task group by name
   836  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
   837  	for _, tg := range j.TaskGroups {
   838  		if tg.Name == name {
   839  			return tg
   840  		}
   841  	}
   842  	return nil
   843  }
   844  
   845  // Stub is used to return a summary of the job
   846  func (j *Job) Stub() *JobListStub {
   847  	return &JobListStub{
   848  		ID:                j.ID,
   849  		Name:              j.Name,
   850  		Type:              j.Type,
   851  		Priority:          j.Priority,
   852  		Status:            j.Status,
   853  		StatusDescription: j.StatusDescription,
   854  		CreateIndex:       j.CreateIndex,
   855  		ModifyIndex:       j.ModifyIndex,
   856  	}
   857  }
   858  
   859  // JobListStub is used to return a subset of job information
   860  // for the job list
   861  type JobListStub struct {
   862  	ID                string
   863  	Name              string
   864  	Type              string
   865  	Priority          int
   866  	Status            string
   867  	StatusDescription string
   868  	CreateIndex       uint64
   869  	ModifyIndex       uint64
   870  }
   871  
   872  // UpdateStrategy is used to modify how updates are done
   873  type UpdateStrategy struct {
   874  	// Stagger is the amount of time between the updates
   875  	Stagger time.Duration
   876  
   877  	// MaxParallel is how many updates can be done in parallel
   878  	MaxParallel int `mapstructure:"max_parallel"`
   879  }
   880  
   881  // Rolling returns if a rolling strategy should be used
   882  func (u *UpdateStrategy) Rolling() bool {
   883  	return u.Stagger > 0 && u.MaxParallel > 0
   884  }
   885  
   886  // TaskGroup is an atomic unit of placement. Each task group belongs to
   887  // a job and may contain any number of tasks. A task group support running
   888  // in many replicas using the same configuration..
   889  type TaskGroup struct {
   890  	// Name of the task group
   891  	Name string
   892  
   893  	// Count is the number of replicas of this task group that should
   894  	// be scheduled.
   895  	Count int
   896  
   897  	// Constraints can be specified at a task group level and apply to
   898  	// all the tasks contained.
   899  	Constraints []*Constraint
   900  
   901  	// Tasks are the collection of tasks that this task group needs to run
   902  	Tasks []*Task
   903  
   904  	// Meta is used to associate arbitrary metadata with this
   905  	// task group. This is opaque to Nomad.
   906  	Meta map[string]string
   907  }
   908  
   909  // Validate is used to sanity check a task group
   910  func (tg *TaskGroup) Validate() error {
   911  	var mErr multierror.Error
   912  	if tg.Name == "" {
   913  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
   914  	}
   915  	if tg.Count <= 0 {
   916  		mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive"))
   917  	}
   918  	if len(tg.Tasks) == 0 {
   919  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
   920  	}
   921  
   922  	// Check for duplicate tasks
   923  	tasks := make(map[string]int)
   924  	for idx, task := range tg.Tasks {
   925  		if task.Name == "" {
   926  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
   927  		} else if existing, ok := tasks[task.Name]; ok {
   928  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
   929  		} else {
   930  			tasks[task.Name] = idx
   931  		}
   932  	}
   933  
   934  	// Validate the tasks
   935  	for idx, task := range tg.Tasks {
   936  		if err := task.Validate(); err != nil {
   937  			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
   938  			mErr.Errors = append(mErr.Errors, outer)
   939  		}
   940  	}
   941  	return mErr.ErrorOrNil()
   942  }
   943  
   944  // LookupTask finds a task by name
   945  func (tg *TaskGroup) LookupTask(name string) *Task {
   946  	for _, t := range tg.Tasks {
   947  		if t.Name == name {
   948  			return t
   949  		}
   950  	}
   951  	return nil
   952  }
   953  
   954  func (tg *TaskGroup) GoString() string {
   955  	return fmt.Sprintf("*%#v", *tg)
   956  }
   957  
   958  // Task is a single process typically that is executed as part of a task group.
   959  type Task struct {
   960  	// Name of the task
   961  	Name string
   962  
   963  	// Driver is used to control which driver is used
   964  	Driver string
   965  
   966  	// Config is provided to the driver to initialize
   967  	Config map[string]string
   968  
   969  	// Map of environment variables to be used by the driver
   970  	Env map[string]string
   971  
   972  	// Constraints can be specified at a task level and apply only to
   973  	// the particular task.
   974  	Constraints []*Constraint
   975  
   976  	// Resources is the resources needed by this task
   977  	Resources *Resources
   978  
   979  	// Meta is used to associate arbitrary metadata with this
   980  	// task. This is opaque to Nomad.
   981  	Meta map[string]string
   982  }
   983  
   984  func (t *Task) GoString() string {
   985  	return fmt.Sprintf("*%#v", *t)
   986  }
   987  
   988  // Validate is used to sanity check a task group
   989  func (t *Task) Validate() error {
   990  	var mErr multierror.Error
   991  	if t.Name == "" {
   992  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
   993  	}
   994  	if t.Driver == "" {
   995  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
   996  	}
   997  	if t.Resources == nil {
   998  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
   999  	}
  1000  	return mErr.ErrorOrNil()
  1001  }
  1002  
  1003  // Constraints are used to restrict placement options in the case of
  1004  // a hard constraint, and used to prefer a placement in the case of
  1005  // a soft constraint.
  1006  type Constraint struct {
  1007  	Hard    bool   // Hard or soft constraint
  1008  	LTarget string // Left-hand target
  1009  	RTarget string // Right-hand target
  1010  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  1011  	Weight  int    // Soft constraints can vary the weight
  1012  }
  1013  
  1014  func (c *Constraint) String() string {
  1015  	return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  1016  }
  1017  
  1018  const (
  1019  	AllocDesiredStatusRun    = "run"    // Allocation should run
  1020  	AllocDesiredStatusStop   = "stop"   // Allocation should stop
  1021  	AllocDesiredStatusEvict  = "evict"  // Allocation should stop, and was evicted
  1022  	AllocDesiredStatusFailed = "failed" // Allocation failed to be done
  1023  )
  1024  
  1025  const (
  1026  	AllocClientStatusPending = "pending"
  1027  	AllocClientStatusRunning = "running"
  1028  	AllocClientStatusDead    = "dead"
  1029  	AllocClientStatusFailed  = "failed"
  1030  )
  1031  
  1032  // Allocation is used to allocate the placement of a task group to a node.
  1033  type Allocation struct {
  1034  	// ID of the allocation (UUID)
  1035  	ID string
  1036  
  1037  	// ID of the evaluation that generated this allocation
  1038  	EvalID string
  1039  
  1040  	// Name is a logical name of the allocation.
  1041  	Name string
  1042  
  1043  	// NodeID is the node this is being placed on
  1044  	NodeID string
  1045  
  1046  	// Job is the parent job of the task group being allocated.
  1047  	// This is copied at allocation time to avoid issues if the job
  1048  	// definition is updated.
  1049  	JobID string
  1050  	Job   *Job
  1051  
  1052  	// TaskGroup is the name of the task group that should be run
  1053  	TaskGroup string
  1054  
  1055  	// Resources is the total set of resources allocated as part
  1056  	// of this allocation of the task group.
  1057  	Resources *Resources
  1058  
  1059  	// TaskResources is the set of resources allocated to each
  1060  	// task. These should sum to the total Resources.
  1061  	TaskResources map[string]*Resources
  1062  
  1063  	// Metrics associated with this allocation
  1064  	Metrics *AllocMetric
  1065  
  1066  	// Desired Status of the allocation on the client
  1067  	DesiredStatus string
  1068  
  1069  	// DesiredStatusDescription is meant to provide more human useful information
  1070  	DesiredDescription string
  1071  
  1072  	// Status of the allocation on the client
  1073  	ClientStatus string
  1074  
  1075  	// ClientStatusDescription is meant to provide more human useful information
  1076  	ClientDescription string
  1077  
  1078  	// Raft Indexes
  1079  	CreateIndex uint64
  1080  	ModifyIndex uint64
  1081  }
  1082  
  1083  // TerminalStatus returns if the desired status is terminal and
  1084  // will no longer transition. This is not based on the current client status.
  1085  func (a *Allocation) TerminalStatus() bool {
  1086  	switch a.DesiredStatus {
  1087  	case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed:
  1088  		return true
  1089  	default:
  1090  		return false
  1091  	}
  1092  }
  1093  
  1094  // Stub returns a list stub for the allocation
  1095  func (a *Allocation) Stub() *AllocListStub {
  1096  	return &AllocListStub{
  1097  		ID:                 a.ID,
  1098  		EvalID:             a.EvalID,
  1099  		Name:               a.Name,
  1100  		NodeID:             a.NodeID,
  1101  		JobID:              a.JobID,
  1102  		TaskGroup:          a.TaskGroup,
  1103  		DesiredStatus:      a.DesiredStatus,
  1104  		DesiredDescription: a.DesiredDescription,
  1105  		ClientStatus:       a.ClientStatus,
  1106  		ClientDescription:  a.ClientDescription,
  1107  		CreateIndex:        a.CreateIndex,
  1108  		ModifyIndex:        a.ModifyIndex,
  1109  	}
  1110  }
  1111  
  1112  // AllocListStub is used to return a subset of alloc information
  1113  type AllocListStub struct {
  1114  	ID                 string
  1115  	EvalID             string
  1116  	Name               string
  1117  	NodeID             string
  1118  	JobID              string
  1119  	TaskGroup          string
  1120  	DesiredStatus      string
  1121  	DesiredDescription string
  1122  	ClientStatus       string
  1123  	ClientDescription  string
  1124  	CreateIndex        uint64
  1125  	ModifyIndex        uint64
  1126  }
  1127  
  1128  // AllocMetric is used to track various metrics while attempting
  1129  // to make an allocation. These are used to debug a job, or to better
  1130  // understand the pressure within the system.
  1131  type AllocMetric struct {
  1132  	// NodesEvaluated is the number of nodes that were evaluated
  1133  	NodesEvaluated int
  1134  
  1135  	// NodesFiltered is the number of nodes filtered due to
  1136  	// a hard constraint
  1137  	NodesFiltered int
  1138  
  1139  	// ClassFiltered is the number of nodes filtered by class
  1140  	ClassFiltered map[string]int
  1141  
  1142  	// ConstraintFiltered is the number of failures caused by constraint
  1143  	ConstraintFiltered map[string]int
  1144  
  1145  	// NodesExhausted is the number of nodes skipped due to being
  1146  	// exhausted of at least one resource
  1147  	NodesExhausted int
  1148  
  1149  	// ClassExhausted is the number of nodes exhausted by class
  1150  	ClassExhausted map[string]int
  1151  
  1152  	// DimensionExhausted provides the count by dimension or reason
  1153  	DimensionExhausted map[string]int
  1154  
  1155  	// Scores is the scores of the final few nodes remaining
  1156  	// for placement. The top score is typically selected.
  1157  	Scores map[string]float64
  1158  
  1159  	// AllocationTime is a measure of how long the allocation
  1160  	// attempt took. This can affect performance and SLAs.
  1161  	AllocationTime time.Duration
  1162  
  1163  	// CoalescedFailures indicates the number of other
  1164  	// allocations that were coalesced into this failed allocation.
  1165  	// This is to prevent creating many failed allocations for a
  1166  	// single task group.
  1167  	CoalescedFailures int
  1168  }
  1169  
  1170  func (a *AllocMetric) EvaluateNode() {
  1171  	a.NodesEvaluated += 1
  1172  }
  1173  
  1174  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  1175  	a.NodesFiltered += 1
  1176  	if node != nil && node.NodeClass != "" {
  1177  		if a.ClassFiltered == nil {
  1178  			a.ClassFiltered = make(map[string]int)
  1179  		}
  1180  		a.ClassFiltered[node.NodeClass] += 1
  1181  	}
  1182  	if constraint != "" {
  1183  		if a.ConstraintFiltered == nil {
  1184  			a.ConstraintFiltered = make(map[string]int)
  1185  		}
  1186  		a.ConstraintFiltered[constraint] += 1
  1187  	}
  1188  }
  1189  
  1190  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  1191  	a.NodesExhausted += 1
  1192  	if node != nil && node.NodeClass != "" {
  1193  		if a.ClassExhausted == nil {
  1194  			a.ClassExhausted = make(map[string]int)
  1195  		}
  1196  		a.ClassExhausted[node.NodeClass] += 1
  1197  	}
  1198  	if dimension != "" {
  1199  		if a.DimensionExhausted == nil {
  1200  			a.DimensionExhausted = make(map[string]int)
  1201  		}
  1202  		a.DimensionExhausted[dimension] += 1
  1203  	}
  1204  }
  1205  
  1206  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  1207  	if a.Scores == nil {
  1208  		a.Scores = make(map[string]float64)
  1209  	}
  1210  	key := fmt.Sprintf("%s.%s", node.ID, name)
  1211  	a.Scores[key] = score
  1212  }
  1213  
  1214  const (
  1215  	EvalStatusPending  = "pending"
  1216  	EvalStatusComplete = "complete"
  1217  	EvalStatusFailed   = "failed"
  1218  )
  1219  
  1220  const (
  1221  	EvalTriggerJobRegister   = "job-register"
  1222  	EvalTriggerJobDeregister = "job-deregister"
  1223  	EvalTriggerNodeUpdate    = "node-update"
  1224  	EvalTriggerScheduled     = "scheduled"
  1225  	EvalTriggerRollingUpdate = "rolling-update"
  1226  )
  1227  
  1228  const (
  1229  	// CoreJobEvalGC is used for the garbage collection of evaluations
  1230  	// and allocations. We periodically scan evaluations in a terminal state,
  1231  	// in which all the corresponding allocations are also terminal. We
  1232  	// delete these out of the system to bound the state.
  1233  	CoreJobEvalGC = "eval-gc"
  1234  
  1235  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  1236  	// We periodically scan nodes in a terminal state, and if they have no
  1237  	// corresponding allocations we delete these out of the system.
  1238  	CoreJobNodeGC = "node-gc"
  1239  )
  1240  
  1241  // Evaluation is used anytime we need to apply business logic as a result
  1242  // of a change to our desired state (job specification) or the emergent state
  1243  // (registered nodes). When the inputs change, we need to "evaluate" them,
  1244  // potentially taking action (allocation of work) or doing nothing if the state
  1245  // of the world does not require it.
  1246  type Evaluation struct {
  1247  	// ID is a randonly generated UUID used for this evaluation. This
  1248  	// is assigned upon the creation of the evaluation.
  1249  	ID string
  1250  
  1251  	// Priority is used to control scheduling importance and if this job
  1252  	// can preempt other jobs.
  1253  	Priority int
  1254  
  1255  	// Type is used to control which schedulers are available to handle
  1256  	// this evaluation.
  1257  	Type string
  1258  
  1259  	// TriggeredBy is used to give some insight into why this Eval
  1260  	// was created. (Job change, node failure, alloc failure, etc).
  1261  	TriggeredBy string
  1262  
  1263  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  1264  	// be run in parallel for a given JobID, so we serialize on this.
  1265  	JobID string
  1266  
  1267  	// JobModifyIndex is the modify index of the job at the time
  1268  	// the evaluation was created
  1269  	JobModifyIndex uint64
  1270  
  1271  	// NodeID is the node that was affected triggering the evaluation.
  1272  	NodeID string
  1273  
  1274  	// NodeModifyIndex is the modify index of the node at the time
  1275  	// the evaluation was created
  1276  	NodeModifyIndex uint64
  1277  
  1278  	// Status of the evaluation
  1279  	Status string
  1280  
  1281  	// StatusDescription is meant to provide more human useful information
  1282  	StatusDescription string
  1283  
  1284  	// Wait is a minimum wait time for running the eval. This is used to
  1285  	// support a rolling upgrade.
  1286  	Wait time.Duration
  1287  
  1288  	// NextEval is the evaluation ID for the eval created to do a followup.
  1289  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1290  	NextEval string
  1291  
  1292  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  1293  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1294  	PreviousEval string
  1295  
  1296  	// Raft Indexes
  1297  	CreateIndex uint64
  1298  	ModifyIndex uint64
  1299  }
  1300  
  1301  // TerminalStatus returns if the current status is terminal and
  1302  // will no longer transition.
  1303  func (e *Evaluation) TerminalStatus() bool {
  1304  	switch e.Status {
  1305  	case EvalStatusComplete, EvalStatusFailed:
  1306  		return true
  1307  	default:
  1308  		return false
  1309  	}
  1310  }
  1311  
  1312  func (e *Evaluation) GoString() string {
  1313  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  1314  }
  1315  
  1316  func (e *Evaluation) Copy() *Evaluation {
  1317  	ne := new(Evaluation)
  1318  	*ne = *e
  1319  	return ne
  1320  }
  1321  
  1322  // ShouldEnqueue checks if a given evaluation should be enqueued
  1323  func (e *Evaluation) ShouldEnqueue() bool {
  1324  	switch e.Status {
  1325  	case EvalStatusPending:
  1326  		return true
  1327  	case EvalStatusComplete, EvalStatusFailed:
  1328  		return false
  1329  	default:
  1330  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  1331  	}
  1332  }
  1333  
  1334  // MakePlan is used to make a plan from the given evaluation
  1335  // for a given Job
  1336  func (e *Evaluation) MakePlan(j *Job) *Plan {
  1337  	p := &Plan{
  1338  		EvalID:         e.ID,
  1339  		Priority:       e.Priority,
  1340  		NodeUpdate:     make(map[string][]*Allocation),
  1341  		NodeAllocation: make(map[string][]*Allocation),
  1342  	}
  1343  	if j != nil {
  1344  		p.AllAtOnce = j.AllAtOnce
  1345  	}
  1346  	return p
  1347  }
  1348  
  1349  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  1350  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  1351  	return &Evaluation{
  1352  		ID:             GenerateUUID(),
  1353  		Priority:       e.Priority,
  1354  		Type:           e.Type,
  1355  		TriggeredBy:    EvalTriggerRollingUpdate,
  1356  		JobID:          e.JobID,
  1357  		JobModifyIndex: e.JobModifyIndex,
  1358  		Status:         EvalStatusPending,
  1359  		Wait:           wait,
  1360  		PreviousEval:   e.ID,
  1361  	}
  1362  }
  1363  
  1364  // Plan is used to submit a commit plan for task allocations. These
  1365  // are submitted to the leader which verifies that resources have
  1366  // not been overcommitted before admiting the plan.
  1367  type Plan struct {
  1368  	// EvalID is the evaluation ID this plan is associated with
  1369  	EvalID string
  1370  
  1371  	// EvalToken is used to prevent a split-brain processing of
  1372  	// an evaluation. There should only be a single scheduler running
  1373  	// an Eval at a time, but this could be violated after a leadership
  1374  	// transition. This unique token is used to reject plans that are
  1375  	// being submitted from a different leader.
  1376  	EvalToken string
  1377  
  1378  	// Priority is the priority of the upstream job
  1379  	Priority int
  1380  
  1381  	// AllAtOnce is used to control if incremental scheduling of task groups
  1382  	// is allowed or if we must do a gang scheduling of the entire job.
  1383  	// If this is false, a plan may be partially applied. Otherwise, the
  1384  	// entire plan must be able to make progress.
  1385  	AllAtOnce bool
  1386  
  1387  	// NodeUpdate contains all the allocations for each node. For each node,
  1388  	// this is a list of the allocations to update to either stop or evict.
  1389  	NodeUpdate map[string][]*Allocation
  1390  
  1391  	// NodeAllocation contains all the allocations for each node.
  1392  	// The evicts must be considered prior to the allocations.
  1393  	NodeAllocation map[string][]*Allocation
  1394  
  1395  	// FailedAllocs are allocations that could not be made,
  1396  	// but are persisted so that the user can use the feedback
  1397  	// to determine the cause.
  1398  	FailedAllocs []*Allocation
  1399  }
  1400  
  1401  func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) {
  1402  	newAlloc := new(Allocation)
  1403  	*newAlloc = *alloc
  1404  	newAlloc.DesiredStatus = status
  1405  	newAlloc.DesiredDescription = desc
  1406  	node := alloc.NodeID
  1407  	existing := p.NodeUpdate[node]
  1408  	p.NodeUpdate[node] = append(existing, newAlloc)
  1409  }
  1410  
  1411  func (p *Plan) PopUpdate(alloc *Allocation) {
  1412  	existing := p.NodeUpdate[alloc.NodeID]
  1413  	n := len(existing)
  1414  	if n > 0 && existing[n-1].ID == alloc.ID {
  1415  		existing = existing[:n-1]
  1416  		if len(existing) > 0 {
  1417  			p.NodeUpdate[alloc.NodeID] = existing
  1418  		} else {
  1419  			delete(p.NodeUpdate, alloc.NodeID)
  1420  		}
  1421  	}
  1422  }
  1423  
  1424  func (p *Plan) AppendAlloc(alloc *Allocation) {
  1425  	node := alloc.NodeID
  1426  	existing := p.NodeAllocation[node]
  1427  	p.NodeAllocation[node] = append(existing, alloc)
  1428  }
  1429  
  1430  func (p *Plan) AppendFailed(alloc *Allocation) {
  1431  	p.FailedAllocs = append(p.FailedAllocs, alloc)
  1432  }
  1433  
  1434  // IsNoOp checks if this plan would do nothing
  1435  func (p *Plan) IsNoOp() bool {
  1436  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1437  }
  1438  
  1439  // PlanResult is the result of a plan submitted to the leader.
  1440  type PlanResult struct {
  1441  	// NodeUpdate contains all the updates that were committed.
  1442  	NodeUpdate map[string][]*Allocation
  1443  
  1444  	// NodeAllocation contains all the allocations that were committed.
  1445  	NodeAllocation map[string][]*Allocation
  1446  
  1447  	// FailedAllocs are allocations that could not be made,
  1448  	// but are persisted so that the user can use the feedback
  1449  	// to determine the cause.
  1450  	FailedAllocs []*Allocation
  1451  
  1452  	// RefreshIndex is the index the worker should refresh state up to.
  1453  	// This allows all evictions and allocations to be materialized.
  1454  	// If any allocations were rejected due to stale data (node state,
  1455  	// over committed) this can be used to force a worker refresh.
  1456  	RefreshIndex uint64
  1457  
  1458  	// AllocIndex is the Raft index in which the evictions and
  1459  	// allocations took place. This is used for the write index.
  1460  	AllocIndex uint64
  1461  }
  1462  
  1463  // IsNoOp checks if this plan result would do nothing
  1464  func (p *PlanResult) IsNoOp() bool {
  1465  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1466  }
  1467  
  1468  // FullCommit is used to check if all the allocations in a plan
  1469  // were committed as part of the result. Returns if there was
  1470  // a match, and the number of expected and actual allocations.
  1471  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  1472  	expected := 0
  1473  	actual := 0
  1474  	for name, allocList := range plan.NodeAllocation {
  1475  		didAlloc, _ := p.NodeAllocation[name]
  1476  		expected += len(allocList)
  1477  		actual += len(didAlloc)
  1478  	}
  1479  	return actual == expected, expected, actual
  1480  }
  1481  
  1482  // msgpackHandle is a shared handle for encoding/decoding of structs
  1483  var msgpackHandle = &codec.MsgpackHandle{}
  1484  
  1485  // Decode is used to decode a MsgPack encoded object
  1486  func Decode(buf []byte, out interface{}) error {
  1487  	return codec.NewDecoder(bytes.NewReader(buf), msgpackHandle).Decode(out)
  1488  }
  1489  
  1490  // Encode is used to encode a MsgPack object with type prefix
  1491  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  1492  	var buf bytes.Buffer
  1493  	buf.WriteByte(uint8(t))
  1494  	err := codec.NewEncoder(&buf, msgpackHandle).Encode(msg)
  1495  	return buf.Bytes(), err
  1496  }