github.com/huiliang/nomad@v0.2.1-0.20151124023127-7a8b664699ff/nomad/structs/structs.go (about)

     1  package structs
     2  
     3  import (
     4  	"bytes"
     5  	"errors"
     6  	"fmt"
     7  	"reflect"
     8  	"regexp"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/hashicorp/go-msgpack/codec"
    13  	"github.com/hashicorp/go-multierror"
    14  	"github.com/hashicorp/go-version"
    15  )
    16  
    17  var (
    18  	ErrNoLeader                    = fmt.Errorf("No cluster leader")
    19  	ErrNoRegionPath                = fmt.Errorf("No path to region")
    20  	defaultServiceJobRestartPolicy = RestartPolicy{
    21  		Delay:    15 * time.Second,
    22  		Attempts: 2,
    23  		Interval: 1 * time.Minute,
    24  	}
    25  	defaultBatchJobRestartPolicy = RestartPolicy{
    26  		Delay:    15 * time.Second,
    27  		Attempts: 15,
    28  	}
    29  )
    30  
    31  type MessageType uint8
    32  
    33  const (
    34  	NodeRegisterRequestType MessageType = iota
    35  	NodeDeregisterRequestType
    36  	NodeUpdateStatusRequestType
    37  	NodeUpdateDrainRequestType
    38  	JobRegisterRequestType
    39  	JobDeregisterRequestType
    40  	EvalUpdateRequestType
    41  	EvalDeleteRequestType
    42  	AllocUpdateRequestType
    43  	AllocClientUpdateRequestType
    44  )
    45  
    46  const (
    47  	// IgnoreUnknownTypeFlag is set along with a MessageType
    48  	// to indicate that the message type can be safely ignored
    49  	// if it is not recognized. This is for future proofing, so
    50  	// that new commands can be added in a way that won't cause
    51  	// old servers to crash when the FSM attempts to process them.
    52  	IgnoreUnknownTypeFlag MessageType = 128
    53  )
    54  
    55  // RPCInfo is used to describe common information about query
    56  type RPCInfo interface {
    57  	RequestRegion() string
    58  	IsRead() bool
    59  	AllowStaleRead() bool
    60  }
    61  
    62  // QueryOptions is used to specify various flags for read queries
    63  type QueryOptions struct {
    64  	// The target region for this query
    65  	Region string
    66  
    67  	// If set, wait until query exceeds given index. Must be provided
    68  	// with MaxQueryTime.
    69  	MinQueryIndex uint64
    70  
    71  	// Provided with MinQueryIndex to wait for change.
    72  	MaxQueryTime time.Duration
    73  
    74  	// If set, any follower can service the request. Results
    75  	// may be arbitrarily stale.
    76  	AllowStale bool
    77  }
    78  
    79  func (q QueryOptions) RequestRegion() string {
    80  	return q.Region
    81  }
    82  
    83  // QueryOption only applies to reads, so always true
    84  func (q QueryOptions) IsRead() bool {
    85  	return true
    86  }
    87  
    88  func (q QueryOptions) AllowStaleRead() bool {
    89  	return q.AllowStale
    90  }
    91  
    92  type WriteRequest struct {
    93  	// The target region for this write
    94  	Region string
    95  }
    96  
    97  func (w WriteRequest) RequestRegion() string {
    98  	// The target region for this request
    99  	return w.Region
   100  }
   101  
   102  // WriteRequest only applies to writes, always false
   103  func (w WriteRequest) IsRead() bool {
   104  	return false
   105  }
   106  
   107  func (w WriteRequest) AllowStaleRead() bool {
   108  	return false
   109  }
   110  
   111  // QueryMeta allows a query response to include potentially
   112  // useful metadata about a query
   113  type QueryMeta struct {
   114  	// This is the index associated with the read
   115  	Index uint64
   116  
   117  	// If AllowStale is used, this is time elapsed since
   118  	// last contact between the follower and leader. This
   119  	// can be used to gauge staleness.
   120  	LastContact time.Duration
   121  
   122  	// Used to indicate if there is a known leader node
   123  	KnownLeader bool
   124  }
   125  
   126  // WriteMeta allows a write response to includ e potentially
   127  // useful metadata about the write
   128  type WriteMeta struct {
   129  	// This is the index associated with the write
   130  	Index uint64
   131  }
   132  
   133  // NodeRegisterRequest is used for Node.Register endpoint
   134  // to register a node as being a schedulable entity.
   135  type NodeRegisterRequest struct {
   136  	Node *Node
   137  	WriteRequest
   138  }
   139  
   140  // NodeDeregisterRequest is used for Node.Deregister endpoint
   141  // to deregister a node as being a schedulable entity.
   142  type NodeDeregisterRequest struct {
   143  	NodeID string
   144  	WriteRequest
   145  }
   146  
   147  // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint
   148  // to update the status of a node.
   149  type NodeUpdateStatusRequest struct {
   150  	NodeID string
   151  	Status string
   152  	WriteRequest
   153  }
   154  
   155  // NodeUpdateDrainRequest is used for updatin the drain status
   156  type NodeUpdateDrainRequest struct {
   157  	NodeID string
   158  	Drain  bool
   159  	WriteRequest
   160  }
   161  
   162  // NodeEvaluateRequest is used to re-evaluate the ndoe
   163  type NodeEvaluateRequest struct {
   164  	NodeID string
   165  	WriteRequest
   166  }
   167  
   168  // NodeSpecificRequest is used when we just need to specify a target node
   169  type NodeSpecificRequest struct {
   170  	NodeID string
   171  	QueryOptions
   172  }
   173  
   174  // JobRegisterRequest is used for Job.Register endpoint
   175  // to register a job as being a schedulable entity.
   176  type JobRegisterRequest struct {
   177  	Job *Job
   178  	WriteRequest
   179  }
   180  
   181  // JobDeregisterRequest is used for Job.Deregister endpoint
   182  // to deregister a job as being a schedulable entity.
   183  type JobDeregisterRequest struct {
   184  	JobID string
   185  	WriteRequest
   186  }
   187  
   188  // JobEvaluateRequest is used when we just need to re-evaluate a target job
   189  type JobEvaluateRequest struct {
   190  	JobID string
   191  	WriteRequest
   192  }
   193  
   194  // JobSpecificRequest is used when we just need to specify a target job
   195  type JobSpecificRequest struct {
   196  	JobID string
   197  	QueryOptions
   198  }
   199  
   200  // JobListRequest is used to parameterize a list request
   201  type JobListRequest struct {
   202  	QueryOptions
   203  }
   204  
   205  // NodeListRequest is used to parameterize a list request
   206  type NodeListRequest struct {
   207  	QueryOptions
   208  }
   209  
   210  // EvalUpdateRequest is used for upserting evaluations.
   211  type EvalUpdateRequest struct {
   212  	Evals     []*Evaluation
   213  	EvalToken string
   214  	WriteRequest
   215  }
   216  
   217  // EvalDeleteRequest is used for deleting an evaluation.
   218  type EvalDeleteRequest struct {
   219  	Evals  []string
   220  	Allocs []string
   221  	WriteRequest
   222  }
   223  
   224  // EvalSpecificRequest is used when we just need to specify a target evaluation
   225  type EvalSpecificRequest struct {
   226  	EvalID string
   227  	QueryOptions
   228  }
   229  
   230  // EvalAckRequest is used to Ack/Nack a specific evaluation
   231  type EvalAckRequest struct {
   232  	EvalID string
   233  	Token  string
   234  	WriteRequest
   235  }
   236  
   237  // EvalDequeueRequest is used when we want to dequeue an evaluation
   238  type EvalDequeueRequest struct {
   239  	Schedulers []string
   240  	Timeout    time.Duration
   241  	WriteRequest
   242  }
   243  
   244  // EvalListRequest is used to list the evaluations
   245  type EvalListRequest struct {
   246  	QueryOptions
   247  }
   248  
   249  // PlanRequest is used to submit an allocation plan to the leader
   250  type PlanRequest struct {
   251  	Plan *Plan
   252  	WriteRequest
   253  }
   254  
   255  // AllocUpdateRequest is used to submit changes to allocations, either
   256  // to cause evictions or to assign new allocaitons. Both can be done
   257  // within a single transaction
   258  type AllocUpdateRequest struct {
   259  	// Alloc is the list of new allocations to assign
   260  	Alloc []*Allocation
   261  	WriteRequest
   262  }
   263  
   264  // AllocListRequest is used to request a list of allocations
   265  type AllocListRequest struct {
   266  	QueryOptions
   267  }
   268  
   269  // AllocSpecificRequest is used to query a specific allocation
   270  type AllocSpecificRequest struct {
   271  	AllocID string
   272  	QueryOptions
   273  }
   274  
   275  // GenericRequest is used to request where no
   276  // specific information is needed.
   277  type GenericRequest struct {
   278  	QueryOptions
   279  }
   280  
   281  // GenericResponse is used to respond to a request where no
   282  // specific response information is needed.
   283  type GenericResponse struct {
   284  	WriteMeta
   285  }
   286  
   287  const (
   288  	ProtocolVersion = "protocol"
   289  	APIMajorVersion = "api.major"
   290  	APIMinorVersion = "api.minor"
   291  )
   292  
   293  // VersionResponse is used for the Status.Version reseponse
   294  type VersionResponse struct {
   295  	Build    string
   296  	Versions map[string]int
   297  	QueryMeta
   298  }
   299  
   300  // JobRegisterResponse is used to respond to a job registration
   301  type JobRegisterResponse struct {
   302  	EvalID          string
   303  	EvalCreateIndex uint64
   304  	JobModifyIndex  uint64
   305  	QueryMeta
   306  }
   307  
   308  // JobDeregisterResponse is used to respond to a job deregistration
   309  type JobDeregisterResponse struct {
   310  	EvalID          string
   311  	EvalCreateIndex uint64
   312  	JobModifyIndex  uint64
   313  	QueryMeta
   314  }
   315  
   316  // NodeUpdateResponse is used to respond to a node update
   317  type NodeUpdateResponse struct {
   318  	HeartbeatTTL    time.Duration
   319  	EvalIDs         []string
   320  	EvalCreateIndex uint64
   321  	NodeModifyIndex uint64
   322  	QueryMeta
   323  }
   324  
   325  // NodeDrainUpdateResponse is used to respond to a node drain update
   326  type NodeDrainUpdateResponse struct {
   327  	EvalIDs         []string
   328  	EvalCreateIndex uint64
   329  	NodeModifyIndex uint64
   330  	QueryMeta
   331  }
   332  
   333  // NodeAllocsResponse is used to return allocs for a single node
   334  type NodeAllocsResponse struct {
   335  	Allocs []*Allocation
   336  	QueryMeta
   337  }
   338  
   339  // SingleNodeResponse is used to return a single node
   340  type SingleNodeResponse struct {
   341  	Node *Node
   342  	QueryMeta
   343  }
   344  
   345  // JobListResponse is used for a list request
   346  type NodeListResponse struct {
   347  	Nodes []*NodeListStub
   348  	QueryMeta
   349  }
   350  
   351  // SingleJobResponse is used to return a single job
   352  type SingleJobResponse struct {
   353  	Job *Job
   354  	QueryMeta
   355  }
   356  
   357  // JobListResponse is used for a list request
   358  type JobListResponse struct {
   359  	Jobs []*JobListStub
   360  	QueryMeta
   361  }
   362  
   363  // SingleAllocResponse is used to return a single allocation
   364  type SingleAllocResponse struct {
   365  	Alloc *Allocation
   366  	QueryMeta
   367  }
   368  
   369  // JobAllocationsResponse is used to return the allocations for a job
   370  type JobAllocationsResponse struct {
   371  	Allocations []*AllocListStub
   372  	QueryMeta
   373  }
   374  
   375  // JobEvaluationsResponse is used to return the evaluations for a job
   376  type JobEvaluationsResponse struct {
   377  	Evaluations []*Evaluation
   378  	QueryMeta
   379  }
   380  
   381  // SingleEvalResponse is used to return a single evaluation
   382  type SingleEvalResponse struct {
   383  	Eval *Evaluation
   384  	QueryMeta
   385  }
   386  
   387  // EvalDequeueResponse is used to return from a dequeue
   388  type EvalDequeueResponse struct {
   389  	Eval  *Evaluation
   390  	Token string
   391  	QueryMeta
   392  }
   393  
   394  // PlanResponse is used to return from a PlanRequest
   395  type PlanResponse struct {
   396  	Result *PlanResult
   397  	WriteMeta
   398  }
   399  
   400  // AllocListResponse is used for a list request
   401  type AllocListResponse struct {
   402  	Allocations []*AllocListStub
   403  	QueryMeta
   404  }
   405  
   406  // EvalListResponse is used for a list request
   407  type EvalListResponse struct {
   408  	Evaluations []*Evaluation
   409  	QueryMeta
   410  }
   411  
   412  // EvalAllocationsResponse is used to return the allocations for an evaluation
   413  type EvalAllocationsResponse struct {
   414  	Allocations []*AllocListStub
   415  	QueryMeta
   416  }
   417  
   418  const (
   419  	NodeStatusInit  = "initializing"
   420  	NodeStatusReady = "ready"
   421  	NodeStatusDown  = "down"
   422  )
   423  
   424  // ShouldDrainNode checks if a given node status should trigger an
   425  // evaluation. Some states don't require any further action.
   426  func ShouldDrainNode(status string) bool {
   427  	switch status {
   428  	case NodeStatusInit, NodeStatusReady:
   429  		return false
   430  	case NodeStatusDown:
   431  		return true
   432  	default:
   433  		panic(fmt.Sprintf("unhandled node status %s", status))
   434  	}
   435  }
   436  
   437  // ValidNodeStatus is used to check if a node status is valid
   438  func ValidNodeStatus(status string) bool {
   439  	switch status {
   440  	case NodeStatusInit, NodeStatusReady, NodeStatusDown:
   441  		return true
   442  	default:
   443  		return false
   444  	}
   445  }
   446  
   447  // Node is a representation of a schedulable client node
   448  type Node struct {
   449  	// ID is a unique identifier for the node. It can be constructed
   450  	// by doing a concatenation of the Name and Datacenter as a simple
   451  	// approach. Alternatively a UUID may be used.
   452  	ID string
   453  
   454  	// Datacenter for this node
   455  	Datacenter string
   456  
   457  	// Node name
   458  	Name string
   459  
   460  	// Attributes is an arbitrary set of key/value
   461  	// data that can be used for constraints. Examples
   462  	// include "kernel.name=linux", "arch=386", "driver.docker=1",
   463  	// "docker.runtime=1.8.3"
   464  	Attributes map[string]string
   465  
   466  	// Resources is the available resources on the client.
   467  	// For example 'cpu=2' 'memory=2048'
   468  	Resources *Resources
   469  
   470  	// Reserved is the set of resources that are reserved,
   471  	// and should be subtracted from the total resources for
   472  	// the purposes of scheduling. This may be provide certain
   473  	// high-watermark tolerances or because of external schedulers
   474  	// consuming resources.
   475  	Reserved *Resources
   476  
   477  	// Links are used to 'link' this client to external
   478  	// systems. For example 'consul=foo.dc1' 'aws=i-83212'
   479  	// 'ami=ami-123'
   480  	Links map[string]string
   481  
   482  	// Meta is used to associate arbitrary metadata with this
   483  	// client. This is opaque to Nomad.
   484  	Meta map[string]string
   485  
   486  	// NodeClass is an opaque identifier used to group nodes
   487  	// together for the purpose of determining scheduling pressure.
   488  	NodeClass string
   489  
   490  	// Drain is controlled by the servers, and not the client.
   491  	// If true, no jobs will be scheduled to this node, and existing
   492  	// allocations will be drained.
   493  	Drain bool
   494  
   495  	// Status of this node
   496  	Status string
   497  
   498  	// StatusDescription is meant to provide more human useful information
   499  	StatusDescription string
   500  
   501  	// Raft Indexes
   502  	CreateIndex uint64
   503  	ModifyIndex uint64
   504  }
   505  
   506  // TerminalStatus returns if the current status is terminal and
   507  // will no longer transition.
   508  func (n *Node) TerminalStatus() bool {
   509  	switch n.Status {
   510  	case NodeStatusDown:
   511  		return true
   512  	default:
   513  		return false
   514  	}
   515  }
   516  
   517  // Stub returns a summarized version of the node
   518  func (n *Node) Stub() *NodeListStub {
   519  	return &NodeListStub{
   520  		ID:                n.ID,
   521  		Datacenter:        n.Datacenter,
   522  		Name:              n.Name,
   523  		NodeClass:         n.NodeClass,
   524  		Drain:             n.Drain,
   525  		Status:            n.Status,
   526  		StatusDescription: n.StatusDescription,
   527  		CreateIndex:       n.CreateIndex,
   528  		ModifyIndex:       n.ModifyIndex,
   529  	}
   530  }
   531  
   532  // NodeListStub is used to return a subset of job information
   533  // for the job list
   534  type NodeListStub struct {
   535  	ID                string
   536  	Datacenter        string
   537  	Name              string
   538  	NodeClass         string
   539  	Drain             bool
   540  	Status            string
   541  	StatusDescription string
   542  	CreateIndex       uint64
   543  	ModifyIndex       uint64
   544  }
   545  
   546  // Resources is used to define the resources available
   547  // on a client
   548  type Resources struct {
   549  	CPU      int
   550  	MemoryMB int `mapstructure:"memory"`
   551  	DiskMB   int `mapstructure:"disk"`
   552  	IOPS     int
   553  	Networks []*NetworkResource
   554  }
   555  
   556  // Copy returns a deep copy of the resources
   557  func (r *Resources) Copy() *Resources {
   558  	newR := new(Resources)
   559  	*newR = *r
   560  	n := len(r.Networks)
   561  	newR.Networks = make([]*NetworkResource, n)
   562  	for i := 0; i < n; i++ {
   563  		newR.Networks[i] = r.Networks[i].Copy()
   564  	}
   565  	return newR
   566  }
   567  
   568  // NetIndex finds the matching net index using device name
   569  func (r *Resources) NetIndex(n *NetworkResource) int {
   570  	for idx, net := range r.Networks {
   571  		if net.Device == n.Device {
   572  			return idx
   573  		}
   574  	}
   575  	return -1
   576  }
   577  
   578  // Superset checks if one set of resources is a superset
   579  // of another. This ignores network resources, and the NetworkIndex
   580  // should be used for that.
   581  func (r *Resources) Superset(other *Resources) (bool, string) {
   582  	if r.CPU < other.CPU {
   583  		return false, "cpu exhausted"
   584  	}
   585  	if r.MemoryMB < other.MemoryMB {
   586  		return false, "memory exhausted"
   587  	}
   588  	if r.DiskMB < other.DiskMB {
   589  		return false, "disk exhausted"
   590  	}
   591  	if r.IOPS < other.IOPS {
   592  		return false, "iops exhausted"
   593  	}
   594  	return true, ""
   595  }
   596  
   597  // Add adds the resources of the delta to this, potentially
   598  // returning an error if not possible.
   599  func (r *Resources) Add(delta *Resources) error {
   600  	if delta == nil {
   601  		return nil
   602  	}
   603  	r.CPU += delta.CPU
   604  	r.MemoryMB += delta.MemoryMB
   605  	r.DiskMB += delta.DiskMB
   606  	r.IOPS += delta.IOPS
   607  
   608  	for _, n := range delta.Networks {
   609  		// Find the matching interface by IP or CIDR
   610  		idx := r.NetIndex(n)
   611  		if idx == -1 {
   612  			r.Networks = append(r.Networks, n.Copy())
   613  		} else {
   614  			r.Networks[idx].Add(n)
   615  		}
   616  	}
   617  	return nil
   618  }
   619  
   620  func (r *Resources) GoString() string {
   621  	return fmt.Sprintf("*%#v", *r)
   622  }
   623  
   624  type Port struct {
   625  	Label string
   626  	Value int `mapstructure:"static"`
   627  }
   628  
   629  // NetworkResource is used to represent available network
   630  // resources
   631  type NetworkResource struct {
   632  	Device        string // Name of the device
   633  	CIDR          string // CIDR block of addresses
   634  	IP            string // IP address
   635  	MBits         int    // Throughput
   636  	ReservedPorts []Port // Reserved ports
   637  	DynamicPorts  []Port // Dynamically assigned ports
   638  }
   639  
   640  // Copy returns a deep copy of the network resource
   641  func (n *NetworkResource) Copy() *NetworkResource {
   642  	newR := new(NetworkResource)
   643  	*newR = *n
   644  	if n.ReservedPorts != nil {
   645  		newR.ReservedPorts = make([]Port, len(n.ReservedPorts))
   646  		copy(newR.ReservedPorts, n.ReservedPorts)
   647  	}
   648  	if n.DynamicPorts != nil {
   649  		newR.DynamicPorts = make([]Port, len(n.DynamicPorts))
   650  		copy(newR.DynamicPorts, n.DynamicPorts)
   651  	}
   652  	return newR
   653  }
   654  
   655  // Add adds the resources of the delta to this, potentially
   656  // returning an error if not possible.
   657  func (n *NetworkResource) Add(delta *NetworkResource) {
   658  	if len(delta.ReservedPorts) > 0 {
   659  		n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...)
   660  	}
   661  	n.MBits += delta.MBits
   662  	n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...)
   663  }
   664  
   665  func (n *NetworkResource) GoString() string {
   666  	return fmt.Sprintf("*%#v", *n)
   667  }
   668  
   669  func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int {
   670  	labelValues := make(map[string]int)
   671  	ports := append(n.ReservedPorts, n.DynamicPorts...)
   672  	for _, port := range ports {
   673  		if mapping, ok := port_map[port.Label]; ok {
   674  			labelValues[port.Label] = mapping
   675  		} else {
   676  			labelValues[port.Label] = port.Value
   677  		}
   678  	}
   679  	return labelValues
   680  }
   681  
   682  const (
   683  	// JobTypeNomad is reserved for internal system tasks and is
   684  	// always handled by the CoreScheduler.
   685  	JobTypeCore    = "_core"
   686  	JobTypeService = "service"
   687  	JobTypeBatch   = "batch"
   688  	JobTypeSystem  = "system"
   689  )
   690  
   691  const (
   692  	JobStatusPending  = "pending"  // Pending means the job is waiting on scheduling
   693  	JobStatusRunning  = "running"  // Running means the entire job is running
   694  	JobStatusComplete = "complete" // Complete means there was a clean termination
   695  	JobStatusDead     = "dead"     // Dead means there was abnormal termination
   696  )
   697  
   698  const (
   699  	// JobMinPriority is the minimum allowed priority
   700  	JobMinPriority = 1
   701  
   702  	// JobDefaultPriority is the default priority if not
   703  	// not specified.
   704  	JobDefaultPriority = 50
   705  
   706  	// JobMaxPriority is the maximum allowed priority
   707  	JobMaxPriority = 100
   708  
   709  	// Ensure CoreJobPriority is higher than any user
   710  	// specified job so that it gets priority. This is important
   711  	// for the system to remain healthy.
   712  	CoreJobPriority = JobMaxPriority * 2
   713  )
   714  
   715  // Job is the scope of a scheduling request to Nomad. It is the largest
   716  // scoped object, and is a named collection of task groups. Each task group
   717  // is further composed of tasks. A task group (TG) is the unit of scheduling
   718  // however.
   719  type Job struct {
   720  	// Region is the Nomad region that handles scheduling this job
   721  	Region string
   722  
   723  	// ID is a unique identifier for the job per region. It can be
   724  	// specified hierarchically like LineOfBiz/OrgName/Team/Project
   725  	ID string
   726  
   727  	// Name is the logical name of the job used to refer to it. This is unique
   728  	// per region, but not unique globally.
   729  	Name string
   730  
   731  	// Type is used to control various behaviors about the job. Most jobs
   732  	// are service jobs, meaning they are expected to be long lived.
   733  	// Some jobs are batch oriented meaning they run and then terminate.
   734  	// This can be extended in the future to support custom schedulers.
   735  	Type string
   736  
   737  	// Priority is used to control scheduling importance and if this job
   738  	// can preempt other jobs.
   739  	Priority int
   740  
   741  	// AllAtOnce is used to control if incremental scheduling of task groups
   742  	// is allowed or if we must do a gang scheduling of the entire job. This
   743  	// can slow down larger jobs if resources are not available.
   744  	AllAtOnce bool `mapstructure:"all_at_once"`
   745  
   746  	// Datacenters contains all the datacenters this job is allowed to span
   747  	Datacenters []string
   748  
   749  	// Constraints can be specified at a job level and apply to
   750  	// all the task groups and tasks.
   751  	Constraints []*Constraint
   752  
   753  	// TaskGroups are the collections of task groups that this job needs
   754  	// to run. Each task group is an atomic unit of scheduling and placement.
   755  	TaskGroups []*TaskGroup
   756  
   757  	// Update is used to control the update strategy
   758  	Update UpdateStrategy
   759  
   760  	// Meta is used to associate arbitrary metadata with this
   761  	// job. This is opaque to Nomad.
   762  	Meta map[string]string
   763  
   764  	// Job status
   765  	Status string
   766  
   767  	// StatusDescription is meant to provide more human useful information
   768  	StatusDescription string
   769  
   770  	// Raft Indexes
   771  	CreateIndex uint64
   772  	ModifyIndex uint64
   773  }
   774  
   775  // Validate is used to sanity check a job input
   776  func (j *Job) Validate() error {
   777  	var mErr multierror.Error
   778  	if j.Region == "" {
   779  		mErr.Errors = append(mErr.Errors, errors.New("Missing job region"))
   780  	}
   781  	if j.ID == "" {
   782  		mErr.Errors = append(mErr.Errors, errors.New("Missing job ID"))
   783  	} else if strings.Contains(j.ID, " ") {
   784  		mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space"))
   785  	}
   786  	if j.Name == "" {
   787  		mErr.Errors = append(mErr.Errors, errors.New("Missing job name"))
   788  	}
   789  	if j.Type == "" {
   790  		mErr.Errors = append(mErr.Errors, errors.New("Missing job type"))
   791  	}
   792  	if j.Priority < JobMinPriority || j.Priority > JobMaxPriority {
   793  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority))
   794  	}
   795  	if len(j.Datacenters) == 0 {
   796  		mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters"))
   797  	}
   798  	if len(j.TaskGroups) == 0 {
   799  		mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups"))
   800  	}
   801  	for idx, constr := range j.Constraints {
   802  		if err := constr.Validate(); err != nil {
   803  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
   804  			mErr.Errors = append(mErr.Errors, outer)
   805  		}
   806  	}
   807  
   808  	// Check for duplicate task groups
   809  	taskGroups := make(map[string]int)
   810  	for idx, tg := range j.TaskGroups {
   811  		if tg.Name == "" {
   812  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1))
   813  		} else if existing, ok := taskGroups[tg.Name]; ok {
   814  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1))
   815  		} else {
   816  			taskGroups[tg.Name] = idx
   817  		}
   818  
   819  		if j.Type == "system" && tg.Count != 1 {
   820  			mErr.Errors = append(mErr.Errors,
   821  				fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler",
   822  					idx+1, tg.Count))
   823  		}
   824  	}
   825  
   826  	// Validate the task group
   827  	for idx, tg := range j.TaskGroups {
   828  		if err := tg.Validate(); err != nil {
   829  			outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err)
   830  			mErr.Errors = append(mErr.Errors, outer)
   831  		}
   832  	}
   833  	return mErr.ErrorOrNil()
   834  }
   835  
   836  // LookupTaskGroup finds a task group by name
   837  func (j *Job) LookupTaskGroup(name string) *TaskGroup {
   838  	for _, tg := range j.TaskGroups {
   839  		if tg.Name == name {
   840  			return tg
   841  		}
   842  	}
   843  	return nil
   844  }
   845  
   846  // Stub is used to return a summary of the job
   847  func (j *Job) Stub() *JobListStub {
   848  	return &JobListStub{
   849  		ID:                j.ID,
   850  		Name:              j.Name,
   851  		Type:              j.Type,
   852  		Priority:          j.Priority,
   853  		Status:            j.Status,
   854  		StatusDescription: j.StatusDescription,
   855  		CreateIndex:       j.CreateIndex,
   856  		ModifyIndex:       j.ModifyIndex,
   857  	}
   858  }
   859  
   860  // JobListStub is used to return a subset of job information
   861  // for the job list
   862  type JobListStub struct {
   863  	ID                string
   864  	Name              string
   865  	Type              string
   866  	Priority          int
   867  	Status            string
   868  	StatusDescription string
   869  	CreateIndex       uint64
   870  	ModifyIndex       uint64
   871  }
   872  
   873  // UpdateStrategy is used to modify how updates are done
   874  type UpdateStrategy struct {
   875  	// Stagger is the amount of time between the updates
   876  	Stagger time.Duration
   877  
   878  	// MaxParallel is how many updates can be done in parallel
   879  	MaxParallel int `mapstructure:"max_parallel"`
   880  }
   881  
   882  // Rolling returns if a rolling strategy should be used
   883  func (u *UpdateStrategy) Rolling() bool {
   884  	return u.Stagger > 0 && u.MaxParallel > 0
   885  }
   886  
   887  // RestartPolicy influences how Nomad restarts Tasks when they
   888  // crash or fail.
   889  type RestartPolicy struct {
   890  	Attempts int
   891  	Interval time.Duration
   892  	Delay    time.Duration
   893  }
   894  
   895  func (r *RestartPolicy) Validate() error {
   896  	if r.Interval == 0 {
   897  		return nil
   898  	}
   899  	if time.Duration(r.Attempts)*r.Delay > r.Interval {
   900  		return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)
   901  	}
   902  	return nil
   903  }
   904  
   905  func NewRestartPolicy(jobType string) *RestartPolicy {
   906  	switch jobType {
   907  	case JobTypeService, JobTypeSystem:
   908  		rp := defaultServiceJobRestartPolicy
   909  		return &rp
   910  	case JobTypeBatch:
   911  		rp := defaultBatchJobRestartPolicy
   912  		return &rp
   913  	}
   914  	return nil
   915  }
   916  
   917  // TaskGroup is an atomic unit of placement. Each task group belongs to
   918  // a job and may contain any number of tasks. A task group support running
   919  // in many replicas using the same configuration..
   920  type TaskGroup struct {
   921  	// Name of the task group
   922  	Name string
   923  
   924  	// Count is the number of replicas of this task group that should
   925  	// be scheduled.
   926  	Count int
   927  
   928  	// Constraints can be specified at a task group level and apply to
   929  	// all the tasks contained.
   930  	Constraints []*Constraint
   931  
   932  	//RestartPolicy of a TaskGroup
   933  	RestartPolicy *RestartPolicy
   934  
   935  	// Tasks are the collection of tasks that this task group needs to run
   936  	Tasks []*Task
   937  
   938  	// Meta is used to associate arbitrary metadata with this
   939  	// task group. This is opaque to Nomad.
   940  	Meta map[string]string
   941  }
   942  
   943  // Validate is used to sanity check a task group
   944  func (tg *TaskGroup) Validate() error {
   945  	var mErr multierror.Error
   946  	if tg.Name == "" {
   947  		mErr.Errors = append(mErr.Errors, errors.New("Missing task group name"))
   948  	}
   949  	if tg.Count <= 0 {
   950  		mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive"))
   951  	}
   952  	if len(tg.Tasks) == 0 {
   953  		mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group"))
   954  	}
   955  	for idx, constr := range tg.Constraints {
   956  		if err := constr.Validate(); err != nil {
   957  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
   958  			mErr.Errors = append(mErr.Errors, outer)
   959  		}
   960  	}
   961  
   962  	if tg.RestartPolicy != nil {
   963  		if err := tg.RestartPolicy.Validate(); err != nil {
   964  			mErr.Errors = append(mErr.Errors, err)
   965  		}
   966  	} else {
   967  		mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name))
   968  	}
   969  
   970  	// Check for duplicate tasks
   971  	tasks := make(map[string]int)
   972  	for idx, task := range tg.Tasks {
   973  		if task.Name == "" {
   974  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1))
   975  		} else if existing, ok := tasks[task.Name]; ok {
   976  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1))
   977  		} else {
   978  			tasks[task.Name] = idx
   979  		}
   980  	}
   981  
   982  	// Validate the tasks
   983  	for idx, task := range tg.Tasks {
   984  		if err := task.Validate(); err != nil {
   985  			outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err)
   986  			mErr.Errors = append(mErr.Errors, outer)
   987  		}
   988  	}
   989  	return mErr.ErrorOrNil()
   990  }
   991  
   992  // LookupTask finds a task by name
   993  func (tg *TaskGroup) LookupTask(name string) *Task {
   994  	for _, t := range tg.Tasks {
   995  		if t.Name == name {
   996  			return t
   997  		}
   998  	}
   999  	return nil
  1000  }
  1001  
  1002  func (tg *TaskGroup) GoString() string {
  1003  	return fmt.Sprintf("*%#v", *tg)
  1004  }
  1005  
  1006  const (
  1007  	ServiceCheckHTTP   = "http"
  1008  	ServiceCheckTCP    = "tcp"
  1009  	ServiceCheckDocker = "docker"
  1010  	ServiceCheckScript = "script"
  1011  )
  1012  
  1013  // The ServiceCheck data model represents the consul health check that
  1014  // Nomad registers for a Task
  1015  type ServiceCheck struct {
  1016  	Id       string        // Id of the check, must be unique and it is autogenrated
  1017  	Name     string        // Name of the check, defaults to id
  1018  	Type     string        // Type of the check - tcp, http, docker and script
  1019  	Script   string        // Script to invoke for script check
  1020  	Path     string        // path of the health check url for http type check
  1021  	Protocol string        // Protocol to use if check is http, defaults to http
  1022  	Interval time.Duration // Interval of the check
  1023  	Timeout  time.Duration // Timeout of the response from the check before consul fails the check
  1024  }
  1025  
  1026  func (sc *ServiceCheck) Validate() error {
  1027  	t := strings.ToLower(sc.Type)
  1028  	if t != ServiceCheckTCP && t != ServiceCheckHTTP {
  1029  		return fmt.Errorf("Check with name %v has invalid check type: %s ", sc.Name, sc.Type)
  1030  	}
  1031  	if sc.Type == ServiceCheckHTTP && sc.Path == "" {
  1032  		return fmt.Errorf("http checks needs the Http path information.")
  1033  	}
  1034  
  1035  	if sc.Type == ServiceCheckScript && sc.Script == "" {
  1036  		return fmt.Errorf("Script checks need the script to invoke")
  1037  	}
  1038  	return nil
  1039  }
  1040  
  1041  // The Service model represents a Consul service defintion
  1042  type Service struct {
  1043  	Id        string         // Id of the service, this needs to be unique on a local machine
  1044  	Name      string         // Name of the service, defaults to id
  1045  	Tags      []string       // List of tags for the service
  1046  	PortLabel string         `mapstructure:"port"` // port for the service
  1047  	Checks    []ServiceCheck // List of checks associated with the service
  1048  }
  1049  
  1050  func (s *Service) Validate() error {
  1051  	var mErr multierror.Error
  1052  	for _, c := range s.Checks {
  1053  		if err := c.Validate(); err != nil {
  1054  			mErr.Errors = append(mErr.Errors, err)
  1055  		}
  1056  	}
  1057  	return mErr.ErrorOrNil()
  1058  }
  1059  
  1060  // Task is a single process typically that is executed as part of a task group.
  1061  type Task struct {
  1062  	// Name of the task
  1063  	Name string
  1064  
  1065  	// Driver is used to control which driver is used
  1066  	Driver string
  1067  
  1068  	// Config is provided to the driver to initialize
  1069  	Config map[string]interface{}
  1070  
  1071  	// Map of environment variables to be used by the driver
  1072  	Env map[string]string
  1073  
  1074  	// List of service definitions exposed by the Task
  1075  	Services []*Service
  1076  
  1077  	// Constraints can be specified at a task level and apply only to
  1078  	// the particular task.
  1079  	Constraints []*Constraint
  1080  
  1081  	// Resources is the resources needed by this task
  1082  	Resources *Resources
  1083  
  1084  	// Meta is used to associate arbitrary metadata with this
  1085  	// task. This is opaque to Nomad.
  1086  	Meta map[string]string
  1087  }
  1088  
  1089  func (t *Task) GoString() string {
  1090  	return fmt.Sprintf("*%#v", *t)
  1091  }
  1092  
  1093  // Set of possible states for a task.
  1094  const (
  1095  	TaskStatePending = "pending" // The task is waiting to be run.
  1096  	TaskStateRunning = "running" // The task is currently running.
  1097  	TaskStateDead    = "dead"    // Terminal state of task.
  1098  )
  1099  
  1100  // TaskState tracks the current state of a task and events that caused state
  1101  // transistions.
  1102  type TaskState struct {
  1103  	// The current state of the task.
  1104  	State string
  1105  
  1106  	// Series of task events that transistion the state of the task.
  1107  	Events []*TaskEvent
  1108  }
  1109  
  1110  const (
  1111  	// A Driver failure indicates that the task could not be started due to a
  1112  	// failure in the driver.
  1113  	TaskDriverFailure = "Driver Failure"
  1114  
  1115  	// Task Started signals that the task was started and its timestamp can be
  1116  	// used to determine the running length of the task.
  1117  	TaskStarted = "Started"
  1118  
  1119  	// Task terminated indicates that the task was started and exited.
  1120  	TaskTerminated = "Terminated"
  1121  
  1122  	// Task Killed indicates a user has killed the task.
  1123  	TaskKilled = "Killed"
  1124  )
  1125  
  1126  // TaskEvent is an event that effects the state of a task and contains meta-data
  1127  // appropriate to the events type.
  1128  type TaskEvent struct {
  1129  	Type string
  1130  	Time int64 // Unix Nanosecond timestamp
  1131  
  1132  	// Driver Failure fields.
  1133  	DriverError string // A driver error occured while starting the task.
  1134  
  1135  	// Task Terminated Fields.
  1136  	ExitCode int    // The exit code of the task.
  1137  	Signal   int    // The signal that terminated the task.
  1138  	Message  string // A possible message explaining the termination of the task.
  1139  
  1140  	// Task Killed Fields.
  1141  	KillError string // Error killing the task.
  1142  }
  1143  
  1144  func NewTaskEvent(event string) *TaskEvent {
  1145  	return &TaskEvent{
  1146  		Type: event,
  1147  		Time: time.Now().UnixNano(),
  1148  	}
  1149  }
  1150  
  1151  func (e *TaskEvent) SetDriverError(err error) *TaskEvent {
  1152  	if err != nil {
  1153  		e.DriverError = err.Error()
  1154  	}
  1155  	return e
  1156  }
  1157  
  1158  func (e *TaskEvent) SetExitCode(c int) *TaskEvent {
  1159  	e.ExitCode = c
  1160  	return e
  1161  }
  1162  
  1163  func (e *TaskEvent) SetSignal(s int) *TaskEvent {
  1164  	e.Signal = s
  1165  	return e
  1166  }
  1167  
  1168  func (e *TaskEvent) SetExitMessage(err error) *TaskEvent {
  1169  	if err != nil {
  1170  		e.Message = err.Error()
  1171  	}
  1172  	return e
  1173  }
  1174  
  1175  func (e *TaskEvent) SetKillError(err error) *TaskEvent {
  1176  	if err != nil {
  1177  		e.KillError = err.Error()
  1178  	}
  1179  	return e
  1180  }
  1181  
  1182  // Validate is used to sanity check a task group
  1183  func (t *Task) Validate() error {
  1184  	var mErr multierror.Error
  1185  	if t.Name == "" {
  1186  		mErr.Errors = append(mErr.Errors, errors.New("Missing task name"))
  1187  	}
  1188  	if t.Driver == "" {
  1189  		mErr.Errors = append(mErr.Errors, errors.New("Missing task driver"))
  1190  	}
  1191  	if t.Resources == nil {
  1192  		mErr.Errors = append(mErr.Errors, errors.New("Missing task resources"))
  1193  	}
  1194  	for idx, constr := range t.Constraints {
  1195  		if err := constr.Validate(); err != nil {
  1196  			outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err)
  1197  			mErr.Errors = append(mErr.Errors, outer)
  1198  		}
  1199  	}
  1200  
  1201  	for _, service := range t.Services {
  1202  		if err := service.Validate(); err != nil {
  1203  			mErr.Errors = append(mErr.Errors, err)
  1204  		}
  1205  	}
  1206  	return mErr.ErrorOrNil()
  1207  }
  1208  
  1209  const (
  1210  	ConstraintDistinctHosts = "distinct_hosts"
  1211  	ConstraintRegex         = "regexp"
  1212  	ConstraintVersion       = "version"
  1213  )
  1214  
  1215  // Constraints are used to restrict placement options.
  1216  type Constraint struct {
  1217  	LTarget string // Left-hand target
  1218  	RTarget string // Right-hand target
  1219  	Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near
  1220  }
  1221  
  1222  func (c *Constraint) String() string {
  1223  	return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget)
  1224  }
  1225  
  1226  func (c *Constraint) Validate() error {
  1227  	var mErr multierror.Error
  1228  	if c.Operand == "" {
  1229  		mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand"))
  1230  	}
  1231  
  1232  	// Perform additional validation based on operand
  1233  	switch c.Operand {
  1234  	case ConstraintRegex:
  1235  		if _, err := regexp.Compile(c.RTarget); err != nil {
  1236  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err))
  1237  		}
  1238  	case ConstraintVersion:
  1239  		if _, err := version.NewConstraint(c.RTarget); err != nil {
  1240  			mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err))
  1241  		}
  1242  	}
  1243  	return mErr.ErrorOrNil()
  1244  }
  1245  
  1246  const (
  1247  	AllocDesiredStatusRun    = "run"    // Allocation should run
  1248  	AllocDesiredStatusStop   = "stop"   // Allocation should stop
  1249  	AllocDesiredStatusEvict  = "evict"  // Allocation should stop, and was evicted
  1250  	AllocDesiredStatusFailed = "failed" // Allocation failed to be done
  1251  )
  1252  
  1253  const (
  1254  	AllocClientStatusPending = "pending"
  1255  	AllocClientStatusRunning = "running"
  1256  	AllocClientStatusDead    = "dead"
  1257  	AllocClientStatusFailed  = "failed"
  1258  )
  1259  
  1260  // Allocation is used to allocate the placement of a task group to a node.
  1261  type Allocation struct {
  1262  	// ID of the allocation (UUID)
  1263  	ID string
  1264  
  1265  	// ID of the evaluation that generated this allocation
  1266  	EvalID string
  1267  
  1268  	// Name is a logical name of the allocation.
  1269  	Name string
  1270  
  1271  	// NodeID is the node this is being placed on
  1272  	NodeID string
  1273  
  1274  	// Job is the parent job of the task group being allocated.
  1275  	// This is copied at allocation time to avoid issues if the job
  1276  	// definition is updated.
  1277  	JobID string
  1278  	Job   *Job
  1279  
  1280  	// TaskGroup is the name of the task group that should be run
  1281  	TaskGroup string
  1282  
  1283  	// Resources is the total set of resources allocated as part
  1284  	// of this allocation of the task group.
  1285  	Resources *Resources
  1286  
  1287  	// TaskResources is the set of resources allocated to each
  1288  	// task. These should sum to the total Resources.
  1289  	TaskResources map[string]*Resources
  1290  
  1291  	// Metrics associated with this allocation
  1292  	Metrics *AllocMetric
  1293  
  1294  	// Desired Status of the allocation on the client
  1295  	DesiredStatus string
  1296  
  1297  	// DesiredStatusDescription is meant to provide more human useful information
  1298  	DesiredDescription string
  1299  
  1300  	// Status of the allocation on the client
  1301  	ClientStatus string
  1302  
  1303  	// ClientStatusDescription is meant to provide more human useful information
  1304  	ClientDescription string
  1305  
  1306  	// TaskStates stores the state of each task,
  1307  	TaskStates map[string]*TaskState
  1308  
  1309  	// Raft Indexes
  1310  	CreateIndex uint64
  1311  	ModifyIndex uint64
  1312  }
  1313  
  1314  // TerminalStatus returns if the desired status is terminal and
  1315  // will no longer transition. This is not based on the current client status.
  1316  func (a *Allocation) TerminalStatus() bool {
  1317  	switch a.DesiredStatus {
  1318  	case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed:
  1319  		return true
  1320  	default:
  1321  		return false
  1322  	}
  1323  }
  1324  
  1325  // Stub returns a list stub for the allocation
  1326  func (a *Allocation) Stub() *AllocListStub {
  1327  	return &AllocListStub{
  1328  		ID:                 a.ID,
  1329  		EvalID:             a.EvalID,
  1330  		Name:               a.Name,
  1331  		NodeID:             a.NodeID,
  1332  		JobID:              a.JobID,
  1333  		TaskGroup:          a.TaskGroup,
  1334  		DesiredStatus:      a.DesiredStatus,
  1335  		DesiredDescription: a.DesiredDescription,
  1336  		ClientStatus:       a.ClientStatus,
  1337  		ClientDescription:  a.ClientDescription,
  1338  		TaskStates:         a.TaskStates,
  1339  		CreateIndex:        a.CreateIndex,
  1340  		ModifyIndex:        a.ModifyIndex,
  1341  	}
  1342  }
  1343  
  1344  // AllocListStub is used to return a subset of alloc information
  1345  type AllocListStub struct {
  1346  	ID                 string
  1347  	EvalID             string
  1348  	Name               string
  1349  	NodeID             string
  1350  	JobID              string
  1351  	TaskGroup          string
  1352  	DesiredStatus      string
  1353  	DesiredDescription string
  1354  	ClientStatus       string
  1355  	ClientDescription  string
  1356  	TaskStates         map[string]*TaskState
  1357  	CreateIndex        uint64
  1358  	ModifyIndex        uint64
  1359  }
  1360  
  1361  // AllocMetric is used to track various metrics while attempting
  1362  // to make an allocation. These are used to debug a job, or to better
  1363  // understand the pressure within the system.
  1364  type AllocMetric struct {
  1365  	// NodesEvaluated is the number of nodes that were evaluated
  1366  	NodesEvaluated int
  1367  
  1368  	// NodesFiltered is the number of nodes filtered due to a constraint
  1369  	NodesFiltered int
  1370  
  1371  	// ClassFiltered is the number of nodes filtered by class
  1372  	ClassFiltered map[string]int
  1373  
  1374  	// ConstraintFiltered is the number of failures caused by constraint
  1375  	ConstraintFiltered map[string]int
  1376  
  1377  	// NodesExhausted is the number of nodes skipped due to being
  1378  	// exhausted of at least one resource
  1379  	NodesExhausted int
  1380  
  1381  	// ClassExhausted is the number of nodes exhausted by class
  1382  	ClassExhausted map[string]int
  1383  
  1384  	// DimensionExhausted provides the count by dimension or reason
  1385  	DimensionExhausted map[string]int
  1386  
  1387  	// Scores is the scores of the final few nodes remaining
  1388  	// for placement. The top score is typically selected.
  1389  	Scores map[string]float64
  1390  
  1391  	// AllocationTime is a measure of how long the allocation
  1392  	// attempt took. This can affect performance and SLAs.
  1393  	AllocationTime time.Duration
  1394  
  1395  	// CoalescedFailures indicates the number of other
  1396  	// allocations that were coalesced into this failed allocation.
  1397  	// This is to prevent creating many failed allocations for a
  1398  	// single task group.
  1399  	CoalescedFailures int
  1400  }
  1401  
  1402  func (a *AllocMetric) EvaluateNode() {
  1403  	a.NodesEvaluated += 1
  1404  }
  1405  
  1406  func (a *AllocMetric) FilterNode(node *Node, constraint string) {
  1407  	a.NodesFiltered += 1
  1408  	if node != nil && node.NodeClass != "" {
  1409  		if a.ClassFiltered == nil {
  1410  			a.ClassFiltered = make(map[string]int)
  1411  		}
  1412  		a.ClassFiltered[node.NodeClass] += 1
  1413  	}
  1414  	if constraint != "" {
  1415  		if a.ConstraintFiltered == nil {
  1416  			a.ConstraintFiltered = make(map[string]int)
  1417  		}
  1418  		a.ConstraintFiltered[constraint] += 1
  1419  	}
  1420  }
  1421  
  1422  func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) {
  1423  	a.NodesExhausted += 1
  1424  	if node != nil && node.NodeClass != "" {
  1425  		if a.ClassExhausted == nil {
  1426  			a.ClassExhausted = make(map[string]int)
  1427  		}
  1428  		a.ClassExhausted[node.NodeClass] += 1
  1429  	}
  1430  	if dimension != "" {
  1431  		if a.DimensionExhausted == nil {
  1432  			a.DimensionExhausted = make(map[string]int)
  1433  		}
  1434  		a.DimensionExhausted[dimension] += 1
  1435  	}
  1436  }
  1437  
  1438  func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) {
  1439  	if a.Scores == nil {
  1440  		a.Scores = make(map[string]float64)
  1441  	}
  1442  	key := fmt.Sprintf("%s.%s", node.ID, name)
  1443  	a.Scores[key] = score
  1444  }
  1445  
  1446  const (
  1447  	EvalStatusPending  = "pending"
  1448  	EvalStatusComplete = "complete"
  1449  	EvalStatusFailed   = "failed"
  1450  )
  1451  
  1452  const (
  1453  	EvalTriggerJobRegister   = "job-register"
  1454  	EvalTriggerJobDeregister = "job-deregister"
  1455  	EvalTriggerNodeUpdate    = "node-update"
  1456  	EvalTriggerScheduled     = "scheduled"
  1457  	EvalTriggerRollingUpdate = "rolling-update"
  1458  )
  1459  
  1460  const (
  1461  	// CoreJobEvalGC is used for the garbage collection of evaluations
  1462  	// and allocations. We periodically scan evaluations in a terminal state,
  1463  	// in which all the corresponding allocations are also terminal. We
  1464  	// delete these out of the system to bound the state.
  1465  	CoreJobEvalGC = "eval-gc"
  1466  
  1467  	// CoreJobNodeGC is used for the garbage collection of failed nodes.
  1468  	// We periodically scan nodes in a terminal state, and if they have no
  1469  	// corresponding allocations we delete these out of the system.
  1470  	CoreJobNodeGC = "node-gc"
  1471  )
  1472  
  1473  // Evaluation is used anytime we need to apply business logic as a result
  1474  // of a change to our desired state (job specification) or the emergent state
  1475  // (registered nodes). When the inputs change, we need to "evaluate" them,
  1476  // potentially taking action (allocation of work) or doing nothing if the state
  1477  // of the world does not require it.
  1478  type Evaluation struct {
  1479  	// ID is a randonly generated UUID used for this evaluation. This
  1480  	// is assigned upon the creation of the evaluation.
  1481  	ID string
  1482  
  1483  	// Priority is used to control scheduling importance and if this job
  1484  	// can preempt other jobs.
  1485  	Priority int
  1486  
  1487  	// Type is used to control which schedulers are available to handle
  1488  	// this evaluation.
  1489  	Type string
  1490  
  1491  	// TriggeredBy is used to give some insight into why this Eval
  1492  	// was created. (Job change, node failure, alloc failure, etc).
  1493  	TriggeredBy string
  1494  
  1495  	// JobID is the job this evaluation is scoped to. Evaluations cannot
  1496  	// be run in parallel for a given JobID, so we serialize on this.
  1497  	JobID string
  1498  
  1499  	// JobModifyIndex is the modify index of the job at the time
  1500  	// the evaluation was created
  1501  	JobModifyIndex uint64
  1502  
  1503  	// NodeID is the node that was affected triggering the evaluation.
  1504  	NodeID string
  1505  
  1506  	// NodeModifyIndex is the modify index of the node at the time
  1507  	// the evaluation was created
  1508  	NodeModifyIndex uint64
  1509  
  1510  	// Status of the evaluation
  1511  	Status string
  1512  
  1513  	// StatusDescription is meant to provide more human useful information
  1514  	StatusDescription string
  1515  
  1516  	// Wait is a minimum wait time for running the eval. This is used to
  1517  	// support a rolling upgrade.
  1518  	Wait time.Duration
  1519  
  1520  	// NextEval is the evaluation ID for the eval created to do a followup.
  1521  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1522  	NextEval string
  1523  
  1524  	// PreviousEval is the evaluation ID for the eval creating this one to do a followup.
  1525  	// This is used to support rolling upgrades, where we need a chain of evaluations.
  1526  	PreviousEval string
  1527  
  1528  	// Raft Indexes
  1529  	CreateIndex uint64
  1530  	ModifyIndex uint64
  1531  }
  1532  
  1533  // TerminalStatus returns if the current status is terminal and
  1534  // will no longer transition.
  1535  func (e *Evaluation) TerminalStatus() bool {
  1536  	switch e.Status {
  1537  	case EvalStatusComplete, EvalStatusFailed:
  1538  		return true
  1539  	default:
  1540  		return false
  1541  	}
  1542  }
  1543  
  1544  func (e *Evaluation) GoString() string {
  1545  	return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID)
  1546  }
  1547  
  1548  func (e *Evaluation) Copy() *Evaluation {
  1549  	ne := new(Evaluation)
  1550  	*ne = *e
  1551  	return ne
  1552  }
  1553  
  1554  // ShouldEnqueue checks if a given evaluation should be enqueued
  1555  func (e *Evaluation) ShouldEnqueue() bool {
  1556  	switch e.Status {
  1557  	case EvalStatusPending:
  1558  		return true
  1559  	case EvalStatusComplete, EvalStatusFailed:
  1560  		return false
  1561  	default:
  1562  		panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status))
  1563  	}
  1564  }
  1565  
  1566  // MakePlan is used to make a plan from the given evaluation
  1567  // for a given Job
  1568  func (e *Evaluation) MakePlan(j *Job) *Plan {
  1569  	p := &Plan{
  1570  		EvalID:         e.ID,
  1571  		Priority:       e.Priority,
  1572  		NodeUpdate:     make(map[string][]*Allocation),
  1573  		NodeAllocation: make(map[string][]*Allocation),
  1574  	}
  1575  	if j != nil {
  1576  		p.AllAtOnce = j.AllAtOnce
  1577  	}
  1578  	return p
  1579  }
  1580  
  1581  // NextRollingEval creates an evaluation to followup this eval for rolling updates
  1582  func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation {
  1583  	return &Evaluation{
  1584  		ID:             GenerateUUID(),
  1585  		Priority:       e.Priority,
  1586  		Type:           e.Type,
  1587  		TriggeredBy:    EvalTriggerRollingUpdate,
  1588  		JobID:          e.JobID,
  1589  		JobModifyIndex: e.JobModifyIndex,
  1590  		Status:         EvalStatusPending,
  1591  		Wait:           wait,
  1592  		PreviousEval:   e.ID,
  1593  	}
  1594  }
  1595  
  1596  // Plan is used to submit a commit plan for task allocations. These
  1597  // are submitted to the leader which verifies that resources have
  1598  // not been overcommitted before admiting the plan.
  1599  type Plan struct {
  1600  	// EvalID is the evaluation ID this plan is associated with
  1601  	EvalID string
  1602  
  1603  	// EvalToken is used to prevent a split-brain processing of
  1604  	// an evaluation. There should only be a single scheduler running
  1605  	// an Eval at a time, but this could be violated after a leadership
  1606  	// transition. This unique token is used to reject plans that are
  1607  	// being submitted from a different leader.
  1608  	EvalToken string
  1609  
  1610  	// Priority is the priority of the upstream job
  1611  	Priority int
  1612  
  1613  	// AllAtOnce is used to control if incremental scheduling of task groups
  1614  	// is allowed or if we must do a gang scheduling of the entire job.
  1615  	// If this is false, a plan may be partially applied. Otherwise, the
  1616  	// entire plan must be able to make progress.
  1617  	AllAtOnce bool
  1618  
  1619  	// NodeUpdate contains all the allocations for each node. For each node,
  1620  	// this is a list of the allocations to update to either stop or evict.
  1621  	NodeUpdate map[string][]*Allocation
  1622  
  1623  	// NodeAllocation contains all the allocations for each node.
  1624  	// The evicts must be considered prior to the allocations.
  1625  	NodeAllocation map[string][]*Allocation
  1626  
  1627  	// FailedAllocs are allocations that could not be made,
  1628  	// but are persisted so that the user can use the feedback
  1629  	// to determine the cause.
  1630  	FailedAllocs []*Allocation
  1631  }
  1632  
  1633  func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) {
  1634  	newAlloc := new(Allocation)
  1635  	*newAlloc = *alloc
  1636  	newAlloc.DesiredStatus = status
  1637  	newAlloc.DesiredDescription = desc
  1638  	node := alloc.NodeID
  1639  	existing := p.NodeUpdate[node]
  1640  	p.NodeUpdate[node] = append(existing, newAlloc)
  1641  }
  1642  
  1643  func (p *Plan) PopUpdate(alloc *Allocation) {
  1644  	existing := p.NodeUpdate[alloc.NodeID]
  1645  	n := len(existing)
  1646  	if n > 0 && existing[n-1].ID == alloc.ID {
  1647  		existing = existing[:n-1]
  1648  		if len(existing) > 0 {
  1649  			p.NodeUpdate[alloc.NodeID] = existing
  1650  		} else {
  1651  			delete(p.NodeUpdate, alloc.NodeID)
  1652  		}
  1653  	}
  1654  }
  1655  
  1656  func (p *Plan) AppendAlloc(alloc *Allocation) {
  1657  	node := alloc.NodeID
  1658  	existing := p.NodeAllocation[node]
  1659  	p.NodeAllocation[node] = append(existing, alloc)
  1660  }
  1661  
  1662  func (p *Plan) AppendFailed(alloc *Allocation) {
  1663  	p.FailedAllocs = append(p.FailedAllocs, alloc)
  1664  }
  1665  
  1666  // IsNoOp checks if this plan would do nothing
  1667  func (p *Plan) IsNoOp() bool {
  1668  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1669  }
  1670  
  1671  // PlanResult is the result of a plan submitted to the leader.
  1672  type PlanResult struct {
  1673  	// NodeUpdate contains all the updates that were committed.
  1674  	NodeUpdate map[string][]*Allocation
  1675  
  1676  	// NodeAllocation contains all the allocations that were committed.
  1677  	NodeAllocation map[string][]*Allocation
  1678  
  1679  	// FailedAllocs are allocations that could not be made,
  1680  	// but are persisted so that the user can use the feedback
  1681  	// to determine the cause.
  1682  	FailedAllocs []*Allocation
  1683  
  1684  	// RefreshIndex is the index the worker should refresh state up to.
  1685  	// This allows all evictions and allocations to be materialized.
  1686  	// If any allocations were rejected due to stale data (node state,
  1687  	// over committed) this can be used to force a worker refresh.
  1688  	RefreshIndex uint64
  1689  
  1690  	// AllocIndex is the Raft index in which the evictions and
  1691  	// allocations took place. This is used for the write index.
  1692  	AllocIndex uint64
  1693  }
  1694  
  1695  // IsNoOp checks if this plan result would do nothing
  1696  func (p *PlanResult) IsNoOp() bool {
  1697  	return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0
  1698  }
  1699  
  1700  // FullCommit is used to check if all the allocations in a plan
  1701  // were committed as part of the result. Returns if there was
  1702  // a match, and the number of expected and actual allocations.
  1703  func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) {
  1704  	expected := 0
  1705  	actual := 0
  1706  	for name, allocList := range plan.NodeAllocation {
  1707  		didAlloc, _ := p.NodeAllocation[name]
  1708  		expected += len(allocList)
  1709  		actual += len(didAlloc)
  1710  	}
  1711  	return actual == expected, expected, actual
  1712  }
  1713  
  1714  // msgpackHandle is a shared handle for encoding/decoding of structs
  1715  var MsgpackHandle = func() *codec.MsgpackHandle {
  1716  	h := &codec.MsgpackHandle{RawToString: true}
  1717  
  1718  	// Sets the default type for decoding a map into a nil interface{}.
  1719  	// This is necessary in particular because we store the driver configs as a
  1720  	// nil interface{}.
  1721  	h.MapType = reflect.TypeOf(map[string]interface{}(nil))
  1722  	return h
  1723  }()
  1724  
  1725  // Decode is used to decode a MsgPack encoded object
  1726  func Decode(buf []byte, out interface{}) error {
  1727  	return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out)
  1728  }
  1729  
  1730  // Encode is used to encode a MsgPack object with type prefix
  1731  func Encode(t MessageType, msg interface{}) ([]byte, error) {
  1732  	var buf bytes.Buffer
  1733  	buf.WriteByte(uint8(t))
  1734  	err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg)
  1735  	return buf.Bytes(), err
  1736  }