github.com/ranjib/nomad@v0.1.1-0.20160225204057-97751b02f70b/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/sha1" 6 "errors" 7 "fmt" 8 "io" 9 "reflect" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/gorhill/cronexpr" 16 "github.com/hashicorp/go-multierror" 17 "github.com/hashicorp/go-version" 18 "github.com/hashicorp/nomad/helper/args" 19 "github.com/mitchellh/copystructure" 20 "github.com/ugorji/go/codec" 21 22 hcodec "github.com/hashicorp/go-msgpack/codec" 23 ) 24 25 var ( 26 ErrNoLeader = fmt.Errorf("No cluster leader") 27 ErrNoRegionPath = fmt.Errorf("No path to region") 28 ) 29 30 type MessageType uint8 31 32 const ( 33 NodeRegisterRequestType MessageType = iota 34 NodeDeregisterRequestType 35 NodeUpdateStatusRequestType 36 NodeUpdateDrainRequestType 37 JobRegisterRequestType 38 JobDeregisterRequestType 39 EvalUpdateRequestType 40 EvalDeleteRequestType 41 AllocUpdateRequestType 42 AllocClientUpdateRequestType 43 ) 44 45 const ( 46 // IgnoreUnknownTypeFlag is set along with a MessageType 47 // to indicate that the message type can be safely ignored 48 // if it is not recognized. This is for future proofing, so 49 // that new commands can be added in a way that won't cause 50 // old servers to crash when the FSM attempts to process them. 51 IgnoreUnknownTypeFlag MessageType = 128 52 ) 53 54 // RPCInfo is used to describe common information about query 55 type RPCInfo interface { 56 RequestRegion() string 57 IsRead() bool 58 AllowStaleRead() bool 59 } 60 61 // QueryOptions is used to specify various flags for read queries 62 type QueryOptions struct { 63 // The target region for this query 64 Region string 65 66 // If set, wait until query exceeds given index. Must be provided 67 // with MaxQueryTime. 68 MinQueryIndex uint64 69 70 // Provided with MinQueryIndex to wait for change. 71 MaxQueryTime time.Duration 72 73 // If set, any follower can service the request. Results 74 // may be arbitrarily stale. 75 AllowStale bool 76 77 // If set, used as prefix for resource list searches 78 Prefix string 79 } 80 81 func (q QueryOptions) RequestRegion() string { 82 return q.Region 83 } 84 85 // QueryOption only applies to reads, so always true 86 func (q QueryOptions) IsRead() bool { 87 return true 88 } 89 90 func (q QueryOptions) AllowStaleRead() bool { 91 return q.AllowStale 92 } 93 94 type WriteRequest struct { 95 // The target region for this write 96 Region string 97 } 98 99 func (w WriteRequest) RequestRegion() string { 100 // The target region for this request 101 return w.Region 102 } 103 104 // WriteRequest only applies to writes, always false 105 func (w WriteRequest) IsRead() bool { 106 return false 107 } 108 109 func (w WriteRequest) AllowStaleRead() bool { 110 return false 111 } 112 113 // QueryMeta allows a query response to include potentially 114 // useful metadata about a query 115 type QueryMeta struct { 116 // This is the index associated with the read 117 Index uint64 118 119 // If AllowStale is used, this is time elapsed since 120 // last contact between the follower and leader. This 121 // can be used to gauge staleness. 122 LastContact time.Duration 123 124 // Used to indicate if there is a known leader node 125 KnownLeader bool 126 } 127 128 // WriteMeta allows a write response to include potentially 129 // useful metadata about the write 130 type WriteMeta struct { 131 // This is the index associated with the write 132 Index uint64 133 } 134 135 // NodeRegisterRequest is used for Node.Register endpoint 136 // to register a node as being a schedulable entity. 137 type NodeRegisterRequest struct { 138 Node *Node 139 WriteRequest 140 } 141 142 // NodeDeregisterRequest is used for Node.Deregister endpoint 143 // to deregister a node as being a schedulable entity. 144 type NodeDeregisterRequest struct { 145 NodeID string 146 WriteRequest 147 } 148 149 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 150 // to update the status of a node. 151 type NodeUpdateStatusRequest struct { 152 NodeID string 153 Status string 154 WriteRequest 155 } 156 157 // NodeUpdateDrainRequest is used for updatin the drain status 158 type NodeUpdateDrainRequest struct { 159 NodeID string 160 Drain bool 161 WriteRequest 162 } 163 164 // NodeEvaluateRequest is used to re-evaluate the ndoe 165 type NodeEvaluateRequest struct { 166 NodeID string 167 WriteRequest 168 } 169 170 // NodeSpecificRequest is used when we just need to specify a target node 171 type NodeSpecificRequest struct { 172 NodeID string 173 QueryOptions 174 } 175 176 // JobRegisterRequest is used for Job.Register endpoint 177 // to register a job as being a schedulable entity. 178 type JobRegisterRequest struct { 179 Job *Job 180 WriteRequest 181 } 182 183 // JobDeregisterRequest is used for Job.Deregister endpoint 184 // to deregister a job as being a schedulable entity. 185 type JobDeregisterRequest struct { 186 JobID string 187 WriteRequest 188 } 189 190 // JobEvaluateRequest is used when we just need to re-evaluate a target job 191 type JobEvaluateRequest struct { 192 JobID string 193 WriteRequest 194 } 195 196 // JobSpecificRequest is used when we just need to specify a target job 197 type JobSpecificRequest struct { 198 JobID string 199 QueryOptions 200 } 201 202 // JobListRequest is used to parameterize a list request 203 type JobListRequest struct { 204 QueryOptions 205 } 206 207 // NodeListRequest is used to parameterize a list request 208 type NodeListRequest struct { 209 QueryOptions 210 } 211 212 // EvalUpdateRequest is used for upserting evaluations. 213 type EvalUpdateRequest struct { 214 Evals []*Evaluation 215 EvalToken string 216 WriteRequest 217 } 218 219 // EvalDeleteRequest is used for deleting an evaluation. 220 type EvalDeleteRequest struct { 221 Evals []string 222 Allocs []string 223 WriteRequest 224 } 225 226 // EvalSpecificRequest is used when we just need to specify a target evaluation 227 type EvalSpecificRequest struct { 228 EvalID string 229 QueryOptions 230 } 231 232 // EvalAckRequest is used to Ack/Nack a specific evaluation 233 type EvalAckRequest struct { 234 EvalID string 235 Token string 236 WriteRequest 237 } 238 239 // EvalDequeueRequest is used when we want to dequeue an evaluation 240 type EvalDequeueRequest struct { 241 Schedulers []string 242 Timeout time.Duration 243 WriteRequest 244 } 245 246 // EvalListRequest is used to list the evaluations 247 type EvalListRequest struct { 248 QueryOptions 249 } 250 251 // PlanRequest is used to submit an allocation plan to the leader 252 type PlanRequest struct { 253 Plan *Plan 254 WriteRequest 255 } 256 257 // AllocUpdateRequest is used to submit changes to allocations, either 258 // to cause evictions or to assign new allocaitons. Both can be done 259 // within a single transaction 260 type AllocUpdateRequest struct { 261 // Alloc is the list of new allocations to assign 262 Alloc []*Allocation 263 264 // Job is the shared parent job of the allocations. 265 // It is pulled out since it is common to reduce payload size. 266 Job *Job 267 268 WriteRequest 269 } 270 271 // AllocListRequest is used to request a list of allocations 272 type AllocListRequest struct { 273 QueryOptions 274 } 275 276 // AllocSpecificRequest is used to query a specific allocation 277 type AllocSpecificRequest struct { 278 AllocID string 279 QueryOptions 280 } 281 282 // AllocsGetcRequest is used to query a set of allocations 283 type AllocsGetRequest struct { 284 AllocIDs []string 285 QueryOptions 286 } 287 288 // PeriodicForceReqeuest is used to force a specific periodic job. 289 type PeriodicForceRequest struct { 290 JobID string 291 WriteRequest 292 } 293 294 // GenericRequest is used to request where no 295 // specific information is needed. 296 type GenericRequest struct { 297 QueryOptions 298 } 299 300 // GenericResponse is used to respond to a request where no 301 // specific response information is needed. 302 type GenericResponse struct { 303 WriteMeta 304 } 305 306 const ( 307 ProtocolVersion = "protocol" 308 APIMajorVersion = "api.major" 309 APIMinorVersion = "api.minor" 310 ) 311 312 // VersionResponse is used for the Status.Version reseponse 313 type VersionResponse struct { 314 Build string 315 Versions map[string]int 316 QueryMeta 317 } 318 319 // JobRegisterResponse is used to respond to a job registration 320 type JobRegisterResponse struct { 321 EvalID string 322 EvalCreateIndex uint64 323 JobModifyIndex uint64 324 QueryMeta 325 } 326 327 // JobDeregisterResponse is used to respond to a job deregistration 328 type JobDeregisterResponse struct { 329 EvalID string 330 EvalCreateIndex uint64 331 JobModifyIndex uint64 332 QueryMeta 333 } 334 335 // NodeUpdateResponse is used to respond to a node update 336 type NodeUpdateResponse struct { 337 HeartbeatTTL time.Duration 338 EvalIDs []string 339 EvalCreateIndex uint64 340 NodeModifyIndex uint64 341 QueryMeta 342 } 343 344 // NodeDrainUpdateResponse is used to respond to a node drain update 345 type NodeDrainUpdateResponse struct { 346 EvalIDs []string 347 EvalCreateIndex uint64 348 NodeModifyIndex uint64 349 QueryMeta 350 } 351 352 // NodeAllocsResponse is used to return allocs for a single node 353 type NodeAllocsResponse struct { 354 Allocs []*Allocation 355 QueryMeta 356 } 357 358 // NodeClientAllocsResponse is used to return allocs meta data for a single node 359 type NodeClientAllocsResponse struct { 360 Allocs map[string]uint64 361 QueryMeta 362 } 363 364 // SingleNodeResponse is used to return a single node 365 type SingleNodeResponse struct { 366 Node *Node 367 QueryMeta 368 } 369 370 // JobListResponse is used for a list request 371 type NodeListResponse struct { 372 Nodes []*NodeListStub 373 QueryMeta 374 } 375 376 // SingleJobResponse is used to return a single job 377 type SingleJobResponse struct { 378 Job *Job 379 QueryMeta 380 } 381 382 // JobListResponse is used for a list request 383 type JobListResponse struct { 384 Jobs []*JobListStub 385 QueryMeta 386 } 387 388 // SingleAllocResponse is used to return a single allocation 389 type SingleAllocResponse struct { 390 Alloc *Allocation 391 QueryMeta 392 } 393 394 // AllocsGetResponse is used to return a set of allocations 395 type AllocsGetResponse struct { 396 Allocs []*Allocation 397 QueryMeta 398 } 399 400 // JobAllocationsResponse is used to return the allocations for a job 401 type JobAllocationsResponse struct { 402 Allocations []*AllocListStub 403 QueryMeta 404 } 405 406 // JobEvaluationsResponse is used to return the evaluations for a job 407 type JobEvaluationsResponse struct { 408 Evaluations []*Evaluation 409 QueryMeta 410 } 411 412 // SingleEvalResponse is used to return a single evaluation 413 type SingleEvalResponse struct { 414 Eval *Evaluation 415 QueryMeta 416 } 417 418 // EvalDequeueResponse is used to return from a dequeue 419 type EvalDequeueResponse struct { 420 Eval *Evaluation 421 Token string 422 QueryMeta 423 } 424 425 // PlanResponse is used to return from a PlanRequest 426 type PlanResponse struct { 427 Result *PlanResult 428 WriteMeta 429 } 430 431 // AllocListResponse is used for a list request 432 type AllocListResponse struct { 433 Allocations []*AllocListStub 434 QueryMeta 435 } 436 437 // EvalListResponse is used for a list request 438 type EvalListResponse struct { 439 Evaluations []*Evaluation 440 QueryMeta 441 } 442 443 // EvalAllocationsResponse is used to return the allocations for an evaluation 444 type EvalAllocationsResponse struct { 445 Allocations []*AllocListStub 446 QueryMeta 447 } 448 449 // PeriodicForceResponse is used to respond to a periodic job force launch 450 type PeriodicForceResponse struct { 451 EvalID string 452 EvalCreateIndex uint64 453 WriteMeta 454 } 455 456 const ( 457 NodeStatusInit = "initializing" 458 NodeStatusReady = "ready" 459 NodeStatusDown = "down" 460 ) 461 462 // ShouldDrainNode checks if a given node status should trigger an 463 // evaluation. Some states don't require any further action. 464 func ShouldDrainNode(status string) bool { 465 switch status { 466 case NodeStatusInit, NodeStatusReady: 467 return false 468 case NodeStatusDown: 469 return true 470 default: 471 panic(fmt.Sprintf("unhandled node status %s", status)) 472 } 473 } 474 475 // ValidNodeStatus is used to check if a node status is valid 476 func ValidNodeStatus(status string) bool { 477 switch status { 478 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 479 return true 480 default: 481 return false 482 } 483 } 484 485 // Node is a representation of a schedulable client node 486 type Node struct { 487 // ID is a unique identifier for the node. It can be constructed 488 // by doing a concatenation of the Name and Datacenter as a simple 489 // approach. Alternatively a UUID may be used. 490 ID string 491 492 // Datacenter for this node 493 Datacenter string 494 495 // Node name 496 Name string 497 498 // HTTPAddr is the address on which the Nomad client is listening for http 499 // requests 500 HTTPAddr string 501 502 // Attributes is an arbitrary set of key/value 503 // data that can be used for constraints. Examples 504 // include "kernel.name=linux", "arch=386", "driver.docker=1", 505 // "docker.runtime=1.8.3" 506 Attributes map[string]string 507 508 // Resources is the available resources on the client. 509 // For example 'cpu=2' 'memory=2048' 510 Resources *Resources 511 512 // Reserved is the set of resources that are reserved, 513 // and should be subtracted from the total resources for 514 // the purposes of scheduling. This may be provide certain 515 // high-watermark tolerances or because of external schedulers 516 // consuming resources. 517 Reserved *Resources 518 519 // Links are used to 'link' this client to external 520 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 521 // 'ami=ami-123' 522 Links map[string]string 523 524 // Meta is used to associate arbitrary metadata with this 525 // client. This is opaque to Nomad. 526 Meta map[string]string 527 528 // NodeClass is an opaque identifier used to group nodes 529 // together for the purpose of determining scheduling pressure. 530 NodeClass string 531 532 // ComputedClass is a unique id that identifies nodes with a common set of 533 // attributes and capabilities. 534 ComputedClass string 535 536 // Drain is controlled by the servers, and not the client. 537 // If true, no jobs will be scheduled to this node, and existing 538 // allocations will be drained. 539 Drain bool 540 541 // Status of this node 542 Status string 543 544 // StatusDescription is meant to provide more human useful information 545 StatusDescription string 546 547 // Raft Indexes 548 CreateIndex uint64 549 ModifyIndex uint64 550 } 551 552 func (n *Node) Copy() *Node { 553 if n == nil { 554 return nil 555 } 556 nn := new(Node) 557 *nn = *n 558 nn.Attributes = CopyMapStringString(nn.Attributes) 559 nn.Resources = nn.Resources.Copy() 560 nn.Reserved = nn.Reserved.Copy() 561 nn.Links = CopyMapStringString(nn.Links) 562 nn.Meta = CopyMapStringString(nn.Meta) 563 return nn 564 } 565 566 // TerminalStatus returns if the current status is terminal and 567 // will no longer transition. 568 func (n *Node) TerminalStatus() bool { 569 switch n.Status { 570 case NodeStatusDown: 571 return true 572 default: 573 return false 574 } 575 } 576 577 // Stub returns a summarized version of the node 578 func (n *Node) Stub() *NodeListStub { 579 return &NodeListStub{ 580 ID: n.ID, 581 Datacenter: n.Datacenter, 582 Name: n.Name, 583 NodeClass: n.NodeClass, 584 Drain: n.Drain, 585 Status: n.Status, 586 StatusDescription: n.StatusDescription, 587 CreateIndex: n.CreateIndex, 588 ModifyIndex: n.ModifyIndex, 589 } 590 } 591 592 // NodeListStub is used to return a subset of job information 593 // for the job list 594 type NodeListStub struct { 595 ID string 596 Datacenter string 597 Name string 598 NodeClass string 599 Drain bool 600 Status string 601 StatusDescription string 602 CreateIndex uint64 603 ModifyIndex uint64 604 } 605 606 // Resources is used to define the resources available 607 // on a client 608 type Resources struct { 609 CPU int 610 MemoryMB int `mapstructure:"memory"` 611 DiskMB int `mapstructure:"disk"` 612 IOPS int 613 Networks []*NetworkResource 614 } 615 616 // DefaultResources returns the minimum resources a task can use and be valid. 617 func DefaultResources() *Resources { 618 return &Resources{ 619 CPU: 100, 620 MemoryMB: 10, 621 DiskMB: 300, 622 IOPS: 0, 623 } 624 } 625 626 // Merge merges this resource with another resource. 627 func (r *Resources) Merge(other *Resources) { 628 if other.CPU != 0 { 629 r.CPU = other.CPU 630 } 631 if other.MemoryMB != 0 { 632 r.MemoryMB = other.MemoryMB 633 } 634 if other.DiskMB != 0 { 635 r.DiskMB = other.DiskMB 636 } 637 if other.IOPS != 0 { 638 r.IOPS = other.IOPS 639 } 640 if len(other.Networks) != 0 { 641 r.Networks = other.Networks 642 } 643 } 644 645 // MeetsMinResources returns an error if the resources specified are less than 646 // the minimum allowed. 647 func (r *Resources) MeetsMinResources() error { 648 var mErr multierror.Error 649 if r.CPU < 20 { 650 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU)) 651 } 652 if r.MemoryMB < 10 { 653 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB)) 654 } 655 if r.DiskMB < 10 { 656 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum DiskMB value is 10; got %d", r.DiskMB)) 657 } 658 if r.IOPS < 0 { 659 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS)) 660 } 661 for i, n := range r.Networks { 662 if err := n.MeetsMinResources(); err != nil { 663 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 664 } 665 } 666 667 return mErr.ErrorOrNil() 668 } 669 670 // Copy returns a deep copy of the resources 671 func (r *Resources) Copy() *Resources { 672 if r == nil { 673 return nil 674 } 675 newR := new(Resources) 676 *newR = *r 677 n := len(r.Networks) 678 newR.Networks = make([]*NetworkResource, n) 679 for i := 0; i < n; i++ { 680 newR.Networks[i] = r.Networks[i].Copy() 681 } 682 return newR 683 } 684 685 // NetIndex finds the matching net index using device name 686 func (r *Resources) NetIndex(n *NetworkResource) int { 687 for idx, net := range r.Networks { 688 if net.Device == n.Device { 689 return idx 690 } 691 } 692 return -1 693 } 694 695 // Superset checks if one set of resources is a superset 696 // of another. This ignores network resources, and the NetworkIndex 697 // should be used for that. 698 func (r *Resources) Superset(other *Resources) (bool, string) { 699 if r.CPU < other.CPU { 700 return false, "cpu exhausted" 701 } 702 if r.MemoryMB < other.MemoryMB { 703 return false, "memory exhausted" 704 } 705 if r.DiskMB < other.DiskMB { 706 return false, "disk exhausted" 707 } 708 if r.IOPS < other.IOPS { 709 return false, "iops exhausted" 710 } 711 return true, "" 712 } 713 714 // Add adds the resources of the delta to this, potentially 715 // returning an error if not possible. 716 func (r *Resources) Add(delta *Resources) error { 717 if delta == nil { 718 return nil 719 } 720 r.CPU += delta.CPU 721 r.MemoryMB += delta.MemoryMB 722 r.DiskMB += delta.DiskMB 723 r.IOPS += delta.IOPS 724 725 for _, n := range delta.Networks { 726 // Find the matching interface by IP or CIDR 727 idx := r.NetIndex(n) 728 if idx == -1 { 729 r.Networks = append(r.Networks, n.Copy()) 730 } else { 731 r.Networks[idx].Add(n) 732 } 733 } 734 return nil 735 } 736 737 func (r *Resources) GoString() string { 738 return fmt.Sprintf("*%#v", *r) 739 } 740 741 type Port struct { 742 Label string 743 Value int `mapstructure:"static"` 744 } 745 746 // NetworkResource is used to represent available network 747 // resources 748 type NetworkResource struct { 749 Device string // Name of the device 750 CIDR string // CIDR block of addresses 751 IP string // IP address 752 MBits int // Throughput 753 ReservedPorts []Port // Reserved ports 754 DynamicPorts []Port // Dynamically assigned ports 755 } 756 757 // MeetsMinResources returns an error if the resources specified are less than 758 // the minimum allowed. 759 func (n *NetworkResource) MeetsMinResources() error { 760 var mErr multierror.Error 761 if n.MBits < 1 { 762 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 763 } 764 return mErr.ErrorOrNil() 765 } 766 767 // Copy returns a deep copy of the network resource 768 func (n *NetworkResource) Copy() *NetworkResource { 769 if n == nil { 770 return nil 771 } 772 newR := new(NetworkResource) 773 *newR = *n 774 if n.ReservedPorts != nil { 775 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 776 copy(newR.ReservedPorts, n.ReservedPorts) 777 } 778 if n.DynamicPorts != nil { 779 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 780 copy(newR.DynamicPorts, n.DynamicPorts) 781 } 782 return newR 783 } 784 785 // Add adds the resources of the delta to this, potentially 786 // returning an error if not possible. 787 func (n *NetworkResource) Add(delta *NetworkResource) { 788 if len(delta.ReservedPorts) > 0 { 789 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 790 } 791 n.MBits += delta.MBits 792 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 793 } 794 795 func (n *NetworkResource) GoString() string { 796 return fmt.Sprintf("*%#v", *n) 797 } 798 799 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 800 labelValues := make(map[string]int) 801 ports := append(n.ReservedPorts, n.DynamicPorts...) 802 for _, port := range ports { 803 if mapping, ok := port_map[port.Label]; ok { 804 labelValues[port.Label] = mapping 805 } else { 806 labelValues[port.Label] = port.Value 807 } 808 } 809 return labelValues 810 } 811 812 const ( 813 // JobTypeNomad is reserved for internal system tasks and is 814 // always handled by the CoreScheduler. 815 JobTypeCore = "_core" 816 JobTypeService = "service" 817 JobTypeBatch = "batch" 818 JobTypeSystem = "system" 819 ) 820 821 const ( 822 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 823 JobStatusRunning = "running" // Running means the job has non-terminal allocations 824 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 825 ) 826 827 const ( 828 // JobMinPriority is the minimum allowed priority 829 JobMinPriority = 1 830 831 // JobDefaultPriority is the default priority if not 832 // not specified. 833 JobDefaultPriority = 50 834 835 // JobMaxPriority is the maximum allowed priority 836 JobMaxPriority = 100 837 838 // Ensure CoreJobPriority is higher than any user 839 // specified job so that it gets priority. This is important 840 // for the system to remain healthy. 841 CoreJobPriority = JobMaxPriority * 2 842 ) 843 844 // Job is the scope of a scheduling request to Nomad. It is the largest 845 // scoped object, and is a named collection of task groups. Each task group 846 // is further composed of tasks. A task group (TG) is the unit of scheduling 847 // however. 848 type Job struct { 849 // Region is the Nomad region that handles scheduling this job 850 Region string 851 852 // ID is a unique identifier for the job per region. It can be 853 // specified hierarchically like LineOfBiz/OrgName/Team/Project 854 ID string 855 856 // ParentID is the unique identifier of the job that spawned this job. 857 ParentID string 858 859 // Name is the logical name of the job used to refer to it. This is unique 860 // per region, but not unique globally. 861 Name string 862 863 // Type is used to control various behaviors about the job. Most jobs 864 // are service jobs, meaning they are expected to be long lived. 865 // Some jobs are batch oriented meaning they run and then terminate. 866 // This can be extended in the future to support custom schedulers. 867 Type string 868 869 // Priority is used to control scheduling importance and if this job 870 // can preempt other jobs. 871 Priority int 872 873 // AllAtOnce is used to control if incremental scheduling of task groups 874 // is allowed or if we must do a gang scheduling of the entire job. This 875 // can slow down larger jobs if resources are not available. 876 AllAtOnce bool `mapstructure:"all_at_once"` 877 878 // Datacenters contains all the datacenters this job is allowed to span 879 Datacenters []string 880 881 // Constraints can be specified at a job level and apply to 882 // all the task groups and tasks. 883 Constraints []*Constraint 884 885 // TaskGroups are the collections of task groups that this job needs 886 // to run. Each task group is an atomic unit of scheduling and placement. 887 TaskGroups []*TaskGroup 888 889 // Update is used to control the update strategy 890 Update UpdateStrategy 891 892 // Periodic is used to define the interval the job is run at. 893 Periodic *PeriodicConfig 894 895 // GC is used to mark the job as available for garbage collection after it 896 // has no outstanding evaluations or allocations. 897 GC bool 898 899 // Meta is used to associate arbitrary metadata with this 900 // job. This is opaque to Nomad. 901 Meta map[string]string 902 903 // Job status 904 Status string 905 906 // StatusDescription is meant to provide more human useful information 907 StatusDescription string 908 909 // Raft Indexes 910 CreateIndex uint64 911 ModifyIndex uint64 912 JobModifyIndex uint64 913 } 914 915 // InitFields is used to initialize fields in the Job. This should be called 916 // when registering a Job. 917 func (j *Job) InitFields() { 918 for _, tg := range j.TaskGroups { 919 tg.InitFields(j) 920 } 921 922 // If the job is batch then make it GC. 923 if j.Type == JobTypeBatch { 924 j.GC = true 925 } 926 } 927 928 // Copy returns a deep copy of the Job. It is expected that callers use recover. 929 // This job can panic if the deep copy failed as it uses reflection. 930 func (j *Job) Copy() *Job { 931 if j == nil { 932 return nil 933 } 934 nj := new(Job) 935 *nj = *j 936 nj.Datacenters = CopySliceString(nj.Datacenters) 937 nj.Constraints = CopySliceConstraints(nj.Constraints) 938 939 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 940 for i, tg := range nj.TaskGroups { 941 tgs[i] = tg.Copy() 942 } 943 nj.TaskGroups = tgs 944 945 nj.Periodic = nj.Periodic.Copy() 946 nj.Meta = CopyMapStringString(nj.Meta) 947 return nj 948 } 949 950 // Validate is used to sanity check a job input 951 func (j *Job) Validate() error { 952 var mErr multierror.Error 953 if j.Region == "" { 954 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 955 } 956 if j.ID == "" { 957 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 958 } else if strings.Contains(j.ID, " ") { 959 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 960 } 961 if j.Name == "" { 962 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 963 } 964 if j.Type == "" { 965 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 966 } 967 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 968 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 969 } 970 if len(j.Datacenters) == 0 { 971 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 972 } 973 if len(j.TaskGroups) == 0 { 974 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 975 } 976 for idx, constr := range j.Constraints { 977 if err := constr.Validate(); err != nil { 978 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 979 mErr.Errors = append(mErr.Errors, outer) 980 } 981 } 982 983 // Check for duplicate task groups 984 taskGroups := make(map[string]int) 985 for idx, tg := range j.TaskGroups { 986 if tg.Name == "" { 987 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 988 } else if existing, ok := taskGroups[tg.Name]; ok { 989 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 990 } else { 991 taskGroups[tg.Name] = idx 992 } 993 994 if j.Type == "system" && tg.Count != 1 { 995 mErr.Errors = append(mErr.Errors, 996 fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler", 997 idx+1, tg.Count)) 998 } 999 } 1000 1001 // Validate the task group 1002 for idx, tg := range j.TaskGroups { 1003 if err := tg.Validate(); err != nil { 1004 outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err) 1005 mErr.Errors = append(mErr.Errors, outer) 1006 } 1007 } 1008 1009 // Validate periodic is only used with batch jobs. 1010 if j.IsPeriodic() { 1011 if j.Type != JobTypeBatch { 1012 mErr.Errors = append(mErr.Errors, 1013 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1014 } 1015 1016 if err := j.Periodic.Validate(); err != nil { 1017 mErr.Errors = append(mErr.Errors, err) 1018 } 1019 } 1020 1021 return mErr.ErrorOrNil() 1022 } 1023 1024 // LookupTaskGroup finds a task group by name 1025 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1026 for _, tg := range j.TaskGroups { 1027 if tg.Name == name { 1028 return tg 1029 } 1030 } 1031 return nil 1032 } 1033 1034 // Stub is used to return a summary of the job 1035 func (j *Job) Stub() *JobListStub { 1036 return &JobListStub{ 1037 ID: j.ID, 1038 ParentID: j.ParentID, 1039 Name: j.Name, 1040 Type: j.Type, 1041 Priority: j.Priority, 1042 Status: j.Status, 1043 StatusDescription: j.StatusDescription, 1044 CreateIndex: j.CreateIndex, 1045 ModifyIndex: j.ModifyIndex, 1046 } 1047 } 1048 1049 // IsPeriodic returns whether a job is periodic. 1050 func (j *Job) IsPeriodic() bool { 1051 return j.Periodic != nil 1052 } 1053 1054 // JobListStub is used to return a subset of job information 1055 // for the job list 1056 type JobListStub struct { 1057 ID string 1058 ParentID string 1059 Name string 1060 Type string 1061 Priority int 1062 Status string 1063 StatusDescription string 1064 CreateIndex uint64 1065 ModifyIndex uint64 1066 } 1067 1068 // UpdateStrategy is used to modify how updates are done 1069 type UpdateStrategy struct { 1070 // Stagger is the amount of time between the updates 1071 Stagger time.Duration 1072 1073 // MaxParallel is how many updates can be done in parallel 1074 MaxParallel int `mapstructure:"max_parallel"` 1075 } 1076 1077 // Rolling returns if a rolling strategy should be used 1078 func (u *UpdateStrategy) Rolling() bool { 1079 return u.Stagger > 0 && u.MaxParallel > 0 1080 } 1081 1082 const ( 1083 // PeriodicSpecCron is used for a cron spec. 1084 PeriodicSpecCron = "cron" 1085 1086 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 1087 // seperated list of unix timestamps at which to launch. 1088 PeriodicSpecTest = "_internal_test" 1089 ) 1090 1091 // Periodic defines the interval a job should be run at. 1092 type PeriodicConfig struct { 1093 // Enabled determines if the job should be run periodically. 1094 Enabled bool 1095 1096 // Spec specifies the interval the job should be run as. It is parsed based 1097 // on the SpecType. 1098 Spec string 1099 1100 // SpecType defines the format of the spec. 1101 SpecType string 1102 1103 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 1104 ProhibitOverlap bool `mapstructure:"prohibit_overlap"` 1105 } 1106 1107 func (p *PeriodicConfig) Copy() *PeriodicConfig { 1108 if p == nil { 1109 return nil 1110 } 1111 np := new(PeriodicConfig) 1112 *np = *p 1113 return np 1114 } 1115 1116 func (p *PeriodicConfig) Validate() error { 1117 if !p.Enabled { 1118 return nil 1119 } 1120 1121 if p.Spec == "" { 1122 return fmt.Errorf("Must specify a spec") 1123 } 1124 1125 switch p.SpecType { 1126 case PeriodicSpecCron: 1127 // Validate the cron spec 1128 if _, err := cronexpr.Parse(p.Spec); err != nil { 1129 return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) 1130 } 1131 case PeriodicSpecTest: 1132 // No-op 1133 default: 1134 return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) 1135 } 1136 1137 return nil 1138 } 1139 1140 // Next returns the closest time instant matching the spec that is after the 1141 // passed time. If no matching instance exists, the zero value of time.Time is 1142 // returned. The `time.Location` of the returned value matches that of the 1143 // passed time. 1144 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1145 switch p.SpecType { 1146 case PeriodicSpecCron: 1147 if e, err := cronexpr.Parse(p.Spec); err == nil { 1148 return e.Next(fromTime) 1149 } 1150 case PeriodicSpecTest: 1151 split := strings.Split(p.Spec, ",") 1152 if len(split) == 1 && split[0] == "" { 1153 return time.Time{} 1154 } 1155 1156 // Parse the times 1157 times := make([]time.Time, len(split)) 1158 for i, s := range split { 1159 unix, err := strconv.Atoi(s) 1160 if err != nil { 1161 return time.Time{} 1162 } 1163 1164 times[i] = time.Unix(int64(unix), 0) 1165 } 1166 1167 // Find the next match 1168 for _, next := range times { 1169 if fromTime.Before(next) { 1170 return next 1171 } 1172 } 1173 } 1174 1175 return time.Time{} 1176 } 1177 1178 const ( 1179 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1180 // when launching derived instances of it. 1181 PeriodicLaunchSuffix = "/periodic-" 1182 ) 1183 1184 // PeriodicLaunch tracks the last launch time of a periodic job. 1185 type PeriodicLaunch struct { 1186 ID string // ID of the periodic job. 1187 Launch time.Time // The last launch time. 1188 1189 // Raft Indexes 1190 CreateIndex uint64 1191 ModifyIndex uint64 1192 } 1193 1194 var ( 1195 defaultServiceJobRestartPolicy = RestartPolicy{ 1196 Delay: 15 * time.Second, 1197 Attempts: 2, 1198 Interval: 1 * time.Minute, 1199 Mode: RestartPolicyModeDelay, 1200 } 1201 defaultBatchJobRestartPolicy = RestartPolicy{ 1202 Delay: 15 * time.Second, 1203 Attempts: 15, 1204 Interval: 7 * 24 * time.Hour, 1205 Mode: RestartPolicyModeDelay, 1206 } 1207 ) 1208 1209 const ( 1210 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1211 // reached when the specified attempts have been reached in the interval. 1212 RestartPolicyModeDelay = "delay" 1213 1214 // RestartPolicyModeFail causes a job to fail if the specified number of 1215 // attempts are reached within an interval. 1216 RestartPolicyModeFail = "fail" 1217 ) 1218 1219 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1220 type RestartPolicy struct { 1221 // Attempts is the number of restart that will occur in an interval. 1222 Attempts int 1223 1224 // Interval is a duration in which we can limit the number of restarts 1225 // within. 1226 Interval time.Duration 1227 1228 // Delay is the time between a failure and a restart. 1229 Delay time.Duration 1230 1231 // Mode controls what happens when the task restarts more than attempt times 1232 // in an interval. 1233 Mode string 1234 } 1235 1236 func (r *RestartPolicy) Copy() *RestartPolicy { 1237 if r == nil { 1238 return nil 1239 } 1240 nrp := new(RestartPolicy) 1241 *nrp = *r 1242 return nrp 1243 } 1244 1245 func (r *RestartPolicy) Validate() error { 1246 switch r.Mode { 1247 case RestartPolicyModeDelay, RestartPolicyModeFail: 1248 default: 1249 return fmt.Errorf("Unsupported restart mode: %q", r.Mode) 1250 } 1251 1252 // Check for ambiguous/confusing settings 1253 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 1254 return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts) 1255 } 1256 1257 if r.Interval == 0 { 1258 return nil 1259 } 1260 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1261 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 1262 } 1263 return nil 1264 } 1265 1266 func NewRestartPolicy(jobType string) *RestartPolicy { 1267 switch jobType { 1268 case JobTypeService, JobTypeSystem: 1269 rp := defaultServiceJobRestartPolicy 1270 return &rp 1271 case JobTypeBatch: 1272 rp := defaultBatchJobRestartPolicy 1273 return &rp 1274 } 1275 return nil 1276 } 1277 1278 // TaskGroup is an atomic unit of placement. Each task group belongs to 1279 // a job and may contain any number of tasks. A task group support running 1280 // in many replicas using the same configuration.. 1281 type TaskGroup struct { 1282 // Name of the task group 1283 Name string 1284 1285 // Count is the number of replicas of this task group that should 1286 // be scheduled. 1287 Count int 1288 1289 // Constraints can be specified at a task group level and apply to 1290 // all the tasks contained. 1291 Constraints []*Constraint 1292 1293 //RestartPolicy of a TaskGroup 1294 RestartPolicy *RestartPolicy 1295 1296 // Tasks are the collection of tasks that this task group needs to run 1297 Tasks []*Task 1298 1299 // Meta is used to associate arbitrary metadata with this 1300 // task group. This is opaque to Nomad. 1301 Meta map[string]string 1302 } 1303 1304 func (tg *TaskGroup) Copy() *TaskGroup { 1305 if tg == nil { 1306 return nil 1307 } 1308 ntg := new(TaskGroup) 1309 *ntg = *tg 1310 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 1311 1312 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 1313 1314 tasks := make([]*Task, len(ntg.Tasks)) 1315 for i, t := range ntg.Tasks { 1316 tasks[i] = t.Copy() 1317 } 1318 ntg.Tasks = tasks 1319 1320 ntg.Meta = CopyMapStringString(ntg.Meta) 1321 return ntg 1322 } 1323 1324 // InitFields is used to initialize fields in the TaskGroup. 1325 func (tg *TaskGroup) InitFields(job *Job) { 1326 // Set the default restart policy. 1327 if tg.RestartPolicy == nil { 1328 tg.RestartPolicy = NewRestartPolicy(job.Type) 1329 } 1330 1331 for _, task := range tg.Tasks { 1332 task.InitFields(job, tg) 1333 } 1334 } 1335 1336 // Validate is used to sanity check a task group 1337 func (tg *TaskGroup) Validate() error { 1338 var mErr multierror.Error 1339 if tg.Name == "" { 1340 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1341 } 1342 if tg.Count <= 0 { 1343 mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive")) 1344 } 1345 if len(tg.Tasks) == 0 { 1346 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1347 } 1348 for idx, constr := range tg.Constraints { 1349 if err := constr.Validate(); err != nil { 1350 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1351 mErr.Errors = append(mErr.Errors, outer) 1352 } 1353 } 1354 1355 if tg.RestartPolicy != nil { 1356 if err := tg.RestartPolicy.Validate(); err != nil { 1357 mErr.Errors = append(mErr.Errors, err) 1358 } 1359 } else { 1360 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1361 } 1362 1363 // Check for duplicate tasks 1364 tasks := make(map[string]int) 1365 for idx, task := range tg.Tasks { 1366 if task.Name == "" { 1367 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 1368 } else if existing, ok := tasks[task.Name]; ok { 1369 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 1370 } else { 1371 tasks[task.Name] = idx 1372 } 1373 } 1374 1375 // Validate the tasks 1376 for idx, task := range tg.Tasks { 1377 if err := task.Validate(); err != nil { 1378 outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) 1379 mErr.Errors = append(mErr.Errors, outer) 1380 } 1381 } 1382 return mErr.ErrorOrNil() 1383 } 1384 1385 // LookupTask finds a task by name 1386 func (tg *TaskGroup) LookupTask(name string) *Task { 1387 for _, t := range tg.Tasks { 1388 if t.Name == name { 1389 return t 1390 } 1391 } 1392 return nil 1393 } 1394 1395 func (tg *TaskGroup) GoString() string { 1396 return fmt.Sprintf("*%#v", *tg) 1397 } 1398 1399 const ( 1400 ServiceCheckHTTP = "http" 1401 ServiceCheckTCP = "tcp" 1402 ServiceCheckDocker = "docker" 1403 ServiceCheckScript = "script" 1404 ) 1405 1406 // The ServiceCheck data model represents the consul health check that 1407 // Nomad registers for a Task 1408 type ServiceCheck struct { 1409 Name string // Name of the check, defaults to id 1410 Type string // Type of the check - tcp, http, docker and script 1411 Script string // Script to invoke for script check 1412 Path string // path of the health check url for http type check 1413 Protocol string // Protocol to use if check is http, defaults to http 1414 Interval time.Duration // Interval of the check 1415 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1416 } 1417 1418 func (sc *ServiceCheck) Copy() *ServiceCheck { 1419 if sc == nil { 1420 return nil 1421 } 1422 nsc := new(ServiceCheck) 1423 *nsc = *sc 1424 return nsc 1425 } 1426 1427 func (sc *ServiceCheck) Validate() error { 1428 t := strings.ToLower(sc.Type) 1429 if t != ServiceCheckTCP && t != ServiceCheckHTTP { 1430 return fmt.Errorf("service check must be either http or tcp type") 1431 } 1432 if sc.Type == ServiceCheckHTTP && sc.Path == "" { 1433 return fmt.Errorf("service checks of http type must have a valid http path") 1434 } 1435 1436 if sc.Type == ServiceCheckScript && sc.Script == "" { 1437 return fmt.Errorf("service checks of script type must have a valid script path") 1438 } 1439 1440 if sc.Interval <= 0 { 1441 return fmt.Errorf("service checks must have positive time intervals") 1442 } 1443 return nil 1444 } 1445 1446 func (sc *ServiceCheck) Hash(serviceID string) string { 1447 h := sha1.New() 1448 io.WriteString(h, serviceID) 1449 io.WriteString(h, sc.Name) 1450 io.WriteString(h, sc.Type) 1451 io.WriteString(h, sc.Script) 1452 io.WriteString(h, sc.Path) 1453 io.WriteString(h, sc.Path) 1454 io.WriteString(h, sc.Protocol) 1455 io.WriteString(h, sc.Interval.String()) 1456 io.WriteString(h, sc.Timeout.String()) 1457 return fmt.Sprintf("%x", h.Sum(nil)) 1458 } 1459 1460 const ( 1461 NomadConsulPrefix = "nomad-registered-service" 1462 ) 1463 1464 // The Service model represents a Consul service defintion 1465 type Service struct { 1466 Name string // Name of the service, defaults to id 1467 Tags []string // List of tags for the service 1468 PortLabel string `mapstructure:"port"` // port for the service 1469 Checks []*ServiceCheck // List of checks associated with the service 1470 } 1471 1472 func (s *Service) Copy() *Service { 1473 if s == nil { 1474 return nil 1475 } 1476 ns := new(Service) 1477 *ns = *s 1478 ns.Tags = CopySliceString(ns.Tags) 1479 1480 var checks []*ServiceCheck 1481 if l := len(ns.Checks); l != 0 { 1482 checks = make([]*ServiceCheck, len(ns.Checks)) 1483 for i, c := range ns.Checks { 1484 checks[i] = c.Copy() 1485 } 1486 } 1487 ns.Checks = checks 1488 return ns 1489 } 1490 1491 // InitFields interpolates values of Job, Task Group and Task in the Service 1492 // Name. This also generates check names, service id and check ids. 1493 func (s *Service) InitFields(job string, taskGroup string, task string) { 1494 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 1495 "JOB": job, 1496 "TASKGROUP": taskGroup, 1497 "TASK": task, 1498 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 1499 }, 1500 ) 1501 1502 for _, check := range s.Checks { 1503 if check.Name == "" { 1504 check.Name = fmt.Sprintf("service: %q check", s.Name) 1505 } 1506 } 1507 } 1508 1509 // Validate checks if the Check definition is valid 1510 func (s *Service) Validate() error { 1511 var mErr multierror.Error 1512 1513 // Ensure the name does not have a period in it. 1514 // RFC-2782: https://tools.ietf.org/html/rfc2782 1515 if strings.Contains(s.Name, ".") { 1516 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name can't contain periods: %q", s.Name)) 1517 } 1518 1519 for _, c := range s.Checks { 1520 if err := c.Validate(); err != nil { 1521 mErr.Errors = append(mErr.Errors, err) 1522 } 1523 } 1524 return mErr.ErrorOrNil() 1525 } 1526 1527 // Hash calculates the hash of the check based on it's content and the service 1528 // which owns it 1529 func (s *Service) Hash() string { 1530 h := sha1.New() 1531 io.WriteString(h, s.Name) 1532 io.WriteString(h, strings.Join(s.Tags, "")) 1533 io.WriteString(h, s.PortLabel) 1534 return fmt.Sprintf("%x", h.Sum(nil)) 1535 } 1536 1537 const ( 1538 // DefaultKillTimeout is the default timeout between signaling a task it 1539 // will be killed and killing it. 1540 DefaultKillTimeout = 5 * time.Second 1541 ) 1542 1543 // LogConfig provides configuration for log rotation 1544 type LogConfig struct { 1545 MaxFiles int `mapstructure:"max_files"` 1546 MaxFileSizeMB int `mapstructure:"max_file_size"` 1547 } 1548 1549 func DefaultLogConfig() *LogConfig { 1550 return &LogConfig{ 1551 MaxFiles: 10, 1552 MaxFileSizeMB: 10, 1553 } 1554 } 1555 1556 // Validate returns an error if the log config specified are less than 1557 // the minimum allowed. 1558 func (l *LogConfig) Validate() error { 1559 var mErr multierror.Error 1560 if l.MaxFiles < 1 { 1561 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 1562 } 1563 if l.MaxFileSizeMB < 1 { 1564 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 1565 } 1566 return mErr.ErrorOrNil() 1567 } 1568 1569 // Task is a single process typically that is executed as part of a task group. 1570 type Task struct { 1571 // Name of the task 1572 Name string 1573 1574 // Driver is used to control which driver is used 1575 Driver string 1576 1577 // Config is provided to the driver to initialize 1578 Config map[string]interface{} 1579 1580 // Map of environment variables to be used by the driver 1581 Env map[string]string 1582 1583 // List of service definitions exposed by the Task 1584 Services []*Service 1585 1586 // Constraints can be specified at a task level and apply only to 1587 // the particular task. 1588 Constraints []*Constraint 1589 1590 // Resources is the resources needed by this task 1591 Resources *Resources 1592 1593 // Meta is used to associate arbitrary metadata with this 1594 // task. This is opaque to Nomad. 1595 Meta map[string]string 1596 1597 // KillTimeout is the time between signaling a task that it will be 1598 // killed and killing it. 1599 KillTimeout time.Duration `mapstructure:"kill_timeout"` 1600 1601 // LogConfig provides configuration for log rotation 1602 LogConfig *LogConfig `mapstructure:"logs"` 1603 } 1604 1605 func (t *Task) Copy() *Task { 1606 if t == nil { 1607 return nil 1608 } 1609 nt := new(Task) 1610 *nt = *t 1611 nt.Env = CopyMapStringString(nt.Env) 1612 1613 services := make([]*Service, len(nt.Services)) 1614 for i, s := range nt.Services { 1615 services[i] = s.Copy() 1616 } 1617 nt.Services = services 1618 nt.Constraints = CopySliceConstraints(nt.Constraints) 1619 1620 nt.Resources = nt.Resources.Copy() 1621 nt.Meta = CopyMapStringString(nt.Meta) 1622 1623 if i, err := copystructure.Copy(nt.Config); err != nil { 1624 nt.Config = i.(map[string]interface{}) 1625 } 1626 1627 return nt 1628 } 1629 1630 // InitFields initializes fields in the task. 1631 func (t *Task) InitFields(job *Job, tg *TaskGroup) { 1632 t.InitServiceFields(job.Name, tg.Name) 1633 1634 // Set the default timeout if it is not specified. 1635 if t.KillTimeout == 0 { 1636 t.KillTimeout = DefaultKillTimeout 1637 } 1638 } 1639 1640 // InitServiceFields interpolates values of Job, Task Group 1641 // and Tasks in all the service Names of a Task. This also generates the service 1642 // id, check id and check names. 1643 func (t *Task) InitServiceFields(job string, taskGroup string) { 1644 for _, service := range t.Services { 1645 service.InitFields(job, taskGroup, t.Name) 1646 } 1647 } 1648 1649 func (t *Task) GoString() string { 1650 return fmt.Sprintf("*%#v", *t) 1651 } 1652 1653 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 1654 for _, network := range t.Resources.Networks { 1655 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 1656 return network.IP, p 1657 } 1658 } 1659 return "", 0 1660 } 1661 1662 // Set of possible states for a task. 1663 const ( 1664 TaskStatePending = "pending" // The task is waiting to be run. 1665 TaskStateRunning = "running" // The task is currently running. 1666 TaskStateDead = "dead" // Terminal state of task. 1667 ) 1668 1669 // TaskState tracks the current state of a task and events that caused state 1670 // transistions. 1671 type TaskState struct { 1672 // The current state of the task. 1673 State string 1674 1675 // Series of task events that transistion the state of the task. 1676 Events []*TaskEvent 1677 } 1678 1679 func (ts *TaskState) Copy() *TaskState { 1680 if ts == nil { 1681 return nil 1682 } 1683 copy := new(TaskState) 1684 copy.State = ts.State 1685 copy.Events = make([]*TaskEvent, len(ts.Events)) 1686 for i, e := range ts.Events { 1687 copy.Events[i] = e.Copy() 1688 } 1689 return copy 1690 } 1691 1692 const ( 1693 // A Driver failure indicates that the task could not be started due to a 1694 // failure in the driver. 1695 TaskDriverFailure = "Driver Failure" 1696 1697 // Task Received signals that the task has been pulled by the client at the 1698 // given timestamp. 1699 TaskReceived = "Received" 1700 1701 // Task Started signals that the task was started and its timestamp can be 1702 // used to determine the running length of the task. 1703 TaskStarted = "Started" 1704 1705 // Task terminated indicates that the task was started and exited. 1706 TaskTerminated = "Terminated" 1707 1708 // Task Killed indicates a user has killed the task. 1709 TaskKilled = "Killed" 1710 ) 1711 1712 // TaskEvent is an event that effects the state of a task and contains meta-data 1713 // appropriate to the events type. 1714 type TaskEvent struct { 1715 Type string 1716 Time int64 // Unix Nanosecond timestamp 1717 1718 // Driver Failure fields. 1719 DriverError string // A driver error occured while starting the task. 1720 1721 // Task Terminated Fields. 1722 ExitCode int // The exit code of the task. 1723 Signal int // The signal that terminated the task. 1724 Message string // A possible message explaining the termination of the task. 1725 1726 // Task Killed Fields. 1727 KillError string // Error killing the task. 1728 } 1729 1730 func (te *TaskEvent) Copy() *TaskEvent { 1731 if te == nil { 1732 return nil 1733 } 1734 copy := new(TaskEvent) 1735 *copy = *te 1736 return copy 1737 } 1738 1739 func NewTaskEvent(event string) *TaskEvent { 1740 return &TaskEvent{ 1741 Type: event, 1742 Time: time.Now().UnixNano(), 1743 } 1744 } 1745 1746 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 1747 if err != nil { 1748 e.DriverError = err.Error() 1749 } 1750 return e 1751 } 1752 1753 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 1754 e.ExitCode = c 1755 return e 1756 } 1757 1758 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 1759 e.Signal = s 1760 return e 1761 } 1762 1763 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 1764 if err != nil { 1765 e.Message = err.Error() 1766 } 1767 return e 1768 } 1769 1770 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 1771 if err != nil { 1772 e.KillError = err.Error() 1773 } 1774 return e 1775 } 1776 1777 // Validate is used to sanity check a task group 1778 func (t *Task) Validate() error { 1779 var mErr multierror.Error 1780 if t.Name == "" { 1781 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 1782 } 1783 if t.Driver == "" { 1784 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 1785 } 1786 if t.KillTimeout.Nanoseconds() < 0 { 1787 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 1788 } 1789 1790 // Validate the resources. 1791 if t.Resources == nil { 1792 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 1793 } else if err := t.Resources.MeetsMinResources(); err != nil { 1794 mErr.Errors = append(mErr.Errors, err) 1795 } 1796 1797 // Validate the log config 1798 if t.LogConfig == nil { 1799 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 1800 } else if err := t.LogConfig.Validate(); err != nil { 1801 mErr.Errors = append(mErr.Errors, err) 1802 } 1803 1804 for idx, constr := range t.Constraints { 1805 if err := constr.Validate(); err != nil { 1806 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1807 mErr.Errors = append(mErr.Errors, outer) 1808 } 1809 } 1810 1811 for _, service := range t.Services { 1812 if err := service.Validate(); err != nil { 1813 mErr.Errors = append(mErr.Errors, err) 1814 } 1815 } 1816 1817 if t.LogConfig != nil && t.Resources != nil { 1818 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 1819 if t.Resources.DiskMB <= logUsage { 1820 mErr.Errors = append(mErr.Errors, 1821 fmt.Errorf("log storage (%d MB) exceeds requested disk capacity (%d MB)", 1822 logUsage, t.Resources.DiskMB)) 1823 } 1824 } 1825 return mErr.ErrorOrNil() 1826 } 1827 1828 const ( 1829 ConstraintDistinctHosts = "distinct_hosts" 1830 ConstraintRegex = "regexp" 1831 ConstraintVersion = "version" 1832 ) 1833 1834 // Constraints are used to restrict placement options. 1835 type Constraint struct { 1836 LTarget string // Left-hand target 1837 RTarget string // Right-hand target 1838 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 1839 str string // Memoized string 1840 } 1841 1842 func (c *Constraint) Copy() *Constraint { 1843 if c == nil { 1844 return nil 1845 } 1846 nc := new(Constraint) 1847 *nc = *c 1848 return nc 1849 } 1850 1851 func (c *Constraint) String() string { 1852 if c.str != "" { 1853 return c.str 1854 } 1855 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 1856 return c.str 1857 } 1858 1859 func (c *Constraint) Validate() error { 1860 var mErr multierror.Error 1861 if c.Operand == "" { 1862 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 1863 } 1864 1865 // Perform additional validation based on operand 1866 switch c.Operand { 1867 case ConstraintRegex: 1868 if _, err := regexp.Compile(c.RTarget); err != nil { 1869 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 1870 } 1871 case ConstraintVersion: 1872 if _, err := version.NewConstraint(c.RTarget); err != nil { 1873 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 1874 } 1875 } 1876 return mErr.ErrorOrNil() 1877 } 1878 1879 const ( 1880 AllocDesiredStatusRun = "run" // Allocation should run 1881 AllocDesiredStatusStop = "stop" // Allocation should stop 1882 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 1883 AllocDesiredStatusFailed = "failed" // Allocation failed to be done 1884 ) 1885 1886 const ( 1887 AllocClientStatusPending = "pending" 1888 AllocClientStatusRunning = "running" 1889 AllocClientStatusDead = "dead" 1890 AllocClientStatusFailed = "failed" 1891 ) 1892 1893 // Allocation is used to allocate the placement of a task group to a node. 1894 type Allocation struct { 1895 // ID of the allocation (UUID) 1896 ID string 1897 1898 // ID of the evaluation that generated this allocation 1899 EvalID string 1900 1901 // Name is a logical name of the allocation. 1902 Name string 1903 1904 // NodeID is the node this is being placed on 1905 NodeID string 1906 1907 // Job is the parent job of the task group being allocated. 1908 // This is copied at allocation time to avoid issues if the job 1909 // definition is updated. 1910 JobID string 1911 Job *Job 1912 1913 // TaskGroup is the name of the task group that should be run 1914 TaskGroup string 1915 1916 // Resources is the total set of resources allocated as part 1917 // of this allocation of the task group. 1918 Resources *Resources 1919 1920 // TaskResources is the set of resources allocated to each 1921 // task. These should sum to the total Resources. 1922 TaskResources map[string]*Resources 1923 1924 // Services is a map of service names to service ids 1925 Services map[string]string 1926 1927 // Metrics associated with this allocation 1928 Metrics *AllocMetric 1929 1930 // Desired Status of the allocation on the client 1931 DesiredStatus string 1932 1933 // DesiredStatusDescription is meant to provide more human useful information 1934 DesiredDescription string 1935 1936 // Status of the allocation on the client 1937 ClientStatus string 1938 1939 // ClientStatusDescription is meant to provide more human useful information 1940 ClientDescription string 1941 1942 // TaskStates stores the state of each task, 1943 TaskStates map[string]*TaskState 1944 1945 // Raft Indexes 1946 CreateIndex uint64 1947 ModifyIndex uint64 1948 1949 // AllocModifyIndex is not updated when the client updates allocations. This 1950 // lets the client pull only the allocs updated by the server. 1951 AllocModifyIndex uint64 1952 1953 // CreateTime is the time the allocation has finished scheduling and been 1954 // verified by the plan applier. 1955 CreateTime int64 1956 } 1957 1958 func (a *Allocation) Copy() *Allocation { 1959 if a == nil { 1960 return nil 1961 } 1962 na := new(Allocation) 1963 *na = *a 1964 1965 na.Job = na.Job.Copy() 1966 na.Resources = na.Resources.Copy() 1967 1968 tr := make(map[string]*Resources, len(na.TaskResources)) 1969 for task, resource := range na.TaskResources { 1970 tr[task] = resource.Copy() 1971 } 1972 na.TaskResources = tr 1973 1974 s := make(map[string]string, len(na.Services)) 1975 for service, id := range na.Services { 1976 s[service] = id 1977 } 1978 na.Services = s 1979 1980 na.Metrics = na.Metrics.Copy() 1981 1982 ts := make(map[string]*TaskState, len(na.TaskStates)) 1983 for task, state := range na.TaskStates { 1984 ts[task] = state.Copy() 1985 } 1986 na.TaskStates = ts 1987 return na 1988 } 1989 1990 // TerminalStatus returns if the desired or actual status is terminal and 1991 // will no longer transition. 1992 func (a *Allocation) TerminalStatus() bool { 1993 // First check the desired state and if that isn't terminal, check client 1994 // state. 1995 switch a.DesiredStatus { 1996 case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: 1997 return true 1998 default: 1999 } 2000 2001 switch a.ClientStatus { 2002 case AllocClientStatusDead, AllocClientStatusFailed: 2003 return true 2004 default: 2005 return false 2006 } 2007 } 2008 2009 // Stub returns a list stub for the allocation 2010 func (a *Allocation) Stub() *AllocListStub { 2011 return &AllocListStub{ 2012 ID: a.ID, 2013 EvalID: a.EvalID, 2014 Name: a.Name, 2015 NodeID: a.NodeID, 2016 JobID: a.JobID, 2017 TaskGroup: a.TaskGroup, 2018 DesiredStatus: a.DesiredStatus, 2019 DesiredDescription: a.DesiredDescription, 2020 ClientStatus: a.ClientStatus, 2021 ClientDescription: a.ClientDescription, 2022 TaskStates: a.TaskStates, 2023 CreateIndex: a.CreateIndex, 2024 ModifyIndex: a.ModifyIndex, 2025 CreateTime: a.CreateTime, 2026 } 2027 } 2028 2029 // PopulateServiceIDs generates the service IDs for all the service definitions 2030 // in that Allocation 2031 func (a *Allocation) PopulateServiceIDs(tg *TaskGroup) { 2032 // Retain the old services, and re-initialize. We may be removing 2033 // services, so we cannot update the existing map. 2034 previous := a.Services 2035 a.Services = make(map[string]string) 2036 2037 for _, task := range tg.Tasks { 2038 for _, service := range task.Services { 2039 // Retain the service if an ID is already generated 2040 if id, ok := previous[service.Name]; ok { 2041 a.Services[service.Name] = id 2042 continue 2043 } 2044 2045 // If the service hasn't been generated an ID, we generate one. 2046 // We add a prefix to the Service ID so that we can know that this service 2047 // is managed by Nomad since Consul can also have service which are not 2048 // managed by Nomad 2049 a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) 2050 } 2051 } 2052 } 2053 2054 // AllocListStub is used to return a subset of alloc information 2055 type AllocListStub struct { 2056 ID string 2057 EvalID string 2058 Name string 2059 NodeID string 2060 JobID string 2061 TaskGroup string 2062 DesiredStatus string 2063 DesiredDescription string 2064 ClientStatus string 2065 ClientDescription string 2066 TaskStates map[string]*TaskState 2067 CreateIndex uint64 2068 ModifyIndex uint64 2069 CreateTime int64 2070 } 2071 2072 // AllocMetric is used to track various metrics while attempting 2073 // to make an allocation. These are used to debug a job, or to better 2074 // understand the pressure within the system. 2075 type AllocMetric struct { 2076 // NodesEvaluated is the number of nodes that were evaluated 2077 NodesEvaluated int 2078 2079 // NodesFiltered is the number of nodes filtered due to a constraint 2080 NodesFiltered int 2081 2082 // NodesAvailable is the number of nodes available for evaluation per DC. 2083 NodesAvailable map[string]int 2084 2085 // ClassFiltered is the number of nodes filtered by class 2086 ClassFiltered map[string]int 2087 2088 // ConstraintFiltered is the number of failures caused by constraint 2089 ConstraintFiltered map[string]int 2090 2091 // NodesExhausted is the number of nodes skipped due to being 2092 // exhausted of at least one resource 2093 NodesExhausted int 2094 2095 // ClassExhausted is the number of nodes exhausted by class 2096 ClassExhausted map[string]int 2097 2098 // DimensionExhausted provides the count by dimension or reason 2099 DimensionExhausted map[string]int 2100 2101 // Scores is the scores of the final few nodes remaining 2102 // for placement. The top score is typically selected. 2103 Scores map[string]float64 2104 2105 // AllocationTime is a measure of how long the allocation 2106 // attempt took. This can affect performance and SLAs. 2107 AllocationTime time.Duration 2108 2109 // CoalescedFailures indicates the number of other 2110 // allocations that were coalesced into this failed allocation. 2111 // This is to prevent creating many failed allocations for a 2112 // single task group. 2113 CoalescedFailures int 2114 } 2115 2116 func (a *AllocMetric) Copy() *AllocMetric { 2117 if a == nil { 2118 return nil 2119 } 2120 na := new(AllocMetric) 2121 *na = *a 2122 na.NodesAvailable = CopyMapStringInt(na.NodesAvailable) 2123 na.ClassFiltered = CopyMapStringInt(na.ClassFiltered) 2124 na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered) 2125 na.ClassExhausted = CopyMapStringInt(na.ClassExhausted) 2126 na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted) 2127 na.Scores = CopyMapStringFloat64(na.Scores) 2128 return na 2129 } 2130 2131 func (a *AllocMetric) EvaluateNode() { 2132 a.NodesEvaluated += 1 2133 } 2134 2135 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 2136 a.NodesFiltered += 1 2137 if node != nil && node.NodeClass != "" { 2138 if a.ClassFiltered == nil { 2139 a.ClassFiltered = make(map[string]int) 2140 } 2141 a.ClassFiltered[node.NodeClass] += 1 2142 } 2143 if constraint != "" { 2144 if a.ConstraintFiltered == nil { 2145 a.ConstraintFiltered = make(map[string]int) 2146 } 2147 a.ConstraintFiltered[constraint] += 1 2148 } 2149 } 2150 2151 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 2152 a.NodesExhausted += 1 2153 if node != nil && node.NodeClass != "" { 2154 if a.ClassExhausted == nil { 2155 a.ClassExhausted = make(map[string]int) 2156 } 2157 a.ClassExhausted[node.NodeClass] += 1 2158 } 2159 if dimension != "" { 2160 if a.DimensionExhausted == nil { 2161 a.DimensionExhausted = make(map[string]int) 2162 } 2163 a.DimensionExhausted[dimension] += 1 2164 } 2165 } 2166 2167 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 2168 if a.Scores == nil { 2169 a.Scores = make(map[string]float64) 2170 } 2171 key := fmt.Sprintf("%s.%s", node.ID, name) 2172 a.Scores[key] = score 2173 } 2174 2175 const ( 2176 EvalStatusBlocked = "blocked" 2177 EvalStatusPending = "pending" 2178 EvalStatusComplete = "complete" 2179 EvalStatusFailed = "failed" 2180 EvalStatusCancelled = "canceled" 2181 ) 2182 2183 const ( 2184 EvalTriggerJobRegister = "job-register" 2185 EvalTriggerJobDeregister = "job-deregister" 2186 EvalTriggerPeriodicJob = "periodic-job" 2187 EvalTriggerNodeUpdate = "node-update" 2188 EvalTriggerScheduled = "scheduled" 2189 EvalTriggerForceGC = "force-gc" 2190 EvalTriggerRollingUpdate = "rolling-update" 2191 ) 2192 2193 const ( 2194 // CoreJobEvalGC is used for the garbage collection of evaluations 2195 // and allocations. We periodically scan evaluations in a terminal state, 2196 // in which all the corresponding allocations are also terminal. We 2197 // delete these out of the system to bound the state. 2198 CoreJobEvalGC = "eval-gc" 2199 2200 // CoreJobNodeGC is used for the garbage collection of failed nodes. 2201 // We periodically scan nodes in a terminal state, and if they have no 2202 // corresponding allocations we delete these out of the system. 2203 CoreJobNodeGC = "node-gc" 2204 2205 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 2206 // periodically scan garbage collectible jobs and check if both their 2207 // evaluations and allocations are terminal. If so, we delete these out of 2208 // the system. 2209 CoreJobJobGC = "job-gc" 2210 ) 2211 2212 // Evaluation is used anytime we need to apply business logic as a result 2213 // of a change to our desired state (job specification) or the emergent state 2214 // (registered nodes). When the inputs change, we need to "evaluate" them, 2215 // potentially taking action (allocation of work) or doing nothing if the state 2216 // of the world does not require it. 2217 type Evaluation struct { 2218 // ID is a randonly generated UUID used for this evaluation. This 2219 // is assigned upon the creation of the evaluation. 2220 ID string 2221 2222 // Priority is used to control scheduling importance and if this job 2223 // can preempt other jobs. 2224 Priority int 2225 2226 // Type is used to control which schedulers are available to handle 2227 // this evaluation. 2228 Type string 2229 2230 // TriggeredBy is used to give some insight into why this Eval 2231 // was created. (Job change, node failure, alloc failure, etc). 2232 TriggeredBy string 2233 2234 // JobID is the job this evaluation is scoped to. Evaluations cannot 2235 // be run in parallel for a given JobID, so we serialize on this. 2236 JobID string 2237 2238 // JobModifyIndex is the modify index of the job at the time 2239 // the evaluation was created 2240 JobModifyIndex uint64 2241 2242 // NodeID is the node that was affected triggering the evaluation. 2243 NodeID string 2244 2245 // NodeModifyIndex is the modify index of the node at the time 2246 // the evaluation was created 2247 NodeModifyIndex uint64 2248 2249 // Status of the evaluation 2250 Status string 2251 2252 // StatusDescription is meant to provide more human useful information 2253 StatusDescription string 2254 2255 // Wait is a minimum wait time for running the eval. This is used to 2256 // support a rolling upgrade. 2257 Wait time.Duration 2258 2259 // NextEval is the evaluation ID for the eval created to do a followup. 2260 // This is used to support rolling upgrades, where we need a chain of evaluations. 2261 NextEval string 2262 2263 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 2264 // This is used to support rolling upgrades, where we need a chain of evaluations. 2265 PreviousEval string 2266 2267 // ClassEligibility tracks computed node classes that have been explicitely 2268 // marked as eligible or ineligible. 2269 ClassEligibility map[string]bool 2270 2271 // EscapedComputedClass marks whether the job has constraints that are not 2272 // captured by computed node classes. 2273 EscapedComputedClass bool 2274 2275 // Raft Indexes 2276 CreateIndex uint64 2277 ModifyIndex uint64 2278 } 2279 2280 // TerminalStatus returns if the current status is terminal and 2281 // will no longer transition. 2282 func (e *Evaluation) TerminalStatus() bool { 2283 switch e.Status { 2284 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 2285 return true 2286 default: 2287 return false 2288 } 2289 } 2290 2291 func (e *Evaluation) GoString() string { 2292 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 2293 } 2294 2295 func (e *Evaluation) Copy() *Evaluation { 2296 if e == nil { 2297 return nil 2298 } 2299 ne := new(Evaluation) 2300 *ne = *e 2301 return ne 2302 } 2303 2304 // ShouldEnqueue checks if a given evaluation should be enqueued into the 2305 // eval_broker 2306 func (e *Evaluation) ShouldEnqueue() bool { 2307 switch e.Status { 2308 case EvalStatusPending: 2309 return true 2310 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 2311 return false 2312 default: 2313 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 2314 } 2315 } 2316 2317 // ShouldBlock checks if a given evaluation should be entered into the blocked 2318 // eval tracker. 2319 func (e *Evaluation) ShouldBlock() bool { 2320 switch e.Status { 2321 case EvalStatusBlocked: 2322 return true 2323 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 2324 return false 2325 default: 2326 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 2327 } 2328 } 2329 2330 // MakePlan is used to make a plan from the given evaluation 2331 // for a given Job 2332 func (e *Evaluation) MakePlan(j *Job) *Plan { 2333 p := &Plan{ 2334 EvalID: e.ID, 2335 Priority: e.Priority, 2336 Job: j, 2337 NodeUpdate: make(map[string][]*Allocation), 2338 NodeAllocation: make(map[string][]*Allocation), 2339 } 2340 if j != nil { 2341 p.AllAtOnce = j.AllAtOnce 2342 } 2343 return p 2344 } 2345 2346 // NextRollingEval creates an evaluation to followup this eval for rolling updates 2347 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 2348 return &Evaluation{ 2349 ID: GenerateUUID(), 2350 Priority: e.Priority, 2351 Type: e.Type, 2352 TriggeredBy: EvalTriggerRollingUpdate, 2353 JobID: e.JobID, 2354 JobModifyIndex: e.JobModifyIndex, 2355 Status: EvalStatusPending, 2356 Wait: wait, 2357 PreviousEval: e.ID, 2358 } 2359 } 2360 2361 // BlockedEval creates a blocked evaluation to followup this eval to place any 2362 // failed allocations. It takes the classes marked explicitely eligible or 2363 // ineligible and whether the job has escaped computed node classes. 2364 func (e *Evaluation) BlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation { 2365 return &Evaluation{ 2366 ID: GenerateUUID(), 2367 Priority: e.Priority, 2368 Type: e.Type, 2369 TriggeredBy: e.TriggeredBy, 2370 JobID: e.JobID, 2371 JobModifyIndex: e.JobModifyIndex, 2372 Status: EvalStatusBlocked, 2373 PreviousEval: e.ID, 2374 ClassEligibility: classEligibility, 2375 EscapedComputedClass: escaped, 2376 } 2377 } 2378 2379 // Plan is used to submit a commit plan for task allocations. These 2380 // are submitted to the leader which verifies that resources have 2381 // not been overcommitted before admiting the plan. 2382 type Plan struct { 2383 // EvalID is the evaluation ID this plan is associated with 2384 EvalID string 2385 2386 // EvalToken is used to prevent a split-brain processing of 2387 // an evaluation. There should only be a single scheduler running 2388 // an Eval at a time, but this could be violated after a leadership 2389 // transition. This unique token is used to reject plans that are 2390 // being submitted from a different leader. 2391 EvalToken string 2392 2393 // Priority is the priority of the upstream job 2394 Priority int 2395 2396 // AllAtOnce is used to control if incremental scheduling of task groups 2397 // is allowed or if we must do a gang scheduling of the entire job. 2398 // If this is false, a plan may be partially applied. Otherwise, the 2399 // entire plan must be able to make progress. 2400 AllAtOnce bool 2401 2402 // Job is the parent job of all the allocations in the Plan. 2403 // Since a Plan only involves a single Job, we can reduce the size 2404 // of the plan by only including it once. 2405 Job *Job 2406 2407 // NodeUpdate contains all the allocations for each node. For each node, 2408 // this is a list of the allocations to update to either stop or evict. 2409 NodeUpdate map[string][]*Allocation 2410 2411 // NodeAllocation contains all the allocations for each node. 2412 // The evicts must be considered prior to the allocations. 2413 NodeAllocation map[string][]*Allocation 2414 2415 // FailedAllocs are allocations that could not be made, 2416 // but are persisted so that the user can use the feedback 2417 // to determine the cause. 2418 FailedAllocs []*Allocation 2419 } 2420 2421 func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) { 2422 newAlloc := new(Allocation) 2423 *newAlloc = *alloc 2424 2425 // If the job is not set in the plan we are deregistering a job so we 2426 // extract the job from the allocation. 2427 if p.Job == nil && newAlloc.Job != nil { 2428 p.Job = newAlloc.Job 2429 } 2430 2431 // Normalize the job 2432 newAlloc.Job = nil 2433 newAlloc.DesiredStatus = status 2434 newAlloc.DesiredDescription = desc 2435 node := alloc.NodeID 2436 existing := p.NodeUpdate[node] 2437 p.NodeUpdate[node] = append(existing, newAlloc) 2438 } 2439 2440 func (p *Plan) PopUpdate(alloc *Allocation) { 2441 existing := p.NodeUpdate[alloc.NodeID] 2442 n := len(existing) 2443 if n > 0 && existing[n-1].ID == alloc.ID { 2444 existing = existing[:n-1] 2445 if len(existing) > 0 { 2446 p.NodeUpdate[alloc.NodeID] = existing 2447 } else { 2448 delete(p.NodeUpdate, alloc.NodeID) 2449 } 2450 } 2451 } 2452 2453 func (p *Plan) AppendAlloc(alloc *Allocation) { 2454 node := alloc.NodeID 2455 existing := p.NodeAllocation[node] 2456 p.NodeAllocation[node] = append(existing, alloc) 2457 } 2458 2459 func (p *Plan) AppendFailed(alloc *Allocation) { 2460 p.FailedAllocs = append(p.FailedAllocs, alloc) 2461 } 2462 2463 // IsNoOp checks if this plan would do nothing 2464 func (p *Plan) IsNoOp() bool { 2465 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 2466 } 2467 2468 // PlanResult is the result of a plan submitted to the leader. 2469 type PlanResult struct { 2470 // NodeUpdate contains all the updates that were committed. 2471 NodeUpdate map[string][]*Allocation 2472 2473 // NodeAllocation contains all the allocations that were committed. 2474 NodeAllocation map[string][]*Allocation 2475 2476 // FailedAllocs are allocations that could not be made, 2477 // but are persisted so that the user can use the feedback 2478 // to determine the cause. 2479 FailedAllocs []*Allocation 2480 2481 // RefreshIndex is the index the worker should refresh state up to. 2482 // This allows all evictions and allocations to be materialized. 2483 // If any allocations were rejected due to stale data (node state, 2484 // over committed) this can be used to force a worker refresh. 2485 RefreshIndex uint64 2486 2487 // AllocIndex is the Raft index in which the evictions and 2488 // allocations took place. This is used for the write index. 2489 AllocIndex uint64 2490 } 2491 2492 // IsNoOp checks if this plan result would do nothing 2493 func (p *PlanResult) IsNoOp() bool { 2494 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 2495 } 2496 2497 // FullCommit is used to check if all the allocations in a plan 2498 // were committed as part of the result. Returns if there was 2499 // a match, and the number of expected and actual allocations. 2500 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 2501 expected := 0 2502 actual := 0 2503 for name, allocList := range plan.NodeAllocation { 2504 didAlloc, _ := p.NodeAllocation[name] 2505 expected += len(allocList) 2506 actual += len(didAlloc) 2507 } 2508 return actual == expected, expected, actual 2509 } 2510 2511 // msgpackHandle is a shared handle for encoding/decoding of structs 2512 var MsgpackHandle = func() *codec.MsgpackHandle { 2513 h := &codec.MsgpackHandle{RawToString: true} 2514 2515 // Sets the default type for decoding a map into a nil interface{}. 2516 // This is necessary in particular because we store the driver configs as a 2517 // nil interface{}. 2518 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 2519 return h 2520 }() 2521 2522 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 2523 h := &hcodec.MsgpackHandle{RawToString: true} 2524 2525 // Sets the default type for decoding a map into a nil interface{}. 2526 // This is necessary in particular because we store the driver configs as a 2527 // nil interface{}. 2528 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 2529 return h 2530 }() 2531 2532 // Decode is used to decode a MsgPack encoded object 2533 func Decode(buf []byte, out interface{}) error { 2534 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 2535 } 2536 2537 // Encode is used to encode a MsgPack object with type prefix 2538 func Encode(t MessageType, msg interface{}) ([]byte, error) { 2539 var buf bytes.Buffer 2540 buf.WriteByte(uint8(t)) 2541 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 2542 return buf.Bytes(), err 2543 }