github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/sha1" 6 "errors" 7 "fmt" 8 "io" 9 "reflect" 10 "regexp" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/gorhill/cronexpr" 16 "github.com/hashicorp/go-msgpack/codec" 17 "github.com/hashicorp/go-multierror" 18 "github.com/hashicorp/go-version" 19 "github.com/hashicorp/nomad/helper/args" 20 "github.com/mitchellh/copystructure" 21 ) 22 23 var ( 24 ErrNoLeader = fmt.Errorf("No cluster leader") 25 ErrNoRegionPath = fmt.Errorf("No path to region") 26 ) 27 28 type MessageType uint8 29 30 const ( 31 NodeRegisterRequestType MessageType = iota 32 NodeDeregisterRequestType 33 NodeUpdateStatusRequestType 34 NodeUpdateDrainRequestType 35 JobRegisterRequestType 36 JobDeregisterRequestType 37 EvalUpdateRequestType 38 EvalDeleteRequestType 39 AllocUpdateRequestType 40 AllocClientUpdateRequestType 41 ) 42 43 const ( 44 // IgnoreUnknownTypeFlag is set along with a MessageType 45 // to indicate that the message type can be safely ignored 46 // if it is not recognized. This is for future proofing, so 47 // that new commands can be added in a way that won't cause 48 // old servers to crash when the FSM attempts to process them. 49 IgnoreUnknownTypeFlag MessageType = 128 50 ) 51 52 // RPCInfo is used to describe common information about query 53 type RPCInfo interface { 54 RequestRegion() string 55 IsRead() bool 56 AllowStaleRead() bool 57 } 58 59 // QueryOptions is used to specify various flags for read queries 60 type QueryOptions struct { 61 // The target region for this query 62 Region string 63 64 // If set, wait until query exceeds given index. Must be provided 65 // with MaxQueryTime. 66 MinQueryIndex uint64 67 68 // Provided with MinQueryIndex to wait for change. 69 MaxQueryTime time.Duration 70 71 // If set, any follower can service the request. Results 72 // may be arbitrarily stale. 73 AllowStale bool 74 75 // If set, used as prefix for resource list searches 76 Prefix string 77 } 78 79 func (q QueryOptions) RequestRegion() string { 80 return q.Region 81 } 82 83 // QueryOption only applies to reads, so always true 84 func (q QueryOptions) IsRead() bool { 85 return true 86 } 87 88 func (q QueryOptions) AllowStaleRead() bool { 89 return q.AllowStale 90 } 91 92 type WriteRequest struct { 93 // The target region for this write 94 Region string 95 } 96 97 func (w WriteRequest) RequestRegion() string { 98 // The target region for this request 99 return w.Region 100 } 101 102 // WriteRequest only applies to writes, always false 103 func (w WriteRequest) IsRead() bool { 104 return false 105 } 106 107 func (w WriteRequest) AllowStaleRead() bool { 108 return false 109 } 110 111 // QueryMeta allows a query response to include potentially 112 // useful metadata about a query 113 type QueryMeta struct { 114 // This is the index associated with the read 115 Index uint64 116 117 // If AllowStale is used, this is time elapsed since 118 // last contact between the follower and leader. This 119 // can be used to gauge staleness. 120 LastContact time.Duration 121 122 // Used to indicate if there is a known leader node 123 KnownLeader bool 124 } 125 126 // WriteMeta allows a write response to include potentially 127 // useful metadata about the write 128 type WriteMeta struct { 129 // This is the index associated with the write 130 Index uint64 131 } 132 133 // NodeRegisterRequest is used for Node.Register endpoint 134 // to register a node as being a schedulable entity. 135 type NodeRegisterRequest struct { 136 Node *Node 137 WriteRequest 138 } 139 140 // NodeDeregisterRequest is used for Node.Deregister endpoint 141 // to deregister a node as being a schedulable entity. 142 type NodeDeregisterRequest struct { 143 NodeID string 144 WriteRequest 145 } 146 147 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 148 // to update the status of a node. 149 type NodeUpdateStatusRequest struct { 150 NodeID string 151 Status string 152 WriteRequest 153 } 154 155 // NodeUpdateDrainRequest is used for updatin the drain status 156 type NodeUpdateDrainRequest struct { 157 NodeID string 158 Drain bool 159 WriteRequest 160 } 161 162 // NodeEvaluateRequest is used to re-evaluate the ndoe 163 type NodeEvaluateRequest struct { 164 NodeID string 165 WriteRequest 166 } 167 168 // NodeSpecificRequest is used when we just need to specify a target node 169 type NodeSpecificRequest struct { 170 NodeID string 171 QueryOptions 172 } 173 174 // JobRegisterRequest is used for Job.Register endpoint 175 // to register a job as being a schedulable entity. 176 type JobRegisterRequest struct { 177 Job *Job 178 WriteRequest 179 } 180 181 // JobDeregisterRequest is used for Job.Deregister endpoint 182 // to deregister a job as being a schedulable entity. 183 type JobDeregisterRequest struct { 184 JobID string 185 WriteRequest 186 } 187 188 // JobEvaluateRequest is used when we just need to re-evaluate a target job 189 type JobEvaluateRequest struct { 190 JobID string 191 WriteRequest 192 } 193 194 // JobSpecificRequest is used when we just need to specify a target job 195 type JobSpecificRequest struct { 196 JobID string 197 QueryOptions 198 } 199 200 // JobListRequest is used to parameterize a list request 201 type JobListRequest struct { 202 QueryOptions 203 } 204 205 // NodeListRequest is used to parameterize a list request 206 type NodeListRequest struct { 207 QueryOptions 208 } 209 210 // EvalUpdateRequest is used for upserting evaluations. 211 type EvalUpdateRequest struct { 212 Evals []*Evaluation 213 EvalToken string 214 WriteRequest 215 } 216 217 // EvalDeleteRequest is used for deleting an evaluation. 218 type EvalDeleteRequest struct { 219 Evals []string 220 Allocs []string 221 WriteRequest 222 } 223 224 // EvalSpecificRequest is used when we just need to specify a target evaluation 225 type EvalSpecificRequest struct { 226 EvalID string 227 QueryOptions 228 } 229 230 // EvalAckRequest is used to Ack/Nack a specific evaluation 231 type EvalAckRequest struct { 232 EvalID string 233 Token string 234 WriteRequest 235 } 236 237 // EvalDequeueRequest is used when we want to dequeue an evaluation 238 type EvalDequeueRequest struct { 239 Schedulers []string 240 Timeout time.Duration 241 WriteRequest 242 } 243 244 // EvalListRequest is used to list the evaluations 245 type EvalListRequest struct { 246 QueryOptions 247 } 248 249 // PlanRequest is used to submit an allocation plan to the leader 250 type PlanRequest struct { 251 Plan *Plan 252 WriteRequest 253 } 254 255 // AllocUpdateRequest is used to submit changes to allocations, either 256 // to cause evictions or to assign new allocaitons. Both can be done 257 // within a single transaction 258 type AllocUpdateRequest struct { 259 // Alloc is the list of new allocations to assign 260 Alloc []*Allocation 261 WriteRequest 262 } 263 264 // AllocListRequest is used to request a list of allocations 265 type AllocListRequest struct { 266 QueryOptions 267 } 268 269 // AllocSpecificRequest is used to query a specific allocation 270 type AllocSpecificRequest struct { 271 AllocID string 272 QueryOptions 273 } 274 275 // PeriodicForceReqeuest is used to force a specific periodic job. 276 type PeriodicForceRequest struct { 277 JobID string 278 WriteRequest 279 } 280 281 // GenericRequest is used to request where no 282 // specific information is needed. 283 type GenericRequest struct { 284 QueryOptions 285 } 286 287 // GenericResponse is used to respond to a request where no 288 // specific response information is needed. 289 type GenericResponse struct { 290 WriteMeta 291 } 292 293 const ( 294 ProtocolVersion = "protocol" 295 APIMajorVersion = "api.major" 296 APIMinorVersion = "api.minor" 297 ) 298 299 // VersionResponse is used for the Status.Version reseponse 300 type VersionResponse struct { 301 Build string 302 Versions map[string]int 303 QueryMeta 304 } 305 306 // JobRegisterResponse is used to respond to a job registration 307 type JobRegisterResponse struct { 308 EvalID string 309 EvalCreateIndex uint64 310 JobModifyIndex uint64 311 QueryMeta 312 } 313 314 // JobDeregisterResponse is used to respond to a job deregistration 315 type JobDeregisterResponse struct { 316 EvalID string 317 EvalCreateIndex uint64 318 JobModifyIndex uint64 319 QueryMeta 320 } 321 322 // NodeUpdateResponse is used to respond to a node update 323 type NodeUpdateResponse struct { 324 HeartbeatTTL time.Duration 325 EvalIDs []string 326 EvalCreateIndex uint64 327 NodeModifyIndex uint64 328 QueryMeta 329 } 330 331 // NodeDrainUpdateResponse is used to respond to a node drain update 332 type NodeDrainUpdateResponse struct { 333 EvalIDs []string 334 EvalCreateIndex uint64 335 NodeModifyIndex uint64 336 QueryMeta 337 } 338 339 // NodeAllocsResponse is used to return allocs for a single node 340 type NodeAllocsResponse struct { 341 Allocs []*Allocation 342 QueryMeta 343 } 344 345 // SingleNodeResponse is used to return a single node 346 type SingleNodeResponse struct { 347 Node *Node 348 QueryMeta 349 } 350 351 // JobListResponse is used for a list request 352 type NodeListResponse struct { 353 Nodes []*NodeListStub 354 QueryMeta 355 } 356 357 // SingleJobResponse is used to return a single job 358 type SingleJobResponse struct { 359 Job *Job 360 QueryMeta 361 } 362 363 // JobListResponse is used for a list request 364 type JobListResponse struct { 365 Jobs []*JobListStub 366 QueryMeta 367 } 368 369 // SingleAllocResponse is used to return a single allocation 370 type SingleAllocResponse struct { 371 Alloc *Allocation 372 QueryMeta 373 } 374 375 // JobAllocationsResponse is used to return the allocations for a job 376 type JobAllocationsResponse struct { 377 Allocations []*AllocListStub 378 QueryMeta 379 } 380 381 // JobEvaluationsResponse is used to return the evaluations for a job 382 type JobEvaluationsResponse struct { 383 Evaluations []*Evaluation 384 QueryMeta 385 } 386 387 // SingleEvalResponse is used to return a single evaluation 388 type SingleEvalResponse struct { 389 Eval *Evaluation 390 QueryMeta 391 } 392 393 // EvalDequeueResponse is used to return from a dequeue 394 type EvalDequeueResponse struct { 395 Eval *Evaluation 396 Token string 397 QueryMeta 398 } 399 400 // PlanResponse is used to return from a PlanRequest 401 type PlanResponse struct { 402 Result *PlanResult 403 WriteMeta 404 } 405 406 // AllocListResponse is used for a list request 407 type AllocListResponse struct { 408 Allocations []*AllocListStub 409 QueryMeta 410 } 411 412 // EvalListResponse is used for a list request 413 type EvalListResponse struct { 414 Evaluations []*Evaluation 415 QueryMeta 416 } 417 418 // EvalAllocationsResponse is used to return the allocations for an evaluation 419 type EvalAllocationsResponse struct { 420 Allocations []*AllocListStub 421 QueryMeta 422 } 423 424 // PeriodicForceResponse is used to respond to a periodic job force launch 425 type PeriodicForceResponse struct { 426 EvalID string 427 EvalCreateIndex uint64 428 WriteMeta 429 } 430 431 const ( 432 NodeStatusInit = "initializing" 433 NodeStatusReady = "ready" 434 NodeStatusDown = "down" 435 ) 436 437 // ShouldDrainNode checks if a given node status should trigger an 438 // evaluation. Some states don't require any further action. 439 func ShouldDrainNode(status string) bool { 440 switch status { 441 case NodeStatusInit, NodeStatusReady: 442 return false 443 case NodeStatusDown: 444 return true 445 default: 446 panic(fmt.Sprintf("unhandled node status %s", status)) 447 } 448 } 449 450 // ValidNodeStatus is used to check if a node status is valid 451 func ValidNodeStatus(status string) bool { 452 switch status { 453 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 454 return true 455 default: 456 return false 457 } 458 } 459 460 // Node is a representation of a schedulable client node 461 type Node struct { 462 // ID is a unique identifier for the node. It can be constructed 463 // by doing a concatenation of the Name and Datacenter as a simple 464 // approach. Alternatively a UUID may be used. 465 ID string 466 467 // Datacenter for this node 468 Datacenter string 469 470 // Node name 471 Name string 472 473 // HTTPAddr is the address on which the Nomad client is listening for http 474 // requests 475 HTTPAddr string 476 477 // Attributes is an arbitrary set of key/value 478 // data that can be used for constraints. Examples 479 // include "kernel.name=linux", "arch=386", "driver.docker=1", 480 // "docker.runtime=1.8.3" 481 Attributes map[string]string 482 483 // Resources is the available resources on the client. 484 // For example 'cpu=2' 'memory=2048' 485 Resources *Resources 486 487 // Reserved is the set of resources that are reserved, 488 // and should be subtracted from the total resources for 489 // the purposes of scheduling. This may be provide certain 490 // high-watermark tolerances or because of external schedulers 491 // consuming resources. 492 Reserved *Resources 493 494 // Links are used to 'link' this client to external 495 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 496 // 'ami=ami-123' 497 Links map[string]string 498 499 // Meta is used to associate arbitrary metadata with this 500 // client. This is opaque to Nomad. 501 Meta map[string]string 502 503 // NodeClass is an opaque identifier used to group nodes 504 // together for the purpose of determining scheduling pressure. 505 NodeClass string 506 507 // ComputedClass is a unique id that identifies nodes with a common set of 508 // attributes and capabilities. 509 ComputedClass uint64 510 511 // Drain is controlled by the servers, and not the client. 512 // If true, no jobs will be scheduled to this node, and existing 513 // allocations will be drained. 514 Drain bool 515 516 // Status of this node 517 Status string 518 519 // StatusDescription is meant to provide more human useful information 520 StatusDescription string 521 522 // Raft Indexes 523 CreateIndex uint64 524 ModifyIndex uint64 525 } 526 527 // TerminalStatus returns if the current status is terminal and 528 // will no longer transition. 529 func (n *Node) TerminalStatus() bool { 530 switch n.Status { 531 case NodeStatusDown: 532 return true 533 default: 534 return false 535 } 536 } 537 538 // Stub returns a summarized version of the node 539 func (n *Node) Stub() *NodeListStub { 540 return &NodeListStub{ 541 ID: n.ID, 542 Datacenter: n.Datacenter, 543 Name: n.Name, 544 NodeClass: n.NodeClass, 545 Drain: n.Drain, 546 Status: n.Status, 547 StatusDescription: n.StatusDescription, 548 CreateIndex: n.CreateIndex, 549 ModifyIndex: n.ModifyIndex, 550 } 551 } 552 553 // NodeListStub is used to return a subset of job information 554 // for the job list 555 type NodeListStub struct { 556 ID string 557 Datacenter string 558 Name string 559 NodeClass string 560 Drain bool 561 Status string 562 StatusDescription string 563 CreateIndex uint64 564 ModifyIndex uint64 565 } 566 567 // Resources is used to define the resources available 568 // on a client 569 type Resources struct { 570 CPU int 571 MemoryMB int `mapstructure:"memory"` 572 DiskMB int `mapstructure:"disk"` 573 IOPS int 574 Networks []*NetworkResource 575 } 576 577 // Copy returns a deep copy of the resources 578 func (r *Resources) Copy() *Resources { 579 newR := new(Resources) 580 *newR = *r 581 n := len(r.Networks) 582 newR.Networks = make([]*NetworkResource, n) 583 for i := 0; i < n; i++ { 584 newR.Networks[i] = r.Networks[i].Copy() 585 } 586 return newR 587 } 588 589 // NetIndex finds the matching net index using device name 590 func (r *Resources) NetIndex(n *NetworkResource) int { 591 for idx, net := range r.Networks { 592 if net.Device == n.Device { 593 return idx 594 } 595 } 596 return -1 597 } 598 599 // Superset checks if one set of resources is a superset 600 // of another. This ignores network resources, and the NetworkIndex 601 // should be used for that. 602 func (r *Resources) Superset(other *Resources) (bool, string) { 603 if r.CPU < other.CPU { 604 return false, "cpu exhausted" 605 } 606 if r.MemoryMB < other.MemoryMB { 607 return false, "memory exhausted" 608 } 609 if r.DiskMB < other.DiskMB { 610 return false, "disk exhausted" 611 } 612 if r.IOPS < other.IOPS { 613 return false, "iops exhausted" 614 } 615 return true, "" 616 } 617 618 // Add adds the resources of the delta to this, potentially 619 // returning an error if not possible. 620 func (r *Resources) Add(delta *Resources) error { 621 if delta == nil { 622 return nil 623 } 624 r.CPU += delta.CPU 625 r.MemoryMB += delta.MemoryMB 626 r.DiskMB += delta.DiskMB 627 r.IOPS += delta.IOPS 628 629 for _, n := range delta.Networks { 630 // Find the matching interface by IP or CIDR 631 idx := r.NetIndex(n) 632 if idx == -1 { 633 r.Networks = append(r.Networks, n.Copy()) 634 } else { 635 r.Networks[idx].Add(n) 636 } 637 } 638 return nil 639 } 640 641 func (r *Resources) GoString() string { 642 return fmt.Sprintf("*%#v", *r) 643 } 644 645 type Port struct { 646 Label string 647 Value int `mapstructure:"static"` 648 } 649 650 // NetworkResource is used to represent available network 651 // resources 652 type NetworkResource struct { 653 Device string // Name of the device 654 CIDR string // CIDR block of addresses 655 IP string // IP address 656 MBits int // Throughput 657 ReservedPorts []Port // Reserved ports 658 DynamicPorts []Port // Dynamically assigned ports 659 } 660 661 // Copy returns a deep copy of the network resource 662 func (n *NetworkResource) Copy() *NetworkResource { 663 newR := new(NetworkResource) 664 *newR = *n 665 if n.ReservedPorts != nil { 666 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 667 copy(newR.ReservedPorts, n.ReservedPorts) 668 } 669 if n.DynamicPorts != nil { 670 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 671 copy(newR.DynamicPorts, n.DynamicPorts) 672 } 673 return newR 674 } 675 676 // Add adds the resources of the delta to this, potentially 677 // returning an error if not possible. 678 func (n *NetworkResource) Add(delta *NetworkResource) { 679 if len(delta.ReservedPorts) > 0 { 680 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 681 } 682 n.MBits += delta.MBits 683 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 684 } 685 686 func (n *NetworkResource) GoString() string { 687 return fmt.Sprintf("*%#v", *n) 688 } 689 690 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 691 labelValues := make(map[string]int) 692 ports := append(n.ReservedPorts, n.DynamicPorts...) 693 for _, port := range ports { 694 if mapping, ok := port_map[port.Label]; ok { 695 labelValues[port.Label] = mapping 696 } else { 697 labelValues[port.Label] = port.Value 698 } 699 } 700 return labelValues 701 } 702 703 const ( 704 // JobTypeNomad is reserved for internal system tasks and is 705 // always handled by the CoreScheduler. 706 JobTypeCore = "_core" 707 JobTypeService = "service" 708 JobTypeBatch = "batch" 709 JobTypeSystem = "system" 710 ) 711 712 const ( 713 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 714 JobStatusRunning = "running" // Running means the job has non-terminal allocations 715 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 716 ) 717 718 const ( 719 // JobMinPriority is the minimum allowed priority 720 JobMinPriority = 1 721 722 // JobDefaultPriority is the default priority if not 723 // not specified. 724 JobDefaultPriority = 50 725 726 // JobMaxPriority is the maximum allowed priority 727 JobMaxPriority = 100 728 729 // Ensure CoreJobPriority is higher than any user 730 // specified job so that it gets priority. This is important 731 // for the system to remain healthy. 732 CoreJobPriority = JobMaxPriority * 2 733 ) 734 735 // Job is the scope of a scheduling request to Nomad. It is the largest 736 // scoped object, and is a named collection of task groups. Each task group 737 // is further composed of tasks. A task group (TG) is the unit of scheduling 738 // however. 739 type Job struct { 740 // Region is the Nomad region that handles scheduling this job 741 Region string 742 743 // ID is a unique identifier for the job per region. It can be 744 // specified hierarchically like LineOfBiz/OrgName/Team/Project 745 ID string 746 747 // ParentID is the unique identifier of the job that spawned this job. 748 ParentID string 749 750 // Name is the logical name of the job used to refer to it. This is unique 751 // per region, but not unique globally. 752 Name string 753 754 // Type is used to control various behaviors about the job. Most jobs 755 // are service jobs, meaning they are expected to be long lived. 756 // Some jobs are batch oriented meaning they run and then terminate. 757 // This can be extended in the future to support custom schedulers. 758 Type string 759 760 // Priority is used to control scheduling importance and if this job 761 // can preempt other jobs. 762 Priority int 763 764 // AllAtOnce is used to control if incremental scheduling of task groups 765 // is allowed or if we must do a gang scheduling of the entire job. This 766 // can slow down larger jobs if resources are not available. 767 AllAtOnce bool `mapstructure:"all_at_once"` 768 769 // Datacenters contains all the datacenters this job is allowed to span 770 Datacenters []string 771 772 // Constraints can be specified at a job level and apply to 773 // all the task groups and tasks. 774 Constraints []*Constraint 775 776 // TaskGroups are the collections of task groups that this job needs 777 // to run. Each task group is an atomic unit of scheduling and placement. 778 TaskGroups []*TaskGroup 779 780 // Update is used to control the update strategy 781 Update UpdateStrategy 782 783 // Periodic is used to define the interval the job is run at. 784 Periodic *PeriodicConfig 785 786 // GC is used to mark the job as available for garbage collection after it 787 // has no outstanding evaluations or allocations. 788 GC bool 789 790 // Meta is used to associate arbitrary metadata with this 791 // job. This is opaque to Nomad. 792 Meta map[string]string 793 794 // Job status 795 Status string 796 797 // StatusDescription is meant to provide more human useful information 798 StatusDescription string 799 800 // Raft Indexes 801 CreateIndex uint64 802 ModifyIndex uint64 803 JobModifyIndex uint64 804 } 805 806 // InitFields is used to initialize fields in the Job. This should be called 807 // when registering a Job. 808 func (j *Job) InitFields() { 809 for _, tg := range j.TaskGroups { 810 tg.InitFields(j) 811 } 812 813 // If the job is batch then make it GC. 814 if j.Type == JobTypeBatch { 815 j.GC = true 816 } 817 } 818 819 // Copy returns a deep copy of the Job. It is expected that callers use recover. 820 // This job can panic if the deep copy failed as it uses reflection. 821 func (j *Job) Copy() *Job { 822 i, err := copystructure.Copy(j) 823 if err != nil { 824 panic(err) 825 } 826 827 return i.(*Job) 828 } 829 830 // Validate is used to sanity check a job input 831 func (j *Job) Validate() error { 832 var mErr multierror.Error 833 if j.Region == "" { 834 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 835 } 836 if j.ID == "" { 837 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 838 } else if strings.Contains(j.ID, " ") { 839 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 840 } 841 if j.Name == "" { 842 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 843 } 844 if j.Type == "" { 845 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 846 } 847 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 848 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 849 } 850 if len(j.Datacenters) == 0 { 851 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 852 } 853 if len(j.TaskGroups) == 0 { 854 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 855 } 856 for idx, constr := range j.Constraints { 857 if err := constr.Validate(); err != nil { 858 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 859 mErr.Errors = append(mErr.Errors, outer) 860 } 861 } 862 863 // Check for duplicate task groups 864 taskGroups := make(map[string]int) 865 for idx, tg := range j.TaskGroups { 866 if tg.Name == "" { 867 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 868 } else if existing, ok := taskGroups[tg.Name]; ok { 869 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 870 } else { 871 taskGroups[tg.Name] = idx 872 } 873 874 if j.Type == "system" && tg.Count != 1 { 875 mErr.Errors = append(mErr.Errors, 876 fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler", 877 idx+1, tg.Count)) 878 } 879 } 880 881 // Validate the task group 882 for idx, tg := range j.TaskGroups { 883 if err := tg.Validate(); err != nil { 884 outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err) 885 mErr.Errors = append(mErr.Errors, outer) 886 } 887 } 888 889 // Validate periodic is only used with batch jobs. 890 if j.IsPeriodic() { 891 if j.Type != JobTypeBatch { 892 mErr.Errors = append(mErr.Errors, 893 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 894 } 895 896 if err := j.Periodic.Validate(); err != nil { 897 mErr.Errors = append(mErr.Errors, err) 898 } 899 } 900 901 return mErr.ErrorOrNil() 902 } 903 904 // LookupTaskGroup finds a task group by name 905 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 906 for _, tg := range j.TaskGroups { 907 if tg.Name == name { 908 return tg 909 } 910 } 911 return nil 912 } 913 914 // Stub is used to return a summary of the job 915 func (j *Job) Stub() *JobListStub { 916 return &JobListStub{ 917 ID: j.ID, 918 ParentID: j.ParentID, 919 Name: j.Name, 920 Type: j.Type, 921 Priority: j.Priority, 922 Status: j.Status, 923 StatusDescription: j.StatusDescription, 924 CreateIndex: j.CreateIndex, 925 ModifyIndex: j.ModifyIndex, 926 } 927 } 928 929 // IsPeriodic returns whether a job is periodic. 930 func (j *Job) IsPeriodic() bool { 931 return j.Periodic != nil 932 } 933 934 // JobListStub is used to return a subset of job information 935 // for the job list 936 type JobListStub struct { 937 ID string 938 ParentID string 939 Name string 940 Type string 941 Priority int 942 Status string 943 StatusDescription string 944 CreateIndex uint64 945 ModifyIndex uint64 946 } 947 948 // UpdateStrategy is used to modify how updates are done 949 type UpdateStrategy struct { 950 // Stagger is the amount of time between the updates 951 Stagger time.Duration 952 953 // MaxParallel is how many updates can be done in parallel 954 MaxParallel int `mapstructure:"max_parallel"` 955 } 956 957 // Rolling returns if a rolling strategy should be used 958 func (u *UpdateStrategy) Rolling() bool { 959 return u.Stagger > 0 && u.MaxParallel > 0 960 } 961 962 const ( 963 // PeriodicSpecCron is used for a cron spec. 964 PeriodicSpecCron = "cron" 965 966 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 967 // seperated list of unix timestamps at which to launch. 968 PeriodicSpecTest = "_internal_test" 969 ) 970 971 // Periodic defines the interval a job should be run at. 972 type PeriodicConfig struct { 973 // Enabled determines if the job should be run periodically. 974 Enabled bool 975 976 // Spec specifies the interval the job should be run as. It is parsed based 977 // on the SpecType. 978 Spec string 979 980 // SpecType defines the format of the spec. 981 SpecType string 982 983 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 984 ProhibitOverlap bool `mapstructure:"prohibit_overlap"` 985 } 986 987 func (p *PeriodicConfig) Validate() error { 988 if !p.Enabled { 989 return nil 990 } 991 992 if p.Spec == "" { 993 return fmt.Errorf("Must specify a spec") 994 } 995 996 switch p.SpecType { 997 case PeriodicSpecCron: 998 // Validate the cron spec 999 if _, err := cronexpr.Parse(p.Spec); err != nil { 1000 return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) 1001 } 1002 case PeriodicSpecTest: 1003 // No-op 1004 default: 1005 return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) 1006 } 1007 1008 return nil 1009 } 1010 1011 // Next returns the closest time instant matching the spec that is after the 1012 // passed time. If no matching instance exists, the zero value of time.Time is 1013 // returned. The `time.Location` of the returned value matches that of the 1014 // passed time. 1015 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1016 switch p.SpecType { 1017 case PeriodicSpecCron: 1018 if e, err := cronexpr.Parse(p.Spec); err == nil { 1019 return e.Next(fromTime) 1020 } 1021 case PeriodicSpecTest: 1022 split := strings.Split(p.Spec, ",") 1023 if len(split) == 1 && split[0] == "" { 1024 return time.Time{} 1025 } 1026 1027 // Parse the times 1028 times := make([]time.Time, len(split)) 1029 for i, s := range split { 1030 unix, err := strconv.Atoi(s) 1031 if err != nil { 1032 return time.Time{} 1033 } 1034 1035 times[i] = time.Unix(int64(unix), 0) 1036 } 1037 1038 // Find the next match 1039 for _, next := range times { 1040 if fromTime.Before(next) { 1041 return next 1042 } 1043 } 1044 } 1045 1046 return time.Time{} 1047 } 1048 1049 const ( 1050 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1051 // when launching derived instances of it. 1052 PeriodicLaunchSuffix = "/periodic-" 1053 ) 1054 1055 // PeriodicLaunch tracks the last launch time of a periodic job. 1056 type PeriodicLaunch struct { 1057 ID string // ID of the periodic job. 1058 Launch time.Time // The last launch time. 1059 1060 // Raft Indexes 1061 CreateIndex uint64 1062 ModifyIndex uint64 1063 } 1064 1065 var ( 1066 defaultServiceJobRestartPolicy = RestartPolicy{ 1067 Delay: 15 * time.Second, 1068 Attempts: 2, 1069 Interval: 1 * time.Minute, 1070 RestartOnSuccess: true, 1071 Mode: RestartPolicyModeDelay, 1072 } 1073 defaultBatchJobRestartPolicy = RestartPolicy{ 1074 Delay: 15 * time.Second, 1075 Attempts: 15, 1076 Interval: 7 * 24 * time.Hour, 1077 RestartOnSuccess: false, 1078 Mode: RestartPolicyModeDelay, 1079 } 1080 ) 1081 1082 const ( 1083 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1084 // reached when the specified attempts have been reached in the interval. 1085 RestartPolicyModeDelay = "delay" 1086 1087 // RestartPolicyModeFail causes a job to fail if the specified number of 1088 // attempts are reached within an interval. 1089 RestartPolicyModeFail = "fail" 1090 ) 1091 1092 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1093 type RestartPolicy struct { 1094 // Attempts is the number of restart that will occur in an interval. 1095 Attempts int 1096 1097 // Interval is a duration in which we can limit the number of restarts 1098 // within. 1099 Interval time.Duration 1100 1101 // Delay is the time between a failure and a restart. 1102 Delay time.Duration 1103 1104 // RestartOnSuccess determines whether a task should be restarted if it 1105 // exited successfully. 1106 RestartOnSuccess bool `mapstructure:"on_success"` 1107 1108 // Mode controls what happens when the task restarts more than attempt times 1109 // in an interval. 1110 Mode string 1111 } 1112 1113 func (r *RestartPolicy) Validate() error { 1114 switch r.Mode { 1115 case RestartPolicyModeDelay, RestartPolicyModeFail: 1116 default: 1117 return fmt.Errorf("Unsupported restart mode: %q", r.Mode) 1118 } 1119 1120 if r.Interval == 0 { 1121 return nil 1122 } 1123 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1124 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 1125 } 1126 return nil 1127 } 1128 1129 func NewRestartPolicy(jobType string) *RestartPolicy { 1130 switch jobType { 1131 case JobTypeService, JobTypeSystem: 1132 rp := defaultServiceJobRestartPolicy 1133 return &rp 1134 case JobTypeBatch: 1135 rp := defaultBatchJobRestartPolicy 1136 return &rp 1137 } 1138 return nil 1139 } 1140 1141 // TaskGroup is an atomic unit of placement. Each task group belongs to 1142 // a job and may contain any number of tasks. A task group support running 1143 // in many replicas using the same configuration.. 1144 type TaskGroup struct { 1145 // Name of the task group 1146 Name string 1147 1148 // Count is the number of replicas of this task group that should 1149 // be scheduled. 1150 Count int 1151 1152 // Constraints can be specified at a task group level and apply to 1153 // all the tasks contained. 1154 Constraints []*Constraint 1155 1156 //RestartPolicy of a TaskGroup 1157 RestartPolicy *RestartPolicy 1158 1159 // Tasks are the collection of tasks that this task group needs to run 1160 Tasks []*Task 1161 1162 // Meta is used to associate arbitrary metadata with this 1163 // task group. This is opaque to Nomad. 1164 Meta map[string]string 1165 } 1166 1167 // InitFields is used to initialize fields in the TaskGroup. 1168 func (tg *TaskGroup) InitFields(job *Job) { 1169 // Set the default restart policy. 1170 if tg.RestartPolicy == nil { 1171 tg.RestartPolicy = NewRestartPolicy(job.Type) 1172 } 1173 1174 for _, task := range tg.Tasks { 1175 task.InitFields(job, tg) 1176 } 1177 } 1178 1179 // Validate is used to sanity check a task group 1180 func (tg *TaskGroup) Validate() error { 1181 var mErr multierror.Error 1182 if tg.Name == "" { 1183 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1184 } 1185 if tg.Count <= 0 { 1186 mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive")) 1187 } 1188 if len(tg.Tasks) == 0 { 1189 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1190 } 1191 for idx, constr := range tg.Constraints { 1192 if err := constr.Validate(); err != nil { 1193 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1194 mErr.Errors = append(mErr.Errors, outer) 1195 } 1196 } 1197 1198 if tg.RestartPolicy != nil { 1199 if err := tg.RestartPolicy.Validate(); err != nil { 1200 mErr.Errors = append(mErr.Errors, err) 1201 } 1202 } else { 1203 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1204 } 1205 1206 // Check for duplicate tasks 1207 tasks := make(map[string]int) 1208 for idx, task := range tg.Tasks { 1209 if task.Name == "" { 1210 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 1211 } else if existing, ok := tasks[task.Name]; ok { 1212 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 1213 } else { 1214 tasks[task.Name] = idx 1215 } 1216 } 1217 1218 // Validate the tasks 1219 for idx, task := range tg.Tasks { 1220 if err := task.Validate(); err != nil { 1221 outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) 1222 mErr.Errors = append(mErr.Errors, outer) 1223 } 1224 } 1225 return mErr.ErrorOrNil() 1226 } 1227 1228 // LookupTask finds a task by name 1229 func (tg *TaskGroup) LookupTask(name string) *Task { 1230 for _, t := range tg.Tasks { 1231 if t.Name == name { 1232 return t 1233 } 1234 } 1235 return nil 1236 } 1237 1238 func (tg *TaskGroup) GoString() string { 1239 return fmt.Sprintf("*%#v", *tg) 1240 } 1241 1242 const ( 1243 ServiceCheckHTTP = "http" 1244 ServiceCheckTCP = "tcp" 1245 ServiceCheckDocker = "docker" 1246 ServiceCheckScript = "script" 1247 ) 1248 1249 // The ServiceCheck data model represents the consul health check that 1250 // Nomad registers for a Task 1251 type ServiceCheck struct { 1252 Name string // Name of the check, defaults to id 1253 Type string // Type of the check - tcp, http, docker and script 1254 Script string // Script to invoke for script check 1255 Path string // path of the health check url for http type check 1256 Protocol string // Protocol to use if check is http, defaults to http 1257 Interval time.Duration // Interval of the check 1258 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1259 } 1260 1261 func (sc *ServiceCheck) Validate() error { 1262 t := strings.ToLower(sc.Type) 1263 if t != ServiceCheckTCP && t != ServiceCheckHTTP { 1264 return fmt.Errorf("service check must be either http or tcp type") 1265 } 1266 if sc.Type == ServiceCheckHTTP && sc.Path == "" { 1267 return fmt.Errorf("service checks of http type must have a valid http path") 1268 } 1269 1270 if sc.Type == ServiceCheckScript && sc.Script == "" { 1271 return fmt.Errorf("service checks of script type must have a valid script path") 1272 } 1273 1274 if sc.Interval <= 0 { 1275 return fmt.Errorf("service checks must have positive time intervals") 1276 } 1277 return nil 1278 } 1279 1280 func (sc *ServiceCheck) Hash(serviceID string) string { 1281 h := sha1.New() 1282 io.WriteString(h, serviceID) 1283 io.WriteString(h, sc.Name) 1284 io.WriteString(h, sc.Type) 1285 io.WriteString(h, sc.Script) 1286 io.WriteString(h, sc.Path) 1287 io.WriteString(h, sc.Path) 1288 io.WriteString(h, sc.Protocol) 1289 io.WriteString(h, sc.Interval.String()) 1290 io.WriteString(h, sc.Timeout.String()) 1291 return fmt.Sprintf("%x", h.Sum(nil)) 1292 } 1293 1294 const ( 1295 NomadConsulPrefix = "nomad-registered-service" 1296 ) 1297 1298 // The Service model represents a Consul service defintion 1299 type Service struct { 1300 Name string // Name of the service, defaults to id 1301 Tags []string // List of tags for the service 1302 PortLabel string `mapstructure:"port"` // port for the service 1303 Checks []*ServiceCheck // List of checks associated with the service 1304 } 1305 1306 // InitFields interpolates values of Job, Task Group and Task in the Service 1307 // Name. This also generates check names, service id and check ids. 1308 func (s *Service) InitFields(job string, taskGroup string, task string) { 1309 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 1310 "JOB": job, 1311 "TASKGROUP": taskGroup, 1312 "TASK": task, 1313 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 1314 }, 1315 ) 1316 1317 for _, check := range s.Checks { 1318 if check.Name == "" { 1319 check.Name = fmt.Sprintf("service: %q check", s.Name) 1320 } 1321 } 1322 } 1323 1324 // Validate checks if the Check definition is valid 1325 func (s *Service) Validate() error { 1326 var mErr multierror.Error 1327 for _, c := range s.Checks { 1328 if err := c.Validate(); err != nil { 1329 mErr.Errors = append(mErr.Errors, err) 1330 } 1331 } 1332 return mErr.ErrorOrNil() 1333 } 1334 1335 // Hash calculates the hash of the check based on it's content and the service 1336 // which owns it 1337 func (s *Service) Hash() string { 1338 h := sha1.New() 1339 io.WriteString(h, s.Name) 1340 io.WriteString(h, strings.Join(s.Tags, "")) 1341 io.WriteString(h, s.PortLabel) 1342 return fmt.Sprintf("%x", h.Sum(nil)) 1343 } 1344 1345 const ( 1346 // DefaultKillTimeout is the default timeout between signaling a task it 1347 // will be killed and killing it. 1348 DefaultKillTimeout = 5 * time.Second 1349 ) 1350 1351 // Task is a single process typically that is executed as part of a task group. 1352 type Task struct { 1353 // Name of the task 1354 Name string 1355 1356 // Driver is used to control which driver is used 1357 Driver string 1358 1359 // Config is provided to the driver to initialize 1360 Config map[string]interface{} 1361 1362 // Map of environment variables to be used by the driver 1363 Env map[string]string 1364 1365 // List of service definitions exposed by the Task 1366 Services []*Service 1367 1368 // Constraints can be specified at a task level and apply only to 1369 // the particular task. 1370 Constraints []*Constraint 1371 1372 // Resources is the resources needed by this task 1373 Resources *Resources 1374 1375 // Meta is used to associate arbitrary metadata with this 1376 // task. This is opaque to Nomad. 1377 Meta map[string]string 1378 1379 // KillTimeout is the time between signaling a task that it will be 1380 // killed and killing it. 1381 KillTimeout time.Duration `mapstructure:"kill_timeout"` 1382 } 1383 1384 // InitFields initializes fields in the task. 1385 func (t *Task) InitFields(job *Job, tg *TaskGroup) { 1386 t.InitServiceFields(job.Name, tg.Name) 1387 1388 // Set the default timeout if it is not specified. 1389 if t.KillTimeout == 0 { 1390 t.KillTimeout = DefaultKillTimeout 1391 } 1392 } 1393 1394 // InitServiceFields interpolates values of Job, Task Group 1395 // and Tasks in all the service Names of a Task. This also generates the service 1396 // id, check id and check names. 1397 func (t *Task) InitServiceFields(job string, taskGroup string) { 1398 for _, service := range t.Services { 1399 service.InitFields(job, taskGroup, t.Name) 1400 } 1401 } 1402 1403 func (t *Task) GoString() string { 1404 return fmt.Sprintf("*%#v", *t) 1405 } 1406 1407 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 1408 for _, network := range t.Resources.Networks { 1409 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 1410 return network.IP, p 1411 } 1412 } 1413 return "", 0 1414 } 1415 1416 // Set of possible states for a task. 1417 const ( 1418 TaskStatePending = "pending" // The task is waiting to be run. 1419 TaskStateRunning = "running" // The task is currently running. 1420 TaskStateDead = "dead" // Terminal state of task. 1421 ) 1422 1423 // TaskState tracks the current state of a task and events that caused state 1424 // transistions. 1425 type TaskState struct { 1426 // The current state of the task. 1427 State string 1428 1429 // Series of task events that transistion the state of the task. 1430 Events []*TaskEvent 1431 } 1432 1433 const ( 1434 // A Driver failure indicates that the task could not be started due to a 1435 // failure in the driver. 1436 TaskDriverFailure = "Driver Failure" 1437 1438 // Task Started signals that the task was started and its timestamp can be 1439 // used to determine the running length of the task. 1440 TaskStarted = "Started" 1441 1442 // Task terminated indicates that the task was started and exited. 1443 TaskTerminated = "Terminated" 1444 1445 // Task Killed indicates a user has killed the task. 1446 TaskKilled = "Killed" 1447 ) 1448 1449 // TaskEvent is an event that effects the state of a task and contains meta-data 1450 // appropriate to the events type. 1451 type TaskEvent struct { 1452 Type string 1453 Time int64 // Unix Nanosecond timestamp 1454 1455 // Driver Failure fields. 1456 DriverError string // A driver error occured while starting the task. 1457 1458 // Task Terminated Fields. 1459 ExitCode int // The exit code of the task. 1460 Signal int // The signal that terminated the task. 1461 Message string // A possible message explaining the termination of the task. 1462 1463 // Task Killed Fields. 1464 KillError string // Error killing the task. 1465 } 1466 1467 func NewTaskEvent(event string) *TaskEvent { 1468 return &TaskEvent{ 1469 Type: event, 1470 Time: time.Now().UnixNano(), 1471 } 1472 } 1473 1474 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 1475 if err != nil { 1476 e.DriverError = err.Error() 1477 } 1478 return e 1479 } 1480 1481 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 1482 e.ExitCode = c 1483 return e 1484 } 1485 1486 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 1487 e.Signal = s 1488 return e 1489 } 1490 1491 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 1492 if err != nil { 1493 e.Message = err.Error() 1494 } 1495 return e 1496 } 1497 1498 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 1499 if err != nil { 1500 e.KillError = err.Error() 1501 } 1502 return e 1503 } 1504 1505 // Validate is used to sanity check a task group 1506 func (t *Task) Validate() error { 1507 var mErr multierror.Error 1508 if t.Name == "" { 1509 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 1510 } 1511 if t.Driver == "" { 1512 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 1513 } 1514 if t.Resources == nil { 1515 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 1516 } 1517 if t.KillTimeout.Nanoseconds() < 0 { 1518 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 1519 } 1520 for idx, constr := range t.Constraints { 1521 if err := constr.Validate(); err != nil { 1522 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1523 mErr.Errors = append(mErr.Errors, outer) 1524 } 1525 } 1526 1527 for _, service := range t.Services { 1528 if err := service.Validate(); err != nil { 1529 mErr.Errors = append(mErr.Errors, err) 1530 } 1531 } 1532 return mErr.ErrorOrNil() 1533 } 1534 1535 const ( 1536 ConstraintDistinctHosts = "distinct_hosts" 1537 ConstraintRegex = "regexp" 1538 ConstraintVersion = "version" 1539 ) 1540 1541 // Constraints are used to restrict placement options. 1542 type Constraint struct { 1543 LTarget string // Left-hand target 1544 RTarget string // Right-hand target 1545 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 1546 } 1547 1548 func (c *Constraint) String() string { 1549 return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 1550 } 1551 1552 func (c *Constraint) Validate() error { 1553 var mErr multierror.Error 1554 if c.Operand == "" { 1555 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 1556 } 1557 1558 // Perform additional validation based on operand 1559 switch c.Operand { 1560 case ConstraintRegex: 1561 if _, err := regexp.Compile(c.RTarget); err != nil { 1562 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 1563 } 1564 case ConstraintVersion: 1565 if _, err := version.NewConstraint(c.RTarget); err != nil { 1566 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 1567 } 1568 } 1569 return mErr.ErrorOrNil() 1570 } 1571 1572 const ( 1573 AllocDesiredStatusRun = "run" // Allocation should run 1574 AllocDesiredStatusStop = "stop" // Allocation should stop 1575 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 1576 AllocDesiredStatusFailed = "failed" // Allocation failed to be done 1577 ) 1578 1579 const ( 1580 AllocClientStatusPending = "pending" 1581 AllocClientStatusRunning = "running" 1582 AllocClientStatusDead = "dead" 1583 AllocClientStatusFailed = "failed" 1584 ) 1585 1586 // Allocation is used to allocate the placement of a task group to a node. 1587 type Allocation struct { 1588 // ID of the allocation (UUID) 1589 ID string 1590 1591 // ID of the evaluation that generated this allocation 1592 EvalID string 1593 1594 // Name is a logical name of the allocation. 1595 Name string 1596 1597 // NodeID is the node this is being placed on 1598 NodeID string 1599 1600 // Job is the parent job of the task group being allocated. 1601 // This is copied at allocation time to avoid issues if the job 1602 // definition is updated. 1603 JobID string 1604 Job *Job 1605 1606 // TaskGroup is the name of the task group that should be run 1607 TaskGroup string 1608 1609 // Resources is the total set of resources allocated as part 1610 // of this allocation of the task group. 1611 Resources *Resources 1612 1613 // TaskResources is the set of resources allocated to each 1614 // task. These should sum to the total Resources. 1615 TaskResources map[string]*Resources 1616 1617 // Services is a map of service names to service ids 1618 Services map[string]string 1619 1620 // Metrics associated with this allocation 1621 Metrics *AllocMetric 1622 1623 // Desired Status of the allocation on the client 1624 DesiredStatus string 1625 1626 // DesiredStatusDescription is meant to provide more human useful information 1627 DesiredDescription string 1628 1629 // Status of the allocation on the client 1630 ClientStatus string 1631 1632 // ClientStatusDescription is meant to provide more human useful information 1633 ClientDescription string 1634 1635 // TaskStates stores the state of each task, 1636 TaskStates map[string]*TaskState 1637 1638 // Raft Indexes 1639 CreateIndex uint64 1640 ModifyIndex uint64 1641 } 1642 1643 // TerminalStatus returns if the desired or actual status is terminal and 1644 // will no longer transition. 1645 func (a *Allocation) TerminalStatus() bool { 1646 // First check the desired state and if that isn't terminal, check client 1647 // state. 1648 switch a.DesiredStatus { 1649 case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: 1650 return true 1651 default: 1652 } 1653 1654 switch a.ClientStatus { 1655 case AllocClientStatusDead, AllocClientStatusFailed: 1656 return true 1657 default: 1658 return false 1659 } 1660 } 1661 1662 // Stub returns a list stub for the allocation 1663 func (a *Allocation) Stub() *AllocListStub { 1664 return &AllocListStub{ 1665 ID: a.ID, 1666 EvalID: a.EvalID, 1667 Name: a.Name, 1668 NodeID: a.NodeID, 1669 JobID: a.JobID, 1670 TaskGroup: a.TaskGroup, 1671 DesiredStatus: a.DesiredStatus, 1672 DesiredDescription: a.DesiredDescription, 1673 ClientStatus: a.ClientStatus, 1674 ClientDescription: a.ClientDescription, 1675 TaskStates: a.TaskStates, 1676 CreateIndex: a.CreateIndex, 1677 ModifyIndex: a.ModifyIndex, 1678 } 1679 } 1680 1681 // PopulateServiceIDs generates the service IDs for all the service definitions 1682 // in that Allocation 1683 func (a *Allocation) PopulateServiceIDs() { 1684 // Make a copy of the old map which contains the service names and their 1685 // generated IDs 1686 oldIDs := make(map[string]string) 1687 for k, v := range a.Services { 1688 oldIDs[k] = v 1689 } 1690 1691 a.Services = make(map[string]string) 1692 tg := a.Job.LookupTaskGroup(a.TaskGroup) 1693 for _, task := range tg.Tasks { 1694 for _, service := range task.Services { 1695 // If the ID for a service name is already generated then we re-use 1696 // it 1697 if ID, ok := oldIDs[service.Name]; ok { 1698 a.Services[service.Name] = ID 1699 } else { 1700 // If the service hasn't been generated an ID, we generate one. 1701 // We add a prefix to the Service ID so that we can know that this service 1702 // is managed by Nomad since Consul can also have service which are not 1703 // managed by Nomad 1704 a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) 1705 } 1706 } 1707 } 1708 } 1709 1710 // AllocListStub is used to return a subset of alloc information 1711 type AllocListStub struct { 1712 ID string 1713 EvalID string 1714 Name string 1715 NodeID string 1716 JobID string 1717 TaskGroup string 1718 DesiredStatus string 1719 DesiredDescription string 1720 ClientStatus string 1721 ClientDescription string 1722 TaskStates map[string]*TaskState 1723 CreateIndex uint64 1724 ModifyIndex uint64 1725 } 1726 1727 // AllocMetric is used to track various metrics while attempting 1728 // to make an allocation. These are used to debug a job, or to better 1729 // understand the pressure within the system. 1730 type AllocMetric struct { 1731 // NodesEvaluated is the number of nodes that were evaluated 1732 NodesEvaluated int 1733 1734 // NodesFiltered is the number of nodes filtered due to a constraint 1735 NodesFiltered int 1736 1737 // NodesAvailable is the number of nodes available for evaluation per DC. 1738 NodesAvailable map[string]int 1739 1740 // ClassFiltered is the number of nodes filtered by class 1741 ClassFiltered map[string]int 1742 1743 // ConstraintFiltered is the number of failures caused by constraint 1744 ConstraintFiltered map[string]int 1745 1746 // NodesExhausted is the number of nodes skipped due to being 1747 // exhausted of at least one resource 1748 NodesExhausted int 1749 1750 // ClassExhausted is the number of nodes exhausted by class 1751 ClassExhausted map[string]int 1752 1753 // DimensionExhausted provides the count by dimension or reason 1754 DimensionExhausted map[string]int 1755 1756 // Scores is the scores of the final few nodes remaining 1757 // for placement. The top score is typically selected. 1758 Scores map[string]float64 1759 1760 // AllocationTime is a measure of how long the allocation 1761 // attempt took. This can affect performance and SLAs. 1762 AllocationTime time.Duration 1763 1764 // CoalescedFailures indicates the number of other 1765 // allocations that were coalesced into this failed allocation. 1766 // This is to prevent creating many failed allocations for a 1767 // single task group. 1768 CoalescedFailures int 1769 } 1770 1771 func (a *AllocMetric) EvaluateNode() { 1772 a.NodesEvaluated += 1 1773 } 1774 1775 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 1776 a.NodesFiltered += 1 1777 if node != nil && node.NodeClass != "" { 1778 if a.ClassFiltered == nil { 1779 a.ClassFiltered = make(map[string]int) 1780 } 1781 a.ClassFiltered[node.NodeClass] += 1 1782 } 1783 if constraint != "" { 1784 if a.ConstraintFiltered == nil { 1785 a.ConstraintFiltered = make(map[string]int) 1786 } 1787 a.ConstraintFiltered[constraint] += 1 1788 } 1789 } 1790 1791 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 1792 a.NodesExhausted += 1 1793 if node != nil && node.NodeClass != "" { 1794 if a.ClassExhausted == nil { 1795 a.ClassExhausted = make(map[string]int) 1796 } 1797 a.ClassExhausted[node.NodeClass] += 1 1798 } 1799 if dimension != "" { 1800 if a.DimensionExhausted == nil { 1801 a.DimensionExhausted = make(map[string]int) 1802 } 1803 a.DimensionExhausted[dimension] += 1 1804 } 1805 } 1806 1807 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 1808 if a.Scores == nil { 1809 a.Scores = make(map[string]float64) 1810 } 1811 key := fmt.Sprintf("%s.%s", node.ID, name) 1812 a.Scores[key] = score 1813 } 1814 1815 const ( 1816 EvalStatusPending = "pending" 1817 EvalStatusComplete = "complete" 1818 EvalStatusFailed = "failed" 1819 ) 1820 1821 const ( 1822 EvalTriggerJobRegister = "job-register" 1823 EvalTriggerJobDeregister = "job-deregister" 1824 EvalTriggerPeriodicJob = "periodic-job" 1825 EvalTriggerNodeUpdate = "node-update" 1826 EvalTriggerScheduled = "scheduled" 1827 EvalTriggerRollingUpdate = "rolling-update" 1828 ) 1829 1830 const ( 1831 // CoreJobEvalGC is used for the garbage collection of evaluations 1832 // and allocations. We periodically scan evaluations in a terminal state, 1833 // in which all the corresponding allocations are also terminal. We 1834 // delete these out of the system to bound the state. 1835 CoreJobEvalGC = "eval-gc" 1836 1837 // CoreJobNodeGC is used for the garbage collection of failed nodes. 1838 // We periodically scan nodes in a terminal state, and if they have no 1839 // corresponding allocations we delete these out of the system. 1840 CoreJobNodeGC = "node-gc" 1841 1842 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 1843 // periodically scan garbage collectible jobs and check if both their 1844 // evaluations and allocations are terminal. If so, we delete these out of 1845 // the system. 1846 CoreJobJobGC = "job-gc" 1847 ) 1848 1849 // Evaluation is used anytime we need to apply business logic as a result 1850 // of a change to our desired state (job specification) or the emergent state 1851 // (registered nodes). When the inputs change, we need to "evaluate" them, 1852 // potentially taking action (allocation of work) or doing nothing if the state 1853 // of the world does not require it. 1854 type Evaluation struct { 1855 // ID is a randonly generated UUID used for this evaluation. This 1856 // is assigned upon the creation of the evaluation. 1857 ID string 1858 1859 // Priority is used to control scheduling importance and if this job 1860 // can preempt other jobs. 1861 Priority int 1862 1863 // Type is used to control which schedulers are available to handle 1864 // this evaluation. 1865 Type string 1866 1867 // TriggeredBy is used to give some insight into why this Eval 1868 // was created. (Job change, node failure, alloc failure, etc). 1869 TriggeredBy string 1870 1871 // JobID is the job this evaluation is scoped to. Evaluations cannot 1872 // be run in parallel for a given JobID, so we serialize on this. 1873 JobID string 1874 1875 // JobModifyIndex is the modify index of the job at the time 1876 // the evaluation was created 1877 JobModifyIndex uint64 1878 1879 // NodeID is the node that was affected triggering the evaluation. 1880 NodeID string 1881 1882 // NodeModifyIndex is the modify index of the node at the time 1883 // the evaluation was created 1884 NodeModifyIndex uint64 1885 1886 // Status of the evaluation 1887 Status string 1888 1889 // StatusDescription is meant to provide more human useful information 1890 StatusDescription string 1891 1892 // Wait is a minimum wait time for running the eval. This is used to 1893 // support a rolling upgrade. 1894 Wait time.Duration 1895 1896 // NextEval is the evaluation ID for the eval created to do a followup. 1897 // This is used to support rolling upgrades, where we need a chain of evaluations. 1898 NextEval string 1899 1900 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 1901 // This is used to support rolling upgrades, where we need a chain of evaluations. 1902 PreviousEval string 1903 1904 // Raft Indexes 1905 CreateIndex uint64 1906 ModifyIndex uint64 1907 } 1908 1909 // TerminalStatus returns if the current status is terminal and 1910 // will no longer transition. 1911 func (e *Evaluation) TerminalStatus() bool { 1912 switch e.Status { 1913 case EvalStatusComplete, EvalStatusFailed: 1914 return true 1915 default: 1916 return false 1917 } 1918 } 1919 1920 func (e *Evaluation) GoString() string { 1921 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 1922 } 1923 1924 func (e *Evaluation) Copy() *Evaluation { 1925 ne := new(Evaluation) 1926 *ne = *e 1927 return ne 1928 } 1929 1930 // ShouldEnqueue checks if a given evaluation should be enqueued 1931 func (e *Evaluation) ShouldEnqueue() bool { 1932 switch e.Status { 1933 case EvalStatusPending: 1934 return true 1935 case EvalStatusComplete, EvalStatusFailed: 1936 return false 1937 default: 1938 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 1939 } 1940 } 1941 1942 // MakePlan is used to make a plan from the given evaluation 1943 // for a given Job 1944 func (e *Evaluation) MakePlan(j *Job) *Plan { 1945 p := &Plan{ 1946 EvalID: e.ID, 1947 Priority: e.Priority, 1948 NodeUpdate: make(map[string][]*Allocation), 1949 NodeAllocation: make(map[string][]*Allocation), 1950 } 1951 if j != nil { 1952 p.AllAtOnce = j.AllAtOnce 1953 } 1954 return p 1955 } 1956 1957 // NextRollingEval creates an evaluation to followup this eval for rolling updates 1958 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 1959 return &Evaluation{ 1960 ID: GenerateUUID(), 1961 Priority: e.Priority, 1962 Type: e.Type, 1963 TriggeredBy: EvalTriggerRollingUpdate, 1964 JobID: e.JobID, 1965 JobModifyIndex: e.JobModifyIndex, 1966 Status: EvalStatusPending, 1967 Wait: wait, 1968 PreviousEval: e.ID, 1969 } 1970 } 1971 1972 // Plan is used to submit a commit plan for task allocations. These 1973 // are submitted to the leader which verifies that resources have 1974 // not been overcommitted before admiting the plan. 1975 type Plan struct { 1976 // EvalID is the evaluation ID this plan is associated with 1977 EvalID string 1978 1979 // EvalToken is used to prevent a split-brain processing of 1980 // an evaluation. There should only be a single scheduler running 1981 // an Eval at a time, but this could be violated after a leadership 1982 // transition. This unique token is used to reject plans that are 1983 // being submitted from a different leader. 1984 EvalToken string 1985 1986 // Priority is the priority of the upstream job 1987 Priority int 1988 1989 // AllAtOnce is used to control if incremental scheduling of task groups 1990 // is allowed or if we must do a gang scheduling of the entire job. 1991 // If this is false, a plan may be partially applied. Otherwise, the 1992 // entire plan must be able to make progress. 1993 AllAtOnce bool 1994 1995 // NodeUpdate contains all the allocations for each node. For each node, 1996 // this is a list of the allocations to update to either stop or evict. 1997 NodeUpdate map[string][]*Allocation 1998 1999 // NodeAllocation contains all the allocations for each node. 2000 // The evicts must be considered prior to the allocations. 2001 NodeAllocation map[string][]*Allocation 2002 2003 // FailedAllocs are allocations that could not be made, 2004 // but are persisted so that the user can use the feedback 2005 // to determine the cause. 2006 FailedAllocs []*Allocation 2007 } 2008 2009 func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) { 2010 newAlloc := new(Allocation) 2011 *newAlloc = *alloc 2012 newAlloc.DesiredStatus = status 2013 newAlloc.DesiredDescription = desc 2014 node := alloc.NodeID 2015 existing := p.NodeUpdate[node] 2016 p.NodeUpdate[node] = append(existing, newAlloc) 2017 } 2018 2019 func (p *Plan) PopUpdate(alloc *Allocation) { 2020 existing := p.NodeUpdate[alloc.NodeID] 2021 n := len(existing) 2022 if n > 0 && existing[n-1].ID == alloc.ID { 2023 existing = existing[:n-1] 2024 if len(existing) > 0 { 2025 p.NodeUpdate[alloc.NodeID] = existing 2026 } else { 2027 delete(p.NodeUpdate, alloc.NodeID) 2028 } 2029 } 2030 } 2031 2032 func (p *Plan) AppendAlloc(alloc *Allocation) { 2033 node := alloc.NodeID 2034 existing := p.NodeAllocation[node] 2035 p.NodeAllocation[node] = append(existing, alloc) 2036 } 2037 2038 func (p *Plan) AppendFailed(alloc *Allocation) { 2039 p.FailedAllocs = append(p.FailedAllocs, alloc) 2040 } 2041 2042 // IsNoOp checks if this plan would do nothing 2043 func (p *Plan) IsNoOp() bool { 2044 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 2045 } 2046 2047 // PlanResult is the result of a plan submitted to the leader. 2048 type PlanResult struct { 2049 // NodeUpdate contains all the updates that were committed. 2050 NodeUpdate map[string][]*Allocation 2051 2052 // NodeAllocation contains all the allocations that were committed. 2053 NodeAllocation map[string][]*Allocation 2054 2055 // FailedAllocs are allocations that could not be made, 2056 // but are persisted so that the user can use the feedback 2057 // to determine the cause. 2058 FailedAllocs []*Allocation 2059 2060 // RefreshIndex is the index the worker should refresh state up to. 2061 // This allows all evictions and allocations to be materialized. 2062 // If any allocations were rejected due to stale data (node state, 2063 // over committed) this can be used to force a worker refresh. 2064 RefreshIndex uint64 2065 2066 // AllocIndex is the Raft index in which the evictions and 2067 // allocations took place. This is used for the write index. 2068 AllocIndex uint64 2069 } 2070 2071 // IsNoOp checks if this plan result would do nothing 2072 func (p *PlanResult) IsNoOp() bool { 2073 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 2074 } 2075 2076 // FullCommit is used to check if all the allocations in a plan 2077 // were committed as part of the result. Returns if there was 2078 // a match, and the number of expected and actual allocations. 2079 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 2080 expected := 0 2081 actual := 0 2082 for name, allocList := range plan.NodeAllocation { 2083 didAlloc, _ := p.NodeAllocation[name] 2084 expected += len(allocList) 2085 actual += len(didAlloc) 2086 } 2087 return actual == expected, expected, actual 2088 } 2089 2090 // msgpackHandle is a shared handle for encoding/decoding of structs 2091 var MsgpackHandle = func() *codec.MsgpackHandle { 2092 h := &codec.MsgpackHandle{RawToString: true} 2093 2094 // Sets the default type for decoding a map into a nil interface{}. 2095 // This is necessary in particular because we store the driver configs as a 2096 // nil interface{}. 2097 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 2098 return h 2099 }() 2100 2101 // Decode is used to decode a MsgPack encoded object 2102 func Decode(buf []byte, out interface{}) error { 2103 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 2104 } 2105 2106 // Encode is used to encode a MsgPack object with type prefix 2107 func Encode(t MessageType, msg interface{}) ([]byte, error) { 2108 var buf bytes.Buffer 2109 buf.WriteByte(uint8(t)) 2110 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 2111 return buf.Bytes(), err 2112 }