github.com/huiliang/nomad@v0.2.1-0.20151124023127-7a8b664699ff/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "errors" 6 "fmt" 7 "reflect" 8 "regexp" 9 "strings" 10 "time" 11 12 "github.com/hashicorp/go-msgpack/codec" 13 "github.com/hashicorp/go-multierror" 14 "github.com/hashicorp/go-version" 15 ) 16 17 var ( 18 ErrNoLeader = fmt.Errorf("No cluster leader") 19 ErrNoRegionPath = fmt.Errorf("No path to region") 20 defaultServiceJobRestartPolicy = RestartPolicy{ 21 Delay: 15 * time.Second, 22 Attempts: 2, 23 Interval: 1 * time.Minute, 24 } 25 defaultBatchJobRestartPolicy = RestartPolicy{ 26 Delay: 15 * time.Second, 27 Attempts: 15, 28 } 29 ) 30 31 type MessageType uint8 32 33 const ( 34 NodeRegisterRequestType MessageType = iota 35 NodeDeregisterRequestType 36 NodeUpdateStatusRequestType 37 NodeUpdateDrainRequestType 38 JobRegisterRequestType 39 JobDeregisterRequestType 40 EvalUpdateRequestType 41 EvalDeleteRequestType 42 AllocUpdateRequestType 43 AllocClientUpdateRequestType 44 ) 45 46 const ( 47 // IgnoreUnknownTypeFlag is set along with a MessageType 48 // to indicate that the message type can be safely ignored 49 // if it is not recognized. This is for future proofing, so 50 // that new commands can be added in a way that won't cause 51 // old servers to crash when the FSM attempts to process them. 52 IgnoreUnknownTypeFlag MessageType = 128 53 ) 54 55 // RPCInfo is used to describe common information about query 56 type RPCInfo interface { 57 RequestRegion() string 58 IsRead() bool 59 AllowStaleRead() bool 60 } 61 62 // QueryOptions is used to specify various flags for read queries 63 type QueryOptions struct { 64 // The target region for this query 65 Region string 66 67 // If set, wait until query exceeds given index. Must be provided 68 // with MaxQueryTime. 69 MinQueryIndex uint64 70 71 // Provided with MinQueryIndex to wait for change. 72 MaxQueryTime time.Duration 73 74 // If set, any follower can service the request. Results 75 // may be arbitrarily stale. 76 AllowStale bool 77 } 78 79 func (q QueryOptions) RequestRegion() string { 80 return q.Region 81 } 82 83 // QueryOption only applies to reads, so always true 84 func (q QueryOptions) IsRead() bool { 85 return true 86 } 87 88 func (q QueryOptions) AllowStaleRead() bool { 89 return q.AllowStale 90 } 91 92 type WriteRequest struct { 93 // The target region for this write 94 Region string 95 } 96 97 func (w WriteRequest) RequestRegion() string { 98 // The target region for this request 99 return w.Region 100 } 101 102 // WriteRequest only applies to writes, always false 103 func (w WriteRequest) IsRead() bool { 104 return false 105 } 106 107 func (w WriteRequest) AllowStaleRead() bool { 108 return false 109 } 110 111 // QueryMeta allows a query response to include potentially 112 // useful metadata about a query 113 type QueryMeta struct { 114 // This is the index associated with the read 115 Index uint64 116 117 // If AllowStale is used, this is time elapsed since 118 // last contact between the follower and leader. This 119 // can be used to gauge staleness. 120 LastContact time.Duration 121 122 // Used to indicate if there is a known leader node 123 KnownLeader bool 124 } 125 126 // WriteMeta allows a write response to includ e potentially 127 // useful metadata about the write 128 type WriteMeta struct { 129 // This is the index associated with the write 130 Index uint64 131 } 132 133 // NodeRegisterRequest is used for Node.Register endpoint 134 // to register a node as being a schedulable entity. 135 type NodeRegisterRequest struct { 136 Node *Node 137 WriteRequest 138 } 139 140 // NodeDeregisterRequest is used for Node.Deregister endpoint 141 // to deregister a node as being a schedulable entity. 142 type NodeDeregisterRequest struct { 143 NodeID string 144 WriteRequest 145 } 146 147 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 148 // to update the status of a node. 149 type NodeUpdateStatusRequest struct { 150 NodeID string 151 Status string 152 WriteRequest 153 } 154 155 // NodeUpdateDrainRequest is used for updatin the drain status 156 type NodeUpdateDrainRequest struct { 157 NodeID string 158 Drain bool 159 WriteRequest 160 } 161 162 // NodeEvaluateRequest is used to re-evaluate the ndoe 163 type NodeEvaluateRequest struct { 164 NodeID string 165 WriteRequest 166 } 167 168 // NodeSpecificRequest is used when we just need to specify a target node 169 type NodeSpecificRequest struct { 170 NodeID string 171 QueryOptions 172 } 173 174 // JobRegisterRequest is used for Job.Register endpoint 175 // to register a job as being a schedulable entity. 176 type JobRegisterRequest struct { 177 Job *Job 178 WriteRequest 179 } 180 181 // JobDeregisterRequest is used for Job.Deregister endpoint 182 // to deregister a job as being a schedulable entity. 183 type JobDeregisterRequest struct { 184 JobID string 185 WriteRequest 186 } 187 188 // JobEvaluateRequest is used when we just need to re-evaluate a target job 189 type JobEvaluateRequest struct { 190 JobID string 191 WriteRequest 192 } 193 194 // JobSpecificRequest is used when we just need to specify a target job 195 type JobSpecificRequest struct { 196 JobID string 197 QueryOptions 198 } 199 200 // JobListRequest is used to parameterize a list request 201 type JobListRequest struct { 202 QueryOptions 203 } 204 205 // NodeListRequest is used to parameterize a list request 206 type NodeListRequest struct { 207 QueryOptions 208 } 209 210 // EvalUpdateRequest is used for upserting evaluations. 211 type EvalUpdateRequest struct { 212 Evals []*Evaluation 213 EvalToken string 214 WriteRequest 215 } 216 217 // EvalDeleteRequest is used for deleting an evaluation. 218 type EvalDeleteRequest struct { 219 Evals []string 220 Allocs []string 221 WriteRequest 222 } 223 224 // EvalSpecificRequest is used when we just need to specify a target evaluation 225 type EvalSpecificRequest struct { 226 EvalID string 227 QueryOptions 228 } 229 230 // EvalAckRequest is used to Ack/Nack a specific evaluation 231 type EvalAckRequest struct { 232 EvalID string 233 Token string 234 WriteRequest 235 } 236 237 // EvalDequeueRequest is used when we want to dequeue an evaluation 238 type EvalDequeueRequest struct { 239 Schedulers []string 240 Timeout time.Duration 241 WriteRequest 242 } 243 244 // EvalListRequest is used to list the evaluations 245 type EvalListRequest struct { 246 QueryOptions 247 } 248 249 // PlanRequest is used to submit an allocation plan to the leader 250 type PlanRequest struct { 251 Plan *Plan 252 WriteRequest 253 } 254 255 // AllocUpdateRequest is used to submit changes to allocations, either 256 // to cause evictions or to assign new allocaitons. Both can be done 257 // within a single transaction 258 type AllocUpdateRequest struct { 259 // Alloc is the list of new allocations to assign 260 Alloc []*Allocation 261 WriteRequest 262 } 263 264 // AllocListRequest is used to request a list of allocations 265 type AllocListRequest struct { 266 QueryOptions 267 } 268 269 // AllocSpecificRequest is used to query a specific allocation 270 type AllocSpecificRequest struct { 271 AllocID string 272 QueryOptions 273 } 274 275 // GenericRequest is used to request where no 276 // specific information is needed. 277 type GenericRequest struct { 278 QueryOptions 279 } 280 281 // GenericResponse is used to respond to a request where no 282 // specific response information is needed. 283 type GenericResponse struct { 284 WriteMeta 285 } 286 287 const ( 288 ProtocolVersion = "protocol" 289 APIMajorVersion = "api.major" 290 APIMinorVersion = "api.minor" 291 ) 292 293 // VersionResponse is used for the Status.Version reseponse 294 type VersionResponse struct { 295 Build string 296 Versions map[string]int 297 QueryMeta 298 } 299 300 // JobRegisterResponse is used to respond to a job registration 301 type JobRegisterResponse struct { 302 EvalID string 303 EvalCreateIndex uint64 304 JobModifyIndex uint64 305 QueryMeta 306 } 307 308 // JobDeregisterResponse is used to respond to a job deregistration 309 type JobDeregisterResponse struct { 310 EvalID string 311 EvalCreateIndex uint64 312 JobModifyIndex uint64 313 QueryMeta 314 } 315 316 // NodeUpdateResponse is used to respond to a node update 317 type NodeUpdateResponse struct { 318 HeartbeatTTL time.Duration 319 EvalIDs []string 320 EvalCreateIndex uint64 321 NodeModifyIndex uint64 322 QueryMeta 323 } 324 325 // NodeDrainUpdateResponse is used to respond to a node drain update 326 type NodeDrainUpdateResponse struct { 327 EvalIDs []string 328 EvalCreateIndex uint64 329 NodeModifyIndex uint64 330 QueryMeta 331 } 332 333 // NodeAllocsResponse is used to return allocs for a single node 334 type NodeAllocsResponse struct { 335 Allocs []*Allocation 336 QueryMeta 337 } 338 339 // SingleNodeResponse is used to return a single node 340 type SingleNodeResponse struct { 341 Node *Node 342 QueryMeta 343 } 344 345 // JobListResponse is used for a list request 346 type NodeListResponse struct { 347 Nodes []*NodeListStub 348 QueryMeta 349 } 350 351 // SingleJobResponse is used to return a single job 352 type SingleJobResponse struct { 353 Job *Job 354 QueryMeta 355 } 356 357 // JobListResponse is used for a list request 358 type JobListResponse struct { 359 Jobs []*JobListStub 360 QueryMeta 361 } 362 363 // SingleAllocResponse is used to return a single allocation 364 type SingleAllocResponse struct { 365 Alloc *Allocation 366 QueryMeta 367 } 368 369 // JobAllocationsResponse is used to return the allocations for a job 370 type JobAllocationsResponse struct { 371 Allocations []*AllocListStub 372 QueryMeta 373 } 374 375 // JobEvaluationsResponse is used to return the evaluations for a job 376 type JobEvaluationsResponse struct { 377 Evaluations []*Evaluation 378 QueryMeta 379 } 380 381 // SingleEvalResponse is used to return a single evaluation 382 type SingleEvalResponse struct { 383 Eval *Evaluation 384 QueryMeta 385 } 386 387 // EvalDequeueResponse is used to return from a dequeue 388 type EvalDequeueResponse struct { 389 Eval *Evaluation 390 Token string 391 QueryMeta 392 } 393 394 // PlanResponse is used to return from a PlanRequest 395 type PlanResponse struct { 396 Result *PlanResult 397 WriteMeta 398 } 399 400 // AllocListResponse is used for a list request 401 type AllocListResponse struct { 402 Allocations []*AllocListStub 403 QueryMeta 404 } 405 406 // EvalListResponse is used for a list request 407 type EvalListResponse struct { 408 Evaluations []*Evaluation 409 QueryMeta 410 } 411 412 // EvalAllocationsResponse is used to return the allocations for an evaluation 413 type EvalAllocationsResponse struct { 414 Allocations []*AllocListStub 415 QueryMeta 416 } 417 418 const ( 419 NodeStatusInit = "initializing" 420 NodeStatusReady = "ready" 421 NodeStatusDown = "down" 422 ) 423 424 // ShouldDrainNode checks if a given node status should trigger an 425 // evaluation. Some states don't require any further action. 426 func ShouldDrainNode(status string) bool { 427 switch status { 428 case NodeStatusInit, NodeStatusReady: 429 return false 430 case NodeStatusDown: 431 return true 432 default: 433 panic(fmt.Sprintf("unhandled node status %s", status)) 434 } 435 } 436 437 // ValidNodeStatus is used to check if a node status is valid 438 func ValidNodeStatus(status string) bool { 439 switch status { 440 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 441 return true 442 default: 443 return false 444 } 445 } 446 447 // Node is a representation of a schedulable client node 448 type Node struct { 449 // ID is a unique identifier for the node. It can be constructed 450 // by doing a concatenation of the Name and Datacenter as a simple 451 // approach. Alternatively a UUID may be used. 452 ID string 453 454 // Datacenter for this node 455 Datacenter string 456 457 // Node name 458 Name string 459 460 // Attributes is an arbitrary set of key/value 461 // data that can be used for constraints. Examples 462 // include "kernel.name=linux", "arch=386", "driver.docker=1", 463 // "docker.runtime=1.8.3" 464 Attributes map[string]string 465 466 // Resources is the available resources on the client. 467 // For example 'cpu=2' 'memory=2048' 468 Resources *Resources 469 470 // Reserved is the set of resources that are reserved, 471 // and should be subtracted from the total resources for 472 // the purposes of scheduling. This may be provide certain 473 // high-watermark tolerances or because of external schedulers 474 // consuming resources. 475 Reserved *Resources 476 477 // Links are used to 'link' this client to external 478 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 479 // 'ami=ami-123' 480 Links map[string]string 481 482 // Meta is used to associate arbitrary metadata with this 483 // client. This is opaque to Nomad. 484 Meta map[string]string 485 486 // NodeClass is an opaque identifier used to group nodes 487 // together for the purpose of determining scheduling pressure. 488 NodeClass string 489 490 // Drain is controlled by the servers, and not the client. 491 // If true, no jobs will be scheduled to this node, and existing 492 // allocations will be drained. 493 Drain bool 494 495 // Status of this node 496 Status string 497 498 // StatusDescription is meant to provide more human useful information 499 StatusDescription string 500 501 // Raft Indexes 502 CreateIndex uint64 503 ModifyIndex uint64 504 } 505 506 // TerminalStatus returns if the current status is terminal and 507 // will no longer transition. 508 func (n *Node) TerminalStatus() bool { 509 switch n.Status { 510 case NodeStatusDown: 511 return true 512 default: 513 return false 514 } 515 } 516 517 // Stub returns a summarized version of the node 518 func (n *Node) Stub() *NodeListStub { 519 return &NodeListStub{ 520 ID: n.ID, 521 Datacenter: n.Datacenter, 522 Name: n.Name, 523 NodeClass: n.NodeClass, 524 Drain: n.Drain, 525 Status: n.Status, 526 StatusDescription: n.StatusDescription, 527 CreateIndex: n.CreateIndex, 528 ModifyIndex: n.ModifyIndex, 529 } 530 } 531 532 // NodeListStub is used to return a subset of job information 533 // for the job list 534 type NodeListStub struct { 535 ID string 536 Datacenter string 537 Name string 538 NodeClass string 539 Drain bool 540 Status string 541 StatusDescription string 542 CreateIndex uint64 543 ModifyIndex uint64 544 } 545 546 // Resources is used to define the resources available 547 // on a client 548 type Resources struct { 549 CPU int 550 MemoryMB int `mapstructure:"memory"` 551 DiskMB int `mapstructure:"disk"` 552 IOPS int 553 Networks []*NetworkResource 554 } 555 556 // Copy returns a deep copy of the resources 557 func (r *Resources) Copy() *Resources { 558 newR := new(Resources) 559 *newR = *r 560 n := len(r.Networks) 561 newR.Networks = make([]*NetworkResource, n) 562 for i := 0; i < n; i++ { 563 newR.Networks[i] = r.Networks[i].Copy() 564 } 565 return newR 566 } 567 568 // NetIndex finds the matching net index using device name 569 func (r *Resources) NetIndex(n *NetworkResource) int { 570 for idx, net := range r.Networks { 571 if net.Device == n.Device { 572 return idx 573 } 574 } 575 return -1 576 } 577 578 // Superset checks if one set of resources is a superset 579 // of another. This ignores network resources, and the NetworkIndex 580 // should be used for that. 581 func (r *Resources) Superset(other *Resources) (bool, string) { 582 if r.CPU < other.CPU { 583 return false, "cpu exhausted" 584 } 585 if r.MemoryMB < other.MemoryMB { 586 return false, "memory exhausted" 587 } 588 if r.DiskMB < other.DiskMB { 589 return false, "disk exhausted" 590 } 591 if r.IOPS < other.IOPS { 592 return false, "iops exhausted" 593 } 594 return true, "" 595 } 596 597 // Add adds the resources of the delta to this, potentially 598 // returning an error if not possible. 599 func (r *Resources) Add(delta *Resources) error { 600 if delta == nil { 601 return nil 602 } 603 r.CPU += delta.CPU 604 r.MemoryMB += delta.MemoryMB 605 r.DiskMB += delta.DiskMB 606 r.IOPS += delta.IOPS 607 608 for _, n := range delta.Networks { 609 // Find the matching interface by IP or CIDR 610 idx := r.NetIndex(n) 611 if idx == -1 { 612 r.Networks = append(r.Networks, n.Copy()) 613 } else { 614 r.Networks[idx].Add(n) 615 } 616 } 617 return nil 618 } 619 620 func (r *Resources) GoString() string { 621 return fmt.Sprintf("*%#v", *r) 622 } 623 624 type Port struct { 625 Label string 626 Value int `mapstructure:"static"` 627 } 628 629 // NetworkResource is used to represent available network 630 // resources 631 type NetworkResource struct { 632 Device string // Name of the device 633 CIDR string // CIDR block of addresses 634 IP string // IP address 635 MBits int // Throughput 636 ReservedPorts []Port // Reserved ports 637 DynamicPorts []Port // Dynamically assigned ports 638 } 639 640 // Copy returns a deep copy of the network resource 641 func (n *NetworkResource) Copy() *NetworkResource { 642 newR := new(NetworkResource) 643 *newR = *n 644 if n.ReservedPorts != nil { 645 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 646 copy(newR.ReservedPorts, n.ReservedPorts) 647 } 648 if n.DynamicPorts != nil { 649 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 650 copy(newR.DynamicPorts, n.DynamicPorts) 651 } 652 return newR 653 } 654 655 // Add adds the resources of the delta to this, potentially 656 // returning an error if not possible. 657 func (n *NetworkResource) Add(delta *NetworkResource) { 658 if len(delta.ReservedPorts) > 0 { 659 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 660 } 661 n.MBits += delta.MBits 662 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 663 } 664 665 func (n *NetworkResource) GoString() string { 666 return fmt.Sprintf("*%#v", *n) 667 } 668 669 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 670 labelValues := make(map[string]int) 671 ports := append(n.ReservedPorts, n.DynamicPorts...) 672 for _, port := range ports { 673 if mapping, ok := port_map[port.Label]; ok { 674 labelValues[port.Label] = mapping 675 } else { 676 labelValues[port.Label] = port.Value 677 } 678 } 679 return labelValues 680 } 681 682 const ( 683 // JobTypeNomad is reserved for internal system tasks and is 684 // always handled by the CoreScheduler. 685 JobTypeCore = "_core" 686 JobTypeService = "service" 687 JobTypeBatch = "batch" 688 JobTypeSystem = "system" 689 ) 690 691 const ( 692 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 693 JobStatusRunning = "running" // Running means the entire job is running 694 JobStatusComplete = "complete" // Complete means there was a clean termination 695 JobStatusDead = "dead" // Dead means there was abnormal termination 696 ) 697 698 const ( 699 // JobMinPriority is the minimum allowed priority 700 JobMinPriority = 1 701 702 // JobDefaultPriority is the default priority if not 703 // not specified. 704 JobDefaultPriority = 50 705 706 // JobMaxPriority is the maximum allowed priority 707 JobMaxPriority = 100 708 709 // Ensure CoreJobPriority is higher than any user 710 // specified job so that it gets priority. This is important 711 // for the system to remain healthy. 712 CoreJobPriority = JobMaxPriority * 2 713 ) 714 715 // Job is the scope of a scheduling request to Nomad. It is the largest 716 // scoped object, and is a named collection of task groups. Each task group 717 // is further composed of tasks. A task group (TG) is the unit of scheduling 718 // however. 719 type Job struct { 720 // Region is the Nomad region that handles scheduling this job 721 Region string 722 723 // ID is a unique identifier for the job per region. It can be 724 // specified hierarchically like LineOfBiz/OrgName/Team/Project 725 ID string 726 727 // Name is the logical name of the job used to refer to it. This is unique 728 // per region, but not unique globally. 729 Name string 730 731 // Type is used to control various behaviors about the job. Most jobs 732 // are service jobs, meaning they are expected to be long lived. 733 // Some jobs are batch oriented meaning they run and then terminate. 734 // This can be extended in the future to support custom schedulers. 735 Type string 736 737 // Priority is used to control scheduling importance and if this job 738 // can preempt other jobs. 739 Priority int 740 741 // AllAtOnce is used to control if incremental scheduling of task groups 742 // is allowed or if we must do a gang scheduling of the entire job. This 743 // can slow down larger jobs if resources are not available. 744 AllAtOnce bool `mapstructure:"all_at_once"` 745 746 // Datacenters contains all the datacenters this job is allowed to span 747 Datacenters []string 748 749 // Constraints can be specified at a job level and apply to 750 // all the task groups and tasks. 751 Constraints []*Constraint 752 753 // TaskGroups are the collections of task groups that this job needs 754 // to run. Each task group is an atomic unit of scheduling and placement. 755 TaskGroups []*TaskGroup 756 757 // Update is used to control the update strategy 758 Update UpdateStrategy 759 760 // Meta is used to associate arbitrary metadata with this 761 // job. This is opaque to Nomad. 762 Meta map[string]string 763 764 // Job status 765 Status string 766 767 // StatusDescription is meant to provide more human useful information 768 StatusDescription string 769 770 // Raft Indexes 771 CreateIndex uint64 772 ModifyIndex uint64 773 } 774 775 // Validate is used to sanity check a job input 776 func (j *Job) Validate() error { 777 var mErr multierror.Error 778 if j.Region == "" { 779 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 780 } 781 if j.ID == "" { 782 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 783 } else if strings.Contains(j.ID, " ") { 784 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 785 } 786 if j.Name == "" { 787 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 788 } 789 if j.Type == "" { 790 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 791 } 792 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 793 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 794 } 795 if len(j.Datacenters) == 0 { 796 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 797 } 798 if len(j.TaskGroups) == 0 { 799 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 800 } 801 for idx, constr := range j.Constraints { 802 if err := constr.Validate(); err != nil { 803 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 804 mErr.Errors = append(mErr.Errors, outer) 805 } 806 } 807 808 // Check for duplicate task groups 809 taskGroups := make(map[string]int) 810 for idx, tg := range j.TaskGroups { 811 if tg.Name == "" { 812 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 813 } else if existing, ok := taskGroups[tg.Name]; ok { 814 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 815 } else { 816 taskGroups[tg.Name] = idx 817 } 818 819 if j.Type == "system" && tg.Count != 1 { 820 mErr.Errors = append(mErr.Errors, 821 fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler", 822 idx+1, tg.Count)) 823 } 824 } 825 826 // Validate the task group 827 for idx, tg := range j.TaskGroups { 828 if err := tg.Validate(); err != nil { 829 outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err) 830 mErr.Errors = append(mErr.Errors, outer) 831 } 832 } 833 return mErr.ErrorOrNil() 834 } 835 836 // LookupTaskGroup finds a task group by name 837 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 838 for _, tg := range j.TaskGroups { 839 if tg.Name == name { 840 return tg 841 } 842 } 843 return nil 844 } 845 846 // Stub is used to return a summary of the job 847 func (j *Job) Stub() *JobListStub { 848 return &JobListStub{ 849 ID: j.ID, 850 Name: j.Name, 851 Type: j.Type, 852 Priority: j.Priority, 853 Status: j.Status, 854 StatusDescription: j.StatusDescription, 855 CreateIndex: j.CreateIndex, 856 ModifyIndex: j.ModifyIndex, 857 } 858 } 859 860 // JobListStub is used to return a subset of job information 861 // for the job list 862 type JobListStub struct { 863 ID string 864 Name string 865 Type string 866 Priority int 867 Status string 868 StatusDescription string 869 CreateIndex uint64 870 ModifyIndex uint64 871 } 872 873 // UpdateStrategy is used to modify how updates are done 874 type UpdateStrategy struct { 875 // Stagger is the amount of time between the updates 876 Stagger time.Duration 877 878 // MaxParallel is how many updates can be done in parallel 879 MaxParallel int `mapstructure:"max_parallel"` 880 } 881 882 // Rolling returns if a rolling strategy should be used 883 func (u *UpdateStrategy) Rolling() bool { 884 return u.Stagger > 0 && u.MaxParallel > 0 885 } 886 887 // RestartPolicy influences how Nomad restarts Tasks when they 888 // crash or fail. 889 type RestartPolicy struct { 890 Attempts int 891 Interval time.Duration 892 Delay time.Duration 893 } 894 895 func (r *RestartPolicy) Validate() error { 896 if r.Interval == 0 { 897 return nil 898 } 899 if time.Duration(r.Attempts)*r.Delay > r.Interval { 900 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 901 } 902 return nil 903 } 904 905 func NewRestartPolicy(jobType string) *RestartPolicy { 906 switch jobType { 907 case JobTypeService, JobTypeSystem: 908 rp := defaultServiceJobRestartPolicy 909 return &rp 910 case JobTypeBatch: 911 rp := defaultBatchJobRestartPolicy 912 return &rp 913 } 914 return nil 915 } 916 917 // TaskGroup is an atomic unit of placement. Each task group belongs to 918 // a job and may contain any number of tasks. A task group support running 919 // in many replicas using the same configuration.. 920 type TaskGroup struct { 921 // Name of the task group 922 Name string 923 924 // Count is the number of replicas of this task group that should 925 // be scheduled. 926 Count int 927 928 // Constraints can be specified at a task group level and apply to 929 // all the tasks contained. 930 Constraints []*Constraint 931 932 //RestartPolicy of a TaskGroup 933 RestartPolicy *RestartPolicy 934 935 // Tasks are the collection of tasks that this task group needs to run 936 Tasks []*Task 937 938 // Meta is used to associate arbitrary metadata with this 939 // task group. This is opaque to Nomad. 940 Meta map[string]string 941 } 942 943 // Validate is used to sanity check a task group 944 func (tg *TaskGroup) Validate() error { 945 var mErr multierror.Error 946 if tg.Name == "" { 947 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 948 } 949 if tg.Count <= 0 { 950 mErr.Errors = append(mErr.Errors, errors.New("Task group count must be positive")) 951 } 952 if len(tg.Tasks) == 0 { 953 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 954 } 955 for idx, constr := range tg.Constraints { 956 if err := constr.Validate(); err != nil { 957 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 958 mErr.Errors = append(mErr.Errors, outer) 959 } 960 } 961 962 if tg.RestartPolicy != nil { 963 if err := tg.RestartPolicy.Validate(); err != nil { 964 mErr.Errors = append(mErr.Errors, err) 965 } 966 } else { 967 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 968 } 969 970 // Check for duplicate tasks 971 tasks := make(map[string]int) 972 for idx, task := range tg.Tasks { 973 if task.Name == "" { 974 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 975 } else if existing, ok := tasks[task.Name]; ok { 976 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 977 } else { 978 tasks[task.Name] = idx 979 } 980 } 981 982 // Validate the tasks 983 for idx, task := range tg.Tasks { 984 if err := task.Validate(); err != nil { 985 outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) 986 mErr.Errors = append(mErr.Errors, outer) 987 } 988 } 989 return mErr.ErrorOrNil() 990 } 991 992 // LookupTask finds a task by name 993 func (tg *TaskGroup) LookupTask(name string) *Task { 994 for _, t := range tg.Tasks { 995 if t.Name == name { 996 return t 997 } 998 } 999 return nil 1000 } 1001 1002 func (tg *TaskGroup) GoString() string { 1003 return fmt.Sprintf("*%#v", *tg) 1004 } 1005 1006 const ( 1007 ServiceCheckHTTP = "http" 1008 ServiceCheckTCP = "tcp" 1009 ServiceCheckDocker = "docker" 1010 ServiceCheckScript = "script" 1011 ) 1012 1013 // The ServiceCheck data model represents the consul health check that 1014 // Nomad registers for a Task 1015 type ServiceCheck struct { 1016 Id string // Id of the check, must be unique and it is autogenrated 1017 Name string // Name of the check, defaults to id 1018 Type string // Type of the check - tcp, http, docker and script 1019 Script string // Script to invoke for script check 1020 Path string // path of the health check url for http type check 1021 Protocol string // Protocol to use if check is http, defaults to http 1022 Interval time.Duration // Interval of the check 1023 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1024 } 1025 1026 func (sc *ServiceCheck) Validate() error { 1027 t := strings.ToLower(sc.Type) 1028 if t != ServiceCheckTCP && t != ServiceCheckHTTP { 1029 return fmt.Errorf("Check with name %v has invalid check type: %s ", sc.Name, sc.Type) 1030 } 1031 if sc.Type == ServiceCheckHTTP && sc.Path == "" { 1032 return fmt.Errorf("http checks needs the Http path information.") 1033 } 1034 1035 if sc.Type == ServiceCheckScript && sc.Script == "" { 1036 return fmt.Errorf("Script checks need the script to invoke") 1037 } 1038 return nil 1039 } 1040 1041 // The Service model represents a Consul service defintion 1042 type Service struct { 1043 Id string // Id of the service, this needs to be unique on a local machine 1044 Name string // Name of the service, defaults to id 1045 Tags []string // List of tags for the service 1046 PortLabel string `mapstructure:"port"` // port for the service 1047 Checks []ServiceCheck // List of checks associated with the service 1048 } 1049 1050 func (s *Service) Validate() error { 1051 var mErr multierror.Error 1052 for _, c := range s.Checks { 1053 if err := c.Validate(); err != nil { 1054 mErr.Errors = append(mErr.Errors, err) 1055 } 1056 } 1057 return mErr.ErrorOrNil() 1058 } 1059 1060 // Task is a single process typically that is executed as part of a task group. 1061 type Task struct { 1062 // Name of the task 1063 Name string 1064 1065 // Driver is used to control which driver is used 1066 Driver string 1067 1068 // Config is provided to the driver to initialize 1069 Config map[string]interface{} 1070 1071 // Map of environment variables to be used by the driver 1072 Env map[string]string 1073 1074 // List of service definitions exposed by the Task 1075 Services []*Service 1076 1077 // Constraints can be specified at a task level and apply only to 1078 // the particular task. 1079 Constraints []*Constraint 1080 1081 // Resources is the resources needed by this task 1082 Resources *Resources 1083 1084 // Meta is used to associate arbitrary metadata with this 1085 // task. This is opaque to Nomad. 1086 Meta map[string]string 1087 } 1088 1089 func (t *Task) GoString() string { 1090 return fmt.Sprintf("*%#v", *t) 1091 } 1092 1093 // Set of possible states for a task. 1094 const ( 1095 TaskStatePending = "pending" // The task is waiting to be run. 1096 TaskStateRunning = "running" // The task is currently running. 1097 TaskStateDead = "dead" // Terminal state of task. 1098 ) 1099 1100 // TaskState tracks the current state of a task and events that caused state 1101 // transistions. 1102 type TaskState struct { 1103 // The current state of the task. 1104 State string 1105 1106 // Series of task events that transistion the state of the task. 1107 Events []*TaskEvent 1108 } 1109 1110 const ( 1111 // A Driver failure indicates that the task could not be started due to a 1112 // failure in the driver. 1113 TaskDriverFailure = "Driver Failure" 1114 1115 // Task Started signals that the task was started and its timestamp can be 1116 // used to determine the running length of the task. 1117 TaskStarted = "Started" 1118 1119 // Task terminated indicates that the task was started and exited. 1120 TaskTerminated = "Terminated" 1121 1122 // Task Killed indicates a user has killed the task. 1123 TaskKilled = "Killed" 1124 ) 1125 1126 // TaskEvent is an event that effects the state of a task and contains meta-data 1127 // appropriate to the events type. 1128 type TaskEvent struct { 1129 Type string 1130 Time int64 // Unix Nanosecond timestamp 1131 1132 // Driver Failure fields. 1133 DriverError string // A driver error occured while starting the task. 1134 1135 // Task Terminated Fields. 1136 ExitCode int // The exit code of the task. 1137 Signal int // The signal that terminated the task. 1138 Message string // A possible message explaining the termination of the task. 1139 1140 // Task Killed Fields. 1141 KillError string // Error killing the task. 1142 } 1143 1144 func NewTaskEvent(event string) *TaskEvent { 1145 return &TaskEvent{ 1146 Type: event, 1147 Time: time.Now().UnixNano(), 1148 } 1149 } 1150 1151 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 1152 if err != nil { 1153 e.DriverError = err.Error() 1154 } 1155 return e 1156 } 1157 1158 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 1159 e.ExitCode = c 1160 return e 1161 } 1162 1163 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 1164 e.Signal = s 1165 return e 1166 } 1167 1168 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 1169 if err != nil { 1170 e.Message = err.Error() 1171 } 1172 return e 1173 } 1174 1175 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 1176 if err != nil { 1177 e.KillError = err.Error() 1178 } 1179 return e 1180 } 1181 1182 // Validate is used to sanity check a task group 1183 func (t *Task) Validate() error { 1184 var mErr multierror.Error 1185 if t.Name == "" { 1186 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 1187 } 1188 if t.Driver == "" { 1189 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 1190 } 1191 if t.Resources == nil { 1192 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 1193 } 1194 for idx, constr := range t.Constraints { 1195 if err := constr.Validate(); err != nil { 1196 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1197 mErr.Errors = append(mErr.Errors, outer) 1198 } 1199 } 1200 1201 for _, service := range t.Services { 1202 if err := service.Validate(); err != nil { 1203 mErr.Errors = append(mErr.Errors, err) 1204 } 1205 } 1206 return mErr.ErrorOrNil() 1207 } 1208 1209 const ( 1210 ConstraintDistinctHosts = "distinct_hosts" 1211 ConstraintRegex = "regexp" 1212 ConstraintVersion = "version" 1213 ) 1214 1215 // Constraints are used to restrict placement options. 1216 type Constraint struct { 1217 LTarget string // Left-hand target 1218 RTarget string // Right-hand target 1219 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 1220 } 1221 1222 func (c *Constraint) String() string { 1223 return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 1224 } 1225 1226 func (c *Constraint) Validate() error { 1227 var mErr multierror.Error 1228 if c.Operand == "" { 1229 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 1230 } 1231 1232 // Perform additional validation based on operand 1233 switch c.Operand { 1234 case ConstraintRegex: 1235 if _, err := regexp.Compile(c.RTarget); err != nil { 1236 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 1237 } 1238 case ConstraintVersion: 1239 if _, err := version.NewConstraint(c.RTarget); err != nil { 1240 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 1241 } 1242 } 1243 return mErr.ErrorOrNil() 1244 } 1245 1246 const ( 1247 AllocDesiredStatusRun = "run" // Allocation should run 1248 AllocDesiredStatusStop = "stop" // Allocation should stop 1249 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 1250 AllocDesiredStatusFailed = "failed" // Allocation failed to be done 1251 ) 1252 1253 const ( 1254 AllocClientStatusPending = "pending" 1255 AllocClientStatusRunning = "running" 1256 AllocClientStatusDead = "dead" 1257 AllocClientStatusFailed = "failed" 1258 ) 1259 1260 // Allocation is used to allocate the placement of a task group to a node. 1261 type Allocation struct { 1262 // ID of the allocation (UUID) 1263 ID string 1264 1265 // ID of the evaluation that generated this allocation 1266 EvalID string 1267 1268 // Name is a logical name of the allocation. 1269 Name string 1270 1271 // NodeID is the node this is being placed on 1272 NodeID string 1273 1274 // Job is the parent job of the task group being allocated. 1275 // This is copied at allocation time to avoid issues if the job 1276 // definition is updated. 1277 JobID string 1278 Job *Job 1279 1280 // TaskGroup is the name of the task group that should be run 1281 TaskGroup string 1282 1283 // Resources is the total set of resources allocated as part 1284 // of this allocation of the task group. 1285 Resources *Resources 1286 1287 // TaskResources is the set of resources allocated to each 1288 // task. These should sum to the total Resources. 1289 TaskResources map[string]*Resources 1290 1291 // Metrics associated with this allocation 1292 Metrics *AllocMetric 1293 1294 // Desired Status of the allocation on the client 1295 DesiredStatus string 1296 1297 // DesiredStatusDescription is meant to provide more human useful information 1298 DesiredDescription string 1299 1300 // Status of the allocation on the client 1301 ClientStatus string 1302 1303 // ClientStatusDescription is meant to provide more human useful information 1304 ClientDescription string 1305 1306 // TaskStates stores the state of each task, 1307 TaskStates map[string]*TaskState 1308 1309 // Raft Indexes 1310 CreateIndex uint64 1311 ModifyIndex uint64 1312 } 1313 1314 // TerminalStatus returns if the desired status is terminal and 1315 // will no longer transition. This is not based on the current client status. 1316 func (a *Allocation) TerminalStatus() bool { 1317 switch a.DesiredStatus { 1318 case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: 1319 return true 1320 default: 1321 return false 1322 } 1323 } 1324 1325 // Stub returns a list stub for the allocation 1326 func (a *Allocation) Stub() *AllocListStub { 1327 return &AllocListStub{ 1328 ID: a.ID, 1329 EvalID: a.EvalID, 1330 Name: a.Name, 1331 NodeID: a.NodeID, 1332 JobID: a.JobID, 1333 TaskGroup: a.TaskGroup, 1334 DesiredStatus: a.DesiredStatus, 1335 DesiredDescription: a.DesiredDescription, 1336 ClientStatus: a.ClientStatus, 1337 ClientDescription: a.ClientDescription, 1338 TaskStates: a.TaskStates, 1339 CreateIndex: a.CreateIndex, 1340 ModifyIndex: a.ModifyIndex, 1341 } 1342 } 1343 1344 // AllocListStub is used to return a subset of alloc information 1345 type AllocListStub struct { 1346 ID string 1347 EvalID string 1348 Name string 1349 NodeID string 1350 JobID string 1351 TaskGroup string 1352 DesiredStatus string 1353 DesiredDescription string 1354 ClientStatus string 1355 ClientDescription string 1356 TaskStates map[string]*TaskState 1357 CreateIndex uint64 1358 ModifyIndex uint64 1359 } 1360 1361 // AllocMetric is used to track various metrics while attempting 1362 // to make an allocation. These are used to debug a job, or to better 1363 // understand the pressure within the system. 1364 type AllocMetric struct { 1365 // NodesEvaluated is the number of nodes that were evaluated 1366 NodesEvaluated int 1367 1368 // NodesFiltered is the number of nodes filtered due to a constraint 1369 NodesFiltered int 1370 1371 // ClassFiltered is the number of nodes filtered by class 1372 ClassFiltered map[string]int 1373 1374 // ConstraintFiltered is the number of failures caused by constraint 1375 ConstraintFiltered map[string]int 1376 1377 // NodesExhausted is the number of nodes skipped due to being 1378 // exhausted of at least one resource 1379 NodesExhausted int 1380 1381 // ClassExhausted is the number of nodes exhausted by class 1382 ClassExhausted map[string]int 1383 1384 // DimensionExhausted provides the count by dimension or reason 1385 DimensionExhausted map[string]int 1386 1387 // Scores is the scores of the final few nodes remaining 1388 // for placement. The top score is typically selected. 1389 Scores map[string]float64 1390 1391 // AllocationTime is a measure of how long the allocation 1392 // attempt took. This can affect performance and SLAs. 1393 AllocationTime time.Duration 1394 1395 // CoalescedFailures indicates the number of other 1396 // allocations that were coalesced into this failed allocation. 1397 // This is to prevent creating many failed allocations for a 1398 // single task group. 1399 CoalescedFailures int 1400 } 1401 1402 func (a *AllocMetric) EvaluateNode() { 1403 a.NodesEvaluated += 1 1404 } 1405 1406 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 1407 a.NodesFiltered += 1 1408 if node != nil && node.NodeClass != "" { 1409 if a.ClassFiltered == nil { 1410 a.ClassFiltered = make(map[string]int) 1411 } 1412 a.ClassFiltered[node.NodeClass] += 1 1413 } 1414 if constraint != "" { 1415 if a.ConstraintFiltered == nil { 1416 a.ConstraintFiltered = make(map[string]int) 1417 } 1418 a.ConstraintFiltered[constraint] += 1 1419 } 1420 } 1421 1422 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 1423 a.NodesExhausted += 1 1424 if node != nil && node.NodeClass != "" { 1425 if a.ClassExhausted == nil { 1426 a.ClassExhausted = make(map[string]int) 1427 } 1428 a.ClassExhausted[node.NodeClass] += 1 1429 } 1430 if dimension != "" { 1431 if a.DimensionExhausted == nil { 1432 a.DimensionExhausted = make(map[string]int) 1433 } 1434 a.DimensionExhausted[dimension] += 1 1435 } 1436 } 1437 1438 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 1439 if a.Scores == nil { 1440 a.Scores = make(map[string]float64) 1441 } 1442 key := fmt.Sprintf("%s.%s", node.ID, name) 1443 a.Scores[key] = score 1444 } 1445 1446 const ( 1447 EvalStatusPending = "pending" 1448 EvalStatusComplete = "complete" 1449 EvalStatusFailed = "failed" 1450 ) 1451 1452 const ( 1453 EvalTriggerJobRegister = "job-register" 1454 EvalTriggerJobDeregister = "job-deregister" 1455 EvalTriggerNodeUpdate = "node-update" 1456 EvalTriggerScheduled = "scheduled" 1457 EvalTriggerRollingUpdate = "rolling-update" 1458 ) 1459 1460 const ( 1461 // CoreJobEvalGC is used for the garbage collection of evaluations 1462 // and allocations. We periodically scan evaluations in a terminal state, 1463 // in which all the corresponding allocations are also terminal. We 1464 // delete these out of the system to bound the state. 1465 CoreJobEvalGC = "eval-gc" 1466 1467 // CoreJobNodeGC is used for the garbage collection of failed nodes. 1468 // We periodically scan nodes in a terminal state, and if they have no 1469 // corresponding allocations we delete these out of the system. 1470 CoreJobNodeGC = "node-gc" 1471 ) 1472 1473 // Evaluation is used anytime we need to apply business logic as a result 1474 // of a change to our desired state (job specification) or the emergent state 1475 // (registered nodes). When the inputs change, we need to "evaluate" them, 1476 // potentially taking action (allocation of work) or doing nothing if the state 1477 // of the world does not require it. 1478 type Evaluation struct { 1479 // ID is a randonly generated UUID used for this evaluation. This 1480 // is assigned upon the creation of the evaluation. 1481 ID string 1482 1483 // Priority is used to control scheduling importance and if this job 1484 // can preempt other jobs. 1485 Priority int 1486 1487 // Type is used to control which schedulers are available to handle 1488 // this evaluation. 1489 Type string 1490 1491 // TriggeredBy is used to give some insight into why this Eval 1492 // was created. (Job change, node failure, alloc failure, etc). 1493 TriggeredBy string 1494 1495 // JobID is the job this evaluation is scoped to. Evaluations cannot 1496 // be run in parallel for a given JobID, so we serialize on this. 1497 JobID string 1498 1499 // JobModifyIndex is the modify index of the job at the time 1500 // the evaluation was created 1501 JobModifyIndex uint64 1502 1503 // NodeID is the node that was affected triggering the evaluation. 1504 NodeID string 1505 1506 // NodeModifyIndex is the modify index of the node at the time 1507 // the evaluation was created 1508 NodeModifyIndex uint64 1509 1510 // Status of the evaluation 1511 Status string 1512 1513 // StatusDescription is meant to provide more human useful information 1514 StatusDescription string 1515 1516 // Wait is a minimum wait time for running the eval. This is used to 1517 // support a rolling upgrade. 1518 Wait time.Duration 1519 1520 // NextEval is the evaluation ID for the eval created to do a followup. 1521 // This is used to support rolling upgrades, where we need a chain of evaluations. 1522 NextEval string 1523 1524 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 1525 // This is used to support rolling upgrades, where we need a chain of evaluations. 1526 PreviousEval string 1527 1528 // Raft Indexes 1529 CreateIndex uint64 1530 ModifyIndex uint64 1531 } 1532 1533 // TerminalStatus returns if the current status is terminal and 1534 // will no longer transition. 1535 func (e *Evaluation) TerminalStatus() bool { 1536 switch e.Status { 1537 case EvalStatusComplete, EvalStatusFailed: 1538 return true 1539 default: 1540 return false 1541 } 1542 } 1543 1544 func (e *Evaluation) GoString() string { 1545 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 1546 } 1547 1548 func (e *Evaluation) Copy() *Evaluation { 1549 ne := new(Evaluation) 1550 *ne = *e 1551 return ne 1552 } 1553 1554 // ShouldEnqueue checks if a given evaluation should be enqueued 1555 func (e *Evaluation) ShouldEnqueue() bool { 1556 switch e.Status { 1557 case EvalStatusPending: 1558 return true 1559 case EvalStatusComplete, EvalStatusFailed: 1560 return false 1561 default: 1562 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 1563 } 1564 } 1565 1566 // MakePlan is used to make a plan from the given evaluation 1567 // for a given Job 1568 func (e *Evaluation) MakePlan(j *Job) *Plan { 1569 p := &Plan{ 1570 EvalID: e.ID, 1571 Priority: e.Priority, 1572 NodeUpdate: make(map[string][]*Allocation), 1573 NodeAllocation: make(map[string][]*Allocation), 1574 } 1575 if j != nil { 1576 p.AllAtOnce = j.AllAtOnce 1577 } 1578 return p 1579 } 1580 1581 // NextRollingEval creates an evaluation to followup this eval for rolling updates 1582 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 1583 return &Evaluation{ 1584 ID: GenerateUUID(), 1585 Priority: e.Priority, 1586 Type: e.Type, 1587 TriggeredBy: EvalTriggerRollingUpdate, 1588 JobID: e.JobID, 1589 JobModifyIndex: e.JobModifyIndex, 1590 Status: EvalStatusPending, 1591 Wait: wait, 1592 PreviousEval: e.ID, 1593 } 1594 } 1595 1596 // Plan is used to submit a commit plan for task allocations. These 1597 // are submitted to the leader which verifies that resources have 1598 // not been overcommitted before admiting the plan. 1599 type Plan struct { 1600 // EvalID is the evaluation ID this plan is associated with 1601 EvalID string 1602 1603 // EvalToken is used to prevent a split-brain processing of 1604 // an evaluation. There should only be a single scheduler running 1605 // an Eval at a time, but this could be violated after a leadership 1606 // transition. This unique token is used to reject plans that are 1607 // being submitted from a different leader. 1608 EvalToken string 1609 1610 // Priority is the priority of the upstream job 1611 Priority int 1612 1613 // AllAtOnce is used to control if incremental scheduling of task groups 1614 // is allowed or if we must do a gang scheduling of the entire job. 1615 // If this is false, a plan may be partially applied. Otherwise, the 1616 // entire plan must be able to make progress. 1617 AllAtOnce bool 1618 1619 // NodeUpdate contains all the allocations for each node. For each node, 1620 // this is a list of the allocations to update to either stop or evict. 1621 NodeUpdate map[string][]*Allocation 1622 1623 // NodeAllocation contains all the allocations for each node. 1624 // The evicts must be considered prior to the allocations. 1625 NodeAllocation map[string][]*Allocation 1626 1627 // FailedAllocs are allocations that could not be made, 1628 // but are persisted so that the user can use the feedback 1629 // to determine the cause. 1630 FailedAllocs []*Allocation 1631 } 1632 1633 func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) { 1634 newAlloc := new(Allocation) 1635 *newAlloc = *alloc 1636 newAlloc.DesiredStatus = status 1637 newAlloc.DesiredDescription = desc 1638 node := alloc.NodeID 1639 existing := p.NodeUpdate[node] 1640 p.NodeUpdate[node] = append(existing, newAlloc) 1641 } 1642 1643 func (p *Plan) PopUpdate(alloc *Allocation) { 1644 existing := p.NodeUpdate[alloc.NodeID] 1645 n := len(existing) 1646 if n > 0 && existing[n-1].ID == alloc.ID { 1647 existing = existing[:n-1] 1648 if len(existing) > 0 { 1649 p.NodeUpdate[alloc.NodeID] = existing 1650 } else { 1651 delete(p.NodeUpdate, alloc.NodeID) 1652 } 1653 } 1654 } 1655 1656 func (p *Plan) AppendAlloc(alloc *Allocation) { 1657 node := alloc.NodeID 1658 existing := p.NodeAllocation[node] 1659 p.NodeAllocation[node] = append(existing, alloc) 1660 } 1661 1662 func (p *Plan) AppendFailed(alloc *Allocation) { 1663 p.FailedAllocs = append(p.FailedAllocs, alloc) 1664 } 1665 1666 // IsNoOp checks if this plan would do nothing 1667 func (p *Plan) IsNoOp() bool { 1668 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 1669 } 1670 1671 // PlanResult is the result of a plan submitted to the leader. 1672 type PlanResult struct { 1673 // NodeUpdate contains all the updates that were committed. 1674 NodeUpdate map[string][]*Allocation 1675 1676 // NodeAllocation contains all the allocations that were committed. 1677 NodeAllocation map[string][]*Allocation 1678 1679 // FailedAllocs are allocations that could not be made, 1680 // but are persisted so that the user can use the feedback 1681 // to determine the cause. 1682 FailedAllocs []*Allocation 1683 1684 // RefreshIndex is the index the worker should refresh state up to. 1685 // This allows all evictions and allocations to be materialized. 1686 // If any allocations were rejected due to stale data (node state, 1687 // over committed) this can be used to force a worker refresh. 1688 RefreshIndex uint64 1689 1690 // AllocIndex is the Raft index in which the evictions and 1691 // allocations took place. This is used for the write index. 1692 AllocIndex uint64 1693 } 1694 1695 // IsNoOp checks if this plan result would do nothing 1696 func (p *PlanResult) IsNoOp() bool { 1697 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && len(p.FailedAllocs) == 0 1698 } 1699 1700 // FullCommit is used to check if all the allocations in a plan 1701 // were committed as part of the result. Returns if there was 1702 // a match, and the number of expected and actual allocations. 1703 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 1704 expected := 0 1705 actual := 0 1706 for name, allocList := range plan.NodeAllocation { 1707 didAlloc, _ := p.NodeAllocation[name] 1708 expected += len(allocList) 1709 actual += len(didAlloc) 1710 } 1711 return actual == expected, expected, actual 1712 } 1713 1714 // msgpackHandle is a shared handle for encoding/decoding of structs 1715 var MsgpackHandle = func() *codec.MsgpackHandle { 1716 h := &codec.MsgpackHandle{RawToString: true} 1717 1718 // Sets the default type for decoding a map into a nil interface{}. 1719 // This is necessary in particular because we store the driver configs as a 1720 // nil interface{}. 1721 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 1722 return h 1723 }() 1724 1725 // Decode is used to decode a MsgPack encoded object 1726 func Decode(buf []byte, out interface{}) error { 1727 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 1728 } 1729 1730 // Encode is used to encode a MsgPack object with type prefix 1731 func Encode(t MessageType, msg interface{}) ([]byte, error) { 1732 var buf bytes.Buffer 1733 buf.WriteByte(uint8(t)) 1734 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 1735 return buf.Bytes(), err 1736 }