github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "io" 13 "path/filepath" 14 "reflect" 15 "regexp" 16 "strconv" 17 "strings" 18 "time" 19 20 "github.com/gorhill/cronexpr" 21 "github.com/hashicorp/consul/api" 22 "github.com/hashicorp/go-multierror" 23 "github.com/hashicorp/go-version" 24 "github.com/hashicorp/nomad/helper/args" 25 "github.com/mitchellh/copystructure" 26 "github.com/ugorji/go/codec" 27 28 hcodec "github.com/hashicorp/go-msgpack/codec" 29 ) 30 31 var ( 32 ErrNoLeader = fmt.Errorf("No cluster leader") 33 ErrNoRegionPath = fmt.Errorf("No path to region") 34 ) 35 36 type MessageType uint8 37 38 const ( 39 NodeRegisterRequestType MessageType = iota 40 NodeDeregisterRequestType 41 NodeUpdateStatusRequestType 42 NodeUpdateDrainRequestType 43 JobRegisterRequestType 44 JobDeregisterRequestType 45 EvalUpdateRequestType 46 EvalDeleteRequestType 47 AllocUpdateRequestType 48 AllocClientUpdateRequestType 49 ReconcileJobSummariesRequestType 50 VaultAccessorRegisterRequestType 51 VaultAccessorDegisterRequestType 52 ) 53 54 const ( 55 // IgnoreUnknownTypeFlag is set along with a MessageType 56 // to indicate that the message type can be safely ignored 57 // if it is not recognized. This is for future proofing, so 58 // that new commands can be added in a way that won't cause 59 // old servers to crash when the FSM attempts to process them. 60 IgnoreUnknownTypeFlag MessageType = 128 61 62 // ApiMajorVersion is returned as part of the Status.Version request. 63 // It should be incremented anytime the APIs are changed in a way 64 // that would break clients for sane client versioning. 65 ApiMajorVersion = 1 66 67 // ApiMinorVersion is returned as part of the Status.Version request. 68 // It should be incremented anytime the APIs are changed to allow 69 // for sane client versioning. Minor changes should be compatible 70 // within the major version. 71 ApiMinorVersion = 1 72 73 ProtocolVersion = "protocol" 74 APIMajorVersion = "api.major" 75 APIMinorVersion = "api.minor" 76 ) 77 78 // RPCInfo is used to describe common information about query 79 type RPCInfo interface { 80 RequestRegion() string 81 IsRead() bool 82 AllowStaleRead() bool 83 } 84 85 // QueryOptions is used to specify various flags for read queries 86 type QueryOptions struct { 87 // The target region for this query 88 Region string 89 90 // If set, wait until query exceeds given index. Must be provided 91 // with MaxQueryTime. 92 MinQueryIndex uint64 93 94 // Provided with MinQueryIndex to wait for change. 95 MaxQueryTime time.Duration 96 97 // If set, any follower can service the request. Results 98 // may be arbitrarily stale. 99 AllowStale bool 100 101 // If set, used as prefix for resource list searches 102 Prefix string 103 } 104 105 func (q QueryOptions) RequestRegion() string { 106 return q.Region 107 } 108 109 // QueryOption only applies to reads, so always true 110 func (q QueryOptions) IsRead() bool { 111 return true 112 } 113 114 func (q QueryOptions) AllowStaleRead() bool { 115 return q.AllowStale 116 } 117 118 type WriteRequest struct { 119 // The target region for this write 120 Region string 121 } 122 123 func (w WriteRequest) RequestRegion() string { 124 // The target region for this request 125 return w.Region 126 } 127 128 // WriteRequest only applies to writes, always false 129 func (w WriteRequest) IsRead() bool { 130 return false 131 } 132 133 func (w WriteRequest) AllowStaleRead() bool { 134 return false 135 } 136 137 // QueryMeta allows a query response to include potentially 138 // useful metadata about a query 139 type QueryMeta struct { 140 // This is the index associated with the read 141 Index uint64 142 143 // If AllowStale is used, this is time elapsed since 144 // last contact between the follower and leader. This 145 // can be used to gauge staleness. 146 LastContact time.Duration 147 148 // Used to indicate if there is a known leader node 149 KnownLeader bool 150 } 151 152 // WriteMeta allows a write response to include potentially 153 // useful metadata about the write 154 type WriteMeta struct { 155 // This is the index associated with the write 156 Index uint64 157 } 158 159 // NodeRegisterRequest is used for Node.Register endpoint 160 // to register a node as being a schedulable entity. 161 type NodeRegisterRequest struct { 162 Node *Node 163 WriteRequest 164 } 165 166 // NodeDeregisterRequest is used for Node.Deregister endpoint 167 // to deregister a node as being a schedulable entity. 168 type NodeDeregisterRequest struct { 169 NodeID string 170 WriteRequest 171 } 172 173 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 174 // information used in RPC server lists. 175 type NodeServerInfo struct { 176 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 177 // be contacted at for RPCs. 178 RPCAdvertiseAddr string 179 180 // RpcMajorVersion is the major version number the Nomad Server 181 // supports 182 RPCMajorVersion int32 183 184 // RpcMinorVersion is the minor version number the Nomad Server 185 // supports 186 RPCMinorVersion int32 187 188 // Datacenter is the datacenter that a Nomad server belongs to 189 Datacenter string 190 } 191 192 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 193 // to update the status of a node. 194 type NodeUpdateStatusRequest struct { 195 NodeID string 196 Status string 197 WriteRequest 198 } 199 200 // NodeUpdateDrainRequest is used for updatin the drain status 201 type NodeUpdateDrainRequest struct { 202 NodeID string 203 Drain bool 204 WriteRequest 205 } 206 207 // NodeEvaluateRequest is used to re-evaluate the ndoe 208 type NodeEvaluateRequest struct { 209 NodeID string 210 WriteRequest 211 } 212 213 // NodeSpecificRequest is used when we just need to specify a target node 214 type NodeSpecificRequest struct { 215 NodeID string 216 SecretID string 217 QueryOptions 218 } 219 220 // JobRegisterRequest is used for Job.Register endpoint 221 // to register a job as being a schedulable entity. 222 type JobRegisterRequest struct { 223 Job *Job 224 225 // If EnforceIndex is set then the job will only be registered if the passed 226 // JobModifyIndex matches the current Jobs index. If the index is zero, the 227 // register only occurs if the job is new. 228 EnforceIndex bool 229 JobModifyIndex uint64 230 231 WriteRequest 232 } 233 234 // JobDeregisterRequest is used for Job.Deregister endpoint 235 // to deregister a job as being a schedulable entity. 236 type JobDeregisterRequest struct { 237 JobID string 238 WriteRequest 239 } 240 241 // JobEvaluateRequest is used when we just need to re-evaluate a target job 242 type JobEvaluateRequest struct { 243 JobID string 244 WriteRequest 245 } 246 247 // JobSpecificRequest is used when we just need to specify a target job 248 type JobSpecificRequest struct { 249 JobID string 250 QueryOptions 251 } 252 253 // JobListRequest is used to parameterize a list request 254 type JobListRequest struct { 255 QueryOptions 256 } 257 258 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 259 // evaluation of the Job. 260 type JobPlanRequest struct { 261 Job *Job 262 Diff bool // Toggles an annotated diff 263 WriteRequest 264 } 265 266 // JobSummaryRequest is used when we just need to get a specific job summary 267 type JobSummaryRequest struct { 268 JobID string 269 QueryOptions 270 } 271 272 // NodeListRequest is used to parameterize a list request 273 type NodeListRequest struct { 274 QueryOptions 275 } 276 277 // EvalUpdateRequest is used for upserting evaluations. 278 type EvalUpdateRequest struct { 279 Evals []*Evaluation 280 EvalToken string 281 WriteRequest 282 } 283 284 // EvalDeleteRequest is used for deleting an evaluation. 285 type EvalDeleteRequest struct { 286 Evals []string 287 Allocs []string 288 WriteRequest 289 } 290 291 // EvalSpecificRequest is used when we just need to specify a target evaluation 292 type EvalSpecificRequest struct { 293 EvalID string 294 QueryOptions 295 } 296 297 // EvalAckRequest is used to Ack/Nack a specific evaluation 298 type EvalAckRequest struct { 299 EvalID string 300 Token string 301 WriteRequest 302 } 303 304 // EvalDequeueRequest is used when we want to dequeue an evaluation 305 type EvalDequeueRequest struct { 306 Schedulers []string 307 Timeout time.Duration 308 WriteRequest 309 } 310 311 // EvalListRequest is used to list the evaluations 312 type EvalListRequest struct { 313 QueryOptions 314 } 315 316 // PlanRequest is used to submit an allocation plan to the leader 317 type PlanRequest struct { 318 Plan *Plan 319 WriteRequest 320 } 321 322 // AllocUpdateRequest is used to submit changes to allocations, either 323 // to cause evictions or to assign new allocaitons. Both can be done 324 // within a single transaction 325 type AllocUpdateRequest struct { 326 // Alloc is the list of new allocations to assign 327 Alloc []*Allocation 328 329 // Job is the shared parent job of the allocations. 330 // It is pulled out since it is common to reduce payload size. 331 Job *Job 332 333 WriteRequest 334 } 335 336 // AllocListRequest is used to request a list of allocations 337 type AllocListRequest struct { 338 QueryOptions 339 } 340 341 // AllocSpecificRequest is used to query a specific allocation 342 type AllocSpecificRequest struct { 343 AllocID string 344 QueryOptions 345 } 346 347 // AllocsGetRequest is used to query a set of allocations 348 type AllocsGetRequest struct { 349 AllocIDs []string 350 QueryOptions 351 } 352 353 // PeriodicForceReqeuest is used to force a specific periodic job. 354 type PeriodicForceRequest struct { 355 JobID string 356 WriteRequest 357 } 358 359 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 360 // following tasks in the given allocation 361 type DeriveVaultTokenRequest struct { 362 NodeID string 363 SecretID string 364 AllocID string 365 Tasks []string 366 QueryOptions 367 } 368 369 // VaultAccessorsRequest is used to operate on a set of Vault accessors 370 type VaultAccessorsRequest struct { 371 Accessors []*VaultAccessor 372 } 373 374 // VaultAccessor is a reference to a created Vault token on behalf of 375 // an allocation's task. 376 type VaultAccessor struct { 377 AllocID string 378 Task string 379 NodeID string 380 Accessor string 381 CreationTTL int 382 383 // Raft Indexes 384 CreateIndex uint64 385 } 386 387 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 388 type DeriveVaultTokenResponse struct { 389 // Tasks is a mapping between the task name and the wrapped token 390 Tasks map[string]string 391 QueryMeta 392 } 393 394 // GenericRequest is used to request where no 395 // specific information is needed. 396 type GenericRequest struct { 397 QueryOptions 398 } 399 400 // GenericResponse is used to respond to a request where no 401 // specific response information is needed. 402 type GenericResponse struct { 403 WriteMeta 404 } 405 406 // VersionResponse is used for the Status.Version reseponse 407 type VersionResponse struct { 408 Build string 409 Versions map[string]int 410 QueryMeta 411 } 412 413 // JobRegisterResponse is used to respond to a job registration 414 type JobRegisterResponse struct { 415 EvalID string 416 EvalCreateIndex uint64 417 JobModifyIndex uint64 418 QueryMeta 419 } 420 421 // JobDeregisterResponse is used to respond to a job deregistration 422 type JobDeregisterResponse struct { 423 EvalID string 424 EvalCreateIndex uint64 425 JobModifyIndex uint64 426 QueryMeta 427 } 428 429 // NodeUpdateResponse is used to respond to a node update 430 type NodeUpdateResponse struct { 431 HeartbeatTTL time.Duration 432 EvalIDs []string 433 EvalCreateIndex uint64 434 NodeModifyIndex uint64 435 436 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 437 // empty, the current Nomad Server is in the minority of a partition. 438 LeaderRPCAddr string 439 440 // NumNodes is the number of Nomad nodes attached to this quorum of 441 // Nomad Servers at the time of the response. This value can 442 // fluctuate based on the health of the cluster between heartbeats. 443 NumNodes int32 444 445 // Servers is the full list of known Nomad servers in the local 446 // region. 447 Servers []*NodeServerInfo 448 449 QueryMeta 450 } 451 452 // NodeDrainUpdateResponse is used to respond to a node drain update 453 type NodeDrainUpdateResponse struct { 454 EvalIDs []string 455 EvalCreateIndex uint64 456 NodeModifyIndex uint64 457 QueryMeta 458 } 459 460 // NodeAllocsResponse is used to return allocs for a single node 461 type NodeAllocsResponse struct { 462 Allocs []*Allocation 463 QueryMeta 464 } 465 466 // NodeClientAllocsResponse is used to return allocs meta data for a single node 467 type NodeClientAllocsResponse struct { 468 Allocs map[string]uint64 469 QueryMeta 470 } 471 472 // SingleNodeResponse is used to return a single node 473 type SingleNodeResponse struct { 474 Node *Node 475 QueryMeta 476 } 477 478 // JobListResponse is used for a list request 479 type NodeListResponse struct { 480 Nodes []*NodeListStub 481 QueryMeta 482 } 483 484 // SingleJobResponse is used to return a single job 485 type SingleJobResponse struct { 486 Job *Job 487 QueryMeta 488 } 489 490 // JobSummaryResponse is used to return a single job summary 491 type JobSummaryResponse struct { 492 JobSummary *JobSummary 493 QueryMeta 494 } 495 496 // JobListResponse is used for a list request 497 type JobListResponse struct { 498 Jobs []*JobListStub 499 QueryMeta 500 } 501 502 // JobPlanResponse is used to respond to a job plan request 503 type JobPlanResponse struct { 504 // Annotations stores annotations explaining decisions the scheduler made. 505 Annotations *PlanAnnotations 506 507 // FailedTGAllocs is the placement failures per task group. 508 FailedTGAllocs map[string]*AllocMetric 509 510 // JobModifyIndex is the modification index of the job. The value can be 511 // used when running `nomad run` to ensure that the Job wasn’t modified 512 // since the last plan. If the job is being created, the value is zero. 513 JobModifyIndex uint64 514 515 // CreatedEvals is the set of evaluations created by the scheduler. The 516 // reasons for this can be rolling-updates or blocked evals. 517 CreatedEvals []*Evaluation 518 519 // Diff contains the diff of the job and annotations on whether the change 520 // causes an in-place update or create/destroy 521 Diff *JobDiff 522 523 // NextPeriodicLaunch is the time duration till the job would be launched if 524 // submitted. 525 NextPeriodicLaunch time.Time 526 527 WriteMeta 528 } 529 530 // SingleAllocResponse is used to return a single allocation 531 type SingleAllocResponse struct { 532 Alloc *Allocation 533 QueryMeta 534 } 535 536 // AllocsGetResponse is used to return a set of allocations 537 type AllocsGetResponse struct { 538 Allocs []*Allocation 539 QueryMeta 540 } 541 542 // JobAllocationsResponse is used to return the allocations for a job 543 type JobAllocationsResponse struct { 544 Allocations []*AllocListStub 545 QueryMeta 546 } 547 548 // JobEvaluationsResponse is used to return the evaluations for a job 549 type JobEvaluationsResponse struct { 550 Evaluations []*Evaluation 551 QueryMeta 552 } 553 554 // SingleEvalResponse is used to return a single evaluation 555 type SingleEvalResponse struct { 556 Eval *Evaluation 557 QueryMeta 558 } 559 560 // EvalDequeueResponse is used to return from a dequeue 561 type EvalDequeueResponse struct { 562 Eval *Evaluation 563 Token string 564 QueryMeta 565 } 566 567 // PlanResponse is used to return from a PlanRequest 568 type PlanResponse struct { 569 Result *PlanResult 570 WriteMeta 571 } 572 573 // AllocListResponse is used for a list request 574 type AllocListResponse struct { 575 Allocations []*AllocListStub 576 QueryMeta 577 } 578 579 // EvalListResponse is used for a list request 580 type EvalListResponse struct { 581 Evaluations []*Evaluation 582 QueryMeta 583 } 584 585 // EvalAllocationsResponse is used to return the allocations for an evaluation 586 type EvalAllocationsResponse struct { 587 Allocations []*AllocListStub 588 QueryMeta 589 } 590 591 // PeriodicForceResponse is used to respond to a periodic job force launch 592 type PeriodicForceResponse struct { 593 EvalID string 594 EvalCreateIndex uint64 595 WriteMeta 596 } 597 598 const ( 599 NodeStatusInit = "initializing" 600 NodeStatusReady = "ready" 601 NodeStatusDown = "down" 602 ) 603 604 // ShouldDrainNode checks if a given node status should trigger an 605 // evaluation. Some states don't require any further action. 606 func ShouldDrainNode(status string) bool { 607 switch status { 608 case NodeStatusInit, NodeStatusReady: 609 return false 610 case NodeStatusDown: 611 return true 612 default: 613 panic(fmt.Sprintf("unhandled node status %s", status)) 614 } 615 } 616 617 // ValidNodeStatus is used to check if a node status is valid 618 func ValidNodeStatus(status string) bool { 619 switch status { 620 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 621 return true 622 default: 623 return false 624 } 625 } 626 627 // Node is a representation of a schedulable client node 628 type Node struct { 629 // ID is a unique identifier for the node. It can be constructed 630 // by doing a concatenation of the Name and Datacenter as a simple 631 // approach. Alternatively a UUID may be used. 632 ID string 633 634 // SecretID is an ID that is only known by the Node and the set of Servers. 635 // It is not accessible via the API and is used to authenticate nodes 636 // conducting priviledged activities. 637 SecretID string 638 639 // Datacenter for this node 640 Datacenter string 641 642 // Node name 643 Name string 644 645 // HTTPAddr is the address on which the Nomad client is listening for http 646 // requests 647 HTTPAddr string 648 649 // Attributes is an arbitrary set of key/value 650 // data that can be used for constraints. Examples 651 // include "kernel.name=linux", "arch=386", "driver.docker=1", 652 // "docker.runtime=1.8.3" 653 Attributes map[string]string 654 655 // Resources is the available resources on the client. 656 // For example 'cpu=2' 'memory=2048' 657 Resources *Resources 658 659 // Reserved is the set of resources that are reserved, 660 // and should be subtracted from the total resources for 661 // the purposes of scheduling. This may be provide certain 662 // high-watermark tolerances or because of external schedulers 663 // consuming resources. 664 Reserved *Resources 665 666 // Links are used to 'link' this client to external 667 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 668 // 'ami=ami-123' 669 Links map[string]string 670 671 // Meta is used to associate arbitrary metadata with this 672 // client. This is opaque to Nomad. 673 Meta map[string]string 674 675 // NodeClass is an opaque identifier used to group nodes 676 // together for the purpose of determining scheduling pressure. 677 NodeClass string 678 679 // ComputedClass is a unique id that identifies nodes with a common set of 680 // attributes and capabilities. 681 ComputedClass string 682 683 // Drain is controlled by the servers, and not the client. 684 // If true, no jobs will be scheduled to this node, and existing 685 // allocations will be drained. 686 Drain bool 687 688 // Status of this node 689 Status string 690 691 // StatusDescription is meant to provide more human useful information 692 StatusDescription string 693 694 // StatusUpdatedAt is the time stamp at which the state of the node was 695 // updated 696 StatusUpdatedAt int64 697 698 // Raft Indexes 699 CreateIndex uint64 700 ModifyIndex uint64 701 } 702 703 func (n *Node) Copy() *Node { 704 if n == nil { 705 return nil 706 } 707 nn := new(Node) 708 *nn = *n 709 nn.Attributes = CopyMapStringString(nn.Attributes) 710 nn.Resources = nn.Resources.Copy() 711 nn.Reserved = nn.Reserved.Copy() 712 nn.Links = CopyMapStringString(nn.Links) 713 nn.Meta = CopyMapStringString(nn.Meta) 714 return nn 715 } 716 717 // TerminalStatus returns if the current status is terminal and 718 // will no longer transition. 719 func (n *Node) TerminalStatus() bool { 720 switch n.Status { 721 case NodeStatusDown: 722 return true 723 default: 724 return false 725 } 726 } 727 728 // Stub returns a summarized version of the node 729 func (n *Node) Stub() *NodeListStub { 730 return &NodeListStub{ 731 ID: n.ID, 732 Datacenter: n.Datacenter, 733 Name: n.Name, 734 NodeClass: n.NodeClass, 735 Drain: n.Drain, 736 Status: n.Status, 737 StatusDescription: n.StatusDescription, 738 CreateIndex: n.CreateIndex, 739 ModifyIndex: n.ModifyIndex, 740 } 741 } 742 743 // NodeListStub is used to return a subset of job information 744 // for the job list 745 type NodeListStub struct { 746 ID string 747 Datacenter string 748 Name string 749 NodeClass string 750 Drain bool 751 Status string 752 StatusDescription string 753 CreateIndex uint64 754 ModifyIndex uint64 755 } 756 757 // Resources is used to define the resources available 758 // on a client 759 type Resources struct { 760 CPU int 761 MemoryMB int `mapstructure:"memory"` 762 DiskMB int `mapstructure:"disk"` 763 IOPS int 764 Networks []*NetworkResource 765 } 766 767 const ( 768 BytesInMegabyte = 1024 * 1024 769 ) 770 771 // DefaultResources returns the default resources for a task. 772 func DefaultResources() *Resources { 773 return &Resources{ 774 CPU: 100, 775 MemoryMB: 10, 776 IOPS: 0, 777 } 778 } 779 780 // DiskInBytes returns the amount of disk resources in bytes. 781 func (r *Resources) DiskInBytes() int64 { 782 return int64(r.DiskMB * BytesInMegabyte) 783 } 784 785 // Merge merges this resource with another resource. 786 func (r *Resources) Merge(other *Resources) { 787 if other.CPU != 0 { 788 r.CPU = other.CPU 789 } 790 if other.MemoryMB != 0 { 791 r.MemoryMB = other.MemoryMB 792 } 793 if other.DiskMB != 0 { 794 r.DiskMB = other.DiskMB 795 } 796 if other.IOPS != 0 { 797 r.IOPS = other.IOPS 798 } 799 if len(other.Networks) != 0 { 800 r.Networks = other.Networks 801 } 802 } 803 804 func (r *Resources) Canonicalize() { 805 // Ensure that an empty and nil slices are treated the same to avoid scheduling 806 // problems since we use reflect DeepEquals. 807 if len(r.Networks) == 0 { 808 r.Networks = nil 809 } 810 811 for _, n := range r.Networks { 812 n.Canonicalize() 813 } 814 } 815 816 // MeetsMinResources returns an error if the resources specified are less than 817 // the minimum allowed. 818 func (r *Resources) MeetsMinResources() error { 819 var mErr multierror.Error 820 if r.CPU < 20 { 821 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU)) 822 } 823 if r.MemoryMB < 10 { 824 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB)) 825 } 826 if r.IOPS < 0 { 827 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS)) 828 } 829 for i, n := range r.Networks { 830 if err := n.MeetsMinResources(); err != nil { 831 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 832 } 833 } 834 835 return mErr.ErrorOrNil() 836 } 837 838 // Copy returns a deep copy of the resources 839 func (r *Resources) Copy() *Resources { 840 if r == nil { 841 return nil 842 } 843 newR := new(Resources) 844 *newR = *r 845 if r.Networks != nil { 846 n := len(r.Networks) 847 newR.Networks = make([]*NetworkResource, n) 848 for i := 0; i < n; i++ { 849 newR.Networks[i] = r.Networks[i].Copy() 850 } 851 } 852 return newR 853 } 854 855 // NetIndex finds the matching net index using device name 856 func (r *Resources) NetIndex(n *NetworkResource) int { 857 for idx, net := range r.Networks { 858 if net.Device == n.Device { 859 return idx 860 } 861 } 862 return -1 863 } 864 865 // Superset checks if one set of resources is a superset 866 // of another. This ignores network resources, and the NetworkIndex 867 // should be used for that. 868 func (r *Resources) Superset(other *Resources) (bool, string) { 869 if r.CPU < other.CPU { 870 return false, "cpu exhausted" 871 } 872 if r.MemoryMB < other.MemoryMB { 873 return false, "memory exhausted" 874 } 875 if r.DiskMB < other.DiskMB { 876 return false, "disk exhausted" 877 } 878 if r.IOPS < other.IOPS { 879 return false, "iops exhausted" 880 } 881 return true, "" 882 } 883 884 // Add adds the resources of the delta to this, potentially 885 // returning an error if not possible. 886 func (r *Resources) Add(delta *Resources) error { 887 if delta == nil { 888 return nil 889 } 890 r.CPU += delta.CPU 891 r.MemoryMB += delta.MemoryMB 892 r.DiskMB += delta.DiskMB 893 r.IOPS += delta.IOPS 894 895 for _, n := range delta.Networks { 896 // Find the matching interface by IP or CIDR 897 idx := r.NetIndex(n) 898 if idx == -1 { 899 r.Networks = append(r.Networks, n.Copy()) 900 } else { 901 r.Networks[idx].Add(n) 902 } 903 } 904 return nil 905 } 906 907 func (r *Resources) GoString() string { 908 return fmt.Sprintf("*%#v", *r) 909 } 910 911 type Port struct { 912 Label string 913 Value int `mapstructure:"static"` 914 } 915 916 // NetworkResource is used to represent available network 917 // resources 918 type NetworkResource struct { 919 Device string // Name of the device 920 CIDR string // CIDR block of addresses 921 IP string // IP address 922 MBits int // Throughput 923 ReservedPorts []Port // Reserved ports 924 DynamicPorts []Port // Dynamically assigned ports 925 } 926 927 func (n *NetworkResource) Canonicalize() { 928 // Ensure that an empty and nil slices are treated the same to avoid scheduling 929 // problems since we use reflect DeepEquals. 930 if len(n.ReservedPorts) == 0 { 931 n.ReservedPorts = nil 932 } 933 if len(n.DynamicPorts) == 0 { 934 n.DynamicPorts = nil 935 } 936 } 937 938 // MeetsMinResources returns an error if the resources specified are less than 939 // the minimum allowed. 940 func (n *NetworkResource) MeetsMinResources() error { 941 var mErr multierror.Error 942 if n.MBits < 1 { 943 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 944 } 945 return mErr.ErrorOrNil() 946 } 947 948 // Copy returns a deep copy of the network resource 949 func (n *NetworkResource) Copy() *NetworkResource { 950 if n == nil { 951 return nil 952 } 953 newR := new(NetworkResource) 954 *newR = *n 955 if n.ReservedPorts != nil { 956 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 957 copy(newR.ReservedPorts, n.ReservedPorts) 958 } 959 if n.DynamicPorts != nil { 960 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 961 copy(newR.DynamicPorts, n.DynamicPorts) 962 } 963 return newR 964 } 965 966 // Add adds the resources of the delta to this, potentially 967 // returning an error if not possible. 968 func (n *NetworkResource) Add(delta *NetworkResource) { 969 if len(delta.ReservedPorts) > 0 { 970 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 971 } 972 n.MBits += delta.MBits 973 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 974 } 975 976 func (n *NetworkResource) GoString() string { 977 return fmt.Sprintf("*%#v", *n) 978 } 979 980 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 981 labelValues := make(map[string]int) 982 ports := append(n.ReservedPorts, n.DynamicPorts...) 983 for _, port := range ports { 984 if mapping, ok := port_map[port.Label]; ok { 985 labelValues[port.Label] = mapping 986 } else { 987 labelValues[port.Label] = port.Value 988 } 989 } 990 return labelValues 991 } 992 993 const ( 994 // JobTypeNomad is reserved for internal system tasks and is 995 // always handled by the CoreScheduler. 996 JobTypeCore = "_core" 997 JobTypeService = "service" 998 JobTypeBatch = "batch" 999 JobTypeSystem = "system" 1000 ) 1001 1002 const ( 1003 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 1004 JobStatusRunning = "running" // Running means the job has non-terminal allocations 1005 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 1006 ) 1007 1008 const ( 1009 // JobMinPriority is the minimum allowed priority 1010 JobMinPriority = 1 1011 1012 // JobDefaultPriority is the default priority if not 1013 // not specified. 1014 JobDefaultPriority = 50 1015 1016 // JobMaxPriority is the maximum allowed priority 1017 JobMaxPriority = 100 1018 1019 // Ensure CoreJobPriority is higher than any user 1020 // specified job so that it gets priority. This is important 1021 // for the system to remain healthy. 1022 CoreJobPriority = JobMaxPriority * 2 1023 ) 1024 1025 // JobSummary summarizes the state of the allocations of a job 1026 type JobSummary struct { 1027 JobID string 1028 Summary map[string]TaskGroupSummary 1029 1030 // Raft Indexes 1031 CreateIndex uint64 1032 ModifyIndex uint64 1033 } 1034 1035 // Copy returns a new copy of JobSummary 1036 func (js *JobSummary) Copy() *JobSummary { 1037 newJobSummary := new(JobSummary) 1038 *newJobSummary = *js 1039 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 1040 for k, v := range js.Summary { 1041 newTGSummary[k] = v 1042 } 1043 newJobSummary.Summary = newTGSummary 1044 return newJobSummary 1045 } 1046 1047 // TaskGroup summarizes the state of all the allocations of a particular 1048 // TaskGroup 1049 type TaskGroupSummary struct { 1050 Queued int 1051 Complete int 1052 Failed int 1053 Running int 1054 Starting int 1055 Lost int 1056 } 1057 1058 // Job is the scope of a scheduling request to Nomad. It is the largest 1059 // scoped object, and is a named collection of task groups. Each task group 1060 // is further composed of tasks. A task group (TG) is the unit of scheduling 1061 // however. 1062 type Job struct { 1063 // Region is the Nomad region that handles scheduling this job 1064 Region string 1065 1066 // ID is a unique identifier for the job per region. It can be 1067 // specified hierarchically like LineOfBiz/OrgName/Team/Project 1068 ID string 1069 1070 // ParentID is the unique identifier of the job that spawned this job. 1071 ParentID string 1072 1073 // Name is the logical name of the job used to refer to it. This is unique 1074 // per region, but not unique globally. 1075 Name string 1076 1077 // Type is used to control various behaviors about the job. Most jobs 1078 // are service jobs, meaning they are expected to be long lived. 1079 // Some jobs are batch oriented meaning they run and then terminate. 1080 // This can be extended in the future to support custom schedulers. 1081 Type string 1082 1083 // Priority is used to control scheduling importance and if this job 1084 // can preempt other jobs. 1085 Priority int 1086 1087 // AllAtOnce is used to control if incremental scheduling of task groups 1088 // is allowed or if we must do a gang scheduling of the entire job. This 1089 // can slow down larger jobs if resources are not available. 1090 AllAtOnce bool `mapstructure:"all_at_once"` 1091 1092 // Datacenters contains all the datacenters this job is allowed to span 1093 Datacenters []string 1094 1095 // Constraints can be specified at a job level and apply to 1096 // all the task groups and tasks. 1097 Constraints []*Constraint 1098 1099 // TaskGroups are the collections of task groups that this job needs 1100 // to run. Each task group is an atomic unit of scheduling and placement. 1101 TaskGroups []*TaskGroup 1102 1103 // Update is used to control the update strategy 1104 Update UpdateStrategy 1105 1106 // Periodic is used to define the interval the job is run at. 1107 Periodic *PeriodicConfig 1108 1109 // Meta is used to associate arbitrary metadata with this 1110 // job. This is opaque to Nomad. 1111 Meta map[string]string 1112 1113 // VaultToken is the Vault token that proves the submitter of the job has 1114 // access to the specified Vault policies. This field is only used to 1115 // transfer the token and is not stored after Job submission. 1116 VaultToken string `mapstructure:"vault_token"` 1117 1118 // Job status 1119 Status string 1120 1121 // StatusDescription is meant to provide more human useful information 1122 StatusDescription string 1123 1124 // Raft Indexes 1125 CreateIndex uint64 1126 ModifyIndex uint64 1127 JobModifyIndex uint64 1128 } 1129 1130 // Canonicalize is used to canonicalize fields in the Job. This should be called 1131 // when registering a Job. 1132 func (j *Job) Canonicalize() { 1133 // Ensure that an empty and nil map are treated the same to avoid scheduling 1134 // problems since we use reflect DeepEquals. 1135 if len(j.Meta) == 0 { 1136 j.Meta = nil 1137 } 1138 1139 for _, tg := range j.TaskGroups { 1140 tg.Canonicalize(j) 1141 } 1142 } 1143 1144 // Copy returns a deep copy of the Job. It is expected that callers use recover. 1145 // This job can panic if the deep copy failed as it uses reflection. 1146 func (j *Job) Copy() *Job { 1147 if j == nil { 1148 return nil 1149 } 1150 nj := new(Job) 1151 *nj = *j 1152 nj.Datacenters = CopySliceString(nj.Datacenters) 1153 nj.Constraints = CopySliceConstraints(nj.Constraints) 1154 1155 if j.TaskGroups != nil { 1156 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 1157 for i, tg := range nj.TaskGroups { 1158 tgs[i] = tg.Copy() 1159 } 1160 nj.TaskGroups = tgs 1161 } 1162 1163 nj.Periodic = nj.Periodic.Copy() 1164 nj.Meta = CopyMapStringString(nj.Meta) 1165 return nj 1166 } 1167 1168 // Validate is used to sanity check a job input 1169 func (j *Job) Validate() error { 1170 var mErr multierror.Error 1171 if j.Region == "" { 1172 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 1173 } 1174 if j.ID == "" { 1175 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 1176 } else if strings.Contains(j.ID, " ") { 1177 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 1178 } 1179 if j.Name == "" { 1180 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 1181 } 1182 if j.Type == "" { 1183 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 1184 } 1185 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 1186 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 1187 } 1188 if len(j.Datacenters) == 0 { 1189 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 1190 } 1191 if len(j.TaskGroups) == 0 { 1192 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 1193 } 1194 for idx, constr := range j.Constraints { 1195 if err := constr.Validate(); err != nil { 1196 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1197 mErr.Errors = append(mErr.Errors, outer) 1198 } 1199 } 1200 1201 // Check for duplicate task groups 1202 taskGroups := make(map[string]int) 1203 for idx, tg := range j.TaskGroups { 1204 if tg.Name == "" { 1205 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 1206 } else if existing, ok := taskGroups[tg.Name]; ok { 1207 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 1208 } else { 1209 taskGroups[tg.Name] = idx 1210 } 1211 1212 if j.Type == "system" && tg.Count > 1 { 1213 mErr.Errors = append(mErr.Errors, 1214 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 1215 tg.Name, tg.Count)) 1216 } 1217 } 1218 1219 // Validate the task group 1220 for _, tg := range j.TaskGroups { 1221 if err := tg.Validate(); err != nil { 1222 outer := fmt.Errorf("Task group %s validation failed: %s", tg.Name, err) 1223 mErr.Errors = append(mErr.Errors, outer) 1224 } 1225 } 1226 1227 // Validate periodic is only used with batch jobs. 1228 if j.IsPeriodic() && j.Periodic.Enabled { 1229 if j.Type != JobTypeBatch { 1230 mErr.Errors = append(mErr.Errors, 1231 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1232 } 1233 1234 if err := j.Periodic.Validate(); err != nil { 1235 mErr.Errors = append(mErr.Errors, err) 1236 } 1237 } 1238 1239 return mErr.ErrorOrNil() 1240 } 1241 1242 // LookupTaskGroup finds a task group by name 1243 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1244 for _, tg := range j.TaskGroups { 1245 if tg.Name == name { 1246 return tg 1247 } 1248 } 1249 return nil 1250 } 1251 1252 // Stub is used to return a summary of the job 1253 func (j *Job) Stub(summary *JobSummary) *JobListStub { 1254 return &JobListStub{ 1255 ID: j.ID, 1256 ParentID: j.ParentID, 1257 Name: j.Name, 1258 Type: j.Type, 1259 Priority: j.Priority, 1260 Status: j.Status, 1261 StatusDescription: j.StatusDescription, 1262 CreateIndex: j.CreateIndex, 1263 ModifyIndex: j.ModifyIndex, 1264 JobModifyIndex: j.JobModifyIndex, 1265 JobSummary: summary, 1266 } 1267 } 1268 1269 // IsPeriodic returns whether a job is periodic. 1270 func (j *Job) IsPeriodic() bool { 1271 return j.Periodic != nil 1272 } 1273 1274 // VaultPolicies returns the set of Vault policies per task group, per task 1275 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 1276 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 1277 1278 for _, tg := range j.TaskGroups { 1279 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 1280 policies[tg.Name] = tgPolicies 1281 1282 for _, task := range tg.Tasks { 1283 if task.Vault == nil { 1284 continue 1285 } 1286 1287 tgPolicies[task.Name] = task.Vault 1288 } 1289 } 1290 1291 return policies 1292 } 1293 1294 // JobListStub is used to return a subset of job information 1295 // for the job list 1296 type JobListStub struct { 1297 ID string 1298 ParentID string 1299 Name string 1300 Type string 1301 Priority int 1302 Status string 1303 StatusDescription string 1304 JobSummary *JobSummary 1305 CreateIndex uint64 1306 ModifyIndex uint64 1307 JobModifyIndex uint64 1308 } 1309 1310 // UpdateStrategy is used to modify how updates are done 1311 type UpdateStrategy struct { 1312 // Stagger is the amount of time between the updates 1313 Stagger time.Duration 1314 1315 // MaxParallel is how many updates can be done in parallel 1316 MaxParallel int `mapstructure:"max_parallel"` 1317 } 1318 1319 // Rolling returns if a rolling strategy should be used 1320 func (u *UpdateStrategy) Rolling() bool { 1321 return u.Stagger > 0 && u.MaxParallel > 0 1322 } 1323 1324 const ( 1325 // PeriodicSpecCron is used for a cron spec. 1326 PeriodicSpecCron = "cron" 1327 1328 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 1329 // separated list of unix timestamps at which to launch. 1330 PeriodicSpecTest = "_internal_test" 1331 ) 1332 1333 // Periodic defines the interval a job should be run at. 1334 type PeriodicConfig struct { 1335 // Enabled determines if the job should be run periodically. 1336 Enabled bool 1337 1338 // Spec specifies the interval the job should be run as. It is parsed based 1339 // on the SpecType. 1340 Spec string 1341 1342 // SpecType defines the format of the spec. 1343 SpecType string 1344 1345 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 1346 ProhibitOverlap bool `mapstructure:"prohibit_overlap"` 1347 } 1348 1349 func (p *PeriodicConfig) Copy() *PeriodicConfig { 1350 if p == nil { 1351 return nil 1352 } 1353 np := new(PeriodicConfig) 1354 *np = *p 1355 return np 1356 } 1357 1358 func (p *PeriodicConfig) Validate() error { 1359 if !p.Enabled { 1360 return nil 1361 } 1362 1363 if p.Spec == "" { 1364 return fmt.Errorf("Must specify a spec") 1365 } 1366 1367 switch p.SpecType { 1368 case PeriodicSpecCron: 1369 // Validate the cron spec 1370 if _, err := cronexpr.Parse(p.Spec); err != nil { 1371 return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) 1372 } 1373 case PeriodicSpecTest: 1374 // No-op 1375 default: 1376 return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) 1377 } 1378 1379 return nil 1380 } 1381 1382 // Next returns the closest time instant matching the spec that is after the 1383 // passed time. If no matching instance exists, the zero value of time.Time is 1384 // returned. The `time.Location` of the returned value matches that of the 1385 // passed time. 1386 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1387 switch p.SpecType { 1388 case PeriodicSpecCron: 1389 if e, err := cronexpr.Parse(p.Spec); err == nil { 1390 return e.Next(fromTime) 1391 } 1392 case PeriodicSpecTest: 1393 split := strings.Split(p.Spec, ",") 1394 if len(split) == 1 && split[0] == "" { 1395 return time.Time{} 1396 } 1397 1398 // Parse the times 1399 times := make([]time.Time, len(split)) 1400 for i, s := range split { 1401 unix, err := strconv.Atoi(s) 1402 if err != nil { 1403 return time.Time{} 1404 } 1405 1406 times[i] = time.Unix(int64(unix), 0) 1407 } 1408 1409 // Find the next match 1410 for _, next := range times { 1411 if fromTime.Before(next) { 1412 return next 1413 } 1414 } 1415 } 1416 1417 return time.Time{} 1418 } 1419 1420 const ( 1421 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1422 // when launching derived instances of it. 1423 PeriodicLaunchSuffix = "/periodic-" 1424 ) 1425 1426 // PeriodicLaunch tracks the last launch time of a periodic job. 1427 type PeriodicLaunch struct { 1428 ID string // ID of the periodic job. 1429 Launch time.Time // The last launch time. 1430 1431 // Raft Indexes 1432 CreateIndex uint64 1433 ModifyIndex uint64 1434 } 1435 1436 var ( 1437 defaultServiceJobRestartPolicy = RestartPolicy{ 1438 Delay: 15 * time.Second, 1439 Attempts: 2, 1440 Interval: 1 * time.Minute, 1441 Mode: RestartPolicyModeDelay, 1442 } 1443 defaultBatchJobRestartPolicy = RestartPolicy{ 1444 Delay: 15 * time.Second, 1445 Attempts: 15, 1446 Interval: 7 * 24 * time.Hour, 1447 Mode: RestartPolicyModeDelay, 1448 } 1449 ) 1450 1451 const ( 1452 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1453 // reached when the specified attempts have been reached in the interval. 1454 RestartPolicyModeDelay = "delay" 1455 1456 // RestartPolicyModeFail causes a job to fail if the specified number of 1457 // attempts are reached within an interval. 1458 RestartPolicyModeFail = "fail" 1459 ) 1460 1461 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1462 type RestartPolicy struct { 1463 // Attempts is the number of restart that will occur in an interval. 1464 Attempts int 1465 1466 // Interval is a duration in which we can limit the number of restarts 1467 // within. 1468 Interval time.Duration 1469 1470 // Delay is the time between a failure and a restart. 1471 Delay time.Duration 1472 1473 // Mode controls what happens when the task restarts more than attempt times 1474 // in an interval. 1475 Mode string 1476 } 1477 1478 func (r *RestartPolicy) Copy() *RestartPolicy { 1479 if r == nil { 1480 return nil 1481 } 1482 nrp := new(RestartPolicy) 1483 *nrp = *r 1484 return nrp 1485 } 1486 1487 func (r *RestartPolicy) Validate() error { 1488 switch r.Mode { 1489 case RestartPolicyModeDelay, RestartPolicyModeFail: 1490 default: 1491 return fmt.Errorf("Unsupported restart mode: %q", r.Mode) 1492 } 1493 1494 // Check for ambiguous/confusing settings 1495 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 1496 return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts) 1497 } 1498 1499 if r.Interval == 0 { 1500 return nil 1501 } 1502 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1503 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 1504 } 1505 return nil 1506 } 1507 1508 func NewRestartPolicy(jobType string) *RestartPolicy { 1509 switch jobType { 1510 case JobTypeService, JobTypeSystem: 1511 rp := defaultServiceJobRestartPolicy 1512 return &rp 1513 case JobTypeBatch: 1514 rp := defaultBatchJobRestartPolicy 1515 return &rp 1516 } 1517 return nil 1518 } 1519 1520 // TaskGroup is an atomic unit of placement. Each task group belongs to 1521 // a job and may contain any number of tasks. A task group support running 1522 // in many replicas using the same configuration.. 1523 type TaskGroup struct { 1524 // Name of the task group 1525 Name string 1526 1527 // Count is the number of replicas of this task group that should 1528 // be scheduled. 1529 Count int 1530 1531 // Constraints can be specified at a task group level and apply to 1532 // all the tasks contained. 1533 Constraints []*Constraint 1534 1535 //RestartPolicy of a TaskGroup 1536 RestartPolicy *RestartPolicy 1537 1538 // Tasks are the collection of tasks that this task group needs to run 1539 Tasks []*Task 1540 1541 // EphemeralDisk is the disk resources that the task group requests 1542 EphemeralDisk *EphemeralDisk 1543 1544 // Meta is used to associate arbitrary metadata with this 1545 // task group. This is opaque to Nomad. 1546 Meta map[string]string 1547 } 1548 1549 func (tg *TaskGroup) Copy() *TaskGroup { 1550 if tg == nil { 1551 return nil 1552 } 1553 ntg := new(TaskGroup) 1554 *ntg = *tg 1555 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 1556 1557 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 1558 1559 if tg.Tasks != nil { 1560 tasks := make([]*Task, len(ntg.Tasks)) 1561 for i, t := range ntg.Tasks { 1562 tasks[i] = t.Copy() 1563 } 1564 ntg.Tasks = tasks 1565 } 1566 1567 ntg.Meta = CopyMapStringString(ntg.Meta) 1568 1569 if tg.EphemeralDisk != nil { 1570 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 1571 } 1572 return ntg 1573 } 1574 1575 // Canonicalize is used to canonicalize fields in the TaskGroup. 1576 func (tg *TaskGroup) Canonicalize(job *Job) { 1577 // Ensure that an empty and nil map are treated the same to avoid scheduling 1578 // problems since we use reflect DeepEquals. 1579 if len(tg.Meta) == 0 { 1580 tg.Meta = nil 1581 } 1582 1583 // Set the default restart policy. 1584 if tg.RestartPolicy == nil { 1585 tg.RestartPolicy = NewRestartPolicy(job.Type) 1586 } 1587 1588 for _, task := range tg.Tasks { 1589 task.Canonicalize(job, tg) 1590 } 1591 } 1592 1593 // Validate is used to sanity check a task group 1594 func (tg *TaskGroup) Validate() error { 1595 var mErr multierror.Error 1596 if tg.Name == "" { 1597 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1598 } 1599 if tg.Count < 0 { 1600 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 1601 } 1602 if len(tg.Tasks) == 0 { 1603 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1604 } 1605 for idx, constr := range tg.Constraints { 1606 if err := constr.Validate(); err != nil { 1607 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1608 mErr.Errors = append(mErr.Errors, outer) 1609 } 1610 } 1611 1612 if tg.RestartPolicy != nil { 1613 if err := tg.RestartPolicy.Validate(); err != nil { 1614 mErr.Errors = append(mErr.Errors, err) 1615 } 1616 } else { 1617 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1618 } 1619 1620 if tg.EphemeralDisk != nil { 1621 if err := tg.EphemeralDisk.Validate(); err != nil { 1622 mErr.Errors = append(mErr.Errors, err) 1623 } 1624 } else { 1625 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a local disk object", tg.Name)) 1626 } 1627 1628 // Check for duplicate tasks 1629 tasks := make(map[string]int) 1630 for idx, task := range tg.Tasks { 1631 if task.Name == "" { 1632 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 1633 } else if existing, ok := tasks[task.Name]; ok { 1634 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 1635 } else { 1636 tasks[task.Name] = idx 1637 } 1638 } 1639 1640 // Validate the tasks 1641 for _, task := range tg.Tasks { 1642 if err := task.Validate(tg.EphemeralDisk); err != nil { 1643 outer := fmt.Errorf("Task %s validation failed: %s", task.Name, err) 1644 mErr.Errors = append(mErr.Errors, outer) 1645 } 1646 } 1647 return mErr.ErrorOrNil() 1648 } 1649 1650 // LookupTask finds a task by name 1651 func (tg *TaskGroup) LookupTask(name string) *Task { 1652 for _, t := range tg.Tasks { 1653 if t.Name == name { 1654 return t 1655 } 1656 } 1657 return nil 1658 } 1659 1660 func (tg *TaskGroup) GoString() string { 1661 return fmt.Sprintf("*%#v", *tg) 1662 } 1663 1664 const ( 1665 // TODO add Consul TTL check 1666 ServiceCheckHTTP = "http" 1667 ServiceCheckTCP = "tcp" 1668 ServiceCheckScript = "script" 1669 1670 // minCheckInterval is the minimum check interval permitted. Consul 1671 // currently has its MinInterval set to 1s. Mirror that here for 1672 // consistency. 1673 minCheckInterval = 1 * time.Second 1674 1675 // minCheckTimeout is the minimum check timeout permitted for Consul 1676 // script TTL checks. 1677 minCheckTimeout = 1 * time.Second 1678 ) 1679 1680 // The ServiceCheck data model represents the consul health check that 1681 // Nomad registers for a Task 1682 type ServiceCheck struct { 1683 Name string // Name of the check, defaults to id 1684 Type string // Type of the check - tcp, http, docker and script 1685 Command string // Command is the command to run for script checks 1686 Args []string // Args is a list of argumes for script checks 1687 Path string // path of the health check url for http type check 1688 Protocol string // Protocol to use if check is http, defaults to http 1689 PortLabel string `mapstructure:"port"` // The port to use for tcp/http checks 1690 Interval time.Duration // Interval of the check 1691 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1692 InitialStatus string `mapstructure:"initial_status"` // Initial status of the check 1693 } 1694 1695 func (sc *ServiceCheck) Copy() *ServiceCheck { 1696 if sc == nil { 1697 return nil 1698 } 1699 nsc := new(ServiceCheck) 1700 *nsc = *sc 1701 return nsc 1702 } 1703 1704 func (sc *ServiceCheck) Canonicalize(serviceName string) { 1705 // Ensure empty slices are treated as null to avoid scheduling issues when 1706 // using DeepEquals. 1707 if len(sc.Args) == 0 { 1708 sc.Args = nil 1709 } 1710 1711 if sc.Name == "" { 1712 sc.Name = fmt.Sprintf("service: %q check", serviceName) 1713 } 1714 } 1715 1716 // validate a Service's ServiceCheck 1717 func (sc *ServiceCheck) validate() error { 1718 switch strings.ToLower(sc.Type) { 1719 case ServiceCheckTCP: 1720 if sc.Timeout < minCheckTimeout { 1721 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 1722 } 1723 case ServiceCheckHTTP: 1724 if sc.Path == "" { 1725 return fmt.Errorf("http type must have a valid http path") 1726 } 1727 1728 if sc.Timeout < minCheckTimeout { 1729 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 1730 } 1731 case ServiceCheckScript: 1732 if sc.Command == "" { 1733 return fmt.Errorf("script type must have a valid script path") 1734 } 1735 1736 // TODO: enforce timeout on the Client side and reenable 1737 // validation. 1738 default: 1739 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 1740 } 1741 1742 if sc.Interval < minCheckInterval { 1743 return fmt.Errorf("interval (%v) can not be lower than %v", sc.Interval, minCheckInterval) 1744 } 1745 1746 switch sc.InitialStatus { 1747 case "": 1748 case api.HealthUnknown: 1749 case api.HealthPassing: 1750 case api.HealthWarning: 1751 case api.HealthCritical: 1752 default: 1753 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q, %q or empty`, sc.InitialStatus, api.HealthUnknown, api.HealthPassing, api.HealthWarning, api.HealthCritical) 1754 1755 } 1756 1757 return nil 1758 } 1759 1760 // RequiresPort returns whether the service check requires the task has a port. 1761 func (sc *ServiceCheck) RequiresPort() bool { 1762 switch sc.Type { 1763 case ServiceCheckHTTP, ServiceCheckTCP: 1764 return true 1765 default: 1766 return false 1767 } 1768 } 1769 1770 func (sc *ServiceCheck) Hash(serviceID string) string { 1771 h := sha1.New() 1772 io.WriteString(h, serviceID) 1773 io.WriteString(h, sc.Name) 1774 io.WriteString(h, sc.Type) 1775 io.WriteString(h, sc.Command) 1776 io.WriteString(h, strings.Join(sc.Args, "")) 1777 io.WriteString(h, sc.Path) 1778 io.WriteString(h, sc.Protocol) 1779 io.WriteString(h, sc.PortLabel) 1780 io.WriteString(h, sc.Interval.String()) 1781 io.WriteString(h, sc.Timeout.String()) 1782 return fmt.Sprintf("%x", h.Sum(nil)) 1783 } 1784 1785 // Service represents a Consul service definition in Nomad 1786 type Service struct { 1787 // Name of the service registered with Consul. Consul defaults the 1788 // Name to ServiceID if not specified. The Name if specified is used 1789 // as one of the seed values when generating a Consul ServiceID. 1790 Name string 1791 1792 // PortLabel is either the numeric port number or the `host:port`. 1793 // To specify the port number using the host's Consul Advertise 1794 // address, specify an empty host in the PortLabel (e.g. `:port`). 1795 PortLabel string `mapstructure:"port"` 1796 Tags []string // List of tags for the service 1797 Checks []*ServiceCheck // List of checks associated with the service 1798 } 1799 1800 func (s *Service) Copy() *Service { 1801 if s == nil { 1802 return nil 1803 } 1804 ns := new(Service) 1805 *ns = *s 1806 ns.Tags = CopySliceString(ns.Tags) 1807 1808 if s.Checks != nil { 1809 checks := make([]*ServiceCheck, len(ns.Checks)) 1810 for i, c := range ns.Checks { 1811 checks[i] = c.Copy() 1812 } 1813 ns.Checks = checks 1814 } 1815 1816 return ns 1817 } 1818 1819 // Canonicalize interpolates values of Job, Task Group and Task in the Service 1820 // Name. This also generates check names, service id and check ids. 1821 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 1822 // Ensure empty lists are treated as null to avoid scheduler issues when 1823 // using DeepEquals 1824 if len(s.Tags) == 0 { 1825 s.Tags = nil 1826 } 1827 if len(s.Checks) == 0 { 1828 s.Checks = nil 1829 } 1830 1831 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 1832 "JOB": job, 1833 "TASKGROUP": taskGroup, 1834 "TASK": task, 1835 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 1836 }, 1837 ) 1838 1839 for _, check := range s.Checks { 1840 check.Canonicalize(s.Name) 1841 } 1842 } 1843 1844 // Validate checks if the Check definition is valid 1845 func (s *Service) Validate() error { 1846 var mErr multierror.Error 1847 1848 // Ensure the service name is valid per RFC-952 §1 1849 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 1850 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 1851 // (https://tools.ietf.org/html/rfc2782). 1852 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 1853 if !re.MatchString(s.Name) { 1854 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be less than 63 characters long: %q", s.Name)) 1855 } 1856 1857 for _, c := range s.Checks { 1858 if s.PortLabel == "" && c.RequiresPort() { 1859 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name)) 1860 continue 1861 } 1862 1863 if err := c.validate(); err != nil { 1864 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 1865 } 1866 } 1867 return mErr.ErrorOrNil() 1868 } 1869 1870 // Hash calculates the hash of the check based on it's content and the service 1871 // which owns it 1872 func (s *Service) Hash() string { 1873 h := sha1.New() 1874 io.WriteString(h, s.Name) 1875 io.WriteString(h, strings.Join(s.Tags, "")) 1876 io.WriteString(h, s.PortLabel) 1877 return fmt.Sprintf("%x", h.Sum(nil)) 1878 } 1879 1880 const ( 1881 // DefaultKillTimeout is the default timeout between signaling a task it 1882 // will be killed and killing it. 1883 DefaultKillTimeout = 5 * time.Second 1884 ) 1885 1886 // LogConfig provides configuration for log rotation 1887 type LogConfig struct { 1888 MaxFiles int `mapstructure:"max_files"` 1889 MaxFileSizeMB int `mapstructure:"max_file_size"` 1890 LogShuttleConfig *LogShuttleConfig 1891 } 1892 1893 // LogShuttleConfig configures log-shuttle log delivery 1894 type LogShuttleConfig struct { 1895 UseGzip bool 1896 Drop bool 1897 Prival string 1898 Version string 1899 Procid string 1900 Appname string 1901 LogplexToken string 1902 Hostname string 1903 Msgid string 1904 LogsURL string 1905 StatsSource string 1906 StatsInterval time.Duration 1907 WaitDuration time.Duration 1908 Timeout time.Duration 1909 MaxAttempts int 1910 NumOutlets int 1911 BatchSize int 1912 BackBuff int 1913 MaxLineLength int 1914 KinesisShards int 1915 } 1916 1917 // DefaultLogConfig returns the default LogConfig values. 1918 func DefaultLogConfig() *LogConfig { 1919 return &LogConfig{ 1920 MaxFiles: 10, 1921 MaxFileSizeMB: 10, 1922 } 1923 } 1924 1925 // Validate returns an error if the log config specified are less than 1926 // the minimum allowed. 1927 func (l *LogConfig) Validate() error { 1928 var mErr multierror.Error 1929 if l.MaxFiles < 1 { 1930 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 1931 } 1932 if l.MaxFileSizeMB < 1 { 1933 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 1934 } 1935 return mErr.ErrorOrNil() 1936 } 1937 1938 // Task is a single process typically that is executed as part of a task group. 1939 type Task struct { 1940 // Name of the task 1941 Name string 1942 1943 // Driver is used to control which driver is used 1944 Driver string 1945 1946 // User is used to determine which user will run the task. It defaults to 1947 // the same user the Nomad client is being run as. 1948 User string 1949 1950 // Config is provided to the driver to initialize 1951 Config map[string]interface{} 1952 1953 // Map of environment variables to be used by the driver 1954 Env map[string]string 1955 1956 // Only use explicitly set Env variables in task environment 1957 ExcludeNomadEnv bool 1958 1959 // List of service definitions exposed by the Task 1960 Services []*Service 1961 1962 // Vault is used to define the set of Vault policies that this task should 1963 // have access to. 1964 Vault *Vault 1965 1966 // Constraints can be specified at a task level and apply only to 1967 // the particular task. 1968 Constraints []*Constraint 1969 1970 // Resources is the resources needed by this task 1971 Resources *Resources 1972 1973 // Meta is used to associate arbitrary metadata with this 1974 // task. This is opaque to Nomad. 1975 Meta map[string]string 1976 1977 // KillTimeout is the time between signaling a task that it will be 1978 // killed and killing it. 1979 KillTimeout time.Duration `mapstructure:"kill_timeout"` 1980 1981 // LogConfig provides configuration for log rotation 1982 LogConfig *LogConfig `mapstructure:"logs"` 1983 1984 // Artifacts is a list of artifacts to download and extract before running 1985 // the task. 1986 Artifacts []*TaskArtifact 1987 } 1988 1989 func (t *Task) Copy() *Task { 1990 if t == nil { 1991 return nil 1992 } 1993 nt := new(Task) 1994 *nt = *t 1995 nt.Env = CopyMapStringString(nt.Env) 1996 1997 if t.Services != nil { 1998 services := make([]*Service, len(nt.Services)) 1999 for i, s := range nt.Services { 2000 services[i] = s.Copy() 2001 } 2002 nt.Services = services 2003 } 2004 2005 nt.Constraints = CopySliceConstraints(nt.Constraints) 2006 2007 nt.Vault = nt.Vault.Copy() 2008 nt.Resources = nt.Resources.Copy() 2009 nt.Meta = CopyMapStringString(nt.Meta) 2010 2011 if t.Artifacts != nil { 2012 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 2013 for _, a := range nt.Artifacts { 2014 artifacts = append(artifacts, a.Copy()) 2015 } 2016 nt.Artifacts = artifacts 2017 } 2018 2019 if i, err := copystructure.Copy(nt.Config); err != nil { 2020 nt.Config = i.(map[string]interface{}) 2021 } 2022 2023 return nt 2024 } 2025 2026 // Canonicalize canonicalizes fields in the task. 2027 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 2028 // Ensure that an empty and nil map are treated the same to avoid scheduling 2029 // problems since we use reflect DeepEquals. 2030 if len(t.Meta) == 0 { 2031 t.Meta = nil 2032 } 2033 if len(t.Config) == 0 { 2034 t.Config = nil 2035 } 2036 if len(t.Env) == 0 { 2037 t.Env = nil 2038 } 2039 2040 for _, service := range t.Services { 2041 service.Canonicalize(job.Name, tg.Name, t.Name) 2042 } 2043 2044 if t.Resources != nil { 2045 t.Resources.Canonicalize() 2046 } 2047 2048 // Set the default timeout if it is not specified. 2049 if t.KillTimeout == 0 { 2050 t.KillTimeout = DefaultKillTimeout 2051 } 2052 } 2053 2054 func (t *Task) GoString() string { 2055 return fmt.Sprintf("*%#v", *t) 2056 } 2057 2058 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 2059 for _, network := range t.Resources.Networks { 2060 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 2061 return network.IP, p 2062 } 2063 } 2064 return "", 0 2065 } 2066 2067 // Validate is used to sanity check a task 2068 func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error { 2069 var mErr multierror.Error 2070 if t.Name == "" { 2071 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 2072 } 2073 if strings.ContainsAny(t.Name, `/\`) { 2074 // We enforce this so that when creating the directory on disk it will 2075 // not have any slashes. 2076 mErr.Errors = append(mErr.Errors, errors.New("Task name can not include slashes")) 2077 } 2078 if t.Driver == "" { 2079 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 2080 } 2081 if t.KillTimeout.Nanoseconds() < 0 { 2082 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 2083 } 2084 2085 // Validate the resources. 2086 if t.Resources == nil { 2087 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 2088 } else if err := t.Resources.MeetsMinResources(); err != nil { 2089 mErr.Errors = append(mErr.Errors, err) 2090 } 2091 2092 // Ensure the task isn't asking for disk resources 2093 if t.Resources != nil { 2094 if t.Resources.DiskMB > 0 { 2095 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2096 } 2097 } 2098 2099 // Validate the log config 2100 if t.LogConfig == nil { 2101 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 2102 } else if err := t.LogConfig.Validate(); err != nil { 2103 mErr.Errors = append(mErr.Errors, err) 2104 } 2105 2106 for idx, constr := range t.Constraints { 2107 if err := constr.Validate(); err != nil { 2108 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2109 mErr.Errors = append(mErr.Errors, outer) 2110 } 2111 } 2112 2113 // Validate Services 2114 if err := validateServices(t); err != nil { 2115 mErr.Errors = append(mErr.Errors, err) 2116 } 2117 2118 if t.LogConfig != nil && ephemeralDisk != nil { 2119 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 2120 if ephemeralDisk.SizeMB <= logUsage { 2121 mErr.Errors = append(mErr.Errors, 2122 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 2123 logUsage, ephemeralDisk.SizeMB)) 2124 } 2125 } 2126 2127 for idx, artifact := range t.Artifacts { 2128 if err := artifact.Validate(); err != nil { 2129 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 2130 mErr.Errors = append(mErr.Errors, outer) 2131 } 2132 } 2133 2134 if t.Vault != nil { 2135 if err := t.Vault.Validate(); err != nil { 2136 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 2137 } 2138 } 2139 2140 return mErr.ErrorOrNil() 2141 } 2142 2143 // validateServices takes a task and validates the services within it are valid 2144 // and reference ports that exist. 2145 func validateServices(t *Task) error { 2146 var mErr multierror.Error 2147 2148 // Ensure that services don't ask for non-existent ports and their names are 2149 // unique. 2150 servicePorts := make(map[string][]string) 2151 knownServices := make(map[string]struct{}) 2152 for i, service := range t.Services { 2153 if err := service.Validate(); err != nil { 2154 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 2155 mErr.Errors = append(mErr.Errors, outer) 2156 } 2157 if _, ok := knownServices[service.Name]; ok { 2158 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 2159 } 2160 knownServices[service.Name] = struct{}{} 2161 2162 if service.PortLabel != "" { 2163 servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name) 2164 } 2165 2166 // Ensure that check names are unique. 2167 knownChecks := make(map[string]struct{}) 2168 for _, check := range service.Checks { 2169 if _, ok := knownChecks[check.Name]; ok { 2170 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 2171 } 2172 knownChecks[check.Name] = struct{}{} 2173 } 2174 } 2175 2176 // Get the set of port labels. 2177 portLabels := make(map[string]struct{}) 2178 if t.Resources != nil { 2179 for _, network := range t.Resources.Networks { 2180 ports := network.MapLabelToValues(nil) 2181 for portLabel, _ := range ports { 2182 portLabels[portLabel] = struct{}{} 2183 } 2184 } 2185 } 2186 2187 // Ensure all ports referenced in services exist. 2188 for servicePort, services := range servicePorts { 2189 _, ok := portLabels[servicePort] 2190 if !ok { 2191 joined := strings.Join(services, ", ") 2192 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 2193 mErr.Errors = append(mErr.Errors, err) 2194 } 2195 } 2196 return mErr.ErrorOrNil() 2197 } 2198 2199 // Set of possible states for a task. 2200 const ( 2201 TaskStatePending = "pending" // The task is waiting to be run. 2202 TaskStateRunning = "running" // The task is currently running. 2203 TaskStateDead = "dead" // Terminal state of task. 2204 ) 2205 2206 // TaskState tracks the current state of a task and events that caused state 2207 // transitions. 2208 type TaskState struct { 2209 // The current state of the task. 2210 State string 2211 2212 // Series of task events that transition the state of the task. 2213 Events []*TaskEvent 2214 } 2215 2216 func (ts *TaskState) Copy() *TaskState { 2217 if ts == nil { 2218 return nil 2219 } 2220 copy := new(TaskState) 2221 copy.State = ts.State 2222 2223 if ts.Events != nil { 2224 copy.Events = make([]*TaskEvent, len(ts.Events)) 2225 for i, e := range ts.Events { 2226 copy.Events[i] = e.Copy() 2227 } 2228 } 2229 return copy 2230 } 2231 2232 // Failed returns true if the task has has failed. 2233 func (ts *TaskState) Failed() bool { 2234 l := len(ts.Events) 2235 if ts.State != TaskStateDead || l == 0 { 2236 return false 2237 } 2238 2239 switch ts.Events[l-1].Type { 2240 case TaskDiskExceeded, TaskNotRestarting, TaskArtifactDownloadFailed, TaskFailedValidation: 2241 return true 2242 default: 2243 return false 2244 } 2245 } 2246 2247 // Successful returns whether a task finished successfully. 2248 func (ts *TaskState) Successful() bool { 2249 l := len(ts.Events) 2250 if ts.State != TaskStateDead || l == 0 { 2251 return false 2252 } 2253 2254 e := ts.Events[l-1] 2255 if e.Type != TaskTerminated { 2256 return false 2257 } 2258 2259 return e.ExitCode == 0 2260 } 2261 2262 const ( 2263 // TaskDriveFailure indicates that the task could not be started due to a 2264 // failure in the driver. 2265 TaskDriverFailure = "Driver Failure" 2266 2267 // TaskReceived signals that the task has been pulled by the client at the 2268 // given timestamp. 2269 TaskReceived = "Received" 2270 2271 // TaskFailedValidation indicates the task was invalid and as such was not 2272 // run. 2273 TaskFailedValidation = "Failed Validation" 2274 2275 // TaskStarted signals that the task was started and its timestamp can be 2276 // used to determine the running length of the task. 2277 TaskStarted = "Started" 2278 2279 // TaskTerminated indicates that the task was started and exited. 2280 TaskTerminated = "Terminated" 2281 2282 // TaskKilling indicates a kill signal has been sent to the task. 2283 TaskKilling = "Killing" 2284 2285 // TaskKilled indicates a user has killed the task. 2286 TaskKilled = "Killed" 2287 2288 // TaskRestarting indicates that task terminated and is being restarted. 2289 TaskRestarting = "Restarting" 2290 2291 // TaskNotRestarting indicates that the task has failed and is not being 2292 // restarted because it has exceeded its restart policy. 2293 TaskNotRestarting = "Not Restarting" 2294 2295 // TaskDownloadingArtifacts means the task is downloading the artifacts 2296 // specified in the task. 2297 TaskDownloadingArtifacts = "Downloading Artifacts" 2298 2299 // TaskArtifactDownloadFailed indicates that downloading the artifacts 2300 // failed. 2301 TaskArtifactDownloadFailed = "Failed Artifact Download" 2302 2303 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 2304 // exceeded the requested disk resources. 2305 TaskDiskExceeded = "Disk Resources Exceeded" 2306 2307 // TaskSiblingFailed indicates that a sibling task in the task group has 2308 // failed. 2309 TaskSiblingFailed = "Sibling task failed" 2310 ) 2311 2312 // TaskEvent is an event that effects the state of a task and contains meta-data 2313 // appropriate to the events type. 2314 type TaskEvent struct { 2315 Type string 2316 Time int64 // Unix Nanosecond timestamp 2317 2318 // Restart fields. 2319 RestartReason string 2320 2321 // Driver Failure fields. 2322 DriverError string // A driver error occurred while starting the task. 2323 2324 // Task Terminated Fields. 2325 ExitCode int // The exit code of the task. 2326 Signal int // The signal that terminated the task. 2327 Message string // A possible message explaining the termination of the task. 2328 2329 // Killing fields 2330 KillTimeout time.Duration 2331 2332 // Task Killed Fields. 2333 KillError string // Error killing the task. 2334 2335 // TaskRestarting fields. 2336 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 2337 2338 // Artifact Download fields 2339 DownloadError string // Error downloading artifacts 2340 2341 // Validation fields 2342 ValidationError string // Validation error 2343 2344 // The maximum allowed task disk size. 2345 DiskLimit int64 2346 2347 // The recorded task disk size. 2348 DiskSize int64 2349 2350 // Name of the sibling task that caused termination of the task that 2351 // the TaskEvent refers to. 2352 FailedSibling string 2353 } 2354 2355 func (te *TaskEvent) GoString() string { 2356 return fmt.Sprintf("%v at %v", te.Type, te.Time) 2357 } 2358 2359 func (te *TaskEvent) Copy() *TaskEvent { 2360 if te == nil { 2361 return nil 2362 } 2363 copy := new(TaskEvent) 2364 *copy = *te 2365 return copy 2366 } 2367 2368 func NewTaskEvent(event string) *TaskEvent { 2369 return &TaskEvent{ 2370 Type: event, 2371 Time: time.Now().UnixNano(), 2372 } 2373 } 2374 2375 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 2376 if err != nil { 2377 e.DriverError = err.Error() 2378 } 2379 return e 2380 } 2381 2382 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 2383 e.ExitCode = c 2384 return e 2385 } 2386 2387 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 2388 e.Signal = s 2389 return e 2390 } 2391 2392 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 2393 if err != nil { 2394 e.Message = err.Error() 2395 } 2396 return e 2397 } 2398 2399 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 2400 if err != nil { 2401 e.KillError = err.Error() 2402 } 2403 return e 2404 } 2405 2406 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 2407 e.StartDelay = int64(delay) 2408 return e 2409 } 2410 2411 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 2412 e.RestartReason = reason 2413 return e 2414 } 2415 2416 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 2417 if err != nil { 2418 e.DownloadError = err.Error() 2419 } 2420 return e 2421 } 2422 2423 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 2424 if err != nil { 2425 e.ValidationError = err.Error() 2426 } 2427 return e 2428 } 2429 2430 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 2431 e.KillTimeout = timeout 2432 return e 2433 } 2434 2435 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 2436 e.DiskLimit = limit 2437 return e 2438 } 2439 2440 func (e *TaskEvent) SetDiskSize(size int64) *TaskEvent { 2441 e.DiskSize = size 2442 return e 2443 } 2444 2445 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 2446 e.FailedSibling = sibling 2447 return e 2448 } 2449 2450 // TaskArtifact is an artifact to download before running the task. 2451 type TaskArtifact struct { 2452 // GetterSource is the source to download an artifact using go-getter 2453 GetterSource string `mapstructure:"source"` 2454 2455 // GetterOptions are options to use when downloading the artifact using 2456 // go-getter. 2457 GetterOptions map[string]string `mapstructure:"options"` 2458 2459 // RelativeDest is the download destination given relative to the task's 2460 // directory. 2461 RelativeDest string `mapstructure:"destination"` 2462 } 2463 2464 func (ta *TaskArtifact) Copy() *TaskArtifact { 2465 if ta == nil { 2466 return nil 2467 } 2468 nta := new(TaskArtifact) 2469 *nta = *ta 2470 nta.GetterOptions = CopyMapStringString(ta.GetterOptions) 2471 return nta 2472 } 2473 2474 func (ta *TaskArtifact) GoString() string { 2475 return fmt.Sprintf("%+v", ta) 2476 } 2477 2478 func (ta *TaskArtifact) Validate() error { 2479 // Verify the source 2480 var mErr multierror.Error 2481 if ta.GetterSource == "" { 2482 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 2483 } 2484 2485 // Verify the destination doesn't escape the tasks directory 2486 alloc, err := filepath.Abs(filepath.Join("/", "foo/", "bar/")) 2487 if err != nil { 2488 mErr.Errors = append(mErr.Errors, err) 2489 return mErr.ErrorOrNil() 2490 } 2491 abs, err := filepath.Abs(filepath.Join(alloc, ta.RelativeDest)) 2492 if err != nil { 2493 mErr.Errors = append(mErr.Errors, err) 2494 return mErr.ErrorOrNil() 2495 } 2496 rel, err := filepath.Rel(alloc, abs) 2497 if err != nil { 2498 mErr.Errors = append(mErr.Errors, err) 2499 return mErr.ErrorOrNil() 2500 } 2501 if strings.HasPrefix(rel, "..") { 2502 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes task's directory")) 2503 } 2504 2505 // Verify the checksum 2506 if check, ok := ta.GetterOptions["checksum"]; ok { 2507 check = strings.TrimSpace(check) 2508 if check == "" { 2509 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value can not be empty")) 2510 return mErr.ErrorOrNil() 2511 } 2512 2513 parts := strings.Split(check, ":") 2514 if l := len(parts); l != 2 { 2515 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 2516 return mErr.ErrorOrNil() 2517 } 2518 2519 checksumVal := parts[1] 2520 checksumBytes, err := hex.DecodeString(checksumVal) 2521 if err != nil { 2522 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 2523 return mErr.ErrorOrNil() 2524 } 2525 2526 checksumType := parts[0] 2527 expectedLength := 0 2528 switch checksumType { 2529 case "md5": 2530 expectedLength = md5.Size 2531 case "sha1": 2532 expectedLength = sha1.Size 2533 case "sha256": 2534 expectedLength = sha256.Size 2535 case "sha512": 2536 expectedLength = sha512.Size 2537 default: 2538 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 2539 return mErr.ErrorOrNil() 2540 } 2541 2542 if len(checksumBytes) != expectedLength { 2543 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 2544 return mErr.ErrorOrNil() 2545 } 2546 } 2547 2548 return mErr.ErrorOrNil() 2549 } 2550 2551 const ( 2552 ConstraintDistinctHosts = "distinct_hosts" 2553 ConstraintRegex = "regexp" 2554 ConstraintVersion = "version" 2555 ) 2556 2557 // Constraints are used to restrict placement options. 2558 type Constraint struct { 2559 LTarget string // Left-hand target 2560 RTarget string // Right-hand target 2561 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 2562 str string // Memoized string 2563 } 2564 2565 func (c *Constraint) Copy() *Constraint { 2566 if c == nil { 2567 return nil 2568 } 2569 nc := new(Constraint) 2570 *nc = *c 2571 return nc 2572 } 2573 2574 func (c *Constraint) String() string { 2575 if c.str != "" { 2576 return c.str 2577 } 2578 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 2579 return c.str 2580 } 2581 2582 func (c *Constraint) Validate() error { 2583 var mErr multierror.Error 2584 if c.Operand == "" { 2585 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 2586 } 2587 2588 // Perform additional validation based on operand 2589 switch c.Operand { 2590 case ConstraintRegex: 2591 if _, err := regexp.Compile(c.RTarget); err != nil { 2592 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 2593 } 2594 case ConstraintVersion: 2595 if _, err := version.NewConstraint(c.RTarget); err != nil { 2596 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 2597 } 2598 } 2599 return mErr.ErrorOrNil() 2600 } 2601 2602 // EphemeralDisk is an ephemeral disk object 2603 type EphemeralDisk struct { 2604 // Sticky indicates whether the allocation is sticky to a node 2605 Sticky bool 2606 2607 // SizeMB is the size of the local disk 2608 SizeMB int `mapstructure:"size"` 2609 } 2610 2611 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 2612 func DefaultEphemeralDisk() *EphemeralDisk { 2613 return &EphemeralDisk{ 2614 SizeMB: 300, 2615 } 2616 } 2617 2618 // Validate validates EphemeralDisk 2619 func (d *EphemeralDisk) Validate() error { 2620 if d.SizeMB < 10 { 2621 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 2622 } 2623 return nil 2624 } 2625 2626 // Copy copies the EphemeralDisk struct and returns a new one 2627 func (d *EphemeralDisk) Copy() *EphemeralDisk { 2628 ld := new(EphemeralDisk) 2629 *ld = *d 2630 return ld 2631 } 2632 2633 // Vault stores the set of premissions a task needs access to from Vault. 2634 type Vault struct { 2635 // Policies is the set of policies that the task needs access to 2636 Policies []string 2637 } 2638 2639 // Copy returns a copy of this Vault block. 2640 func (v *Vault) Copy() *Vault { 2641 if v == nil { 2642 return nil 2643 } 2644 2645 nv := new(Vault) 2646 *nv = *v 2647 return nv 2648 } 2649 2650 // Validate returns if the Vault block is valid. 2651 func (v *Vault) Validate() error { 2652 if v == nil { 2653 return nil 2654 } 2655 2656 if len(v.Policies) == 0 { 2657 return fmt.Errorf("Policy list can not be empty") 2658 } 2659 2660 return nil 2661 } 2662 2663 const ( 2664 AllocDesiredStatusRun = "run" // Allocation should run 2665 AllocDesiredStatusStop = "stop" // Allocation should stop 2666 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 2667 ) 2668 2669 const ( 2670 AllocClientStatusPending = "pending" 2671 AllocClientStatusRunning = "running" 2672 AllocClientStatusComplete = "complete" 2673 AllocClientStatusFailed = "failed" 2674 AllocClientStatusLost = "lost" 2675 ) 2676 2677 // Allocation is used to allocate the placement of a task group to a node. 2678 type Allocation struct { 2679 // ID of the allocation (UUID) 2680 ID string 2681 2682 // ID of the evaluation that generated this allocation 2683 EvalID string 2684 2685 // Name is a logical name of the allocation. 2686 Name string 2687 2688 // NodeID is the node this is being placed on 2689 NodeID string 2690 2691 // Job is the parent job of the task group being allocated. 2692 // This is copied at allocation time to avoid issues if the job 2693 // definition is updated. 2694 JobID string 2695 Job *Job 2696 2697 // TaskGroup is the name of the task group that should be run 2698 TaskGroup string 2699 2700 // Resources is the total set of resources allocated as part 2701 // of this allocation of the task group. 2702 Resources *Resources 2703 2704 // SharedResources are the resources that are shared by all the tasks in an 2705 // allocation 2706 SharedResources *Resources 2707 2708 // TaskResources is the set of resources allocated to each 2709 // task. These should sum to the total Resources. 2710 TaskResources map[string]*Resources 2711 2712 // Metrics associated with this allocation 2713 Metrics *AllocMetric 2714 2715 // Desired Status of the allocation on the client 2716 DesiredStatus string 2717 2718 // DesiredStatusDescription is meant to provide more human useful information 2719 DesiredDescription string 2720 2721 // Status of the allocation on the client 2722 ClientStatus string 2723 2724 // ClientStatusDescription is meant to provide more human useful information 2725 ClientDescription string 2726 2727 // TaskStates stores the state of each task, 2728 TaskStates map[string]*TaskState 2729 2730 // PreviousAllocation is the allocation that this allocation is replacing 2731 PreviousAllocation string 2732 2733 // Raft Indexes 2734 CreateIndex uint64 2735 ModifyIndex uint64 2736 2737 // AllocModifyIndex is not updated when the client updates allocations. This 2738 // lets the client pull only the allocs updated by the server. 2739 AllocModifyIndex uint64 2740 2741 // CreateTime is the time the allocation has finished scheduling and been 2742 // verified by the plan applier. 2743 CreateTime int64 2744 } 2745 2746 func (a *Allocation) Copy() *Allocation { 2747 if a == nil { 2748 return nil 2749 } 2750 na := new(Allocation) 2751 *na = *a 2752 2753 na.Job = na.Job.Copy() 2754 na.Resources = na.Resources.Copy() 2755 na.SharedResources = na.SharedResources.Copy() 2756 2757 if a.TaskResources != nil { 2758 tr := make(map[string]*Resources, len(na.TaskResources)) 2759 for task, resource := range na.TaskResources { 2760 tr[task] = resource.Copy() 2761 } 2762 na.TaskResources = tr 2763 } 2764 2765 na.Metrics = na.Metrics.Copy() 2766 2767 if a.TaskStates != nil { 2768 ts := make(map[string]*TaskState, len(na.TaskStates)) 2769 for task, state := range na.TaskStates { 2770 ts[task] = state.Copy() 2771 } 2772 na.TaskStates = ts 2773 } 2774 return na 2775 } 2776 2777 // TerminalStatus returns if the desired or actual status is terminal and 2778 // will no longer transition. 2779 func (a *Allocation) TerminalStatus() bool { 2780 // First check the desired state and if that isn't terminal, check client 2781 // state. 2782 switch a.DesiredStatus { 2783 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 2784 return true 2785 default: 2786 } 2787 2788 switch a.ClientStatus { 2789 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 2790 return true 2791 default: 2792 return false 2793 } 2794 } 2795 2796 // Terminated returns if the allocation is in a terminal state on a client. 2797 func (a *Allocation) Terminated() bool { 2798 if a.ClientStatus == AllocClientStatusFailed || 2799 a.ClientStatus == AllocClientStatusComplete || 2800 a.ClientStatus == AllocClientStatusLost { 2801 return true 2802 } 2803 return false 2804 } 2805 2806 // RanSuccessfully returns whether the client has ran the allocation and all 2807 // tasks finished successfully 2808 func (a *Allocation) RanSuccessfully() bool { 2809 // Handle the case the client hasn't started the allocation. 2810 if len(a.TaskStates) == 0 { 2811 return false 2812 } 2813 2814 // Check to see if all the tasks finised successfully in the allocation 2815 allSuccess := true 2816 for _, state := range a.TaskStates { 2817 allSuccess = allSuccess && state.Successful() 2818 } 2819 2820 return allSuccess 2821 } 2822 2823 // Stub returns a list stub for the allocation 2824 func (a *Allocation) Stub() *AllocListStub { 2825 return &AllocListStub{ 2826 ID: a.ID, 2827 EvalID: a.EvalID, 2828 Name: a.Name, 2829 NodeID: a.NodeID, 2830 JobID: a.JobID, 2831 TaskGroup: a.TaskGroup, 2832 DesiredStatus: a.DesiredStatus, 2833 DesiredDescription: a.DesiredDescription, 2834 ClientStatus: a.ClientStatus, 2835 ClientDescription: a.ClientDescription, 2836 TaskStates: a.TaskStates, 2837 CreateIndex: a.CreateIndex, 2838 ModifyIndex: a.ModifyIndex, 2839 CreateTime: a.CreateTime, 2840 } 2841 } 2842 2843 var ( 2844 // AllocationIndexRegex is a regular expression to find the allocation index. 2845 AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 2846 ) 2847 2848 // Index returns the index of the allocation. If the allocation is from a task 2849 // group with count greater than 1, there will be multiple allocations for it. 2850 func (a *Allocation) Index() int { 2851 matches := AllocationIndexRegex.FindStringSubmatch(a.Name) 2852 if len(matches) != 2 { 2853 return -1 2854 } 2855 2856 index, err := strconv.Atoi(matches[1]) 2857 if err != nil { 2858 return -1 2859 } 2860 2861 return index 2862 } 2863 2864 // AllocListStub is used to return a subset of alloc information 2865 type AllocListStub struct { 2866 ID string 2867 EvalID string 2868 Name string 2869 NodeID string 2870 JobID string 2871 TaskGroup string 2872 DesiredStatus string 2873 DesiredDescription string 2874 ClientStatus string 2875 ClientDescription string 2876 TaskStates map[string]*TaskState 2877 CreateIndex uint64 2878 ModifyIndex uint64 2879 CreateTime int64 2880 } 2881 2882 // AllocMetric is used to track various metrics while attempting 2883 // to make an allocation. These are used to debug a job, or to better 2884 // understand the pressure within the system. 2885 type AllocMetric struct { 2886 // NodesEvaluated is the number of nodes that were evaluated 2887 NodesEvaluated int 2888 2889 // NodesFiltered is the number of nodes filtered due to a constraint 2890 NodesFiltered int 2891 2892 // NodesAvailable is the number of nodes available for evaluation per DC. 2893 NodesAvailable map[string]int 2894 2895 // ClassFiltered is the number of nodes filtered by class 2896 ClassFiltered map[string]int 2897 2898 // ConstraintFiltered is the number of failures caused by constraint 2899 ConstraintFiltered map[string]int 2900 2901 // NodesExhausted is the number of nodes skipped due to being 2902 // exhausted of at least one resource 2903 NodesExhausted int 2904 2905 // ClassExhausted is the number of nodes exhausted by class 2906 ClassExhausted map[string]int 2907 2908 // DimensionExhausted provides the count by dimension or reason 2909 DimensionExhausted map[string]int 2910 2911 // Scores is the scores of the final few nodes remaining 2912 // for placement. The top score is typically selected. 2913 Scores map[string]float64 2914 2915 // AllocationTime is a measure of how long the allocation 2916 // attempt took. This can affect performance and SLAs. 2917 AllocationTime time.Duration 2918 2919 // CoalescedFailures indicates the number of other 2920 // allocations that were coalesced into this failed allocation. 2921 // This is to prevent creating many failed allocations for a 2922 // single task group. 2923 CoalescedFailures int 2924 } 2925 2926 func (a *AllocMetric) Copy() *AllocMetric { 2927 if a == nil { 2928 return nil 2929 } 2930 na := new(AllocMetric) 2931 *na = *a 2932 na.NodesAvailable = CopyMapStringInt(na.NodesAvailable) 2933 na.ClassFiltered = CopyMapStringInt(na.ClassFiltered) 2934 na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered) 2935 na.ClassExhausted = CopyMapStringInt(na.ClassExhausted) 2936 na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted) 2937 na.Scores = CopyMapStringFloat64(na.Scores) 2938 return na 2939 } 2940 2941 func (a *AllocMetric) EvaluateNode() { 2942 a.NodesEvaluated += 1 2943 } 2944 2945 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 2946 a.NodesFiltered += 1 2947 if node != nil && node.NodeClass != "" { 2948 if a.ClassFiltered == nil { 2949 a.ClassFiltered = make(map[string]int) 2950 } 2951 a.ClassFiltered[node.NodeClass] += 1 2952 } 2953 if constraint != "" { 2954 if a.ConstraintFiltered == nil { 2955 a.ConstraintFiltered = make(map[string]int) 2956 } 2957 a.ConstraintFiltered[constraint] += 1 2958 } 2959 } 2960 2961 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 2962 a.NodesExhausted += 1 2963 if node != nil && node.NodeClass != "" { 2964 if a.ClassExhausted == nil { 2965 a.ClassExhausted = make(map[string]int) 2966 } 2967 a.ClassExhausted[node.NodeClass] += 1 2968 } 2969 if dimension != "" { 2970 if a.DimensionExhausted == nil { 2971 a.DimensionExhausted = make(map[string]int) 2972 } 2973 a.DimensionExhausted[dimension] += 1 2974 } 2975 } 2976 2977 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 2978 if a.Scores == nil { 2979 a.Scores = make(map[string]float64) 2980 } 2981 key := fmt.Sprintf("%s.%s", node.ID, name) 2982 a.Scores[key] = score 2983 } 2984 2985 const ( 2986 EvalStatusBlocked = "blocked" 2987 EvalStatusPending = "pending" 2988 EvalStatusComplete = "complete" 2989 EvalStatusFailed = "failed" 2990 EvalStatusCancelled = "canceled" 2991 ) 2992 2993 const ( 2994 EvalTriggerJobRegister = "job-register" 2995 EvalTriggerJobDeregister = "job-deregister" 2996 EvalTriggerPeriodicJob = "periodic-job" 2997 EvalTriggerNodeUpdate = "node-update" 2998 EvalTriggerScheduled = "scheduled" 2999 EvalTriggerRollingUpdate = "rolling-update" 3000 EvalTriggerMaxPlans = "max-plan-attempts" 3001 ) 3002 3003 const ( 3004 // CoreJobEvalGC is used for the garbage collection of evaluations 3005 // and allocations. We periodically scan evaluations in a terminal state, 3006 // in which all the corresponding allocations are also terminal. We 3007 // delete these out of the system to bound the state. 3008 CoreJobEvalGC = "eval-gc" 3009 3010 // CoreJobNodeGC is used for the garbage collection of failed nodes. 3011 // We periodically scan nodes in a terminal state, and if they have no 3012 // corresponding allocations we delete these out of the system. 3013 CoreJobNodeGC = "node-gc" 3014 3015 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 3016 // periodically scan garbage collectible jobs and check if both their 3017 // evaluations and allocations are terminal. If so, we delete these out of 3018 // the system. 3019 CoreJobJobGC = "job-gc" 3020 3021 // CoreJobForceGC is used to force garbage collection of all GCable objects. 3022 CoreJobForceGC = "force-gc" 3023 ) 3024 3025 // Evaluation is used anytime we need to apply business logic as a result 3026 // of a change to our desired state (job specification) or the emergent state 3027 // (registered nodes). When the inputs change, we need to "evaluate" them, 3028 // potentially taking action (allocation of work) or doing nothing if the state 3029 // of the world does not require it. 3030 type Evaluation struct { 3031 // ID is a randonly generated UUID used for this evaluation. This 3032 // is assigned upon the creation of the evaluation. 3033 ID string 3034 3035 // Priority is used to control scheduling importance and if this job 3036 // can preempt other jobs. 3037 Priority int 3038 3039 // Type is used to control which schedulers are available to handle 3040 // this evaluation. 3041 Type string 3042 3043 // TriggeredBy is used to give some insight into why this Eval 3044 // was created. (Job change, node failure, alloc failure, etc). 3045 TriggeredBy string 3046 3047 // JobID is the job this evaluation is scoped to. Evaluations cannot 3048 // be run in parallel for a given JobID, so we serialize on this. 3049 JobID string 3050 3051 // JobModifyIndex is the modify index of the job at the time 3052 // the evaluation was created 3053 JobModifyIndex uint64 3054 3055 // NodeID is the node that was affected triggering the evaluation. 3056 NodeID string 3057 3058 // NodeModifyIndex is the modify index of the node at the time 3059 // the evaluation was created 3060 NodeModifyIndex uint64 3061 3062 // Status of the evaluation 3063 Status string 3064 3065 // StatusDescription is meant to provide more human useful information 3066 StatusDescription string 3067 3068 // Wait is a minimum wait time for running the eval. This is used to 3069 // support a rolling upgrade. 3070 Wait time.Duration 3071 3072 // NextEval is the evaluation ID for the eval created to do a followup. 3073 // This is used to support rolling upgrades, where we need a chain of evaluations. 3074 NextEval string 3075 3076 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 3077 // This is used to support rolling upgrades, where we need a chain of evaluations. 3078 PreviousEval string 3079 3080 // BlockedEval is the evaluation ID for a created blocked eval. A 3081 // blocked eval will be created if all allocations could not be placed due 3082 // to constraints or lacking resources. 3083 BlockedEval string 3084 3085 // FailedTGAllocs are task groups which have allocations that could not be 3086 // made, but the metrics are persisted so that the user can use the feedback 3087 // to determine the cause. 3088 FailedTGAllocs map[string]*AllocMetric 3089 3090 // ClassEligibility tracks computed node classes that have been explicitly 3091 // marked as eligible or ineligible. 3092 ClassEligibility map[string]bool 3093 3094 // EscapedComputedClass marks whether the job has constraints that are not 3095 // captured by computed node classes. 3096 EscapedComputedClass bool 3097 3098 // AnnotatePlan triggers the scheduler to provide additional annotations 3099 // during the evaluation. This should not be set during normal operations. 3100 AnnotatePlan bool 3101 3102 // SnapshotIndex is the Raft index of the snapshot used to process the 3103 // evaluation. As such it will only be set once it has gone through the 3104 // scheduler. 3105 SnapshotIndex uint64 3106 3107 // QueuedAllocations is the number of unplaced allocations at the time the 3108 // evaluation was processed. The map is keyed by Task Group names. 3109 QueuedAllocations map[string]int 3110 3111 // Raft Indexes 3112 CreateIndex uint64 3113 ModifyIndex uint64 3114 } 3115 3116 // TerminalStatus returns if the current status is terminal and 3117 // will no longer transition. 3118 func (e *Evaluation) TerminalStatus() bool { 3119 switch e.Status { 3120 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 3121 return true 3122 default: 3123 return false 3124 } 3125 } 3126 3127 func (e *Evaluation) GoString() string { 3128 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 3129 } 3130 3131 func (e *Evaluation) Copy() *Evaluation { 3132 if e == nil { 3133 return nil 3134 } 3135 ne := new(Evaluation) 3136 *ne = *e 3137 3138 // Copy ClassEligibility 3139 if e.ClassEligibility != nil { 3140 classes := make(map[string]bool, len(e.ClassEligibility)) 3141 for class, elig := range e.ClassEligibility { 3142 classes[class] = elig 3143 } 3144 ne.ClassEligibility = classes 3145 } 3146 3147 // Copy FailedTGAllocs 3148 if e.FailedTGAllocs != nil { 3149 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 3150 for tg, metric := range e.FailedTGAllocs { 3151 failedTGs[tg] = metric.Copy() 3152 } 3153 ne.FailedTGAllocs = failedTGs 3154 } 3155 3156 // Copy queued allocations 3157 if e.QueuedAllocations != nil { 3158 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 3159 for tg, num := range e.QueuedAllocations { 3160 queuedAllocations[tg] = num 3161 } 3162 ne.QueuedAllocations = queuedAllocations 3163 } 3164 3165 return ne 3166 } 3167 3168 // ShouldEnqueue checks if a given evaluation should be enqueued into the 3169 // eval_broker 3170 func (e *Evaluation) ShouldEnqueue() bool { 3171 switch e.Status { 3172 case EvalStatusPending: 3173 return true 3174 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 3175 return false 3176 default: 3177 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3178 } 3179 } 3180 3181 // ShouldBlock checks if a given evaluation should be entered into the blocked 3182 // eval tracker. 3183 func (e *Evaluation) ShouldBlock() bool { 3184 switch e.Status { 3185 case EvalStatusBlocked: 3186 return true 3187 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 3188 return false 3189 default: 3190 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3191 } 3192 } 3193 3194 // MakePlan is used to make a plan from the given evaluation 3195 // for a given Job 3196 func (e *Evaluation) MakePlan(j *Job) *Plan { 3197 p := &Plan{ 3198 EvalID: e.ID, 3199 Priority: e.Priority, 3200 Job: j, 3201 NodeUpdate: make(map[string][]*Allocation), 3202 NodeAllocation: make(map[string][]*Allocation), 3203 } 3204 if j != nil { 3205 p.AllAtOnce = j.AllAtOnce 3206 } 3207 return p 3208 } 3209 3210 // NextRollingEval creates an evaluation to followup this eval for rolling updates 3211 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 3212 return &Evaluation{ 3213 ID: GenerateUUID(), 3214 Priority: e.Priority, 3215 Type: e.Type, 3216 TriggeredBy: EvalTriggerRollingUpdate, 3217 JobID: e.JobID, 3218 JobModifyIndex: e.JobModifyIndex, 3219 Status: EvalStatusPending, 3220 Wait: wait, 3221 PreviousEval: e.ID, 3222 } 3223 } 3224 3225 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 3226 // failed allocations. It takes the classes marked explicitly eligible or 3227 // ineligible and whether the job has escaped computed node classes. 3228 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation { 3229 return &Evaluation{ 3230 ID: GenerateUUID(), 3231 Priority: e.Priority, 3232 Type: e.Type, 3233 TriggeredBy: e.TriggeredBy, 3234 JobID: e.JobID, 3235 JobModifyIndex: e.JobModifyIndex, 3236 Status: EvalStatusBlocked, 3237 PreviousEval: e.ID, 3238 ClassEligibility: classEligibility, 3239 EscapedComputedClass: escaped, 3240 } 3241 } 3242 3243 // Plan is used to submit a commit plan for task allocations. These 3244 // are submitted to the leader which verifies that resources have 3245 // not been overcommitted before admiting the plan. 3246 type Plan struct { 3247 // EvalID is the evaluation ID this plan is associated with 3248 EvalID string 3249 3250 // EvalToken is used to prevent a split-brain processing of 3251 // an evaluation. There should only be a single scheduler running 3252 // an Eval at a time, but this could be violated after a leadership 3253 // transition. This unique token is used to reject plans that are 3254 // being submitted from a different leader. 3255 EvalToken string 3256 3257 // Priority is the priority of the upstream job 3258 Priority int 3259 3260 // AllAtOnce is used to control if incremental scheduling of task groups 3261 // is allowed or if we must do a gang scheduling of the entire job. 3262 // If this is false, a plan may be partially applied. Otherwise, the 3263 // entire plan must be able to make progress. 3264 AllAtOnce bool 3265 3266 // Job is the parent job of all the allocations in the Plan. 3267 // Since a Plan only involves a single Job, we can reduce the size 3268 // of the plan by only including it once. 3269 Job *Job 3270 3271 // NodeUpdate contains all the allocations for each node. For each node, 3272 // this is a list of the allocations to update to either stop or evict. 3273 NodeUpdate map[string][]*Allocation 3274 3275 // NodeAllocation contains all the allocations for each node. 3276 // The evicts must be considered prior to the allocations. 3277 NodeAllocation map[string][]*Allocation 3278 3279 // Annotations contains annotations by the scheduler to be used by operators 3280 // to understand the decisions made by the scheduler. 3281 Annotations *PlanAnnotations 3282 } 3283 3284 // AppendUpdate marks the allocation for eviction. The clientStatus of the 3285 // allocation may be optionally set by passing in a non-empty value. 3286 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 3287 newAlloc := new(Allocation) 3288 *newAlloc = *alloc 3289 3290 // If the job is not set in the plan we are deregistering a job so we 3291 // extract the job from the allocation. 3292 if p.Job == nil && newAlloc.Job != nil { 3293 p.Job = newAlloc.Job 3294 } 3295 3296 // Normalize the job 3297 newAlloc.Job = nil 3298 3299 // Strip the resources as it can be rebuilt. 3300 newAlloc.Resources = nil 3301 3302 newAlloc.DesiredStatus = desiredStatus 3303 newAlloc.DesiredDescription = desiredDesc 3304 3305 if clientStatus != "" { 3306 newAlloc.ClientStatus = clientStatus 3307 } 3308 3309 node := alloc.NodeID 3310 existing := p.NodeUpdate[node] 3311 p.NodeUpdate[node] = append(existing, newAlloc) 3312 } 3313 3314 func (p *Plan) PopUpdate(alloc *Allocation) { 3315 existing := p.NodeUpdate[alloc.NodeID] 3316 n := len(existing) 3317 if n > 0 && existing[n-1].ID == alloc.ID { 3318 existing = existing[:n-1] 3319 if len(existing) > 0 { 3320 p.NodeUpdate[alloc.NodeID] = existing 3321 } else { 3322 delete(p.NodeUpdate, alloc.NodeID) 3323 } 3324 } 3325 } 3326 3327 func (p *Plan) AppendAlloc(alloc *Allocation) { 3328 node := alloc.NodeID 3329 existing := p.NodeAllocation[node] 3330 p.NodeAllocation[node] = append(existing, alloc) 3331 } 3332 3333 // IsNoOp checks if this plan would do nothing 3334 func (p *Plan) IsNoOp() bool { 3335 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 3336 } 3337 3338 // PlanResult is the result of a plan submitted to the leader. 3339 type PlanResult struct { 3340 // NodeUpdate contains all the updates that were committed. 3341 NodeUpdate map[string][]*Allocation 3342 3343 // NodeAllocation contains all the allocations that were committed. 3344 NodeAllocation map[string][]*Allocation 3345 3346 // RefreshIndex is the index the worker should refresh state up to. 3347 // This allows all evictions and allocations to be materialized. 3348 // If any allocations were rejected due to stale data (node state, 3349 // over committed) this can be used to force a worker refresh. 3350 RefreshIndex uint64 3351 3352 // AllocIndex is the Raft index in which the evictions and 3353 // allocations took place. This is used for the write index. 3354 AllocIndex uint64 3355 } 3356 3357 // IsNoOp checks if this plan result would do nothing 3358 func (p *PlanResult) IsNoOp() bool { 3359 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 3360 } 3361 3362 // FullCommit is used to check if all the allocations in a plan 3363 // were committed as part of the result. Returns if there was 3364 // a match, and the number of expected and actual allocations. 3365 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 3366 expected := 0 3367 actual := 0 3368 for name, allocList := range plan.NodeAllocation { 3369 didAlloc, _ := p.NodeAllocation[name] 3370 expected += len(allocList) 3371 actual += len(didAlloc) 3372 } 3373 return actual == expected, expected, actual 3374 } 3375 3376 // PlanAnnotations holds annotations made by the scheduler to give further debug 3377 // information to operators. 3378 type PlanAnnotations struct { 3379 // DesiredTGUpdates is the set of desired updates per task group. 3380 DesiredTGUpdates map[string]*DesiredUpdates 3381 } 3382 3383 // DesiredUpdates is the set of changes the scheduler would like to make given 3384 // sufficient resources and cluster capacity. 3385 type DesiredUpdates struct { 3386 Ignore uint64 3387 Place uint64 3388 Migrate uint64 3389 Stop uint64 3390 InPlaceUpdate uint64 3391 DestructiveUpdate uint64 3392 } 3393 3394 // msgpackHandle is a shared handle for encoding/decoding of structs 3395 var MsgpackHandle = func() *codec.MsgpackHandle { 3396 h := &codec.MsgpackHandle{RawToString: true} 3397 3398 // Sets the default type for decoding a map into a nil interface{}. 3399 // This is necessary in particular because we store the driver configs as a 3400 // nil interface{}. 3401 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 3402 return h 3403 }() 3404 3405 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 3406 h := &hcodec.MsgpackHandle{RawToString: true} 3407 3408 // Sets the default type for decoding a map into a nil interface{}. 3409 // This is necessary in particular because we store the driver configs as a 3410 // nil interface{}. 3411 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 3412 return h 3413 }() 3414 3415 // Decode is used to decode a MsgPack encoded object 3416 func Decode(buf []byte, out interface{}) error { 3417 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 3418 } 3419 3420 // Encode is used to encode a MsgPack object with type prefix 3421 func Encode(t MessageType, msg interface{}) ([]byte, error) { 3422 var buf bytes.Buffer 3423 buf.WriteByte(uint8(t)) 3424 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 3425 return buf.Bytes(), err 3426 }