github.com/maier/nomad@v0.4.1-0.20161110003312-a9e3d0b8549d/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "io" 13 "net" 14 "os" 15 "path/filepath" 16 "reflect" 17 "regexp" 18 "sort" 19 "strconv" 20 "strings" 21 "time" 22 23 "github.com/gorhill/cronexpr" 24 "github.com/hashicorp/consul/api" 25 "github.com/hashicorp/go-multierror" 26 "github.com/hashicorp/go-version" 27 "github.com/hashicorp/nomad/helper/args" 28 "github.com/mitchellh/copystructure" 29 "github.com/ugorji/go/codec" 30 31 hcodec "github.com/hashicorp/go-msgpack/codec" 32 ) 33 34 var ( 35 ErrNoLeader = fmt.Errorf("No cluster leader") 36 ErrNoRegionPath = fmt.Errorf("No path to region") 37 ) 38 39 type MessageType uint8 40 41 const ( 42 NodeRegisterRequestType MessageType = iota 43 NodeDeregisterRequestType 44 NodeUpdateStatusRequestType 45 NodeUpdateDrainRequestType 46 JobRegisterRequestType 47 JobDeregisterRequestType 48 EvalUpdateRequestType 49 EvalDeleteRequestType 50 AllocUpdateRequestType 51 AllocClientUpdateRequestType 52 ReconcileJobSummariesRequestType 53 VaultAccessorRegisterRequestType 54 VaultAccessorDegisterRequestType 55 ) 56 57 const ( 58 // IgnoreUnknownTypeFlag is set along with a MessageType 59 // to indicate that the message type can be safely ignored 60 // if it is not recognized. This is for future proofing, so 61 // that new commands can be added in a way that won't cause 62 // old servers to crash when the FSM attempts to process them. 63 IgnoreUnknownTypeFlag MessageType = 128 64 65 // ApiMajorVersion is returned as part of the Status.Version request. 66 // It should be incremented anytime the APIs are changed in a way 67 // that would break clients for sane client versioning. 68 ApiMajorVersion = 1 69 70 // ApiMinorVersion is returned as part of the Status.Version request. 71 // It should be incremented anytime the APIs are changed to allow 72 // for sane client versioning. Minor changes should be compatible 73 // within the major version. 74 ApiMinorVersion = 1 75 76 ProtocolVersion = "protocol" 77 APIMajorVersion = "api.major" 78 APIMinorVersion = "api.minor" 79 ) 80 81 // RPCInfo is used to describe common information about query 82 type RPCInfo interface { 83 RequestRegion() string 84 IsRead() bool 85 AllowStaleRead() bool 86 } 87 88 // QueryOptions is used to specify various flags for read queries 89 type QueryOptions struct { 90 // The target region for this query 91 Region string 92 93 // If set, wait until query exceeds given index. Must be provided 94 // with MaxQueryTime. 95 MinQueryIndex uint64 96 97 // Provided with MinQueryIndex to wait for change. 98 MaxQueryTime time.Duration 99 100 // If set, any follower can service the request. Results 101 // may be arbitrarily stale. 102 AllowStale bool 103 104 // If set, used as prefix for resource list searches 105 Prefix string 106 } 107 108 func (q QueryOptions) RequestRegion() string { 109 return q.Region 110 } 111 112 // QueryOption only applies to reads, so always true 113 func (q QueryOptions) IsRead() bool { 114 return true 115 } 116 117 func (q QueryOptions) AllowStaleRead() bool { 118 return q.AllowStale 119 } 120 121 type WriteRequest struct { 122 // The target region for this write 123 Region string 124 } 125 126 func (w WriteRequest) RequestRegion() string { 127 // The target region for this request 128 return w.Region 129 } 130 131 // WriteRequest only applies to writes, always false 132 func (w WriteRequest) IsRead() bool { 133 return false 134 } 135 136 func (w WriteRequest) AllowStaleRead() bool { 137 return false 138 } 139 140 // QueryMeta allows a query response to include potentially 141 // useful metadata about a query 142 type QueryMeta struct { 143 // This is the index associated with the read 144 Index uint64 145 146 // If AllowStale is used, this is time elapsed since 147 // last contact between the follower and leader. This 148 // can be used to gauge staleness. 149 LastContact time.Duration 150 151 // Used to indicate if there is a known leader node 152 KnownLeader bool 153 } 154 155 // WriteMeta allows a write response to include potentially 156 // useful metadata about the write 157 type WriteMeta struct { 158 // This is the index associated with the write 159 Index uint64 160 } 161 162 // NodeRegisterRequest is used for Node.Register endpoint 163 // to register a node as being a schedulable entity. 164 type NodeRegisterRequest struct { 165 Node *Node 166 WriteRequest 167 } 168 169 // NodeDeregisterRequest is used for Node.Deregister endpoint 170 // to deregister a node as being a schedulable entity. 171 type NodeDeregisterRequest struct { 172 NodeID string 173 WriteRequest 174 } 175 176 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 177 // information used in RPC server lists. 178 type NodeServerInfo struct { 179 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 180 // be contacted at for RPCs. 181 RPCAdvertiseAddr string 182 183 // RpcMajorVersion is the major version number the Nomad Server 184 // supports 185 RPCMajorVersion int32 186 187 // RpcMinorVersion is the minor version number the Nomad Server 188 // supports 189 RPCMinorVersion int32 190 191 // Datacenter is the datacenter that a Nomad server belongs to 192 Datacenter string 193 } 194 195 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 196 // to update the status of a node. 197 type NodeUpdateStatusRequest struct { 198 NodeID string 199 Status string 200 WriteRequest 201 } 202 203 // NodeUpdateDrainRequest is used for updatin the drain status 204 type NodeUpdateDrainRequest struct { 205 NodeID string 206 Drain bool 207 WriteRequest 208 } 209 210 // NodeEvaluateRequest is used to re-evaluate the ndoe 211 type NodeEvaluateRequest struct { 212 NodeID string 213 WriteRequest 214 } 215 216 // NodeSpecificRequest is used when we just need to specify a target node 217 type NodeSpecificRequest struct { 218 NodeID string 219 SecretID string 220 QueryOptions 221 } 222 223 // JobRegisterRequest is used for Job.Register endpoint 224 // to register a job as being a schedulable entity. 225 type JobRegisterRequest struct { 226 Job *Job 227 228 // If EnforceIndex is set then the job will only be registered if the passed 229 // JobModifyIndex matches the current Jobs index. If the index is zero, the 230 // register only occurs if the job is new. 231 EnforceIndex bool 232 JobModifyIndex uint64 233 234 WriteRequest 235 } 236 237 // JobDeregisterRequest is used for Job.Deregister endpoint 238 // to deregister a job as being a schedulable entity. 239 type JobDeregisterRequest struct { 240 JobID string 241 WriteRequest 242 } 243 244 // JobEvaluateRequest is used when we just need to re-evaluate a target job 245 type JobEvaluateRequest struct { 246 JobID string 247 WriteRequest 248 } 249 250 // JobSpecificRequest is used when we just need to specify a target job 251 type JobSpecificRequest struct { 252 JobID string 253 QueryOptions 254 } 255 256 // JobListRequest is used to parameterize a list request 257 type JobListRequest struct { 258 QueryOptions 259 } 260 261 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 262 // evaluation of the Job. 263 type JobPlanRequest struct { 264 Job *Job 265 Diff bool // Toggles an annotated diff 266 WriteRequest 267 } 268 269 // JobSummaryRequest is used when we just need to get a specific job summary 270 type JobSummaryRequest struct { 271 JobID string 272 QueryOptions 273 } 274 275 // NodeListRequest is used to parameterize a list request 276 type NodeListRequest struct { 277 QueryOptions 278 } 279 280 // EvalUpdateRequest is used for upserting evaluations. 281 type EvalUpdateRequest struct { 282 Evals []*Evaluation 283 EvalToken string 284 WriteRequest 285 } 286 287 // EvalDeleteRequest is used for deleting an evaluation. 288 type EvalDeleteRequest struct { 289 Evals []string 290 Allocs []string 291 WriteRequest 292 } 293 294 // EvalSpecificRequest is used when we just need to specify a target evaluation 295 type EvalSpecificRequest struct { 296 EvalID string 297 QueryOptions 298 } 299 300 // EvalAckRequest is used to Ack/Nack a specific evaluation 301 type EvalAckRequest struct { 302 EvalID string 303 Token string 304 WriteRequest 305 } 306 307 // EvalDequeueRequest is used when we want to dequeue an evaluation 308 type EvalDequeueRequest struct { 309 Schedulers []string 310 Timeout time.Duration 311 SchedulerVersion uint16 312 WriteRequest 313 } 314 315 // EvalListRequest is used to list the evaluations 316 type EvalListRequest struct { 317 QueryOptions 318 } 319 320 // PlanRequest is used to submit an allocation plan to the leader 321 type PlanRequest struct { 322 Plan *Plan 323 WriteRequest 324 } 325 326 // AllocUpdateRequest is used to submit changes to allocations, either 327 // to cause evictions or to assign new allocaitons. Both can be done 328 // within a single transaction 329 type AllocUpdateRequest struct { 330 // Alloc is the list of new allocations to assign 331 Alloc []*Allocation 332 333 // Job is the shared parent job of the allocations. 334 // It is pulled out since it is common to reduce payload size. 335 Job *Job 336 337 WriteRequest 338 } 339 340 // AllocListRequest is used to request a list of allocations 341 type AllocListRequest struct { 342 QueryOptions 343 } 344 345 // AllocSpecificRequest is used to query a specific allocation 346 type AllocSpecificRequest struct { 347 AllocID string 348 QueryOptions 349 } 350 351 // AllocsGetRequest is used to query a set of allocations 352 type AllocsGetRequest struct { 353 AllocIDs []string 354 QueryOptions 355 } 356 357 // PeriodicForceReqeuest is used to force a specific periodic job. 358 type PeriodicForceRequest struct { 359 JobID string 360 WriteRequest 361 } 362 363 // ServerMembersResponse has the list of servers in a cluster 364 type ServerMembersResponse struct { 365 ServerName string 366 ServerRegion string 367 ServerDC string 368 Members []*ServerMember 369 QueryMeta 370 } 371 372 // ServerMember holds information about a Nomad server agent in a cluster 373 type ServerMember struct { 374 Name string 375 Addr net.IP 376 Port uint16 377 Tags map[string]string 378 Status string 379 ProtocolMin uint8 380 ProtocolMax uint8 381 ProtocolCur uint8 382 DelegateMin uint8 383 DelegateMax uint8 384 DelegateCur uint8 385 } 386 387 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 388 // following tasks in the given allocation 389 type DeriveVaultTokenRequest struct { 390 NodeID string 391 SecretID string 392 AllocID string 393 Tasks []string 394 QueryOptions 395 } 396 397 // VaultAccessorsRequest is used to operate on a set of Vault accessors 398 type VaultAccessorsRequest struct { 399 Accessors []*VaultAccessor 400 } 401 402 // VaultAccessor is a reference to a created Vault token on behalf of 403 // an allocation's task. 404 type VaultAccessor struct { 405 AllocID string 406 Task string 407 NodeID string 408 Accessor string 409 CreationTTL int 410 411 // Raft Indexes 412 CreateIndex uint64 413 } 414 415 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 416 type DeriveVaultTokenResponse struct { 417 // Tasks is a mapping between the task name and the wrapped token 418 Tasks map[string]string 419 420 // Error stores any error that occured. Errors are stored here so we can 421 // communicate whether it is retriable 422 Error *RecoverableError 423 424 QueryMeta 425 } 426 427 // GenericRequest is used to request where no 428 // specific information is needed. 429 type GenericRequest struct { 430 QueryOptions 431 } 432 433 // GenericResponse is used to respond to a request where no 434 // specific response information is needed. 435 type GenericResponse struct { 436 WriteMeta 437 } 438 439 // VersionResponse is used for the Status.Version reseponse 440 type VersionResponse struct { 441 Build string 442 Versions map[string]int 443 QueryMeta 444 } 445 446 // JobRegisterResponse is used to respond to a job registration 447 type JobRegisterResponse struct { 448 EvalID string 449 EvalCreateIndex uint64 450 JobModifyIndex uint64 451 QueryMeta 452 } 453 454 // JobDeregisterResponse is used to respond to a job deregistration 455 type JobDeregisterResponse struct { 456 EvalID string 457 EvalCreateIndex uint64 458 JobModifyIndex uint64 459 QueryMeta 460 } 461 462 // NodeUpdateResponse is used to respond to a node update 463 type NodeUpdateResponse struct { 464 HeartbeatTTL time.Duration 465 EvalIDs []string 466 EvalCreateIndex uint64 467 NodeModifyIndex uint64 468 469 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 470 // empty, the current Nomad Server is in the minority of a partition. 471 LeaderRPCAddr string 472 473 // NumNodes is the number of Nomad nodes attached to this quorum of 474 // Nomad Servers at the time of the response. This value can 475 // fluctuate based on the health of the cluster between heartbeats. 476 NumNodes int32 477 478 // Servers is the full list of known Nomad servers in the local 479 // region. 480 Servers []*NodeServerInfo 481 482 QueryMeta 483 } 484 485 // NodeDrainUpdateResponse is used to respond to a node drain update 486 type NodeDrainUpdateResponse struct { 487 EvalIDs []string 488 EvalCreateIndex uint64 489 NodeModifyIndex uint64 490 QueryMeta 491 } 492 493 // NodeAllocsResponse is used to return allocs for a single node 494 type NodeAllocsResponse struct { 495 Allocs []*Allocation 496 QueryMeta 497 } 498 499 // NodeClientAllocsResponse is used to return allocs meta data for a single node 500 type NodeClientAllocsResponse struct { 501 Allocs map[string]uint64 502 QueryMeta 503 } 504 505 // SingleNodeResponse is used to return a single node 506 type SingleNodeResponse struct { 507 Node *Node 508 QueryMeta 509 } 510 511 // JobListResponse is used for a list request 512 type NodeListResponse struct { 513 Nodes []*NodeListStub 514 QueryMeta 515 } 516 517 // SingleJobResponse is used to return a single job 518 type SingleJobResponse struct { 519 Job *Job 520 QueryMeta 521 } 522 523 // JobSummaryResponse is used to return a single job summary 524 type JobSummaryResponse struct { 525 JobSummary *JobSummary 526 QueryMeta 527 } 528 529 // JobListResponse is used for a list request 530 type JobListResponse struct { 531 Jobs []*JobListStub 532 QueryMeta 533 } 534 535 // JobPlanResponse is used to respond to a job plan request 536 type JobPlanResponse struct { 537 // Annotations stores annotations explaining decisions the scheduler made. 538 Annotations *PlanAnnotations 539 540 // FailedTGAllocs is the placement failures per task group. 541 FailedTGAllocs map[string]*AllocMetric 542 543 // JobModifyIndex is the modification index of the job. The value can be 544 // used when running `nomad run` to ensure that the Job wasn’t modified 545 // since the last plan. If the job is being created, the value is zero. 546 JobModifyIndex uint64 547 548 // CreatedEvals is the set of evaluations created by the scheduler. The 549 // reasons for this can be rolling-updates or blocked evals. 550 CreatedEvals []*Evaluation 551 552 // Diff contains the diff of the job and annotations on whether the change 553 // causes an in-place update or create/destroy 554 Diff *JobDiff 555 556 // NextPeriodicLaunch is the time duration till the job would be launched if 557 // submitted. 558 NextPeriodicLaunch time.Time 559 560 WriteMeta 561 } 562 563 // SingleAllocResponse is used to return a single allocation 564 type SingleAllocResponse struct { 565 Alloc *Allocation 566 QueryMeta 567 } 568 569 // AllocsGetResponse is used to return a set of allocations 570 type AllocsGetResponse struct { 571 Allocs []*Allocation 572 QueryMeta 573 } 574 575 // JobAllocationsResponse is used to return the allocations for a job 576 type JobAllocationsResponse struct { 577 Allocations []*AllocListStub 578 QueryMeta 579 } 580 581 // JobEvaluationsResponse is used to return the evaluations for a job 582 type JobEvaluationsResponse struct { 583 Evaluations []*Evaluation 584 QueryMeta 585 } 586 587 // SingleEvalResponse is used to return a single evaluation 588 type SingleEvalResponse struct { 589 Eval *Evaluation 590 QueryMeta 591 } 592 593 // EvalDequeueResponse is used to return from a dequeue 594 type EvalDequeueResponse struct { 595 Eval *Evaluation 596 Token string 597 QueryMeta 598 } 599 600 // PlanResponse is used to return from a PlanRequest 601 type PlanResponse struct { 602 Result *PlanResult 603 WriteMeta 604 } 605 606 // AllocListResponse is used for a list request 607 type AllocListResponse struct { 608 Allocations []*AllocListStub 609 QueryMeta 610 } 611 612 // EvalListResponse is used for a list request 613 type EvalListResponse struct { 614 Evaluations []*Evaluation 615 QueryMeta 616 } 617 618 // EvalAllocationsResponse is used to return the allocations for an evaluation 619 type EvalAllocationsResponse struct { 620 Allocations []*AllocListStub 621 QueryMeta 622 } 623 624 // PeriodicForceResponse is used to respond to a periodic job force launch 625 type PeriodicForceResponse struct { 626 EvalID string 627 EvalCreateIndex uint64 628 WriteMeta 629 } 630 631 const ( 632 NodeStatusInit = "initializing" 633 NodeStatusReady = "ready" 634 NodeStatusDown = "down" 635 ) 636 637 // ShouldDrainNode checks if a given node status should trigger an 638 // evaluation. Some states don't require any further action. 639 func ShouldDrainNode(status string) bool { 640 switch status { 641 case NodeStatusInit, NodeStatusReady: 642 return false 643 case NodeStatusDown: 644 return true 645 default: 646 panic(fmt.Sprintf("unhandled node status %s", status)) 647 } 648 } 649 650 // ValidNodeStatus is used to check if a node status is valid 651 func ValidNodeStatus(status string) bool { 652 switch status { 653 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 654 return true 655 default: 656 return false 657 } 658 } 659 660 // Node is a representation of a schedulable client node 661 type Node struct { 662 // ID is a unique identifier for the node. It can be constructed 663 // by doing a concatenation of the Name and Datacenter as a simple 664 // approach. Alternatively a UUID may be used. 665 ID string 666 667 // SecretID is an ID that is only known by the Node and the set of Servers. 668 // It is not accessible via the API and is used to authenticate nodes 669 // conducting priviledged activities. 670 SecretID string 671 672 // Datacenter for this node 673 Datacenter string 674 675 // Node name 676 Name string 677 678 // HTTPAddr is the address on which the Nomad client is listening for http 679 // requests 680 HTTPAddr string 681 682 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 683 TLSEnabled bool 684 685 // Attributes is an arbitrary set of key/value 686 // data that can be used for constraints. Examples 687 // include "kernel.name=linux", "arch=386", "driver.docker=1", 688 // "docker.runtime=1.8.3" 689 Attributes map[string]string 690 691 // Resources is the available resources on the client. 692 // For example 'cpu=2' 'memory=2048' 693 Resources *Resources 694 695 // Reserved is the set of resources that are reserved, 696 // and should be subtracted from the total resources for 697 // the purposes of scheduling. This may be provide certain 698 // high-watermark tolerances or because of external schedulers 699 // consuming resources. 700 Reserved *Resources 701 702 // Links are used to 'link' this client to external 703 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 704 // 'ami=ami-123' 705 Links map[string]string 706 707 // Meta is used to associate arbitrary metadata with this 708 // client. This is opaque to Nomad. 709 Meta map[string]string 710 711 // NodeClass is an opaque identifier used to group nodes 712 // together for the purpose of determining scheduling pressure. 713 NodeClass string 714 715 // ComputedClass is a unique id that identifies nodes with a common set of 716 // attributes and capabilities. 717 ComputedClass string 718 719 // Drain is controlled by the servers, and not the client. 720 // If true, no jobs will be scheduled to this node, and existing 721 // allocations will be drained. 722 Drain bool 723 724 // Status of this node 725 Status string 726 727 // StatusDescription is meant to provide more human useful information 728 StatusDescription string 729 730 // StatusUpdatedAt is the time stamp at which the state of the node was 731 // updated 732 StatusUpdatedAt int64 733 734 // Raft Indexes 735 CreateIndex uint64 736 ModifyIndex uint64 737 } 738 739 // Ready returns if the node is ready for running allocations 740 func (n *Node) Ready() bool { 741 return n.Status == NodeStatusReady && !n.Drain 742 } 743 744 func (n *Node) Copy() *Node { 745 if n == nil { 746 return nil 747 } 748 nn := new(Node) 749 *nn = *n 750 nn.Attributes = CopyMapStringString(nn.Attributes) 751 nn.Resources = nn.Resources.Copy() 752 nn.Reserved = nn.Reserved.Copy() 753 nn.Links = CopyMapStringString(nn.Links) 754 nn.Meta = CopyMapStringString(nn.Meta) 755 return nn 756 } 757 758 // TerminalStatus returns if the current status is terminal and 759 // will no longer transition. 760 func (n *Node) TerminalStatus() bool { 761 switch n.Status { 762 case NodeStatusDown: 763 return true 764 default: 765 return false 766 } 767 } 768 769 // Stub returns a summarized version of the node 770 func (n *Node) Stub() *NodeListStub { 771 return &NodeListStub{ 772 ID: n.ID, 773 Datacenter: n.Datacenter, 774 Name: n.Name, 775 NodeClass: n.NodeClass, 776 Drain: n.Drain, 777 Status: n.Status, 778 StatusDescription: n.StatusDescription, 779 CreateIndex: n.CreateIndex, 780 ModifyIndex: n.ModifyIndex, 781 } 782 } 783 784 // NodeListStub is used to return a subset of job information 785 // for the job list 786 type NodeListStub struct { 787 ID string 788 Datacenter string 789 Name string 790 NodeClass string 791 Drain bool 792 Status string 793 StatusDescription string 794 CreateIndex uint64 795 ModifyIndex uint64 796 } 797 798 // Resources is used to define the resources available 799 // on a client 800 type Resources struct { 801 CPU int 802 MemoryMB int `mapstructure:"memory"` 803 DiskMB int `mapstructure:"disk"` 804 IOPS int 805 Networks []*NetworkResource 806 } 807 808 const ( 809 BytesInMegabyte = 1024 * 1024 810 ) 811 812 // DefaultResources returns the default resources for a task. 813 func DefaultResources() *Resources { 814 return &Resources{ 815 CPU: 100, 816 MemoryMB: 10, 817 IOPS: 0, 818 } 819 } 820 821 // DiskInBytes returns the amount of disk resources in bytes. 822 func (r *Resources) DiskInBytes() int64 { 823 return int64(r.DiskMB * BytesInMegabyte) 824 } 825 826 // Merge merges this resource with another resource. 827 func (r *Resources) Merge(other *Resources) { 828 if other.CPU != 0 { 829 r.CPU = other.CPU 830 } 831 if other.MemoryMB != 0 { 832 r.MemoryMB = other.MemoryMB 833 } 834 if other.DiskMB != 0 { 835 r.DiskMB = other.DiskMB 836 } 837 if other.IOPS != 0 { 838 r.IOPS = other.IOPS 839 } 840 if len(other.Networks) != 0 { 841 r.Networks = other.Networks 842 } 843 } 844 845 func (r *Resources) Canonicalize() { 846 // Ensure that an empty and nil slices are treated the same to avoid scheduling 847 // problems since we use reflect DeepEquals. 848 if len(r.Networks) == 0 { 849 r.Networks = nil 850 } 851 852 for _, n := range r.Networks { 853 n.Canonicalize() 854 } 855 } 856 857 // MeetsMinResources returns an error if the resources specified are less than 858 // the minimum allowed. 859 func (r *Resources) MeetsMinResources() error { 860 var mErr multierror.Error 861 if r.CPU < 20 { 862 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU)) 863 } 864 if r.MemoryMB < 10 { 865 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB)) 866 } 867 if r.IOPS < 0 { 868 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS)) 869 } 870 for i, n := range r.Networks { 871 if err := n.MeetsMinResources(); err != nil { 872 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 873 } 874 } 875 876 return mErr.ErrorOrNil() 877 } 878 879 // Copy returns a deep copy of the resources 880 func (r *Resources) Copy() *Resources { 881 if r == nil { 882 return nil 883 } 884 newR := new(Resources) 885 *newR = *r 886 if r.Networks != nil { 887 n := len(r.Networks) 888 newR.Networks = make([]*NetworkResource, n) 889 for i := 0; i < n; i++ { 890 newR.Networks[i] = r.Networks[i].Copy() 891 } 892 } 893 return newR 894 } 895 896 // NetIndex finds the matching net index using device name 897 func (r *Resources) NetIndex(n *NetworkResource) int { 898 for idx, net := range r.Networks { 899 if net.Device == n.Device { 900 return idx 901 } 902 } 903 return -1 904 } 905 906 // Superset checks if one set of resources is a superset 907 // of another. This ignores network resources, and the NetworkIndex 908 // should be used for that. 909 func (r *Resources) Superset(other *Resources) (bool, string) { 910 if r.CPU < other.CPU { 911 return false, "cpu exhausted" 912 } 913 if r.MemoryMB < other.MemoryMB { 914 return false, "memory exhausted" 915 } 916 if r.DiskMB < other.DiskMB { 917 return false, "disk exhausted" 918 } 919 if r.IOPS < other.IOPS { 920 return false, "iops exhausted" 921 } 922 return true, "" 923 } 924 925 // Add adds the resources of the delta to this, potentially 926 // returning an error if not possible. 927 func (r *Resources) Add(delta *Resources) error { 928 if delta == nil { 929 return nil 930 } 931 r.CPU += delta.CPU 932 r.MemoryMB += delta.MemoryMB 933 r.DiskMB += delta.DiskMB 934 r.IOPS += delta.IOPS 935 936 for _, n := range delta.Networks { 937 // Find the matching interface by IP or CIDR 938 idx := r.NetIndex(n) 939 if idx == -1 { 940 r.Networks = append(r.Networks, n.Copy()) 941 } else { 942 r.Networks[idx].Add(n) 943 } 944 } 945 return nil 946 } 947 948 func (r *Resources) GoString() string { 949 return fmt.Sprintf("*%#v", *r) 950 } 951 952 type Port struct { 953 Label string 954 Value int `mapstructure:"static"` 955 } 956 957 // NetworkResource is used to represent available network 958 // resources 959 type NetworkResource struct { 960 Device string // Name of the device 961 CIDR string // CIDR block of addresses 962 IP string // IP address 963 MBits int // Throughput 964 ReservedPorts []Port // Reserved ports 965 DynamicPorts []Port // Dynamically assigned ports 966 } 967 968 func (n *NetworkResource) Canonicalize() { 969 // Ensure that an empty and nil slices are treated the same to avoid scheduling 970 // problems since we use reflect DeepEquals. 971 if len(n.ReservedPorts) == 0 { 972 n.ReservedPorts = nil 973 } 974 if len(n.DynamicPorts) == 0 { 975 n.DynamicPorts = nil 976 } 977 } 978 979 // MeetsMinResources returns an error if the resources specified are less than 980 // the minimum allowed. 981 func (n *NetworkResource) MeetsMinResources() error { 982 var mErr multierror.Error 983 if n.MBits < 1 { 984 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 985 } 986 return mErr.ErrorOrNil() 987 } 988 989 // Copy returns a deep copy of the network resource 990 func (n *NetworkResource) Copy() *NetworkResource { 991 if n == nil { 992 return nil 993 } 994 newR := new(NetworkResource) 995 *newR = *n 996 if n.ReservedPorts != nil { 997 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 998 copy(newR.ReservedPorts, n.ReservedPorts) 999 } 1000 if n.DynamicPorts != nil { 1001 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 1002 copy(newR.DynamicPorts, n.DynamicPorts) 1003 } 1004 return newR 1005 } 1006 1007 // Add adds the resources of the delta to this, potentially 1008 // returning an error if not possible. 1009 func (n *NetworkResource) Add(delta *NetworkResource) { 1010 if len(delta.ReservedPorts) > 0 { 1011 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 1012 } 1013 n.MBits += delta.MBits 1014 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 1015 } 1016 1017 func (n *NetworkResource) GoString() string { 1018 return fmt.Sprintf("*%#v", *n) 1019 } 1020 1021 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 1022 labelValues := make(map[string]int) 1023 ports := append(n.ReservedPorts, n.DynamicPorts...) 1024 for _, port := range ports { 1025 if mapping, ok := port_map[port.Label]; ok { 1026 labelValues[port.Label] = mapping 1027 } else { 1028 labelValues[port.Label] = port.Value 1029 } 1030 } 1031 return labelValues 1032 } 1033 1034 const ( 1035 // JobTypeNomad is reserved for internal system tasks and is 1036 // always handled by the CoreScheduler. 1037 JobTypeCore = "_core" 1038 JobTypeService = "service" 1039 JobTypeBatch = "batch" 1040 JobTypeSystem = "system" 1041 ) 1042 1043 const ( 1044 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 1045 JobStatusRunning = "running" // Running means the job has non-terminal allocations 1046 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 1047 ) 1048 1049 const ( 1050 // JobMinPriority is the minimum allowed priority 1051 JobMinPriority = 1 1052 1053 // JobDefaultPriority is the default priority if not 1054 // not specified. 1055 JobDefaultPriority = 50 1056 1057 // JobMaxPriority is the maximum allowed priority 1058 JobMaxPriority = 100 1059 1060 // Ensure CoreJobPriority is higher than any user 1061 // specified job so that it gets priority. This is important 1062 // for the system to remain healthy. 1063 CoreJobPriority = JobMaxPriority * 2 1064 ) 1065 1066 // JobSummary summarizes the state of the allocations of a job 1067 type JobSummary struct { 1068 JobID string 1069 Summary map[string]TaskGroupSummary 1070 1071 // Raft Indexes 1072 CreateIndex uint64 1073 ModifyIndex uint64 1074 } 1075 1076 // Copy returns a new copy of JobSummary 1077 func (js *JobSummary) Copy() *JobSummary { 1078 newJobSummary := new(JobSummary) 1079 *newJobSummary = *js 1080 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 1081 for k, v := range js.Summary { 1082 newTGSummary[k] = v 1083 } 1084 newJobSummary.Summary = newTGSummary 1085 return newJobSummary 1086 } 1087 1088 // TaskGroup summarizes the state of all the allocations of a particular 1089 // TaskGroup 1090 type TaskGroupSummary struct { 1091 Queued int 1092 Complete int 1093 Failed int 1094 Running int 1095 Starting int 1096 Lost int 1097 } 1098 1099 // Job is the scope of a scheduling request to Nomad. It is the largest 1100 // scoped object, and is a named collection of task groups. Each task group 1101 // is further composed of tasks. A task group (TG) is the unit of scheduling 1102 // however. 1103 type Job struct { 1104 // Region is the Nomad region that handles scheduling this job 1105 Region string 1106 1107 // ID is a unique identifier for the job per region. It can be 1108 // specified hierarchically like LineOfBiz/OrgName/Team/Project 1109 ID string 1110 1111 // ParentID is the unique identifier of the job that spawned this job. 1112 ParentID string 1113 1114 // Name is the logical name of the job used to refer to it. This is unique 1115 // per region, but not unique globally. 1116 Name string 1117 1118 // Type is used to control various behaviors about the job. Most jobs 1119 // are service jobs, meaning they are expected to be long lived. 1120 // Some jobs are batch oriented meaning they run and then terminate. 1121 // This can be extended in the future to support custom schedulers. 1122 Type string 1123 1124 // Priority is used to control scheduling importance and if this job 1125 // can preempt other jobs. 1126 Priority int 1127 1128 // AllAtOnce is used to control if incremental scheduling of task groups 1129 // is allowed or if we must do a gang scheduling of the entire job. This 1130 // can slow down larger jobs if resources are not available. 1131 AllAtOnce bool `mapstructure:"all_at_once"` 1132 1133 // Datacenters contains all the datacenters this job is allowed to span 1134 Datacenters []string 1135 1136 // Constraints can be specified at a job level and apply to 1137 // all the task groups and tasks. 1138 Constraints []*Constraint 1139 1140 // TaskGroups are the collections of task groups that this job needs 1141 // to run. Each task group is an atomic unit of scheduling and placement. 1142 TaskGroups []*TaskGroup 1143 1144 // Update is used to control the update strategy 1145 Update UpdateStrategy 1146 1147 // Periodic is used to define the interval the job is run at. 1148 Periodic *PeriodicConfig 1149 1150 // Meta is used to associate arbitrary metadata with this 1151 // job. This is opaque to Nomad. 1152 Meta map[string]string 1153 1154 // VaultToken is the Vault token that proves the submitter of the job has 1155 // access to the specified Vault policies. This field is only used to 1156 // transfer the token and is not stored after Job submission. 1157 VaultToken string `mapstructure:"vault_token"` 1158 1159 // Job status 1160 Status string 1161 1162 // StatusDescription is meant to provide more human useful information 1163 StatusDescription string 1164 1165 // Raft Indexes 1166 CreateIndex uint64 1167 ModifyIndex uint64 1168 JobModifyIndex uint64 1169 } 1170 1171 // Canonicalize is used to canonicalize fields in the Job. This should be called 1172 // when registering a Job. 1173 func (j *Job) Canonicalize() { 1174 // Ensure that an empty and nil map are treated the same to avoid scheduling 1175 // problems since we use reflect DeepEquals. 1176 if len(j.Meta) == 0 { 1177 j.Meta = nil 1178 } 1179 1180 for _, tg := range j.TaskGroups { 1181 tg.Canonicalize(j) 1182 } 1183 } 1184 1185 // Copy returns a deep copy of the Job. It is expected that callers use recover. 1186 // This job can panic if the deep copy failed as it uses reflection. 1187 func (j *Job) Copy() *Job { 1188 if j == nil { 1189 return nil 1190 } 1191 nj := new(Job) 1192 *nj = *j 1193 nj.Datacenters = CopySliceString(nj.Datacenters) 1194 nj.Constraints = CopySliceConstraints(nj.Constraints) 1195 1196 if j.TaskGroups != nil { 1197 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 1198 for i, tg := range nj.TaskGroups { 1199 tgs[i] = tg.Copy() 1200 } 1201 nj.TaskGroups = tgs 1202 } 1203 1204 nj.Periodic = nj.Periodic.Copy() 1205 nj.Meta = CopyMapStringString(nj.Meta) 1206 return nj 1207 } 1208 1209 // Validate is used to sanity check a job input 1210 func (j *Job) Validate() error { 1211 var mErr multierror.Error 1212 if j.Region == "" { 1213 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 1214 } 1215 if j.ID == "" { 1216 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 1217 } else if strings.Contains(j.ID, " ") { 1218 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 1219 } 1220 if j.Name == "" { 1221 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 1222 } 1223 if j.Type == "" { 1224 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 1225 } 1226 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 1227 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 1228 } 1229 if len(j.Datacenters) == 0 { 1230 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 1231 } 1232 if len(j.TaskGroups) == 0 { 1233 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 1234 } 1235 for idx, constr := range j.Constraints { 1236 if err := constr.Validate(); err != nil { 1237 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1238 mErr.Errors = append(mErr.Errors, outer) 1239 } 1240 } 1241 1242 // Check for duplicate task groups 1243 taskGroups := make(map[string]int) 1244 for idx, tg := range j.TaskGroups { 1245 if tg.Name == "" { 1246 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 1247 } else if existing, ok := taskGroups[tg.Name]; ok { 1248 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 1249 } else { 1250 taskGroups[tg.Name] = idx 1251 } 1252 1253 if j.Type == "system" && tg.Count > 1 { 1254 mErr.Errors = append(mErr.Errors, 1255 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 1256 tg.Name, tg.Count)) 1257 } 1258 } 1259 1260 // Validate the task group 1261 for _, tg := range j.TaskGroups { 1262 if err := tg.Validate(); err != nil { 1263 outer := fmt.Errorf("Task group %s validation failed: %s", tg.Name, err) 1264 mErr.Errors = append(mErr.Errors, outer) 1265 } 1266 } 1267 1268 // Validate periodic is only used with batch jobs. 1269 if j.IsPeriodic() && j.Periodic.Enabled { 1270 if j.Type != JobTypeBatch { 1271 mErr.Errors = append(mErr.Errors, 1272 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1273 } 1274 1275 if err := j.Periodic.Validate(); err != nil { 1276 mErr.Errors = append(mErr.Errors, err) 1277 } 1278 } 1279 1280 return mErr.ErrorOrNil() 1281 } 1282 1283 // LookupTaskGroup finds a task group by name 1284 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1285 for _, tg := range j.TaskGroups { 1286 if tg.Name == name { 1287 return tg 1288 } 1289 } 1290 return nil 1291 } 1292 1293 // Stub is used to return a summary of the job 1294 func (j *Job) Stub(summary *JobSummary) *JobListStub { 1295 return &JobListStub{ 1296 ID: j.ID, 1297 ParentID: j.ParentID, 1298 Name: j.Name, 1299 Type: j.Type, 1300 Priority: j.Priority, 1301 Status: j.Status, 1302 StatusDescription: j.StatusDescription, 1303 CreateIndex: j.CreateIndex, 1304 ModifyIndex: j.ModifyIndex, 1305 JobModifyIndex: j.JobModifyIndex, 1306 JobSummary: summary, 1307 } 1308 } 1309 1310 // IsPeriodic returns whether a job is periodic. 1311 func (j *Job) IsPeriodic() bool { 1312 return j.Periodic != nil 1313 } 1314 1315 // VaultPolicies returns the set of Vault policies per task group, per task 1316 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 1317 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 1318 1319 for _, tg := range j.TaskGroups { 1320 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 1321 1322 for _, task := range tg.Tasks { 1323 if task.Vault == nil { 1324 continue 1325 } 1326 1327 tgPolicies[task.Name] = task.Vault 1328 } 1329 1330 if len(tgPolicies) != 0 { 1331 policies[tg.Name] = tgPolicies 1332 } 1333 } 1334 1335 return policies 1336 } 1337 1338 // RequiredSignals returns a mapping of task groups to tasks to their required 1339 // set of signals 1340 func (j *Job) RequiredSignals() map[string]map[string][]string { 1341 signals := make(map[string]map[string][]string) 1342 1343 for _, tg := range j.TaskGroups { 1344 for _, task := range tg.Tasks { 1345 // Use this local one as a set 1346 taskSignals := make(map[string]struct{}) 1347 1348 // Check if the Vault change mode uses signals 1349 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 1350 taskSignals[task.Vault.ChangeSignal] = struct{}{} 1351 } 1352 1353 // Check if any template change mode uses signals 1354 for _, t := range task.Templates { 1355 if t.ChangeMode != TemplateChangeModeSignal { 1356 continue 1357 } 1358 1359 taskSignals[t.ChangeSignal] = struct{}{} 1360 } 1361 1362 // Flatten and sort the signals 1363 l := len(taskSignals) 1364 if l == 0 { 1365 continue 1366 } 1367 1368 flat := make([]string, 0, l) 1369 for sig := range taskSignals { 1370 flat = append(flat, sig) 1371 } 1372 1373 sort.Strings(flat) 1374 tgSignals, ok := signals[tg.Name] 1375 if !ok { 1376 tgSignals = make(map[string][]string) 1377 signals[tg.Name] = tgSignals 1378 } 1379 tgSignals[task.Name] = flat 1380 } 1381 1382 } 1383 1384 return signals 1385 } 1386 1387 // JobListStub is used to return a subset of job information 1388 // for the job list 1389 type JobListStub struct { 1390 ID string 1391 ParentID string 1392 Name string 1393 Type string 1394 Priority int 1395 Status string 1396 StatusDescription string 1397 JobSummary *JobSummary 1398 CreateIndex uint64 1399 ModifyIndex uint64 1400 JobModifyIndex uint64 1401 } 1402 1403 // UpdateStrategy is used to modify how updates are done 1404 type UpdateStrategy struct { 1405 // Stagger is the amount of time between the updates 1406 Stagger time.Duration 1407 1408 // MaxParallel is how many updates can be done in parallel 1409 MaxParallel int `mapstructure:"max_parallel"` 1410 } 1411 1412 // Rolling returns if a rolling strategy should be used 1413 func (u *UpdateStrategy) Rolling() bool { 1414 return u.Stagger > 0 && u.MaxParallel > 0 1415 } 1416 1417 const ( 1418 // PeriodicSpecCron is used for a cron spec. 1419 PeriodicSpecCron = "cron" 1420 1421 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 1422 // separated list of unix timestamps at which to launch. 1423 PeriodicSpecTest = "_internal_test" 1424 ) 1425 1426 // Periodic defines the interval a job should be run at. 1427 type PeriodicConfig struct { 1428 // Enabled determines if the job should be run periodically. 1429 Enabled bool 1430 1431 // Spec specifies the interval the job should be run as. It is parsed based 1432 // on the SpecType. 1433 Spec string 1434 1435 // SpecType defines the format of the spec. 1436 SpecType string 1437 1438 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 1439 ProhibitOverlap bool `mapstructure:"prohibit_overlap"` 1440 } 1441 1442 func (p *PeriodicConfig) Copy() *PeriodicConfig { 1443 if p == nil { 1444 return nil 1445 } 1446 np := new(PeriodicConfig) 1447 *np = *p 1448 return np 1449 } 1450 1451 func (p *PeriodicConfig) Validate() error { 1452 if !p.Enabled { 1453 return nil 1454 } 1455 1456 if p.Spec == "" { 1457 return fmt.Errorf("Must specify a spec") 1458 } 1459 1460 switch p.SpecType { 1461 case PeriodicSpecCron: 1462 // Validate the cron spec 1463 if _, err := cronexpr.Parse(p.Spec); err != nil { 1464 return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) 1465 } 1466 case PeriodicSpecTest: 1467 // No-op 1468 default: 1469 return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) 1470 } 1471 1472 return nil 1473 } 1474 1475 // Next returns the closest time instant matching the spec that is after the 1476 // passed time. If no matching instance exists, the zero value of time.Time is 1477 // returned. The `time.Location` of the returned value matches that of the 1478 // passed time. 1479 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1480 switch p.SpecType { 1481 case PeriodicSpecCron: 1482 if e, err := cronexpr.Parse(p.Spec); err == nil { 1483 return e.Next(fromTime) 1484 } 1485 case PeriodicSpecTest: 1486 split := strings.Split(p.Spec, ",") 1487 if len(split) == 1 && split[0] == "" { 1488 return time.Time{} 1489 } 1490 1491 // Parse the times 1492 times := make([]time.Time, len(split)) 1493 for i, s := range split { 1494 unix, err := strconv.Atoi(s) 1495 if err != nil { 1496 return time.Time{} 1497 } 1498 1499 times[i] = time.Unix(int64(unix), 0) 1500 } 1501 1502 // Find the next match 1503 for _, next := range times { 1504 if fromTime.Before(next) { 1505 return next 1506 } 1507 } 1508 } 1509 1510 return time.Time{} 1511 } 1512 1513 const ( 1514 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1515 // when launching derived instances of it. 1516 PeriodicLaunchSuffix = "/periodic-" 1517 ) 1518 1519 // PeriodicLaunch tracks the last launch time of a periodic job. 1520 type PeriodicLaunch struct { 1521 ID string // ID of the periodic job. 1522 Launch time.Time // The last launch time. 1523 1524 // Raft Indexes 1525 CreateIndex uint64 1526 ModifyIndex uint64 1527 } 1528 1529 var ( 1530 defaultServiceJobRestartPolicy = RestartPolicy{ 1531 Delay: 15 * time.Second, 1532 Attempts: 2, 1533 Interval: 1 * time.Minute, 1534 Mode: RestartPolicyModeDelay, 1535 } 1536 defaultBatchJobRestartPolicy = RestartPolicy{ 1537 Delay: 15 * time.Second, 1538 Attempts: 15, 1539 Interval: 7 * 24 * time.Hour, 1540 Mode: RestartPolicyModeDelay, 1541 } 1542 ) 1543 1544 const ( 1545 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1546 // reached when the specified attempts have been reached in the interval. 1547 RestartPolicyModeDelay = "delay" 1548 1549 // RestartPolicyModeFail causes a job to fail if the specified number of 1550 // attempts are reached within an interval. 1551 RestartPolicyModeFail = "fail" 1552 ) 1553 1554 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1555 type RestartPolicy struct { 1556 // Attempts is the number of restart that will occur in an interval. 1557 Attempts int 1558 1559 // Interval is a duration in which we can limit the number of restarts 1560 // within. 1561 Interval time.Duration 1562 1563 // Delay is the time between a failure and a restart. 1564 Delay time.Duration 1565 1566 // Mode controls what happens when the task restarts more than attempt times 1567 // in an interval. 1568 Mode string 1569 } 1570 1571 func (r *RestartPolicy) Copy() *RestartPolicy { 1572 if r == nil { 1573 return nil 1574 } 1575 nrp := new(RestartPolicy) 1576 *nrp = *r 1577 return nrp 1578 } 1579 1580 func (r *RestartPolicy) Validate() error { 1581 switch r.Mode { 1582 case RestartPolicyModeDelay, RestartPolicyModeFail: 1583 default: 1584 return fmt.Errorf("Unsupported restart mode: %q", r.Mode) 1585 } 1586 1587 // Check for ambiguous/confusing settings 1588 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 1589 return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts) 1590 } 1591 1592 if r.Interval == 0 { 1593 return nil 1594 } 1595 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1596 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 1597 } 1598 return nil 1599 } 1600 1601 func NewRestartPolicy(jobType string) *RestartPolicy { 1602 switch jobType { 1603 case JobTypeService, JobTypeSystem: 1604 rp := defaultServiceJobRestartPolicy 1605 return &rp 1606 case JobTypeBatch: 1607 rp := defaultBatchJobRestartPolicy 1608 return &rp 1609 } 1610 return nil 1611 } 1612 1613 // TaskGroup is an atomic unit of placement. Each task group belongs to 1614 // a job and may contain any number of tasks. A task group support running 1615 // in many replicas using the same configuration.. 1616 type TaskGroup struct { 1617 // Name of the task group 1618 Name string 1619 1620 // Count is the number of replicas of this task group that should 1621 // be scheduled. 1622 Count int 1623 1624 // Constraints can be specified at a task group level and apply to 1625 // all the tasks contained. 1626 Constraints []*Constraint 1627 1628 //RestartPolicy of a TaskGroup 1629 RestartPolicy *RestartPolicy 1630 1631 // Tasks are the collection of tasks that this task group needs to run 1632 Tasks []*Task 1633 1634 // EphemeralDisk is the disk resources that the task group requests 1635 EphemeralDisk *EphemeralDisk 1636 1637 // Meta is used to associate arbitrary metadata with this 1638 // task group. This is opaque to Nomad. 1639 Meta map[string]string 1640 } 1641 1642 func (tg *TaskGroup) Copy() *TaskGroup { 1643 if tg == nil { 1644 return nil 1645 } 1646 ntg := new(TaskGroup) 1647 *ntg = *tg 1648 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 1649 1650 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 1651 1652 if tg.Tasks != nil { 1653 tasks := make([]*Task, len(ntg.Tasks)) 1654 for i, t := range ntg.Tasks { 1655 tasks[i] = t.Copy() 1656 } 1657 ntg.Tasks = tasks 1658 } 1659 1660 ntg.Meta = CopyMapStringString(ntg.Meta) 1661 1662 if tg.EphemeralDisk != nil { 1663 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 1664 } 1665 return ntg 1666 } 1667 1668 // Canonicalize is used to canonicalize fields in the TaskGroup. 1669 func (tg *TaskGroup) Canonicalize(job *Job) { 1670 // Ensure that an empty and nil map are treated the same to avoid scheduling 1671 // problems since we use reflect DeepEquals. 1672 if len(tg.Meta) == 0 { 1673 tg.Meta = nil 1674 } 1675 1676 // Set the default restart policy. 1677 if tg.RestartPolicy == nil { 1678 tg.RestartPolicy = NewRestartPolicy(job.Type) 1679 } 1680 1681 // Set a default ephemeral disk object if the user has not requested for one 1682 if tg.EphemeralDisk == nil { 1683 tg.EphemeralDisk = DefaultEphemeralDisk() 1684 } 1685 1686 for _, task := range tg.Tasks { 1687 task.Canonicalize(job, tg) 1688 } 1689 1690 // Add up the disk resources to EphemeralDisk. This is done so that users 1691 // are not required to move their disk attribute from resources to 1692 // EphemeralDisk section of the job spec in Nomad 0.5 1693 // COMPAT 0.4.1 -> 0.5 1694 // Remove in 0.6 1695 var diskMB int 1696 for _, task := range tg.Tasks { 1697 diskMB += task.Resources.DiskMB 1698 } 1699 if diskMB > 0 { 1700 tg.EphemeralDisk.SizeMB = diskMB 1701 } 1702 } 1703 1704 // Validate is used to sanity check a task group 1705 func (tg *TaskGroup) Validate() error { 1706 var mErr multierror.Error 1707 if tg.Name == "" { 1708 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1709 } 1710 if tg.Count < 0 { 1711 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 1712 } 1713 if len(tg.Tasks) == 0 { 1714 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1715 } 1716 for idx, constr := range tg.Constraints { 1717 if err := constr.Validate(); err != nil { 1718 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1719 mErr.Errors = append(mErr.Errors, outer) 1720 } 1721 } 1722 1723 if tg.RestartPolicy != nil { 1724 if err := tg.RestartPolicy.Validate(); err != nil { 1725 mErr.Errors = append(mErr.Errors, err) 1726 } 1727 } else { 1728 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1729 } 1730 1731 if tg.EphemeralDisk != nil { 1732 if err := tg.EphemeralDisk.Validate(); err != nil { 1733 mErr.Errors = append(mErr.Errors, err) 1734 } 1735 } else { 1736 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 1737 } 1738 1739 // Check for duplicate tasks 1740 tasks := make(map[string]int) 1741 for idx, task := range tg.Tasks { 1742 if task.Name == "" { 1743 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 1744 } else if existing, ok := tasks[task.Name]; ok { 1745 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 1746 } else { 1747 tasks[task.Name] = idx 1748 } 1749 } 1750 1751 // Validate the tasks 1752 for _, task := range tg.Tasks { 1753 if err := task.Validate(tg.EphemeralDisk); err != nil { 1754 outer := fmt.Errorf("Task %s validation failed: %s", task.Name, err) 1755 mErr.Errors = append(mErr.Errors, outer) 1756 } 1757 } 1758 return mErr.ErrorOrNil() 1759 } 1760 1761 // LookupTask finds a task by name 1762 func (tg *TaskGroup) LookupTask(name string) *Task { 1763 for _, t := range tg.Tasks { 1764 if t.Name == name { 1765 return t 1766 } 1767 } 1768 return nil 1769 } 1770 1771 func (tg *TaskGroup) GoString() string { 1772 return fmt.Sprintf("*%#v", *tg) 1773 } 1774 1775 const ( 1776 // TODO add Consul TTL check 1777 ServiceCheckHTTP = "http" 1778 ServiceCheckTCP = "tcp" 1779 ServiceCheckScript = "script" 1780 1781 // minCheckInterval is the minimum check interval permitted. Consul 1782 // currently has its MinInterval set to 1s. Mirror that here for 1783 // consistency. 1784 minCheckInterval = 1 * time.Second 1785 1786 // minCheckTimeout is the minimum check timeout permitted for Consul 1787 // script TTL checks. 1788 minCheckTimeout = 1 * time.Second 1789 ) 1790 1791 // The ServiceCheck data model represents the consul health check that 1792 // Nomad registers for a Task 1793 type ServiceCheck struct { 1794 Name string // Name of the check, defaults to id 1795 Type string // Type of the check - tcp, http, docker and script 1796 Command string // Command is the command to run for script checks 1797 Args []string // Args is a list of argumes for script checks 1798 Path string // path of the health check url for http type check 1799 Protocol string // Protocol to use if check is http, defaults to http 1800 PortLabel string `mapstructure:"port"` // The port to use for tcp/http checks 1801 Interval time.Duration // Interval of the check 1802 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1803 InitialStatus string `mapstructure:"initial_status"` // Initial status of the check 1804 } 1805 1806 func (sc *ServiceCheck) Copy() *ServiceCheck { 1807 if sc == nil { 1808 return nil 1809 } 1810 nsc := new(ServiceCheck) 1811 *nsc = *sc 1812 return nsc 1813 } 1814 1815 func (sc *ServiceCheck) Canonicalize(serviceName string) { 1816 // Ensure empty slices are treated as null to avoid scheduling issues when 1817 // using DeepEquals. 1818 if len(sc.Args) == 0 { 1819 sc.Args = nil 1820 } 1821 1822 if sc.Name == "" { 1823 sc.Name = fmt.Sprintf("service: %q check", serviceName) 1824 } 1825 } 1826 1827 // validate a Service's ServiceCheck 1828 func (sc *ServiceCheck) validate() error { 1829 switch strings.ToLower(sc.Type) { 1830 case ServiceCheckTCP: 1831 if sc.Timeout == 0 { 1832 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 1833 } else if sc.Timeout < minCheckTimeout { 1834 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 1835 } 1836 case ServiceCheckHTTP: 1837 if sc.Path == "" { 1838 return fmt.Errorf("http type must have a valid http path") 1839 } 1840 1841 if sc.Timeout == 0 { 1842 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 1843 } else if sc.Timeout < minCheckTimeout { 1844 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 1845 } 1846 case ServiceCheckScript: 1847 if sc.Command == "" { 1848 return fmt.Errorf("script type must have a valid script path") 1849 } 1850 1851 // TODO: enforce timeout on the Client side and reenable 1852 // validation. 1853 default: 1854 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 1855 } 1856 1857 if sc.Interval == 0 { 1858 return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval) 1859 } else if sc.Interval < minCheckInterval { 1860 return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval) 1861 } 1862 1863 switch sc.InitialStatus { 1864 case "": 1865 // case api.HealthUnknown: TODO: Add when Consul releases 0.7.1 1866 case api.HealthPassing: 1867 case api.HealthWarning: 1868 case api.HealthCritical: 1869 default: 1870 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical) 1871 1872 } 1873 1874 return nil 1875 } 1876 1877 // RequiresPort returns whether the service check requires the task has a port. 1878 func (sc *ServiceCheck) RequiresPort() bool { 1879 switch sc.Type { 1880 case ServiceCheckHTTP, ServiceCheckTCP: 1881 return true 1882 default: 1883 return false 1884 } 1885 } 1886 1887 func (sc *ServiceCheck) Hash(serviceID string) string { 1888 h := sha1.New() 1889 io.WriteString(h, serviceID) 1890 io.WriteString(h, sc.Name) 1891 io.WriteString(h, sc.Type) 1892 io.WriteString(h, sc.Command) 1893 io.WriteString(h, strings.Join(sc.Args, "")) 1894 io.WriteString(h, sc.Path) 1895 io.WriteString(h, sc.Protocol) 1896 io.WriteString(h, sc.PortLabel) 1897 io.WriteString(h, sc.Interval.String()) 1898 io.WriteString(h, sc.Timeout.String()) 1899 return fmt.Sprintf("%x", h.Sum(nil)) 1900 } 1901 1902 // Service represents a Consul service definition in Nomad 1903 type Service struct { 1904 // Name of the service registered with Consul. Consul defaults the 1905 // Name to ServiceID if not specified. The Name if specified is used 1906 // as one of the seed values when generating a Consul ServiceID. 1907 Name string 1908 1909 // PortLabel is either the numeric port number or the `host:port`. 1910 // To specify the port number using the host's Consul Advertise 1911 // address, specify an empty host in the PortLabel (e.g. `:port`). 1912 PortLabel string `mapstructure:"port"` 1913 Tags []string // List of tags for the service 1914 Checks []*ServiceCheck // List of checks associated with the service 1915 } 1916 1917 func (s *Service) Copy() *Service { 1918 if s == nil { 1919 return nil 1920 } 1921 ns := new(Service) 1922 *ns = *s 1923 ns.Tags = CopySliceString(ns.Tags) 1924 1925 if s.Checks != nil { 1926 checks := make([]*ServiceCheck, len(ns.Checks)) 1927 for i, c := range ns.Checks { 1928 checks[i] = c.Copy() 1929 } 1930 ns.Checks = checks 1931 } 1932 1933 return ns 1934 } 1935 1936 // Canonicalize interpolates values of Job, Task Group and Task in the Service 1937 // Name. This also generates check names, service id and check ids. 1938 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 1939 // Ensure empty lists are treated as null to avoid scheduler issues when 1940 // using DeepEquals 1941 if len(s.Tags) == 0 { 1942 s.Tags = nil 1943 } 1944 if len(s.Checks) == 0 { 1945 s.Checks = nil 1946 } 1947 1948 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 1949 "JOB": job, 1950 "TASKGROUP": taskGroup, 1951 "TASK": task, 1952 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 1953 }, 1954 ) 1955 1956 for _, check := range s.Checks { 1957 check.Canonicalize(s.Name) 1958 } 1959 } 1960 1961 // Validate checks if the Check definition is valid 1962 func (s *Service) Validate() error { 1963 var mErr multierror.Error 1964 1965 // Ensure the service name is valid per the below RFCs but make an exception 1966 // for our interpolation syntax 1967 // RFC-952 §1 (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 1968 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 1969 // (https://tools.ietf.org/html/rfc2782). 1970 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9\$][a-zA-Z0-9\-\$\{\}\_\.]*[a-z0-9\}])$`) 1971 if !re.MatchString(s.Name) { 1972 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name)) 1973 } 1974 1975 for _, c := range s.Checks { 1976 if s.PortLabel == "" && c.RequiresPort() { 1977 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name)) 1978 continue 1979 } 1980 1981 if err := c.validate(); err != nil { 1982 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 1983 } 1984 } 1985 return mErr.ErrorOrNil() 1986 } 1987 1988 // ValidateName checks if the services Name is valid and should be called after 1989 // the name has been interpolated 1990 func (s *Service) ValidateName(name string) error { 1991 // Ensure the service name is valid per RFC-952 §1 1992 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 1993 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 1994 // (https://tools.ietf.org/html/rfc2782). 1995 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 1996 if !re.MatchString(name) { 1997 return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be less than 63 characters long: %q", name) 1998 } 1999 return nil 2000 } 2001 2002 // Hash calculates the hash of the check based on it's content and the service 2003 // which owns it 2004 func (s *Service) Hash() string { 2005 h := sha1.New() 2006 io.WriteString(h, s.Name) 2007 io.WriteString(h, strings.Join(s.Tags, "")) 2008 io.WriteString(h, s.PortLabel) 2009 return fmt.Sprintf("%x", h.Sum(nil)) 2010 } 2011 2012 const ( 2013 // DefaultKillTimeout is the default timeout between signaling a task it 2014 // will be killed and killing it. 2015 DefaultKillTimeout = 5 * time.Second 2016 ) 2017 2018 // LogConfig provides configuration for log rotation 2019 type LogConfig struct { 2020 MaxFiles int `mapstructure:"max_files"` 2021 MaxFileSizeMB int `mapstructure:"max_file_size"` 2022 } 2023 2024 // DefaultLogConfig returns the default LogConfig values. 2025 func DefaultLogConfig() *LogConfig { 2026 return &LogConfig{ 2027 MaxFiles: 10, 2028 MaxFileSizeMB: 10, 2029 } 2030 } 2031 2032 // Validate returns an error if the log config specified are less than 2033 // the minimum allowed. 2034 func (l *LogConfig) Validate() error { 2035 var mErr multierror.Error 2036 if l.MaxFiles < 1 { 2037 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 2038 } 2039 if l.MaxFileSizeMB < 1 { 2040 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 2041 } 2042 return mErr.ErrorOrNil() 2043 } 2044 2045 // Task is a single process typically that is executed as part of a task group. 2046 type Task struct { 2047 // Name of the task 2048 Name string 2049 2050 // Driver is used to control which driver is used 2051 Driver string 2052 2053 // User is used to determine which user will run the task. It defaults to 2054 // the same user the Nomad client is being run as. 2055 User string 2056 2057 // Config is provided to the driver to initialize 2058 Config map[string]interface{} 2059 2060 // Map of environment variables to be used by the driver 2061 Env map[string]string 2062 2063 // List of service definitions exposed by the Task 2064 Services []*Service 2065 2066 // Vault is used to define the set of Vault policies that this task should 2067 // have access to. 2068 Vault *Vault 2069 2070 // Templates are the set of templates to be rendered for the task. 2071 Templates []*Template 2072 2073 // Constraints can be specified at a task level and apply only to 2074 // the particular task. 2075 Constraints []*Constraint 2076 2077 // Resources is the resources needed by this task 2078 Resources *Resources 2079 2080 // Meta is used to associate arbitrary metadata with this 2081 // task. This is opaque to Nomad. 2082 Meta map[string]string 2083 2084 // KillTimeout is the time between signaling a task that it will be 2085 // killed and killing it. 2086 KillTimeout time.Duration `mapstructure:"kill_timeout"` 2087 2088 // LogConfig provides configuration for log rotation 2089 LogConfig *LogConfig `mapstructure:"logs"` 2090 2091 // Artifacts is a list of artifacts to download and extract before running 2092 // the task. 2093 Artifacts []*TaskArtifact 2094 } 2095 2096 func (t *Task) Copy() *Task { 2097 if t == nil { 2098 return nil 2099 } 2100 nt := new(Task) 2101 *nt = *t 2102 nt.Env = CopyMapStringString(nt.Env) 2103 2104 if t.Services != nil { 2105 services := make([]*Service, len(nt.Services)) 2106 for i, s := range nt.Services { 2107 services[i] = s.Copy() 2108 } 2109 nt.Services = services 2110 } 2111 2112 nt.Constraints = CopySliceConstraints(nt.Constraints) 2113 2114 nt.Vault = nt.Vault.Copy() 2115 nt.Resources = nt.Resources.Copy() 2116 nt.Meta = CopyMapStringString(nt.Meta) 2117 2118 if t.Artifacts != nil { 2119 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 2120 for _, a := range nt.Artifacts { 2121 artifacts = append(artifacts, a.Copy()) 2122 } 2123 nt.Artifacts = artifacts 2124 } 2125 2126 if i, err := copystructure.Copy(nt.Config); err != nil { 2127 nt.Config = i.(map[string]interface{}) 2128 } 2129 2130 if t.Templates != nil { 2131 templates := make([]*Template, len(t.Templates)) 2132 for i, tmpl := range nt.Templates { 2133 templates[i] = tmpl.Copy() 2134 } 2135 nt.Templates = templates 2136 } 2137 2138 return nt 2139 } 2140 2141 // Canonicalize canonicalizes fields in the task. 2142 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 2143 // Ensure that an empty and nil map are treated the same to avoid scheduling 2144 // problems since we use reflect DeepEquals. 2145 if len(t.Meta) == 0 { 2146 t.Meta = nil 2147 } 2148 if len(t.Config) == 0 { 2149 t.Config = nil 2150 } 2151 if len(t.Env) == 0 { 2152 t.Env = nil 2153 } 2154 2155 for _, service := range t.Services { 2156 service.Canonicalize(job.Name, tg.Name, t.Name) 2157 } 2158 2159 // If Resources are nil initialize them to defaults, otherwise canonicalize 2160 if t.Resources == nil { 2161 t.Resources = DefaultResources() 2162 } else { 2163 t.Resources.Canonicalize() 2164 } 2165 2166 // Set the default timeout if it is not specified. 2167 if t.KillTimeout == 0 { 2168 t.KillTimeout = DefaultKillTimeout 2169 } 2170 2171 if t.Vault != nil { 2172 t.Vault.Canonicalize() 2173 } 2174 2175 for _, template := range t.Templates { 2176 template.Canonicalize() 2177 } 2178 } 2179 2180 func (t *Task) GoString() string { 2181 return fmt.Sprintf("*%#v", *t) 2182 } 2183 2184 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 2185 for _, network := range t.Resources.Networks { 2186 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 2187 return network.IP, p 2188 } 2189 } 2190 return "", 0 2191 } 2192 2193 // Validate is used to sanity check a task 2194 func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error { 2195 var mErr multierror.Error 2196 if t.Name == "" { 2197 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 2198 } 2199 if strings.ContainsAny(t.Name, `/\`) { 2200 // We enforce this so that when creating the directory on disk it will 2201 // not have any slashes. 2202 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 2203 } 2204 if t.Driver == "" { 2205 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 2206 } 2207 if t.KillTimeout.Nanoseconds() < 0 { 2208 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 2209 } 2210 2211 // Validate the resources. 2212 if t.Resources == nil { 2213 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 2214 } else { 2215 if err := t.Resources.MeetsMinResources(); err != nil { 2216 mErr.Errors = append(mErr.Errors, err) 2217 } 2218 2219 // Ensure the task isn't asking for disk resources 2220 if t.Resources.DiskMB > 0 { 2221 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2222 } 2223 } 2224 2225 // Validate the log config 2226 if t.LogConfig == nil { 2227 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 2228 } else if err := t.LogConfig.Validate(); err != nil { 2229 mErr.Errors = append(mErr.Errors, err) 2230 } 2231 2232 for idx, constr := range t.Constraints { 2233 if err := constr.Validate(); err != nil { 2234 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2235 mErr.Errors = append(mErr.Errors, outer) 2236 } 2237 } 2238 2239 // Validate Services 2240 if err := validateServices(t); err != nil { 2241 mErr.Errors = append(mErr.Errors, err) 2242 } 2243 2244 if t.LogConfig != nil && ephemeralDisk != nil { 2245 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 2246 if ephemeralDisk.SizeMB <= logUsage { 2247 mErr.Errors = append(mErr.Errors, 2248 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 2249 logUsage, ephemeralDisk.SizeMB)) 2250 } 2251 } 2252 2253 for idx, artifact := range t.Artifacts { 2254 if err := artifact.Validate(); err != nil { 2255 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 2256 mErr.Errors = append(mErr.Errors, outer) 2257 } 2258 } 2259 2260 if t.Vault != nil { 2261 if err := t.Vault.Validate(); err != nil { 2262 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 2263 } 2264 } 2265 2266 destinations := make(map[string]int, len(t.Templates)) 2267 for idx, tmpl := range t.Templates { 2268 if err := tmpl.Validate(); err != nil { 2269 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 2270 mErr.Errors = append(mErr.Errors, outer) 2271 } 2272 2273 if other, ok := destinations[tmpl.DestPath]; ok { 2274 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 2275 mErr.Errors = append(mErr.Errors, outer) 2276 } else { 2277 destinations[tmpl.DestPath] = idx + 1 2278 } 2279 } 2280 2281 return mErr.ErrorOrNil() 2282 } 2283 2284 // validateServices takes a task and validates the services within it are valid 2285 // and reference ports that exist. 2286 func validateServices(t *Task) error { 2287 var mErr multierror.Error 2288 2289 // Ensure that services don't ask for non-existent ports and their names are 2290 // unique. 2291 servicePorts := make(map[string][]string) 2292 knownServices := make(map[string]struct{}) 2293 for i, service := range t.Services { 2294 if err := service.Validate(); err != nil { 2295 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 2296 mErr.Errors = append(mErr.Errors, outer) 2297 } 2298 if _, ok := knownServices[service.Name]; ok { 2299 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 2300 } 2301 knownServices[service.Name] = struct{}{} 2302 2303 if service.PortLabel != "" { 2304 servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name) 2305 } 2306 2307 // Ensure that check names are unique. 2308 knownChecks := make(map[string]struct{}) 2309 for _, check := range service.Checks { 2310 if _, ok := knownChecks[check.Name]; ok { 2311 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 2312 } 2313 knownChecks[check.Name] = struct{}{} 2314 } 2315 } 2316 2317 // Get the set of port labels. 2318 portLabels := make(map[string]struct{}) 2319 if t.Resources != nil { 2320 for _, network := range t.Resources.Networks { 2321 ports := network.MapLabelToValues(nil) 2322 for portLabel, _ := range ports { 2323 portLabels[portLabel] = struct{}{} 2324 } 2325 } 2326 } 2327 2328 // Ensure all ports referenced in services exist. 2329 for servicePort, services := range servicePorts { 2330 _, ok := portLabels[servicePort] 2331 if !ok { 2332 joined := strings.Join(services, ", ") 2333 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 2334 mErr.Errors = append(mErr.Errors, err) 2335 } 2336 } 2337 return mErr.ErrorOrNil() 2338 } 2339 2340 const ( 2341 // TemplateChangeModeNoop marks that no action should be taken if the 2342 // template is re-rendered 2343 TemplateChangeModeNoop = "noop" 2344 2345 // TemplateChangeModeSignal marks that the task should be signaled if the 2346 // template is re-rendered 2347 TemplateChangeModeSignal = "signal" 2348 2349 // TemplateChangeModeRestart marks that the task should be restarted if the 2350 // template is re-rendered 2351 TemplateChangeModeRestart = "restart" 2352 ) 2353 2354 var ( 2355 // TemplateChangeModeInvalidError is the error for when an invalid change 2356 // mode is given 2357 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 2358 ) 2359 2360 // Template represents a template configuration to be rendered for a given task 2361 type Template struct { 2362 // SourcePath is the path to the template to be rendered 2363 SourcePath string `mapstructure:"source"` 2364 2365 // DestPath is the path to where the template should be rendered 2366 DestPath string `mapstructure:"destination"` 2367 2368 // EmbeddedTmpl store the raw template. This is useful for smaller templates 2369 // where they are embedded in the job file rather than sent as an artificat 2370 EmbeddedTmpl string `mapstructure:"data"` 2371 2372 // ChangeMode indicates what should be done if the template is re-rendered 2373 ChangeMode string `mapstructure:"change_mode"` 2374 2375 // ChangeSignal is the signal that should be sent if the change mode 2376 // requires it. 2377 ChangeSignal string `mapstructure:"change_signal"` 2378 2379 // Splay is used to avoid coordinated restarts of processes by applying a 2380 // random wait between 0 and the given splay value before signalling the 2381 // application of a change 2382 Splay time.Duration `mapstructure:"splay"` 2383 } 2384 2385 // DefaultTemplate returns a default template. 2386 func DefaultTemplate() *Template { 2387 return &Template{ 2388 ChangeMode: TemplateChangeModeRestart, 2389 Splay: 5 * time.Second, 2390 } 2391 } 2392 2393 func (t *Template) Copy() *Template { 2394 if t == nil { 2395 return nil 2396 } 2397 copy := new(Template) 2398 *copy = *t 2399 return copy 2400 } 2401 2402 func (t *Template) Canonicalize() { 2403 if t.ChangeSignal != "" { 2404 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 2405 } 2406 } 2407 2408 func (t *Template) Validate() error { 2409 var mErr multierror.Error 2410 2411 // Verify we have something to render 2412 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 2413 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 2414 } 2415 2416 // Verify we can render somewhere 2417 if t.DestPath == "" { 2418 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 2419 } 2420 2421 // Verify the destination doesn't escape 2422 escaped, err := PathEscapesAllocDir(t.DestPath) 2423 if err != nil { 2424 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 2425 } else if escaped { 2426 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 2427 } 2428 2429 // Verify a proper change mode 2430 switch t.ChangeMode { 2431 case TemplateChangeModeNoop, TemplateChangeModeRestart: 2432 case TemplateChangeModeSignal: 2433 if t.ChangeSignal == "" { 2434 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 2435 } 2436 default: 2437 multierror.Append(&mErr, TemplateChangeModeInvalidError) 2438 } 2439 2440 // Verify the splay is positive 2441 if t.Splay < 0 { 2442 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 2443 } 2444 2445 return mErr.ErrorOrNil() 2446 } 2447 2448 // Set of possible states for a task. 2449 const ( 2450 TaskStatePending = "pending" // The task is waiting to be run. 2451 TaskStateRunning = "running" // The task is currently running. 2452 TaskStateDead = "dead" // Terminal state of task. 2453 ) 2454 2455 // TaskState tracks the current state of a task and events that caused state 2456 // transitions. 2457 type TaskState struct { 2458 // The current state of the task. 2459 State string 2460 2461 // Failed marks a task as having failed 2462 Failed bool 2463 2464 // Series of task events that transition the state of the task. 2465 Events []*TaskEvent 2466 } 2467 2468 func (ts *TaskState) Copy() *TaskState { 2469 if ts == nil { 2470 return nil 2471 } 2472 copy := new(TaskState) 2473 copy.State = ts.State 2474 copy.Failed = ts.Failed 2475 2476 if ts.Events != nil { 2477 copy.Events = make([]*TaskEvent, len(ts.Events)) 2478 for i, e := range ts.Events { 2479 copy.Events[i] = e.Copy() 2480 } 2481 } 2482 return copy 2483 } 2484 2485 // Successful returns whether a task finished successfully. 2486 func (ts *TaskState) Successful() bool { 2487 l := len(ts.Events) 2488 if ts.State != TaskStateDead || l == 0 { 2489 return false 2490 } 2491 2492 e := ts.Events[l-1] 2493 if e.Type != TaskTerminated { 2494 return false 2495 } 2496 2497 return e.ExitCode == 0 2498 } 2499 2500 const ( 2501 // TaskSetupFailure indicates that the task could not be started due to a 2502 // a setup failure. 2503 TaskSetupFailure = "Setup Failure" 2504 2505 // TaskDriveFailure indicates that the task could not be started due to a 2506 // failure in the driver. 2507 TaskDriverFailure = "Driver Failure" 2508 2509 // TaskReceived signals that the task has been pulled by the client at the 2510 // given timestamp. 2511 TaskReceived = "Received" 2512 2513 // TaskFailedValidation indicates the task was invalid and as such was not 2514 // run. 2515 TaskFailedValidation = "Failed Validation" 2516 2517 // TaskStarted signals that the task was started and its timestamp can be 2518 // used to determine the running length of the task. 2519 TaskStarted = "Started" 2520 2521 // TaskTerminated indicates that the task was started and exited. 2522 TaskTerminated = "Terminated" 2523 2524 // TaskKilling indicates a kill signal has been sent to the task. 2525 TaskKilling = "Killing" 2526 2527 // TaskKilled indicates a user has killed the task. 2528 TaskKilled = "Killed" 2529 2530 // TaskRestarting indicates that task terminated and is being restarted. 2531 TaskRestarting = "Restarting" 2532 2533 // TaskNotRestarting indicates that the task has failed and is not being 2534 // restarted because it has exceeded its restart policy. 2535 TaskNotRestarting = "Not Restarting" 2536 2537 // TaskRestartSignal indicates that the task has been signalled to be 2538 // restarted 2539 TaskRestartSignal = "Restart Signaled" 2540 2541 // TaskSignaling indicates that the task is being signalled. 2542 TaskSignaling = "Signaling" 2543 2544 // TaskDownloadingArtifacts means the task is downloading the artifacts 2545 // specified in the task. 2546 TaskDownloadingArtifacts = "Downloading Artifacts" 2547 2548 // TaskArtifactDownloadFailed indicates that downloading the artifacts 2549 // failed. 2550 TaskArtifactDownloadFailed = "Failed Artifact Download" 2551 2552 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 2553 // exceeded the requested disk resources. 2554 TaskDiskExceeded = "Disk Resources Exceeded" 2555 2556 // TaskSiblingFailed indicates that a sibling task in the task group has 2557 // failed. 2558 TaskSiblingFailed = "Sibling task failed" 2559 ) 2560 2561 // TaskEvent is an event that effects the state of a task and contains meta-data 2562 // appropriate to the events type. 2563 type TaskEvent struct { 2564 Type string 2565 Time int64 // Unix Nanosecond timestamp 2566 2567 // FailsTask marks whether this event fails the task 2568 FailsTask bool 2569 2570 // Restart fields. 2571 RestartReason string 2572 2573 // Setup Failure fields. 2574 SetupError string 2575 2576 // Driver Failure fields. 2577 DriverError string // A driver error occurred while starting the task. 2578 2579 // Task Terminated Fields. 2580 ExitCode int // The exit code of the task. 2581 Signal int // The signal that terminated the task. 2582 Message string // A possible message explaining the termination of the task. 2583 2584 // Killing fields 2585 KillTimeout time.Duration 2586 2587 // Task Killed Fields. 2588 KillError string // Error killing the task. 2589 2590 // KillReason is the reason the task was killed 2591 KillReason string 2592 2593 // TaskRestarting fields. 2594 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 2595 2596 // Artifact Download fields 2597 DownloadError string // Error downloading artifacts 2598 2599 // Validation fields 2600 ValidationError string // Validation error 2601 2602 // The maximum allowed task disk size. 2603 DiskLimit int64 2604 2605 // Name of the sibling task that caused termination of the task that 2606 // the TaskEvent refers to. 2607 FailedSibling string 2608 2609 // VaultError is the error from token renewal 2610 VaultError string 2611 2612 // TaskSignalReason indicates the reason the task is being signalled. 2613 TaskSignalReason string 2614 2615 // TaskSignal is the signal that was sent to the task 2616 TaskSignal string 2617 } 2618 2619 func (te *TaskEvent) GoString() string { 2620 return fmt.Sprintf("%v at %v", te.Type, te.Time) 2621 } 2622 2623 func (te *TaskEvent) Copy() *TaskEvent { 2624 if te == nil { 2625 return nil 2626 } 2627 copy := new(TaskEvent) 2628 *copy = *te 2629 return copy 2630 } 2631 2632 func NewTaskEvent(event string) *TaskEvent { 2633 return &TaskEvent{ 2634 Type: event, 2635 Time: time.Now().UnixNano(), 2636 } 2637 } 2638 2639 // SetSetupError is used to store an error that occured while setting up the 2640 // task 2641 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 2642 if err != nil { 2643 e.SetupError = err.Error() 2644 } 2645 return e 2646 } 2647 2648 func (e *TaskEvent) SetFailsTask() *TaskEvent { 2649 e.FailsTask = true 2650 return e 2651 } 2652 2653 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 2654 if err != nil { 2655 e.DriverError = err.Error() 2656 } 2657 return e 2658 } 2659 2660 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 2661 e.ExitCode = c 2662 return e 2663 } 2664 2665 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 2666 e.Signal = s 2667 return e 2668 } 2669 2670 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 2671 if err != nil { 2672 e.Message = err.Error() 2673 } 2674 return e 2675 } 2676 2677 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 2678 if err != nil { 2679 e.KillError = err.Error() 2680 } 2681 return e 2682 } 2683 2684 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 2685 e.KillReason = r 2686 return e 2687 } 2688 2689 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 2690 e.StartDelay = int64(delay) 2691 return e 2692 } 2693 2694 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 2695 e.RestartReason = reason 2696 return e 2697 } 2698 2699 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 2700 e.TaskSignalReason = r 2701 return e 2702 } 2703 2704 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 2705 e.TaskSignal = s.String() 2706 return e 2707 } 2708 2709 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 2710 if err != nil { 2711 e.DownloadError = err.Error() 2712 } 2713 return e 2714 } 2715 2716 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 2717 if err != nil { 2718 e.ValidationError = err.Error() 2719 } 2720 return e 2721 } 2722 2723 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 2724 e.KillTimeout = timeout 2725 return e 2726 } 2727 2728 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 2729 e.DiskLimit = limit 2730 return e 2731 } 2732 2733 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 2734 e.FailedSibling = sibling 2735 return e 2736 } 2737 2738 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 2739 if err != nil { 2740 e.VaultError = err.Error() 2741 } 2742 return e 2743 } 2744 2745 // TaskArtifact is an artifact to download before running the task. 2746 type TaskArtifact struct { 2747 // GetterSource is the source to download an artifact using go-getter 2748 GetterSource string `mapstructure:"source"` 2749 2750 // GetterOptions are options to use when downloading the artifact using 2751 // go-getter. 2752 GetterOptions map[string]string `mapstructure:"options"` 2753 2754 // RelativeDest is the download destination given relative to the task's 2755 // directory. 2756 RelativeDest string `mapstructure:"destination"` 2757 } 2758 2759 func (ta *TaskArtifact) Copy() *TaskArtifact { 2760 if ta == nil { 2761 return nil 2762 } 2763 nta := new(TaskArtifact) 2764 *nta = *ta 2765 nta.GetterOptions = CopyMapStringString(ta.GetterOptions) 2766 return nta 2767 } 2768 2769 func (ta *TaskArtifact) GoString() string { 2770 return fmt.Sprintf("%+v", ta) 2771 } 2772 2773 // PathEscapesAllocDir returns if the given path escapes the allocation 2774 // directory 2775 func PathEscapesAllocDir(path string) (bool, error) { 2776 // Verify the destination doesn't escape the tasks directory 2777 alloc, err := filepath.Abs(filepath.Join("/", "foo/", "bar/")) 2778 if err != nil { 2779 return false, err 2780 } 2781 abs, err := filepath.Abs(filepath.Join(alloc, path)) 2782 if err != nil { 2783 return false, err 2784 } 2785 rel, err := filepath.Rel(alloc, abs) 2786 if err != nil { 2787 return false, err 2788 } 2789 2790 return strings.HasPrefix(rel, ".."), nil 2791 } 2792 2793 func (ta *TaskArtifact) Validate() error { 2794 // Verify the source 2795 var mErr multierror.Error 2796 if ta.GetterSource == "" { 2797 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 2798 } 2799 2800 escaped, err := PathEscapesAllocDir(ta.RelativeDest) 2801 if err != nil { 2802 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 2803 } else if escaped { 2804 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes task's directory")) 2805 } 2806 2807 // Verify the checksum 2808 if check, ok := ta.GetterOptions["checksum"]; ok { 2809 check = strings.TrimSpace(check) 2810 if check == "" { 2811 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty")) 2812 return mErr.ErrorOrNil() 2813 } 2814 2815 parts := strings.Split(check, ":") 2816 if l := len(parts); l != 2 { 2817 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 2818 return mErr.ErrorOrNil() 2819 } 2820 2821 checksumVal := parts[1] 2822 checksumBytes, err := hex.DecodeString(checksumVal) 2823 if err != nil { 2824 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 2825 return mErr.ErrorOrNil() 2826 } 2827 2828 checksumType := parts[0] 2829 expectedLength := 0 2830 switch checksumType { 2831 case "md5": 2832 expectedLength = md5.Size 2833 case "sha1": 2834 expectedLength = sha1.Size 2835 case "sha256": 2836 expectedLength = sha256.Size 2837 case "sha512": 2838 expectedLength = sha512.Size 2839 default: 2840 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 2841 return mErr.ErrorOrNil() 2842 } 2843 2844 if len(checksumBytes) != expectedLength { 2845 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 2846 return mErr.ErrorOrNil() 2847 } 2848 } 2849 2850 return mErr.ErrorOrNil() 2851 } 2852 2853 const ( 2854 ConstraintDistinctHosts = "distinct_hosts" 2855 ConstraintRegex = "regexp" 2856 ConstraintVersion = "version" 2857 ConstraintSetContains = "set_contains" 2858 ) 2859 2860 // Constraints are used to restrict placement options. 2861 type Constraint struct { 2862 LTarget string // Left-hand target 2863 RTarget string // Right-hand target 2864 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 2865 str string // Memoized string 2866 } 2867 2868 // Equal checks if two constraints are equal 2869 func (c *Constraint) Equal(o *Constraint) bool { 2870 return c.LTarget == o.LTarget && 2871 c.RTarget == o.RTarget && 2872 c.Operand == o.Operand 2873 } 2874 2875 func (c *Constraint) Copy() *Constraint { 2876 if c == nil { 2877 return nil 2878 } 2879 nc := new(Constraint) 2880 *nc = *c 2881 return nc 2882 } 2883 2884 func (c *Constraint) String() string { 2885 if c.str != "" { 2886 return c.str 2887 } 2888 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 2889 return c.str 2890 } 2891 2892 func (c *Constraint) Validate() error { 2893 var mErr multierror.Error 2894 if c.Operand == "" { 2895 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 2896 } 2897 2898 // Perform additional validation based on operand 2899 switch c.Operand { 2900 case ConstraintRegex: 2901 if _, err := regexp.Compile(c.RTarget); err != nil { 2902 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 2903 } 2904 case ConstraintVersion: 2905 if _, err := version.NewConstraint(c.RTarget); err != nil { 2906 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 2907 } 2908 } 2909 return mErr.ErrorOrNil() 2910 } 2911 2912 // EphemeralDisk is an ephemeral disk object 2913 type EphemeralDisk struct { 2914 // Sticky indicates whether the allocation is sticky to a node 2915 Sticky bool 2916 2917 // SizeMB is the size of the local disk 2918 SizeMB int `mapstructure:"size"` 2919 2920 // Migrate determines if Nomad client should migrate the allocation dir for 2921 // sticky allocations 2922 Migrate bool 2923 } 2924 2925 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 2926 func DefaultEphemeralDisk() *EphemeralDisk { 2927 return &EphemeralDisk{ 2928 SizeMB: 300, 2929 } 2930 } 2931 2932 // Validate validates EphemeralDisk 2933 func (d *EphemeralDisk) Validate() error { 2934 if d.SizeMB < 10 { 2935 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 2936 } 2937 return nil 2938 } 2939 2940 // Copy copies the EphemeralDisk struct and returns a new one 2941 func (d *EphemeralDisk) Copy() *EphemeralDisk { 2942 ld := new(EphemeralDisk) 2943 *ld = *d 2944 return ld 2945 } 2946 2947 const ( 2948 // VaultChangeModeNoop takes no action when a new token is retrieved. 2949 VaultChangeModeNoop = "noop" 2950 2951 // VaultChangeModeSignal signals the task when a new token is retrieved. 2952 VaultChangeModeSignal = "signal" 2953 2954 // VaultChangeModeRestart restarts the task when a new token is retrieved. 2955 VaultChangeModeRestart = "restart" 2956 ) 2957 2958 // Vault stores the set of premissions a task needs access to from Vault. 2959 type Vault struct { 2960 // Policies is the set of policies that the task needs access to 2961 Policies []string 2962 2963 // Env marks whether the Vault Token should be exposed as an environment 2964 // variable 2965 Env bool 2966 2967 // ChangeMode is used to configure the task's behavior when the Vault 2968 // token changes because the original token could not be renewed in time. 2969 ChangeMode string `mapstructure:"change_mode"` 2970 2971 // ChangeSignal is the signal sent to the task when a new token is 2972 // retrieved. This is only valid when using the signal change mode. 2973 ChangeSignal string `mapstructure:"change_signal"` 2974 } 2975 2976 func DefaultVaultBlock() *Vault { 2977 return &Vault{ 2978 Env: true, 2979 ChangeMode: VaultChangeModeRestart, 2980 } 2981 } 2982 2983 // Copy returns a copy of this Vault block. 2984 func (v *Vault) Copy() *Vault { 2985 if v == nil { 2986 return nil 2987 } 2988 2989 nv := new(Vault) 2990 *nv = *v 2991 return nv 2992 } 2993 2994 func (v *Vault) Canonicalize() { 2995 if v.ChangeSignal != "" { 2996 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 2997 } 2998 } 2999 3000 // Validate returns if the Vault block is valid. 3001 func (v *Vault) Validate() error { 3002 if v == nil { 3003 return nil 3004 } 3005 3006 if len(v.Policies) == 0 { 3007 return fmt.Errorf("Policy list cannot be empty") 3008 } 3009 3010 switch v.ChangeMode { 3011 case VaultChangeModeSignal: 3012 if v.ChangeSignal == "" { 3013 return fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal) 3014 } 3015 case VaultChangeModeNoop, VaultChangeModeRestart: 3016 default: 3017 return fmt.Errorf("Unknown change mode %q", v.ChangeMode) 3018 } 3019 3020 return nil 3021 } 3022 3023 const ( 3024 AllocDesiredStatusRun = "run" // Allocation should run 3025 AllocDesiredStatusStop = "stop" // Allocation should stop 3026 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 3027 ) 3028 3029 const ( 3030 AllocClientStatusPending = "pending" 3031 AllocClientStatusRunning = "running" 3032 AllocClientStatusComplete = "complete" 3033 AllocClientStatusFailed = "failed" 3034 AllocClientStatusLost = "lost" 3035 ) 3036 3037 // Allocation is used to allocate the placement of a task group to a node. 3038 type Allocation struct { 3039 // ID of the allocation (UUID) 3040 ID string 3041 3042 // ID of the evaluation that generated this allocation 3043 EvalID string 3044 3045 // Name is a logical name of the allocation. 3046 Name string 3047 3048 // NodeID is the node this is being placed on 3049 NodeID string 3050 3051 // Job is the parent job of the task group being allocated. 3052 // This is copied at allocation time to avoid issues if the job 3053 // definition is updated. 3054 JobID string 3055 Job *Job 3056 3057 // TaskGroup is the name of the task group that should be run 3058 TaskGroup string 3059 3060 // Resources is the total set of resources allocated as part 3061 // of this allocation of the task group. 3062 Resources *Resources 3063 3064 // SharedResources are the resources that are shared by all the tasks in an 3065 // allocation 3066 SharedResources *Resources 3067 3068 // TaskResources is the set of resources allocated to each 3069 // task. These should sum to the total Resources. 3070 TaskResources map[string]*Resources 3071 3072 // Metrics associated with this allocation 3073 Metrics *AllocMetric 3074 3075 // Desired Status of the allocation on the client 3076 DesiredStatus string 3077 3078 // DesiredStatusDescription is meant to provide more human useful information 3079 DesiredDescription string 3080 3081 // Status of the allocation on the client 3082 ClientStatus string 3083 3084 // ClientStatusDescription is meant to provide more human useful information 3085 ClientDescription string 3086 3087 // TaskStates stores the state of each task, 3088 TaskStates map[string]*TaskState 3089 3090 // PreviousAllocation is the allocation that this allocation is replacing 3091 PreviousAllocation string 3092 3093 // Raft Indexes 3094 CreateIndex uint64 3095 ModifyIndex uint64 3096 3097 // AllocModifyIndex is not updated when the client updates allocations. This 3098 // lets the client pull only the allocs updated by the server. 3099 AllocModifyIndex uint64 3100 3101 // CreateTime is the time the allocation has finished scheduling and been 3102 // verified by the plan applier. 3103 CreateTime int64 3104 } 3105 3106 func (a *Allocation) Copy() *Allocation { 3107 if a == nil { 3108 return nil 3109 } 3110 na := new(Allocation) 3111 *na = *a 3112 3113 na.Job = na.Job.Copy() 3114 na.Resources = na.Resources.Copy() 3115 na.SharedResources = na.SharedResources.Copy() 3116 3117 if a.TaskResources != nil { 3118 tr := make(map[string]*Resources, len(na.TaskResources)) 3119 for task, resource := range na.TaskResources { 3120 tr[task] = resource.Copy() 3121 } 3122 na.TaskResources = tr 3123 } 3124 3125 na.Metrics = na.Metrics.Copy() 3126 3127 if a.TaskStates != nil { 3128 ts := make(map[string]*TaskState, len(na.TaskStates)) 3129 for task, state := range na.TaskStates { 3130 ts[task] = state.Copy() 3131 } 3132 na.TaskStates = ts 3133 } 3134 return na 3135 } 3136 3137 // TerminalStatus returns if the desired or actual status is terminal and 3138 // will no longer transition. 3139 func (a *Allocation) TerminalStatus() bool { 3140 // First check the desired state and if that isn't terminal, check client 3141 // state. 3142 switch a.DesiredStatus { 3143 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 3144 return true 3145 default: 3146 } 3147 3148 switch a.ClientStatus { 3149 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 3150 return true 3151 default: 3152 return false 3153 } 3154 } 3155 3156 // Terminated returns if the allocation is in a terminal state on a client. 3157 func (a *Allocation) Terminated() bool { 3158 if a.ClientStatus == AllocClientStatusFailed || 3159 a.ClientStatus == AllocClientStatusComplete || 3160 a.ClientStatus == AllocClientStatusLost { 3161 return true 3162 } 3163 return false 3164 } 3165 3166 // RanSuccessfully returns whether the client has ran the allocation and all 3167 // tasks finished successfully 3168 func (a *Allocation) RanSuccessfully() bool { 3169 // Handle the case the client hasn't started the allocation. 3170 if len(a.TaskStates) == 0 { 3171 return false 3172 } 3173 3174 // Check to see if all the tasks finised successfully in the allocation 3175 allSuccess := true 3176 for _, state := range a.TaskStates { 3177 allSuccess = allSuccess && state.Successful() 3178 } 3179 3180 return allSuccess 3181 } 3182 3183 // Stub returns a list stub for the allocation 3184 func (a *Allocation) Stub() *AllocListStub { 3185 return &AllocListStub{ 3186 ID: a.ID, 3187 EvalID: a.EvalID, 3188 Name: a.Name, 3189 NodeID: a.NodeID, 3190 JobID: a.JobID, 3191 TaskGroup: a.TaskGroup, 3192 DesiredStatus: a.DesiredStatus, 3193 DesiredDescription: a.DesiredDescription, 3194 ClientStatus: a.ClientStatus, 3195 ClientDescription: a.ClientDescription, 3196 TaskStates: a.TaskStates, 3197 CreateIndex: a.CreateIndex, 3198 ModifyIndex: a.ModifyIndex, 3199 CreateTime: a.CreateTime, 3200 } 3201 } 3202 3203 // ShouldMigrate returns if the allocation needs data migration 3204 func (a *Allocation) ShouldMigrate() bool { 3205 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 3206 return false 3207 } 3208 3209 tg := a.Job.LookupTaskGroup(a.TaskGroup) 3210 3211 // if the task group is nil or the ephemeral disk block isn't present then 3212 // we won't migrate 3213 if tg == nil || tg.EphemeralDisk == nil { 3214 return false 3215 } 3216 3217 // We won't migrate any data is the user hasn't enabled migration or the 3218 // disk is not marked as sticky 3219 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 3220 return false 3221 } 3222 3223 return true 3224 } 3225 3226 var ( 3227 // AllocationIndexRegex is a regular expression to find the allocation index. 3228 AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 3229 ) 3230 3231 // Index returns the index of the allocation. If the allocation is from a task 3232 // group with count greater than 1, there will be multiple allocations for it. 3233 func (a *Allocation) Index() int { 3234 matches := AllocationIndexRegex.FindStringSubmatch(a.Name) 3235 if len(matches) != 2 { 3236 return -1 3237 } 3238 3239 index, err := strconv.Atoi(matches[1]) 3240 if err != nil { 3241 return -1 3242 } 3243 3244 return index 3245 } 3246 3247 // AllocListStub is used to return a subset of alloc information 3248 type AllocListStub struct { 3249 ID string 3250 EvalID string 3251 Name string 3252 NodeID string 3253 JobID string 3254 TaskGroup string 3255 DesiredStatus string 3256 DesiredDescription string 3257 ClientStatus string 3258 ClientDescription string 3259 TaskStates map[string]*TaskState 3260 CreateIndex uint64 3261 ModifyIndex uint64 3262 CreateTime int64 3263 } 3264 3265 // AllocMetric is used to track various metrics while attempting 3266 // to make an allocation. These are used to debug a job, or to better 3267 // understand the pressure within the system. 3268 type AllocMetric struct { 3269 // NodesEvaluated is the number of nodes that were evaluated 3270 NodesEvaluated int 3271 3272 // NodesFiltered is the number of nodes filtered due to a constraint 3273 NodesFiltered int 3274 3275 // NodesAvailable is the number of nodes available for evaluation per DC. 3276 NodesAvailable map[string]int 3277 3278 // ClassFiltered is the number of nodes filtered by class 3279 ClassFiltered map[string]int 3280 3281 // ConstraintFiltered is the number of failures caused by constraint 3282 ConstraintFiltered map[string]int 3283 3284 // NodesExhausted is the number of nodes skipped due to being 3285 // exhausted of at least one resource 3286 NodesExhausted int 3287 3288 // ClassExhausted is the number of nodes exhausted by class 3289 ClassExhausted map[string]int 3290 3291 // DimensionExhausted provides the count by dimension or reason 3292 DimensionExhausted map[string]int 3293 3294 // Scores is the scores of the final few nodes remaining 3295 // for placement. The top score is typically selected. 3296 Scores map[string]float64 3297 3298 // AllocationTime is a measure of how long the allocation 3299 // attempt took. This can affect performance and SLAs. 3300 AllocationTime time.Duration 3301 3302 // CoalescedFailures indicates the number of other 3303 // allocations that were coalesced into this failed allocation. 3304 // This is to prevent creating many failed allocations for a 3305 // single task group. 3306 CoalescedFailures int 3307 } 3308 3309 func (a *AllocMetric) Copy() *AllocMetric { 3310 if a == nil { 3311 return nil 3312 } 3313 na := new(AllocMetric) 3314 *na = *a 3315 na.NodesAvailable = CopyMapStringInt(na.NodesAvailable) 3316 na.ClassFiltered = CopyMapStringInt(na.ClassFiltered) 3317 na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered) 3318 na.ClassExhausted = CopyMapStringInt(na.ClassExhausted) 3319 na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted) 3320 na.Scores = CopyMapStringFloat64(na.Scores) 3321 return na 3322 } 3323 3324 func (a *AllocMetric) EvaluateNode() { 3325 a.NodesEvaluated += 1 3326 } 3327 3328 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 3329 a.NodesFiltered += 1 3330 if node != nil && node.NodeClass != "" { 3331 if a.ClassFiltered == nil { 3332 a.ClassFiltered = make(map[string]int) 3333 } 3334 a.ClassFiltered[node.NodeClass] += 1 3335 } 3336 if constraint != "" { 3337 if a.ConstraintFiltered == nil { 3338 a.ConstraintFiltered = make(map[string]int) 3339 } 3340 a.ConstraintFiltered[constraint] += 1 3341 } 3342 } 3343 3344 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 3345 a.NodesExhausted += 1 3346 if node != nil && node.NodeClass != "" { 3347 if a.ClassExhausted == nil { 3348 a.ClassExhausted = make(map[string]int) 3349 } 3350 a.ClassExhausted[node.NodeClass] += 1 3351 } 3352 if dimension != "" { 3353 if a.DimensionExhausted == nil { 3354 a.DimensionExhausted = make(map[string]int) 3355 } 3356 a.DimensionExhausted[dimension] += 1 3357 } 3358 } 3359 3360 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 3361 if a.Scores == nil { 3362 a.Scores = make(map[string]float64) 3363 } 3364 key := fmt.Sprintf("%s.%s", node.ID, name) 3365 a.Scores[key] = score 3366 } 3367 3368 const ( 3369 EvalStatusBlocked = "blocked" 3370 EvalStatusPending = "pending" 3371 EvalStatusComplete = "complete" 3372 EvalStatusFailed = "failed" 3373 EvalStatusCancelled = "canceled" 3374 ) 3375 3376 const ( 3377 EvalTriggerJobRegister = "job-register" 3378 EvalTriggerJobDeregister = "job-deregister" 3379 EvalTriggerPeriodicJob = "periodic-job" 3380 EvalTriggerNodeUpdate = "node-update" 3381 EvalTriggerScheduled = "scheduled" 3382 EvalTriggerRollingUpdate = "rolling-update" 3383 EvalTriggerMaxPlans = "max-plan-attempts" 3384 ) 3385 3386 const ( 3387 // CoreJobEvalGC is used for the garbage collection of evaluations 3388 // and allocations. We periodically scan evaluations in a terminal state, 3389 // in which all the corresponding allocations are also terminal. We 3390 // delete these out of the system to bound the state. 3391 CoreJobEvalGC = "eval-gc" 3392 3393 // CoreJobNodeGC is used for the garbage collection of failed nodes. 3394 // We periodically scan nodes in a terminal state, and if they have no 3395 // corresponding allocations we delete these out of the system. 3396 CoreJobNodeGC = "node-gc" 3397 3398 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 3399 // periodically scan garbage collectible jobs and check if both their 3400 // evaluations and allocations are terminal. If so, we delete these out of 3401 // the system. 3402 CoreJobJobGC = "job-gc" 3403 3404 // CoreJobForceGC is used to force garbage collection of all GCable objects. 3405 CoreJobForceGC = "force-gc" 3406 ) 3407 3408 // Evaluation is used anytime we need to apply business logic as a result 3409 // of a change to our desired state (job specification) or the emergent state 3410 // (registered nodes). When the inputs change, we need to "evaluate" them, 3411 // potentially taking action (allocation of work) or doing nothing if the state 3412 // of the world does not require it. 3413 type Evaluation struct { 3414 // ID is a randonly generated UUID used for this evaluation. This 3415 // is assigned upon the creation of the evaluation. 3416 ID string 3417 3418 // Priority is used to control scheduling importance and if this job 3419 // can preempt other jobs. 3420 Priority int 3421 3422 // Type is used to control which schedulers are available to handle 3423 // this evaluation. 3424 Type string 3425 3426 // TriggeredBy is used to give some insight into why this Eval 3427 // was created. (Job change, node failure, alloc failure, etc). 3428 TriggeredBy string 3429 3430 // JobID is the job this evaluation is scoped to. Evaluations cannot 3431 // be run in parallel for a given JobID, so we serialize on this. 3432 JobID string 3433 3434 // JobModifyIndex is the modify index of the job at the time 3435 // the evaluation was created 3436 JobModifyIndex uint64 3437 3438 // NodeID is the node that was affected triggering the evaluation. 3439 NodeID string 3440 3441 // NodeModifyIndex is the modify index of the node at the time 3442 // the evaluation was created 3443 NodeModifyIndex uint64 3444 3445 // Status of the evaluation 3446 Status string 3447 3448 // StatusDescription is meant to provide more human useful information 3449 StatusDescription string 3450 3451 // Wait is a minimum wait time for running the eval. This is used to 3452 // support a rolling upgrade. 3453 Wait time.Duration 3454 3455 // NextEval is the evaluation ID for the eval created to do a followup. 3456 // This is used to support rolling upgrades, where we need a chain of evaluations. 3457 NextEval string 3458 3459 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 3460 // This is used to support rolling upgrades, where we need a chain of evaluations. 3461 PreviousEval string 3462 3463 // BlockedEval is the evaluation ID for a created blocked eval. A 3464 // blocked eval will be created if all allocations could not be placed due 3465 // to constraints or lacking resources. 3466 BlockedEval string 3467 3468 // FailedTGAllocs are task groups which have allocations that could not be 3469 // made, but the metrics are persisted so that the user can use the feedback 3470 // to determine the cause. 3471 FailedTGAllocs map[string]*AllocMetric 3472 3473 // ClassEligibility tracks computed node classes that have been explicitly 3474 // marked as eligible or ineligible. 3475 ClassEligibility map[string]bool 3476 3477 // EscapedComputedClass marks whether the job has constraints that are not 3478 // captured by computed node classes. 3479 EscapedComputedClass bool 3480 3481 // AnnotatePlan triggers the scheduler to provide additional annotations 3482 // during the evaluation. This should not be set during normal operations. 3483 AnnotatePlan bool 3484 3485 // SnapshotIndex is the Raft index of the snapshot used to process the 3486 // evaluation. As such it will only be set once it has gone through the 3487 // scheduler. 3488 SnapshotIndex uint64 3489 3490 // QueuedAllocations is the number of unplaced allocations at the time the 3491 // evaluation was processed. The map is keyed by Task Group names. 3492 QueuedAllocations map[string]int 3493 3494 // Raft Indexes 3495 CreateIndex uint64 3496 ModifyIndex uint64 3497 } 3498 3499 // TerminalStatus returns if the current status is terminal and 3500 // will no longer transition. 3501 func (e *Evaluation) TerminalStatus() bool { 3502 switch e.Status { 3503 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 3504 return true 3505 default: 3506 return false 3507 } 3508 } 3509 3510 func (e *Evaluation) GoString() string { 3511 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 3512 } 3513 3514 func (e *Evaluation) Copy() *Evaluation { 3515 if e == nil { 3516 return nil 3517 } 3518 ne := new(Evaluation) 3519 *ne = *e 3520 3521 // Copy ClassEligibility 3522 if e.ClassEligibility != nil { 3523 classes := make(map[string]bool, len(e.ClassEligibility)) 3524 for class, elig := range e.ClassEligibility { 3525 classes[class] = elig 3526 } 3527 ne.ClassEligibility = classes 3528 } 3529 3530 // Copy FailedTGAllocs 3531 if e.FailedTGAllocs != nil { 3532 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 3533 for tg, metric := range e.FailedTGAllocs { 3534 failedTGs[tg] = metric.Copy() 3535 } 3536 ne.FailedTGAllocs = failedTGs 3537 } 3538 3539 // Copy queued allocations 3540 if e.QueuedAllocations != nil { 3541 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 3542 for tg, num := range e.QueuedAllocations { 3543 queuedAllocations[tg] = num 3544 } 3545 ne.QueuedAllocations = queuedAllocations 3546 } 3547 3548 return ne 3549 } 3550 3551 // ShouldEnqueue checks if a given evaluation should be enqueued into the 3552 // eval_broker 3553 func (e *Evaluation) ShouldEnqueue() bool { 3554 switch e.Status { 3555 case EvalStatusPending: 3556 return true 3557 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 3558 return false 3559 default: 3560 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3561 } 3562 } 3563 3564 // ShouldBlock checks if a given evaluation should be entered into the blocked 3565 // eval tracker. 3566 func (e *Evaluation) ShouldBlock() bool { 3567 switch e.Status { 3568 case EvalStatusBlocked: 3569 return true 3570 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 3571 return false 3572 default: 3573 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3574 } 3575 } 3576 3577 // MakePlan is used to make a plan from the given evaluation 3578 // for a given Job 3579 func (e *Evaluation) MakePlan(j *Job) *Plan { 3580 p := &Plan{ 3581 EvalID: e.ID, 3582 Priority: e.Priority, 3583 Job: j, 3584 NodeUpdate: make(map[string][]*Allocation), 3585 NodeAllocation: make(map[string][]*Allocation), 3586 } 3587 if j != nil { 3588 p.AllAtOnce = j.AllAtOnce 3589 } 3590 return p 3591 } 3592 3593 // NextRollingEval creates an evaluation to followup this eval for rolling updates 3594 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 3595 return &Evaluation{ 3596 ID: GenerateUUID(), 3597 Priority: e.Priority, 3598 Type: e.Type, 3599 TriggeredBy: EvalTriggerRollingUpdate, 3600 JobID: e.JobID, 3601 JobModifyIndex: e.JobModifyIndex, 3602 Status: EvalStatusPending, 3603 Wait: wait, 3604 PreviousEval: e.ID, 3605 } 3606 } 3607 3608 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 3609 // failed allocations. It takes the classes marked explicitly eligible or 3610 // ineligible and whether the job has escaped computed node classes. 3611 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation { 3612 return &Evaluation{ 3613 ID: GenerateUUID(), 3614 Priority: e.Priority, 3615 Type: e.Type, 3616 TriggeredBy: e.TriggeredBy, 3617 JobID: e.JobID, 3618 JobModifyIndex: e.JobModifyIndex, 3619 Status: EvalStatusBlocked, 3620 PreviousEval: e.ID, 3621 ClassEligibility: classEligibility, 3622 EscapedComputedClass: escaped, 3623 } 3624 } 3625 3626 // Plan is used to submit a commit plan for task allocations. These 3627 // are submitted to the leader which verifies that resources have 3628 // not been overcommitted before admiting the plan. 3629 type Plan struct { 3630 // EvalID is the evaluation ID this plan is associated with 3631 EvalID string 3632 3633 // EvalToken is used to prevent a split-brain processing of 3634 // an evaluation. There should only be a single scheduler running 3635 // an Eval at a time, but this could be violated after a leadership 3636 // transition. This unique token is used to reject plans that are 3637 // being submitted from a different leader. 3638 EvalToken string 3639 3640 // Priority is the priority of the upstream job 3641 Priority int 3642 3643 // AllAtOnce is used to control if incremental scheduling of task groups 3644 // is allowed or if we must do a gang scheduling of the entire job. 3645 // If this is false, a plan may be partially applied. Otherwise, the 3646 // entire plan must be able to make progress. 3647 AllAtOnce bool 3648 3649 // Job is the parent job of all the allocations in the Plan. 3650 // Since a Plan only involves a single Job, we can reduce the size 3651 // of the plan by only including it once. 3652 Job *Job 3653 3654 // NodeUpdate contains all the allocations for each node. For each node, 3655 // this is a list of the allocations to update to either stop or evict. 3656 NodeUpdate map[string][]*Allocation 3657 3658 // NodeAllocation contains all the allocations for each node. 3659 // The evicts must be considered prior to the allocations. 3660 NodeAllocation map[string][]*Allocation 3661 3662 // Annotations contains annotations by the scheduler to be used by operators 3663 // to understand the decisions made by the scheduler. 3664 Annotations *PlanAnnotations 3665 } 3666 3667 // AppendUpdate marks the allocation for eviction. The clientStatus of the 3668 // allocation may be optionally set by passing in a non-empty value. 3669 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 3670 newAlloc := new(Allocation) 3671 *newAlloc = *alloc 3672 3673 // If the job is not set in the plan we are deregistering a job so we 3674 // extract the job from the allocation. 3675 if p.Job == nil && newAlloc.Job != nil { 3676 p.Job = newAlloc.Job 3677 } 3678 3679 // Normalize the job 3680 newAlloc.Job = nil 3681 3682 // Strip the resources as it can be rebuilt. 3683 newAlloc.Resources = nil 3684 3685 newAlloc.DesiredStatus = desiredStatus 3686 newAlloc.DesiredDescription = desiredDesc 3687 3688 if clientStatus != "" { 3689 newAlloc.ClientStatus = clientStatus 3690 } 3691 3692 node := alloc.NodeID 3693 existing := p.NodeUpdate[node] 3694 p.NodeUpdate[node] = append(existing, newAlloc) 3695 } 3696 3697 func (p *Plan) PopUpdate(alloc *Allocation) { 3698 existing := p.NodeUpdate[alloc.NodeID] 3699 n := len(existing) 3700 if n > 0 && existing[n-1].ID == alloc.ID { 3701 existing = existing[:n-1] 3702 if len(existing) > 0 { 3703 p.NodeUpdate[alloc.NodeID] = existing 3704 } else { 3705 delete(p.NodeUpdate, alloc.NodeID) 3706 } 3707 } 3708 } 3709 3710 func (p *Plan) AppendAlloc(alloc *Allocation) { 3711 node := alloc.NodeID 3712 existing := p.NodeAllocation[node] 3713 p.NodeAllocation[node] = append(existing, alloc) 3714 } 3715 3716 // IsNoOp checks if this plan would do nothing 3717 func (p *Plan) IsNoOp() bool { 3718 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 3719 } 3720 3721 // PlanResult is the result of a plan submitted to the leader. 3722 type PlanResult struct { 3723 // NodeUpdate contains all the updates that were committed. 3724 NodeUpdate map[string][]*Allocation 3725 3726 // NodeAllocation contains all the allocations that were committed. 3727 NodeAllocation map[string][]*Allocation 3728 3729 // RefreshIndex is the index the worker should refresh state up to. 3730 // This allows all evictions and allocations to be materialized. 3731 // If any allocations were rejected due to stale data (node state, 3732 // over committed) this can be used to force a worker refresh. 3733 RefreshIndex uint64 3734 3735 // AllocIndex is the Raft index in which the evictions and 3736 // allocations took place. This is used for the write index. 3737 AllocIndex uint64 3738 } 3739 3740 // IsNoOp checks if this plan result would do nothing 3741 func (p *PlanResult) IsNoOp() bool { 3742 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 3743 } 3744 3745 // FullCommit is used to check if all the allocations in a plan 3746 // were committed as part of the result. Returns if there was 3747 // a match, and the number of expected and actual allocations. 3748 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 3749 expected := 0 3750 actual := 0 3751 for name, allocList := range plan.NodeAllocation { 3752 didAlloc, _ := p.NodeAllocation[name] 3753 expected += len(allocList) 3754 actual += len(didAlloc) 3755 } 3756 return actual == expected, expected, actual 3757 } 3758 3759 // PlanAnnotations holds annotations made by the scheduler to give further debug 3760 // information to operators. 3761 type PlanAnnotations struct { 3762 // DesiredTGUpdates is the set of desired updates per task group. 3763 DesiredTGUpdates map[string]*DesiredUpdates 3764 } 3765 3766 // DesiredUpdates is the set of changes the scheduler would like to make given 3767 // sufficient resources and cluster capacity. 3768 type DesiredUpdates struct { 3769 Ignore uint64 3770 Place uint64 3771 Migrate uint64 3772 Stop uint64 3773 InPlaceUpdate uint64 3774 DestructiveUpdate uint64 3775 } 3776 3777 // msgpackHandle is a shared handle for encoding/decoding of structs 3778 var MsgpackHandle = func() *codec.MsgpackHandle { 3779 h := &codec.MsgpackHandle{RawToString: true} 3780 3781 // Sets the default type for decoding a map into a nil interface{}. 3782 // This is necessary in particular because we store the driver configs as a 3783 // nil interface{}. 3784 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 3785 return h 3786 }() 3787 3788 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 3789 h := &hcodec.MsgpackHandle{RawToString: true} 3790 3791 // Sets the default type for decoding a map into a nil interface{}. 3792 // This is necessary in particular because we store the driver configs as a 3793 // nil interface{}. 3794 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 3795 return h 3796 }() 3797 3798 // Decode is used to decode a MsgPack encoded object 3799 func Decode(buf []byte, out interface{}) error { 3800 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 3801 } 3802 3803 // Encode is used to encode a MsgPack object with type prefix 3804 func Encode(t MessageType, msg interface{}) ([]byte, error) { 3805 var buf bytes.Buffer 3806 buf.WriteByte(uint8(t)) 3807 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 3808 return buf.Bytes(), err 3809 } 3810 3811 // KeyringResponse is a unified key response and can be used for install, 3812 // remove, use, as well as listing key queries. 3813 type KeyringResponse struct { 3814 Messages map[string]string 3815 Keys map[string]int 3816 NumNodes int 3817 } 3818 3819 // KeyringRequest is request objects for serf key operations. 3820 type KeyringRequest struct { 3821 Key string 3822 } 3823 3824 // RecoverableError wraps an error and marks whether it is recoverable and could 3825 // be retried or it is fatal. 3826 type RecoverableError struct { 3827 Err string 3828 Recoverable bool 3829 } 3830 3831 // NewRecoverableError is used to wrap an error and mark it as recoverable or 3832 // not. 3833 func NewRecoverableError(e error, recoverable bool) *RecoverableError { 3834 if e == nil { 3835 return nil 3836 } 3837 3838 return &RecoverableError{ 3839 Err: e.Error(), 3840 Recoverable: recoverable, 3841 } 3842 } 3843 3844 func (r *RecoverableError) Error() string { 3845 return r.Err 3846 }