github.com/taylorchu/nomad@v0.5.3-rc1.0.20170407200202-db11e7dd7b55/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "io" 13 "net" 14 "os" 15 "path/filepath" 16 "reflect" 17 "regexp" 18 "sort" 19 "strconv" 20 "strings" 21 "time" 22 23 "github.com/gorhill/cronexpr" 24 "github.com/hashicorp/consul/api" 25 "github.com/hashicorp/go-multierror" 26 "github.com/hashicorp/go-version" 27 "github.com/hashicorp/nomad/helper" 28 "github.com/hashicorp/nomad/helper/args" 29 "github.com/mitchellh/copystructure" 30 "github.com/ugorji/go/codec" 31 32 hcodec "github.com/hashicorp/go-msgpack/codec" 33 ) 34 35 var ( 36 ErrNoLeader = fmt.Errorf("No cluster leader") 37 ErrNoRegionPath = fmt.Errorf("No path to region") 38 ) 39 40 type MessageType uint8 41 42 const ( 43 NodeRegisterRequestType MessageType = iota 44 NodeDeregisterRequestType 45 NodeUpdateStatusRequestType 46 NodeUpdateDrainRequestType 47 JobRegisterRequestType 48 JobDeregisterRequestType 49 EvalUpdateRequestType 50 EvalDeleteRequestType 51 AllocUpdateRequestType 52 AllocClientUpdateRequestType 53 ReconcileJobSummariesRequestType 54 VaultAccessorRegisterRequestType 55 VaultAccessorDegisterRequestType 56 ) 57 58 const ( 59 // IgnoreUnknownTypeFlag is set along with a MessageType 60 // to indicate that the message type can be safely ignored 61 // if it is not recognized. This is for future proofing, so 62 // that new commands can be added in a way that won't cause 63 // old servers to crash when the FSM attempts to process them. 64 IgnoreUnknownTypeFlag MessageType = 128 65 66 // ApiMajorVersion is returned as part of the Status.Version request. 67 // It should be incremented anytime the APIs are changed in a way 68 // that would break clients for sane client versioning. 69 ApiMajorVersion = 1 70 71 // ApiMinorVersion is returned as part of the Status.Version request. 72 // It should be incremented anytime the APIs are changed to allow 73 // for sane client versioning. Minor changes should be compatible 74 // within the major version. 75 ApiMinorVersion = 1 76 77 ProtocolVersion = "protocol" 78 APIMajorVersion = "api.major" 79 APIMinorVersion = "api.minor" 80 ) 81 82 // RPCInfo is used to describe common information about query 83 type RPCInfo interface { 84 RequestRegion() string 85 IsRead() bool 86 AllowStaleRead() bool 87 } 88 89 // QueryOptions is used to specify various flags for read queries 90 type QueryOptions struct { 91 // The target region for this query 92 Region string 93 94 // If set, wait until query exceeds given index. Must be provided 95 // with MaxQueryTime. 96 MinQueryIndex uint64 97 98 // Provided with MinQueryIndex to wait for change. 99 MaxQueryTime time.Duration 100 101 // If set, any follower can service the request. Results 102 // may be arbitrarily stale. 103 AllowStale bool 104 105 // If set, used as prefix for resource list searches 106 Prefix string 107 } 108 109 func (q QueryOptions) RequestRegion() string { 110 return q.Region 111 } 112 113 // QueryOption only applies to reads, so always true 114 func (q QueryOptions) IsRead() bool { 115 return true 116 } 117 118 func (q QueryOptions) AllowStaleRead() bool { 119 return q.AllowStale 120 } 121 122 type WriteRequest struct { 123 // The target region for this write 124 Region string 125 } 126 127 func (w WriteRequest) RequestRegion() string { 128 // The target region for this request 129 return w.Region 130 } 131 132 // WriteRequest only applies to writes, always false 133 func (w WriteRequest) IsRead() bool { 134 return false 135 } 136 137 func (w WriteRequest) AllowStaleRead() bool { 138 return false 139 } 140 141 // QueryMeta allows a query response to include potentially 142 // useful metadata about a query 143 type QueryMeta struct { 144 // This is the index associated with the read 145 Index uint64 146 147 // If AllowStale is used, this is time elapsed since 148 // last contact between the follower and leader. This 149 // can be used to gauge staleness. 150 LastContact time.Duration 151 152 // Used to indicate if there is a known leader node 153 KnownLeader bool 154 } 155 156 // WriteMeta allows a write response to include potentially 157 // useful metadata about the write 158 type WriteMeta struct { 159 // This is the index associated with the write 160 Index uint64 161 } 162 163 // NodeRegisterRequest is used for Node.Register endpoint 164 // to register a node as being a schedulable entity. 165 type NodeRegisterRequest struct { 166 Node *Node 167 WriteRequest 168 } 169 170 // NodeDeregisterRequest is used for Node.Deregister endpoint 171 // to deregister a node as being a schedulable entity. 172 type NodeDeregisterRequest struct { 173 NodeID string 174 WriteRequest 175 } 176 177 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 178 // information used in RPC server lists. 179 type NodeServerInfo struct { 180 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 181 // be contacted at for RPCs. 182 RPCAdvertiseAddr string 183 184 // RpcMajorVersion is the major version number the Nomad Server 185 // supports 186 RPCMajorVersion int32 187 188 // RpcMinorVersion is the minor version number the Nomad Server 189 // supports 190 RPCMinorVersion int32 191 192 // Datacenter is the datacenter that a Nomad server belongs to 193 Datacenter string 194 } 195 196 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 197 // to update the status of a node. 198 type NodeUpdateStatusRequest struct { 199 NodeID string 200 Status string 201 WriteRequest 202 } 203 204 // NodeUpdateDrainRequest is used for updatin the drain status 205 type NodeUpdateDrainRequest struct { 206 NodeID string 207 Drain bool 208 WriteRequest 209 } 210 211 // NodeEvaluateRequest is used to re-evaluate the ndoe 212 type NodeEvaluateRequest struct { 213 NodeID string 214 WriteRequest 215 } 216 217 // NodeSpecificRequest is used when we just need to specify a target node 218 type NodeSpecificRequest struct { 219 NodeID string 220 SecretID string 221 QueryOptions 222 } 223 224 // JobRegisterRequest is used for Job.Register endpoint 225 // to register a job as being a schedulable entity. 226 type JobRegisterRequest struct { 227 Job *Job 228 229 // If EnforceIndex is set then the job will only be registered if the passed 230 // JobModifyIndex matches the current Jobs index. If the index is zero, the 231 // register only occurs if the job is new. 232 EnforceIndex bool 233 JobModifyIndex uint64 234 235 WriteRequest 236 } 237 238 // JobDeregisterRequest is used for Job.Deregister endpoint 239 // to deregister a job as being a schedulable entity. 240 type JobDeregisterRequest struct { 241 JobID string 242 WriteRequest 243 } 244 245 // JobEvaluateRequest is used when we just need to re-evaluate a target job 246 type JobEvaluateRequest struct { 247 JobID string 248 WriteRequest 249 } 250 251 // JobSpecificRequest is used when we just need to specify a target job 252 type JobSpecificRequest struct { 253 JobID string 254 AllAllocs bool 255 QueryOptions 256 } 257 258 // JobListRequest is used to parameterize a list request 259 type JobListRequest struct { 260 QueryOptions 261 } 262 263 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 264 // evaluation of the Job. 265 type JobPlanRequest struct { 266 Job *Job 267 Diff bool // Toggles an annotated diff 268 WriteRequest 269 } 270 271 // JobSummaryRequest is used when we just need to get a specific job summary 272 type JobSummaryRequest struct { 273 JobID string 274 QueryOptions 275 } 276 277 // JobDispatchRequest is used to dispatch a job based on a parameterized job 278 type JobDispatchRequest struct { 279 JobID string 280 Payload []byte 281 Meta map[string]string 282 WriteRequest 283 } 284 285 // JobValidateRequest is used to validate a job 286 type JobValidateRequest struct { 287 Job *Job 288 WriteRequest 289 } 290 291 // JobValidateResponse is the response from validate request 292 type JobValidateResponse struct { 293 // DriverConfigValidated indicates whether the agent validated the driver 294 // config 295 DriverConfigValidated bool 296 297 // ValidationErrors is a list of validation errors 298 ValidationErrors []string 299 300 // Error is a string version of any error that may have occured 301 Error string 302 } 303 304 // NodeListRequest is used to parameterize a list request 305 type NodeListRequest struct { 306 QueryOptions 307 } 308 309 // EvalUpdateRequest is used for upserting evaluations. 310 type EvalUpdateRequest struct { 311 Evals []*Evaluation 312 EvalToken string 313 WriteRequest 314 } 315 316 // EvalDeleteRequest is used for deleting an evaluation. 317 type EvalDeleteRequest struct { 318 Evals []string 319 Allocs []string 320 WriteRequest 321 } 322 323 // EvalSpecificRequest is used when we just need to specify a target evaluation 324 type EvalSpecificRequest struct { 325 EvalID string 326 QueryOptions 327 } 328 329 // EvalAckRequest is used to Ack/Nack a specific evaluation 330 type EvalAckRequest struct { 331 EvalID string 332 Token string 333 WriteRequest 334 } 335 336 // EvalDequeueRequest is used when we want to dequeue an evaluation 337 type EvalDequeueRequest struct { 338 Schedulers []string 339 Timeout time.Duration 340 SchedulerVersion uint16 341 WriteRequest 342 } 343 344 // EvalListRequest is used to list the evaluations 345 type EvalListRequest struct { 346 QueryOptions 347 } 348 349 // PlanRequest is used to submit an allocation plan to the leader 350 type PlanRequest struct { 351 Plan *Plan 352 WriteRequest 353 } 354 355 // AllocUpdateRequest is used to submit changes to allocations, either 356 // to cause evictions or to assign new allocaitons. Both can be done 357 // within a single transaction 358 type AllocUpdateRequest struct { 359 // Alloc is the list of new allocations to assign 360 Alloc []*Allocation 361 362 // Job is the shared parent job of the allocations. 363 // It is pulled out since it is common to reduce payload size. 364 Job *Job 365 366 WriteRequest 367 } 368 369 // AllocListRequest is used to request a list of allocations 370 type AllocListRequest struct { 371 QueryOptions 372 } 373 374 // AllocSpecificRequest is used to query a specific allocation 375 type AllocSpecificRequest struct { 376 AllocID string 377 QueryOptions 378 } 379 380 // AllocsGetRequest is used to query a set of allocations 381 type AllocsGetRequest struct { 382 AllocIDs []string 383 QueryOptions 384 } 385 386 // PeriodicForceReqeuest is used to force a specific periodic job. 387 type PeriodicForceRequest struct { 388 JobID string 389 WriteRequest 390 } 391 392 // ServerMembersResponse has the list of servers in a cluster 393 type ServerMembersResponse struct { 394 ServerName string 395 ServerRegion string 396 ServerDC string 397 Members []*ServerMember 398 } 399 400 // ServerMember holds information about a Nomad server agent in a cluster 401 type ServerMember struct { 402 Name string 403 Addr net.IP 404 Port uint16 405 Tags map[string]string 406 Status string 407 ProtocolMin uint8 408 ProtocolMax uint8 409 ProtocolCur uint8 410 DelegateMin uint8 411 DelegateMax uint8 412 DelegateCur uint8 413 } 414 415 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 416 // following tasks in the given allocation 417 type DeriveVaultTokenRequest struct { 418 NodeID string 419 SecretID string 420 AllocID string 421 Tasks []string 422 QueryOptions 423 } 424 425 // VaultAccessorsRequest is used to operate on a set of Vault accessors 426 type VaultAccessorsRequest struct { 427 Accessors []*VaultAccessor 428 } 429 430 // VaultAccessor is a reference to a created Vault token on behalf of 431 // an allocation's task. 432 type VaultAccessor struct { 433 AllocID string 434 Task string 435 NodeID string 436 Accessor string 437 CreationTTL int 438 439 // Raft Indexes 440 CreateIndex uint64 441 } 442 443 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 444 type DeriveVaultTokenResponse struct { 445 // Tasks is a mapping between the task name and the wrapped token 446 Tasks map[string]string 447 448 // Error stores any error that occured. Errors are stored here so we can 449 // communicate whether it is retriable 450 Error *RecoverableError 451 452 QueryMeta 453 } 454 455 // GenericRequest is used to request where no 456 // specific information is needed. 457 type GenericRequest struct { 458 QueryOptions 459 } 460 461 // GenericResponse is used to respond to a request where no 462 // specific response information is needed. 463 type GenericResponse struct { 464 WriteMeta 465 } 466 467 // VersionResponse is used for the Status.Version reseponse 468 type VersionResponse struct { 469 Build string 470 Versions map[string]int 471 QueryMeta 472 } 473 474 // JobRegisterResponse is used to respond to a job registration 475 type JobRegisterResponse struct { 476 EvalID string 477 EvalCreateIndex uint64 478 JobModifyIndex uint64 479 QueryMeta 480 } 481 482 // JobDeregisterResponse is used to respond to a job deregistration 483 type JobDeregisterResponse struct { 484 EvalID string 485 EvalCreateIndex uint64 486 JobModifyIndex uint64 487 QueryMeta 488 } 489 490 // NodeUpdateResponse is used to respond to a node update 491 type NodeUpdateResponse struct { 492 HeartbeatTTL time.Duration 493 EvalIDs []string 494 EvalCreateIndex uint64 495 NodeModifyIndex uint64 496 497 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 498 // empty, the current Nomad Server is in the minority of a partition. 499 LeaderRPCAddr string 500 501 // NumNodes is the number of Nomad nodes attached to this quorum of 502 // Nomad Servers at the time of the response. This value can 503 // fluctuate based on the health of the cluster between heartbeats. 504 NumNodes int32 505 506 // Servers is the full list of known Nomad servers in the local 507 // region. 508 Servers []*NodeServerInfo 509 510 QueryMeta 511 } 512 513 // NodeDrainUpdateResponse is used to respond to a node drain update 514 type NodeDrainUpdateResponse struct { 515 EvalIDs []string 516 EvalCreateIndex uint64 517 NodeModifyIndex uint64 518 QueryMeta 519 } 520 521 // NodeAllocsResponse is used to return allocs for a single node 522 type NodeAllocsResponse struct { 523 Allocs []*Allocation 524 QueryMeta 525 } 526 527 // NodeClientAllocsResponse is used to return allocs meta data for a single node 528 type NodeClientAllocsResponse struct { 529 Allocs map[string]uint64 530 QueryMeta 531 } 532 533 // SingleNodeResponse is used to return a single node 534 type SingleNodeResponse struct { 535 Node *Node 536 QueryMeta 537 } 538 539 // JobListResponse is used for a list request 540 type NodeListResponse struct { 541 Nodes []*NodeListStub 542 QueryMeta 543 } 544 545 // SingleJobResponse is used to return a single job 546 type SingleJobResponse struct { 547 Job *Job 548 QueryMeta 549 } 550 551 // JobSummaryResponse is used to return a single job summary 552 type JobSummaryResponse struct { 553 JobSummary *JobSummary 554 QueryMeta 555 } 556 557 type JobDispatchResponse struct { 558 DispatchedJobID string 559 EvalID string 560 EvalCreateIndex uint64 561 JobCreateIndex uint64 562 WriteMeta 563 } 564 565 // JobListResponse is used for a list request 566 type JobListResponse struct { 567 Jobs []*JobListStub 568 QueryMeta 569 } 570 571 // JobPlanResponse is used to respond to a job plan request 572 type JobPlanResponse struct { 573 // Annotations stores annotations explaining decisions the scheduler made. 574 Annotations *PlanAnnotations 575 576 // FailedTGAllocs is the placement failures per task group. 577 FailedTGAllocs map[string]*AllocMetric 578 579 // JobModifyIndex is the modification index of the job. The value can be 580 // used when running `nomad run` to ensure that the Job wasn’t modified 581 // since the last plan. If the job is being created, the value is zero. 582 JobModifyIndex uint64 583 584 // CreatedEvals is the set of evaluations created by the scheduler. The 585 // reasons for this can be rolling-updates or blocked evals. 586 CreatedEvals []*Evaluation 587 588 // Diff contains the diff of the job and annotations on whether the change 589 // causes an in-place update or create/destroy 590 Diff *JobDiff 591 592 // NextPeriodicLaunch is the time duration till the job would be launched if 593 // submitted. 594 NextPeriodicLaunch time.Time 595 596 WriteMeta 597 } 598 599 // SingleAllocResponse is used to return a single allocation 600 type SingleAllocResponse struct { 601 Alloc *Allocation 602 QueryMeta 603 } 604 605 // AllocsGetResponse is used to return a set of allocations 606 type AllocsGetResponse struct { 607 Allocs []*Allocation 608 QueryMeta 609 } 610 611 // JobAllocationsResponse is used to return the allocations for a job 612 type JobAllocationsResponse struct { 613 Allocations []*AllocListStub 614 QueryMeta 615 } 616 617 // JobEvaluationsResponse is used to return the evaluations for a job 618 type JobEvaluationsResponse struct { 619 Evaluations []*Evaluation 620 QueryMeta 621 } 622 623 // SingleEvalResponse is used to return a single evaluation 624 type SingleEvalResponse struct { 625 Eval *Evaluation 626 QueryMeta 627 } 628 629 // EvalDequeueResponse is used to return from a dequeue 630 type EvalDequeueResponse struct { 631 Eval *Evaluation 632 Token string 633 QueryMeta 634 } 635 636 // PlanResponse is used to return from a PlanRequest 637 type PlanResponse struct { 638 Result *PlanResult 639 WriteMeta 640 } 641 642 // AllocListResponse is used for a list request 643 type AllocListResponse struct { 644 Allocations []*AllocListStub 645 QueryMeta 646 } 647 648 // EvalListResponse is used for a list request 649 type EvalListResponse struct { 650 Evaluations []*Evaluation 651 QueryMeta 652 } 653 654 // EvalAllocationsResponse is used to return the allocations for an evaluation 655 type EvalAllocationsResponse struct { 656 Allocations []*AllocListStub 657 QueryMeta 658 } 659 660 // PeriodicForceResponse is used to respond to a periodic job force launch 661 type PeriodicForceResponse struct { 662 EvalID string 663 EvalCreateIndex uint64 664 WriteMeta 665 } 666 667 const ( 668 NodeStatusInit = "initializing" 669 NodeStatusReady = "ready" 670 NodeStatusDown = "down" 671 ) 672 673 // ShouldDrainNode checks if a given node status should trigger an 674 // evaluation. Some states don't require any further action. 675 func ShouldDrainNode(status string) bool { 676 switch status { 677 case NodeStatusInit, NodeStatusReady: 678 return false 679 case NodeStatusDown: 680 return true 681 default: 682 panic(fmt.Sprintf("unhandled node status %s", status)) 683 } 684 } 685 686 // ValidNodeStatus is used to check if a node status is valid 687 func ValidNodeStatus(status string) bool { 688 switch status { 689 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 690 return true 691 default: 692 return false 693 } 694 } 695 696 // Node is a representation of a schedulable client node 697 type Node struct { 698 // ID is a unique identifier for the node. It can be constructed 699 // by doing a concatenation of the Name and Datacenter as a simple 700 // approach. Alternatively a UUID may be used. 701 ID string 702 703 // SecretID is an ID that is only known by the Node and the set of Servers. 704 // It is not accessible via the API and is used to authenticate nodes 705 // conducting priviledged activities. 706 SecretID string 707 708 // Datacenter for this node 709 Datacenter string 710 711 // Node name 712 Name string 713 714 // HTTPAddr is the address on which the Nomad client is listening for http 715 // requests 716 HTTPAddr string 717 718 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 719 TLSEnabled bool 720 721 // Attributes is an arbitrary set of key/value 722 // data that can be used for constraints. Examples 723 // include "kernel.name=linux", "arch=386", "driver.docker=1", 724 // "docker.runtime=1.8.3" 725 Attributes map[string]string 726 727 // Resources is the available resources on the client. 728 // For example 'cpu=2' 'memory=2048' 729 Resources *Resources 730 731 // Reserved is the set of resources that are reserved, 732 // and should be subtracted from the total resources for 733 // the purposes of scheduling. This may be provide certain 734 // high-watermark tolerances or because of external schedulers 735 // consuming resources. 736 Reserved *Resources 737 738 // Links are used to 'link' this client to external 739 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 740 // 'ami=ami-123' 741 Links map[string]string 742 743 // Meta is used to associate arbitrary metadata with this 744 // client. This is opaque to Nomad. 745 Meta map[string]string 746 747 // NodeClass is an opaque identifier used to group nodes 748 // together for the purpose of determining scheduling pressure. 749 NodeClass string 750 751 // ComputedClass is a unique id that identifies nodes with a common set of 752 // attributes and capabilities. 753 ComputedClass string 754 755 // Drain is controlled by the servers, and not the client. 756 // If true, no jobs will be scheduled to this node, and existing 757 // allocations will be drained. 758 Drain bool 759 760 // Status of this node 761 Status string 762 763 // StatusDescription is meant to provide more human useful information 764 StatusDescription string 765 766 // StatusUpdatedAt is the time stamp at which the state of the node was 767 // updated 768 StatusUpdatedAt int64 769 770 // Raft Indexes 771 CreateIndex uint64 772 ModifyIndex uint64 773 } 774 775 // Ready returns if the node is ready for running allocations 776 func (n *Node) Ready() bool { 777 return n.Status == NodeStatusReady && !n.Drain 778 } 779 780 func (n *Node) Copy() *Node { 781 if n == nil { 782 return nil 783 } 784 nn := new(Node) 785 *nn = *n 786 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 787 nn.Resources = nn.Resources.Copy() 788 nn.Reserved = nn.Reserved.Copy() 789 nn.Links = helper.CopyMapStringString(nn.Links) 790 nn.Meta = helper.CopyMapStringString(nn.Meta) 791 return nn 792 } 793 794 // TerminalStatus returns if the current status is terminal and 795 // will no longer transition. 796 func (n *Node) TerminalStatus() bool { 797 switch n.Status { 798 case NodeStatusDown: 799 return true 800 default: 801 return false 802 } 803 } 804 805 // Stub returns a summarized version of the node 806 func (n *Node) Stub() *NodeListStub { 807 return &NodeListStub{ 808 ID: n.ID, 809 Datacenter: n.Datacenter, 810 Name: n.Name, 811 NodeClass: n.NodeClass, 812 Drain: n.Drain, 813 Status: n.Status, 814 StatusDescription: n.StatusDescription, 815 CreateIndex: n.CreateIndex, 816 ModifyIndex: n.ModifyIndex, 817 } 818 } 819 820 // NodeListStub is used to return a subset of job information 821 // for the job list 822 type NodeListStub struct { 823 ID string 824 Datacenter string 825 Name string 826 NodeClass string 827 Drain bool 828 Status string 829 StatusDescription string 830 CreateIndex uint64 831 ModifyIndex uint64 832 } 833 834 // Resources is used to define the resources available 835 // on a client 836 type Resources struct { 837 CPU int 838 MemoryMB int 839 DiskMB int 840 IOPS int 841 Networks []*NetworkResource 842 } 843 844 const ( 845 BytesInMegabyte = 1024 * 1024 846 ) 847 848 // DefaultResources returns the default resources for a task. 849 func DefaultResources() *Resources { 850 return &Resources{ 851 CPU: 100, 852 MemoryMB: 10, 853 IOPS: 0, 854 } 855 } 856 857 // DiskInBytes returns the amount of disk resources in bytes. 858 func (r *Resources) DiskInBytes() int64 { 859 return int64(r.DiskMB * BytesInMegabyte) 860 } 861 862 // Merge merges this resource with another resource. 863 func (r *Resources) Merge(other *Resources) { 864 if other.CPU != 0 { 865 r.CPU = other.CPU 866 } 867 if other.MemoryMB != 0 { 868 r.MemoryMB = other.MemoryMB 869 } 870 if other.DiskMB != 0 { 871 r.DiskMB = other.DiskMB 872 } 873 if other.IOPS != 0 { 874 r.IOPS = other.IOPS 875 } 876 if len(other.Networks) != 0 { 877 r.Networks = other.Networks 878 } 879 } 880 881 func (r *Resources) Canonicalize() { 882 // Ensure that an empty and nil slices are treated the same to avoid scheduling 883 // problems since we use reflect DeepEquals. 884 if len(r.Networks) == 0 { 885 r.Networks = nil 886 } 887 888 for _, n := range r.Networks { 889 n.Canonicalize() 890 } 891 } 892 893 // MeetsMinResources returns an error if the resources specified are less than 894 // the minimum allowed. 895 func (r *Resources) MeetsMinResources() error { 896 var mErr multierror.Error 897 if r.CPU < 20 { 898 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU)) 899 } 900 if r.MemoryMB < 10 { 901 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB)) 902 } 903 if r.IOPS < 0 { 904 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS)) 905 } 906 for i, n := range r.Networks { 907 if err := n.MeetsMinResources(); err != nil { 908 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 909 } 910 } 911 912 return mErr.ErrorOrNil() 913 } 914 915 // Copy returns a deep copy of the resources 916 func (r *Resources) Copy() *Resources { 917 if r == nil { 918 return nil 919 } 920 newR := new(Resources) 921 *newR = *r 922 if r.Networks != nil { 923 n := len(r.Networks) 924 newR.Networks = make([]*NetworkResource, n) 925 for i := 0; i < n; i++ { 926 newR.Networks[i] = r.Networks[i].Copy() 927 } 928 } 929 return newR 930 } 931 932 // NetIndex finds the matching net index using device name 933 func (r *Resources) NetIndex(n *NetworkResource) int { 934 for idx, net := range r.Networks { 935 if net.Device == n.Device { 936 return idx 937 } 938 } 939 return -1 940 } 941 942 // Superset checks if one set of resources is a superset 943 // of another. This ignores network resources, and the NetworkIndex 944 // should be used for that. 945 func (r *Resources) Superset(other *Resources) (bool, string) { 946 if r.CPU < other.CPU { 947 return false, "cpu exhausted" 948 } 949 if r.MemoryMB < other.MemoryMB { 950 return false, "memory exhausted" 951 } 952 if r.DiskMB < other.DiskMB { 953 return false, "disk exhausted" 954 } 955 if r.IOPS < other.IOPS { 956 return false, "iops exhausted" 957 } 958 return true, "" 959 } 960 961 // Add adds the resources of the delta to this, potentially 962 // returning an error if not possible. 963 func (r *Resources) Add(delta *Resources) error { 964 if delta == nil { 965 return nil 966 } 967 r.CPU += delta.CPU 968 r.MemoryMB += delta.MemoryMB 969 r.DiskMB += delta.DiskMB 970 r.IOPS += delta.IOPS 971 972 for _, n := range delta.Networks { 973 // Find the matching interface by IP or CIDR 974 idx := r.NetIndex(n) 975 if idx == -1 { 976 r.Networks = append(r.Networks, n.Copy()) 977 } else { 978 r.Networks[idx].Add(n) 979 } 980 } 981 return nil 982 } 983 984 func (r *Resources) GoString() string { 985 return fmt.Sprintf("*%#v", *r) 986 } 987 988 type Port struct { 989 Label string 990 Value int 991 } 992 993 // NetworkResource is used to represent available network 994 // resources 995 type NetworkResource struct { 996 Device string // Name of the device 997 CIDR string // CIDR block of addresses 998 IP string // IP address 999 MBits int // Throughput 1000 ReservedPorts []Port // Reserved ports 1001 DynamicPorts []Port // Dynamically assigned ports 1002 } 1003 1004 func (n *NetworkResource) Canonicalize() { 1005 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1006 // problems since we use reflect DeepEquals. 1007 if len(n.ReservedPorts) == 0 { 1008 n.ReservedPorts = nil 1009 } 1010 if len(n.DynamicPorts) == 0 { 1011 n.DynamicPorts = nil 1012 } 1013 } 1014 1015 // MeetsMinResources returns an error if the resources specified are less than 1016 // the minimum allowed. 1017 func (n *NetworkResource) MeetsMinResources() error { 1018 var mErr multierror.Error 1019 if n.MBits < 1 { 1020 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 1021 } 1022 return mErr.ErrorOrNil() 1023 } 1024 1025 // Copy returns a deep copy of the network resource 1026 func (n *NetworkResource) Copy() *NetworkResource { 1027 if n == nil { 1028 return nil 1029 } 1030 newR := new(NetworkResource) 1031 *newR = *n 1032 if n.ReservedPorts != nil { 1033 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 1034 copy(newR.ReservedPorts, n.ReservedPorts) 1035 } 1036 if n.DynamicPorts != nil { 1037 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 1038 copy(newR.DynamicPorts, n.DynamicPorts) 1039 } 1040 return newR 1041 } 1042 1043 // Add adds the resources of the delta to this, potentially 1044 // returning an error if not possible. 1045 func (n *NetworkResource) Add(delta *NetworkResource) { 1046 if len(delta.ReservedPorts) > 0 { 1047 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 1048 } 1049 n.MBits += delta.MBits 1050 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 1051 } 1052 1053 func (n *NetworkResource) GoString() string { 1054 return fmt.Sprintf("*%#v", *n) 1055 } 1056 1057 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 1058 labelValues := make(map[string]int) 1059 ports := append(n.ReservedPorts, n.DynamicPorts...) 1060 for _, port := range ports { 1061 if mapping, ok := port_map[port.Label]; ok { 1062 labelValues[port.Label] = mapping 1063 } else { 1064 labelValues[port.Label] = port.Value 1065 } 1066 } 1067 return labelValues 1068 } 1069 1070 const ( 1071 // JobTypeNomad is reserved for internal system tasks and is 1072 // always handled by the CoreScheduler. 1073 JobTypeCore = "_core" 1074 JobTypeService = "service" 1075 JobTypeBatch = "batch" 1076 JobTypeSystem = "system" 1077 ) 1078 1079 const ( 1080 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 1081 JobStatusRunning = "running" // Running means the job has non-terminal allocations 1082 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 1083 ) 1084 1085 const ( 1086 // JobMinPriority is the minimum allowed priority 1087 JobMinPriority = 1 1088 1089 // JobDefaultPriority is the default priority if not 1090 // not specified. 1091 JobDefaultPriority = 50 1092 1093 // JobMaxPriority is the maximum allowed priority 1094 JobMaxPriority = 100 1095 1096 // Ensure CoreJobPriority is higher than any user 1097 // specified job so that it gets priority. This is important 1098 // for the system to remain healthy. 1099 CoreJobPriority = JobMaxPriority * 2 1100 ) 1101 1102 // Job is the scope of a scheduling request to Nomad. It is the largest 1103 // scoped object, and is a named collection of task groups. Each task group 1104 // is further composed of tasks. A task group (TG) is the unit of scheduling 1105 // however. 1106 type Job struct { 1107 // Region is the Nomad region that handles scheduling this job 1108 Region string 1109 1110 // ID is a unique identifier for the job per region. It can be 1111 // specified hierarchically like LineOfBiz/OrgName/Team/Project 1112 ID string 1113 1114 // ParentID is the unique identifier of the job that spawned this job. 1115 ParentID string 1116 1117 // Name is the logical name of the job used to refer to it. This is unique 1118 // per region, but not unique globally. 1119 Name string 1120 1121 // Type is used to control various behaviors about the job. Most jobs 1122 // are service jobs, meaning they are expected to be long lived. 1123 // Some jobs are batch oriented meaning they run and then terminate. 1124 // This can be extended in the future to support custom schedulers. 1125 Type string 1126 1127 // Priority is used to control scheduling importance and if this job 1128 // can preempt other jobs. 1129 Priority int 1130 1131 // AllAtOnce is used to control if incremental scheduling of task groups 1132 // is allowed or if we must do a gang scheduling of the entire job. This 1133 // can slow down larger jobs if resources are not available. 1134 AllAtOnce bool 1135 1136 // Datacenters contains all the datacenters this job is allowed to span 1137 Datacenters []string 1138 1139 // Constraints can be specified at a job level and apply to 1140 // all the task groups and tasks. 1141 Constraints []*Constraint 1142 1143 // TaskGroups are the collections of task groups that this job needs 1144 // to run. Each task group is an atomic unit of scheduling and placement. 1145 TaskGroups []*TaskGroup 1146 1147 // Update is used to control the update strategy 1148 Update UpdateStrategy 1149 1150 // Periodic is used to define the interval the job is run at. 1151 Periodic *PeriodicConfig 1152 1153 // ParameterizedJob is used to specify the job as a parameterized job 1154 // for dispatching. 1155 ParameterizedJob *ParameterizedJobConfig 1156 1157 // Payload is the payload supplied when the job was dispatched. 1158 Payload []byte 1159 1160 // Meta is used to associate arbitrary metadata with this 1161 // job. This is opaque to Nomad. 1162 Meta map[string]string 1163 1164 // VaultToken is the Vault token that proves the submitter of the job has 1165 // access to the specified Vault policies. This field is only used to 1166 // transfer the token and is not stored after Job submission. 1167 VaultToken string 1168 1169 // Job status 1170 Status string 1171 1172 // StatusDescription is meant to provide more human useful information 1173 StatusDescription string 1174 1175 // Raft Indexes 1176 CreateIndex uint64 1177 ModifyIndex uint64 1178 JobModifyIndex uint64 1179 } 1180 1181 // Canonicalize is used to canonicalize fields in the Job. This should be called 1182 // when registering a Job. 1183 func (j *Job) Canonicalize() { 1184 // Ensure that an empty and nil map are treated the same to avoid scheduling 1185 // problems since we use reflect DeepEquals. 1186 if len(j.Meta) == 0 { 1187 j.Meta = nil 1188 } 1189 1190 for _, tg := range j.TaskGroups { 1191 tg.Canonicalize(j) 1192 } 1193 1194 if j.ParameterizedJob != nil { 1195 j.ParameterizedJob.Canonicalize() 1196 } 1197 1198 if j.Periodic != nil { 1199 j.Periodic.Canonicalize() 1200 } 1201 } 1202 1203 // Copy returns a deep copy of the Job. It is expected that callers use recover. 1204 // This job can panic if the deep copy failed as it uses reflection. 1205 func (j *Job) Copy() *Job { 1206 if j == nil { 1207 return nil 1208 } 1209 nj := new(Job) 1210 *nj = *j 1211 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 1212 nj.Constraints = CopySliceConstraints(nj.Constraints) 1213 1214 if j.TaskGroups != nil { 1215 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 1216 for i, tg := range nj.TaskGroups { 1217 tgs[i] = tg.Copy() 1218 } 1219 nj.TaskGroups = tgs 1220 } 1221 1222 nj.Periodic = nj.Periodic.Copy() 1223 nj.Meta = helper.CopyMapStringString(nj.Meta) 1224 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 1225 return nj 1226 } 1227 1228 // Validate is used to sanity check a job input 1229 func (j *Job) Validate() error { 1230 var mErr multierror.Error 1231 1232 if j.Region == "" { 1233 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 1234 } 1235 if j.ID == "" { 1236 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 1237 } else if strings.Contains(j.ID, " ") { 1238 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 1239 } 1240 if j.Name == "" { 1241 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 1242 } 1243 if j.Type == "" { 1244 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 1245 } 1246 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 1247 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 1248 } 1249 if len(j.Datacenters) == 0 { 1250 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 1251 } 1252 if len(j.TaskGroups) == 0 { 1253 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 1254 } 1255 for idx, constr := range j.Constraints { 1256 if err := constr.Validate(); err != nil { 1257 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1258 mErr.Errors = append(mErr.Errors, outer) 1259 } 1260 } 1261 1262 // Check for duplicate task groups 1263 taskGroups := make(map[string]int) 1264 for idx, tg := range j.TaskGroups { 1265 if tg.Name == "" { 1266 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 1267 } else if existing, ok := taskGroups[tg.Name]; ok { 1268 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 1269 } else { 1270 taskGroups[tg.Name] = idx 1271 } 1272 1273 if j.Type == "system" && tg.Count > 1 { 1274 mErr.Errors = append(mErr.Errors, 1275 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 1276 tg.Name, tg.Count)) 1277 } 1278 } 1279 1280 // Validate the task group 1281 for _, tg := range j.TaskGroups { 1282 if err := tg.Validate(); err != nil { 1283 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 1284 mErr.Errors = append(mErr.Errors, outer) 1285 } 1286 } 1287 1288 // Validate periodic is only used with batch jobs. 1289 if j.IsPeriodic() && j.Periodic.Enabled { 1290 if j.Type != JobTypeBatch { 1291 mErr.Errors = append(mErr.Errors, 1292 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1293 } 1294 1295 if err := j.Periodic.Validate(); err != nil { 1296 mErr.Errors = append(mErr.Errors, err) 1297 } 1298 } 1299 1300 if j.IsParameterized() { 1301 if j.Type != JobTypeBatch { 1302 mErr.Errors = append(mErr.Errors, 1303 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 1304 } 1305 1306 if err := j.ParameterizedJob.Validate(); err != nil { 1307 mErr.Errors = append(mErr.Errors, err) 1308 } 1309 } 1310 1311 return mErr.ErrorOrNil() 1312 } 1313 1314 // LookupTaskGroup finds a task group by name 1315 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1316 for _, tg := range j.TaskGroups { 1317 if tg.Name == name { 1318 return tg 1319 } 1320 } 1321 return nil 1322 } 1323 1324 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 1325 // meta data for the task. When joining Job, Group and Task Meta, the precedence 1326 // is by deepest scope (Task > Group > Job). 1327 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 1328 group := j.LookupTaskGroup(groupName) 1329 if group == nil { 1330 return nil 1331 } 1332 1333 task := group.LookupTask(taskName) 1334 if task == nil { 1335 return nil 1336 } 1337 1338 meta := helper.CopyMapStringString(task.Meta) 1339 if meta == nil { 1340 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 1341 } 1342 1343 // Add the group specific meta 1344 for k, v := range group.Meta { 1345 if _, ok := meta[k]; !ok { 1346 meta[k] = v 1347 } 1348 } 1349 1350 // Add the job specific meta 1351 for k, v := range j.Meta { 1352 if _, ok := meta[k]; !ok { 1353 meta[k] = v 1354 } 1355 } 1356 1357 return meta 1358 } 1359 1360 // Stub is used to return a summary of the job 1361 func (j *Job) Stub(summary *JobSummary) *JobListStub { 1362 return &JobListStub{ 1363 ID: j.ID, 1364 ParentID: j.ParentID, 1365 Name: j.Name, 1366 Type: j.Type, 1367 Priority: j.Priority, 1368 Status: j.Status, 1369 StatusDescription: j.StatusDescription, 1370 CreateIndex: j.CreateIndex, 1371 ModifyIndex: j.ModifyIndex, 1372 JobModifyIndex: j.JobModifyIndex, 1373 JobSummary: summary, 1374 } 1375 } 1376 1377 // IsPeriodic returns whether a job is periodic. 1378 func (j *Job) IsPeriodic() bool { 1379 return j.Periodic != nil 1380 } 1381 1382 // IsParameterized returns whether a job is parameterized job. 1383 func (j *Job) IsParameterized() bool { 1384 return j.ParameterizedJob != nil 1385 } 1386 1387 // VaultPolicies returns the set of Vault policies per task group, per task 1388 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 1389 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 1390 1391 for _, tg := range j.TaskGroups { 1392 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 1393 1394 for _, task := range tg.Tasks { 1395 if task.Vault == nil { 1396 continue 1397 } 1398 1399 tgPolicies[task.Name] = task.Vault 1400 } 1401 1402 if len(tgPolicies) != 0 { 1403 policies[tg.Name] = tgPolicies 1404 } 1405 } 1406 1407 return policies 1408 } 1409 1410 // RequiredSignals returns a mapping of task groups to tasks to their required 1411 // set of signals 1412 func (j *Job) RequiredSignals() map[string]map[string][]string { 1413 signals := make(map[string]map[string][]string) 1414 1415 for _, tg := range j.TaskGroups { 1416 for _, task := range tg.Tasks { 1417 // Use this local one as a set 1418 taskSignals := make(map[string]struct{}) 1419 1420 // Check if the Vault change mode uses signals 1421 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 1422 taskSignals[task.Vault.ChangeSignal] = struct{}{} 1423 } 1424 1425 // Check if any template change mode uses signals 1426 for _, t := range task.Templates { 1427 if t.ChangeMode != TemplateChangeModeSignal { 1428 continue 1429 } 1430 1431 taskSignals[t.ChangeSignal] = struct{}{} 1432 } 1433 1434 // Flatten and sort the signals 1435 l := len(taskSignals) 1436 if l == 0 { 1437 continue 1438 } 1439 1440 flat := make([]string, 0, l) 1441 for sig := range taskSignals { 1442 flat = append(flat, sig) 1443 } 1444 1445 sort.Strings(flat) 1446 tgSignals, ok := signals[tg.Name] 1447 if !ok { 1448 tgSignals = make(map[string][]string) 1449 signals[tg.Name] = tgSignals 1450 } 1451 tgSignals[task.Name] = flat 1452 } 1453 1454 } 1455 1456 return signals 1457 } 1458 1459 // JobListStub is used to return a subset of job information 1460 // for the job list 1461 type JobListStub struct { 1462 ID string 1463 ParentID string 1464 Name string 1465 Type string 1466 Priority int 1467 Status string 1468 StatusDescription string 1469 JobSummary *JobSummary 1470 CreateIndex uint64 1471 ModifyIndex uint64 1472 JobModifyIndex uint64 1473 } 1474 1475 // JobSummary summarizes the state of the allocations of a job 1476 type JobSummary struct { 1477 JobID string 1478 1479 // Summmary contains the summary per task group for the Job 1480 Summary map[string]TaskGroupSummary 1481 1482 // Children contains a summary for the children of this job. 1483 Children *JobChildrenSummary 1484 1485 // Raft Indexes 1486 CreateIndex uint64 1487 ModifyIndex uint64 1488 } 1489 1490 // Copy returns a new copy of JobSummary 1491 func (js *JobSummary) Copy() *JobSummary { 1492 newJobSummary := new(JobSummary) 1493 *newJobSummary = *js 1494 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 1495 for k, v := range js.Summary { 1496 newTGSummary[k] = v 1497 } 1498 newJobSummary.Summary = newTGSummary 1499 newJobSummary.Children = newJobSummary.Children.Copy() 1500 return newJobSummary 1501 } 1502 1503 // JobChildrenSummary contains the summary of children job statuses 1504 type JobChildrenSummary struct { 1505 Pending int64 1506 Running int64 1507 Dead int64 1508 } 1509 1510 // Copy returns a new copy of a JobChildrenSummary 1511 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 1512 if jc == nil { 1513 return nil 1514 } 1515 1516 njc := new(JobChildrenSummary) 1517 *njc = *jc 1518 return njc 1519 } 1520 1521 // TaskGroup summarizes the state of all the allocations of a particular 1522 // TaskGroup 1523 type TaskGroupSummary struct { 1524 Queued int 1525 Complete int 1526 Failed int 1527 Running int 1528 Starting int 1529 Lost int 1530 } 1531 1532 // UpdateStrategy is used to modify how updates are done 1533 type UpdateStrategy struct { 1534 // Stagger is the amount of time between the updates 1535 Stagger time.Duration 1536 1537 // MaxParallel is how many updates can be done in parallel 1538 MaxParallel int 1539 } 1540 1541 // Rolling returns if a rolling strategy should be used 1542 func (u *UpdateStrategy) Rolling() bool { 1543 return u.Stagger > 0 && u.MaxParallel > 0 1544 } 1545 1546 const ( 1547 // PeriodicSpecCron is used for a cron spec. 1548 PeriodicSpecCron = "cron" 1549 1550 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 1551 // separated list of unix timestamps at which to launch. 1552 PeriodicSpecTest = "_internal_test" 1553 ) 1554 1555 // Periodic defines the interval a job should be run at. 1556 type PeriodicConfig struct { 1557 // Enabled determines if the job should be run periodically. 1558 Enabled bool 1559 1560 // Spec specifies the interval the job should be run as. It is parsed based 1561 // on the SpecType. 1562 Spec string 1563 1564 // SpecType defines the format of the spec. 1565 SpecType string 1566 1567 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 1568 ProhibitOverlap bool 1569 1570 // TimeZone is the user specified string that determines the time zone to 1571 // launch against. The time zones must be specified from IANA Time Zone 1572 // database, such as "America/New_York". 1573 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 1574 // Reference: https://www.iana.org/time-zones 1575 TimeZone string 1576 1577 // location is the time zone to evaluate the launch time against 1578 location *time.Location 1579 } 1580 1581 func (p *PeriodicConfig) Copy() *PeriodicConfig { 1582 if p == nil { 1583 return nil 1584 } 1585 np := new(PeriodicConfig) 1586 *np = *p 1587 return np 1588 } 1589 1590 func (p *PeriodicConfig) Validate() error { 1591 if !p.Enabled { 1592 return nil 1593 } 1594 1595 var mErr multierror.Error 1596 if p.Spec == "" { 1597 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 1598 } 1599 1600 // Check if we got a valid time zone 1601 if p.TimeZone != "" { 1602 if _, err := time.LoadLocation(p.TimeZone); err != nil { 1603 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 1604 } 1605 } 1606 1607 switch p.SpecType { 1608 case PeriodicSpecCron: 1609 // Validate the cron spec 1610 if _, err := cronexpr.Parse(p.Spec); err != nil { 1611 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 1612 } 1613 case PeriodicSpecTest: 1614 // No-op 1615 default: 1616 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 1617 } 1618 1619 return mErr.ErrorOrNil() 1620 } 1621 1622 func (p *PeriodicConfig) Canonicalize() { 1623 // Load the location 1624 l, err := time.LoadLocation(p.TimeZone) 1625 if err != nil { 1626 p.location = time.UTC 1627 } 1628 1629 p.location = l 1630 } 1631 1632 // Next returns the closest time instant matching the spec that is after the 1633 // passed time. If no matching instance exists, the zero value of time.Time is 1634 // returned. The `time.Location` of the returned value matches that of the 1635 // passed time. 1636 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1637 switch p.SpecType { 1638 case PeriodicSpecCron: 1639 if e, err := cronexpr.Parse(p.Spec); err == nil { 1640 return e.Next(fromTime) 1641 } 1642 case PeriodicSpecTest: 1643 split := strings.Split(p.Spec, ",") 1644 if len(split) == 1 && split[0] == "" { 1645 return time.Time{} 1646 } 1647 1648 // Parse the times 1649 times := make([]time.Time, len(split)) 1650 for i, s := range split { 1651 unix, err := strconv.Atoi(s) 1652 if err != nil { 1653 return time.Time{} 1654 } 1655 1656 times[i] = time.Unix(int64(unix), 0) 1657 } 1658 1659 // Find the next match 1660 for _, next := range times { 1661 if fromTime.Before(next) { 1662 return next 1663 } 1664 } 1665 } 1666 1667 return time.Time{} 1668 } 1669 1670 // GetLocation returns the location to use for determining the time zone to run 1671 // the periodic job against. 1672 func (p *PeriodicConfig) GetLocation() *time.Location { 1673 // Jobs pre 0.5.5 will not have this 1674 if p.location != nil { 1675 return p.location 1676 } 1677 1678 return time.UTC 1679 } 1680 1681 const ( 1682 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1683 // when launching derived instances of it. 1684 PeriodicLaunchSuffix = "/periodic-" 1685 ) 1686 1687 // PeriodicLaunch tracks the last launch time of a periodic job. 1688 type PeriodicLaunch struct { 1689 ID string // ID of the periodic job. 1690 Launch time.Time // The last launch time. 1691 1692 // Raft Indexes 1693 CreateIndex uint64 1694 ModifyIndex uint64 1695 } 1696 1697 const ( 1698 DispatchPayloadForbidden = "forbidden" 1699 DispatchPayloadOptional = "optional" 1700 DispatchPayloadRequired = "required" 1701 1702 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 1703 // when dispatching instances of it. 1704 DispatchLaunchSuffix = "/dispatch-" 1705 ) 1706 1707 // ParameterizedJobConfig is used to configure the parameterized job 1708 type ParameterizedJobConfig struct { 1709 // Payload configure the payload requirements 1710 Payload string 1711 1712 // MetaRequired is metadata keys that must be specified by the dispatcher 1713 MetaRequired []string 1714 1715 // MetaOptional is metadata keys that may be specified by the dispatcher 1716 MetaOptional []string 1717 } 1718 1719 func (d *ParameterizedJobConfig) Validate() error { 1720 var mErr multierror.Error 1721 switch d.Payload { 1722 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 1723 default: 1724 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 1725 } 1726 1727 // Check that the meta configurations are disjoint sets 1728 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 1729 if !disjoint { 1730 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 1731 } 1732 1733 return mErr.ErrorOrNil() 1734 } 1735 1736 func (d *ParameterizedJobConfig) Canonicalize() { 1737 if d.Payload == "" { 1738 d.Payload = DispatchPayloadOptional 1739 } 1740 } 1741 1742 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 1743 if d == nil { 1744 return nil 1745 } 1746 nd := new(ParameterizedJobConfig) 1747 *nd = *d 1748 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 1749 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 1750 return nd 1751 } 1752 1753 // DispatchedID returns an ID appropriate for a job dispatched against a 1754 // particular parameterized job 1755 func DispatchedID(templateID string, t time.Time) string { 1756 u := GenerateUUID()[:8] 1757 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 1758 } 1759 1760 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 1761 type DispatchPayloadConfig struct { 1762 // File specifies a relative path to where the input data should be written 1763 File string 1764 } 1765 1766 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 1767 if d == nil { 1768 return nil 1769 } 1770 nd := new(DispatchPayloadConfig) 1771 *nd = *d 1772 return nd 1773 } 1774 1775 func (d *DispatchPayloadConfig) Validate() error { 1776 // Verify the destination doesn't escape 1777 escaped, err := PathEscapesAllocDir("task/local/", d.File) 1778 if err != nil { 1779 return fmt.Errorf("invalid destination path: %v", err) 1780 } else if escaped { 1781 return fmt.Errorf("destination escapes allocation directory") 1782 } 1783 1784 return nil 1785 } 1786 1787 var ( 1788 defaultServiceJobRestartPolicy = RestartPolicy{ 1789 Delay: 15 * time.Second, 1790 Attempts: 2, 1791 Interval: 1 * time.Minute, 1792 Mode: RestartPolicyModeDelay, 1793 } 1794 defaultBatchJobRestartPolicy = RestartPolicy{ 1795 Delay: 15 * time.Second, 1796 Attempts: 15, 1797 Interval: 7 * 24 * time.Hour, 1798 Mode: RestartPolicyModeDelay, 1799 } 1800 ) 1801 1802 const ( 1803 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1804 // reached when the specified attempts have been reached in the interval. 1805 RestartPolicyModeDelay = "delay" 1806 1807 // RestartPolicyModeFail causes a job to fail if the specified number of 1808 // attempts are reached within an interval. 1809 RestartPolicyModeFail = "fail" 1810 1811 // RestartPolicyMinInterval is the minimum interval that is accepted for a 1812 // restart policy. 1813 RestartPolicyMinInterval = 5 * time.Second 1814 ) 1815 1816 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1817 type RestartPolicy struct { 1818 // Attempts is the number of restart that will occur in an interval. 1819 Attempts int 1820 1821 // Interval is a duration in which we can limit the number of restarts 1822 // within. 1823 Interval time.Duration 1824 1825 // Delay is the time between a failure and a restart. 1826 Delay time.Duration 1827 1828 // Mode controls what happens when the task restarts more than attempt times 1829 // in an interval. 1830 Mode string 1831 } 1832 1833 func (r *RestartPolicy) Copy() *RestartPolicy { 1834 if r == nil { 1835 return nil 1836 } 1837 nrp := new(RestartPolicy) 1838 *nrp = *r 1839 return nrp 1840 } 1841 1842 func (r *RestartPolicy) Validate() error { 1843 var mErr multierror.Error 1844 switch r.Mode { 1845 case RestartPolicyModeDelay, RestartPolicyModeFail: 1846 default: 1847 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 1848 } 1849 1850 // Check for ambiguous/confusing settings 1851 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 1852 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 1853 } 1854 1855 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 1856 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 1857 } 1858 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1859 multierror.Append(&mErr, 1860 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 1861 } 1862 return mErr.ErrorOrNil() 1863 } 1864 1865 func NewRestartPolicy(jobType string) *RestartPolicy { 1866 switch jobType { 1867 case JobTypeService, JobTypeSystem: 1868 rp := defaultServiceJobRestartPolicy 1869 return &rp 1870 case JobTypeBatch: 1871 rp := defaultBatchJobRestartPolicy 1872 return &rp 1873 } 1874 return nil 1875 } 1876 1877 // TaskGroup is an atomic unit of placement. Each task group belongs to 1878 // a job and may contain any number of tasks. A task group support running 1879 // in many replicas using the same configuration.. 1880 type TaskGroup struct { 1881 // Name of the task group 1882 Name string 1883 1884 // Count is the number of replicas of this task group that should 1885 // be scheduled. 1886 Count int 1887 1888 // Constraints can be specified at a task group level and apply to 1889 // all the tasks contained. 1890 Constraints []*Constraint 1891 1892 //RestartPolicy of a TaskGroup 1893 RestartPolicy *RestartPolicy 1894 1895 // Tasks are the collection of tasks that this task group needs to run 1896 Tasks []*Task 1897 1898 // EphemeralDisk is the disk resources that the task group requests 1899 EphemeralDisk *EphemeralDisk 1900 1901 // Meta is used to associate arbitrary metadata with this 1902 // task group. This is opaque to Nomad. 1903 Meta map[string]string 1904 } 1905 1906 func (tg *TaskGroup) Copy() *TaskGroup { 1907 if tg == nil { 1908 return nil 1909 } 1910 ntg := new(TaskGroup) 1911 *ntg = *tg 1912 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 1913 1914 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 1915 1916 if tg.Tasks != nil { 1917 tasks := make([]*Task, len(ntg.Tasks)) 1918 for i, t := range ntg.Tasks { 1919 tasks[i] = t.Copy() 1920 } 1921 ntg.Tasks = tasks 1922 } 1923 1924 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 1925 1926 if tg.EphemeralDisk != nil { 1927 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 1928 } 1929 return ntg 1930 } 1931 1932 // Canonicalize is used to canonicalize fields in the TaskGroup. 1933 func (tg *TaskGroup) Canonicalize(job *Job) { 1934 // Ensure that an empty and nil map are treated the same to avoid scheduling 1935 // problems since we use reflect DeepEquals. 1936 if len(tg.Meta) == 0 { 1937 tg.Meta = nil 1938 } 1939 1940 // Set the default restart policy. 1941 if tg.RestartPolicy == nil { 1942 tg.RestartPolicy = NewRestartPolicy(job.Type) 1943 } 1944 1945 // Set a default ephemeral disk object if the user has not requested for one 1946 if tg.EphemeralDisk == nil { 1947 tg.EphemeralDisk = DefaultEphemeralDisk() 1948 } 1949 1950 for _, task := range tg.Tasks { 1951 task.Canonicalize(job, tg) 1952 } 1953 1954 // Add up the disk resources to EphemeralDisk. This is done so that users 1955 // are not required to move their disk attribute from resources to 1956 // EphemeralDisk section of the job spec in Nomad 0.5 1957 // COMPAT 0.4.1 -> 0.5 1958 // Remove in 0.6 1959 var diskMB int 1960 for _, task := range tg.Tasks { 1961 diskMB += task.Resources.DiskMB 1962 } 1963 if diskMB > 0 { 1964 tg.EphemeralDisk.SizeMB = diskMB 1965 } 1966 } 1967 1968 // Validate is used to sanity check a task group 1969 func (tg *TaskGroup) Validate() error { 1970 var mErr multierror.Error 1971 if tg.Name == "" { 1972 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1973 } 1974 if tg.Count < 0 { 1975 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 1976 } 1977 if len(tg.Tasks) == 0 { 1978 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1979 } 1980 for idx, constr := range tg.Constraints { 1981 if err := constr.Validate(); err != nil { 1982 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1983 mErr.Errors = append(mErr.Errors, outer) 1984 } 1985 } 1986 1987 if tg.RestartPolicy != nil { 1988 if err := tg.RestartPolicy.Validate(); err != nil { 1989 mErr.Errors = append(mErr.Errors, err) 1990 } 1991 } else { 1992 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1993 } 1994 1995 if tg.EphemeralDisk != nil { 1996 if err := tg.EphemeralDisk.Validate(); err != nil { 1997 mErr.Errors = append(mErr.Errors, err) 1998 } 1999 } else { 2000 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 2001 } 2002 2003 // Check for duplicate tasks and that there is only leader task if any 2004 tasks := make(map[string]int) 2005 leaderTasks := 0 2006 for idx, task := range tg.Tasks { 2007 if task.Name == "" { 2008 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 2009 } else if existing, ok := tasks[task.Name]; ok { 2010 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 2011 } else { 2012 tasks[task.Name] = idx 2013 } 2014 2015 if task.Leader { 2016 leaderTasks++ 2017 } 2018 } 2019 2020 if leaderTasks > 1 { 2021 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 2022 } 2023 2024 // Validate the tasks 2025 for _, task := range tg.Tasks { 2026 if err := task.Validate(tg.EphemeralDisk); err != nil { 2027 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 2028 mErr.Errors = append(mErr.Errors, outer) 2029 } 2030 } 2031 return mErr.ErrorOrNil() 2032 } 2033 2034 // LookupTask finds a task by name 2035 func (tg *TaskGroup) LookupTask(name string) *Task { 2036 for _, t := range tg.Tasks { 2037 if t.Name == name { 2038 return t 2039 } 2040 } 2041 return nil 2042 } 2043 2044 func (tg *TaskGroup) GoString() string { 2045 return fmt.Sprintf("*%#v", *tg) 2046 } 2047 2048 const ( 2049 // TODO add Consul TTL check 2050 ServiceCheckHTTP = "http" 2051 ServiceCheckTCP = "tcp" 2052 ServiceCheckScript = "script" 2053 2054 // minCheckInterval is the minimum check interval permitted. Consul 2055 // currently has its MinInterval set to 1s. Mirror that here for 2056 // consistency. 2057 minCheckInterval = 1 * time.Second 2058 2059 // minCheckTimeout is the minimum check timeout permitted for Consul 2060 // script TTL checks. 2061 minCheckTimeout = 1 * time.Second 2062 ) 2063 2064 // The ServiceCheck data model represents the consul health check that 2065 // Nomad registers for a Task 2066 type ServiceCheck struct { 2067 Name string // Name of the check, defaults to id 2068 Type string // Type of the check - tcp, http, docker and script 2069 Command string // Command is the command to run for script checks 2070 Args []string // Args is a list of argumes for script checks 2071 Path string // path of the health check url for http type check 2072 Protocol string // Protocol to use if check is http, defaults to http 2073 PortLabel string // The port to use for tcp/http checks 2074 Interval time.Duration // Interval of the check 2075 Timeout time.Duration // Timeout of the response from the check before consul fails the check 2076 InitialStatus string // Initial status of the check 2077 } 2078 2079 func (sc *ServiceCheck) Copy() *ServiceCheck { 2080 if sc == nil { 2081 return nil 2082 } 2083 nsc := new(ServiceCheck) 2084 *nsc = *sc 2085 return nsc 2086 } 2087 2088 func (sc *ServiceCheck) Canonicalize(serviceName string) { 2089 // Ensure empty slices are treated as null to avoid scheduling issues when 2090 // using DeepEquals. 2091 if len(sc.Args) == 0 { 2092 sc.Args = nil 2093 } 2094 2095 if sc.Name == "" { 2096 sc.Name = fmt.Sprintf("service: %q check", serviceName) 2097 } 2098 } 2099 2100 // validate a Service's ServiceCheck 2101 func (sc *ServiceCheck) validate() error { 2102 switch strings.ToLower(sc.Type) { 2103 case ServiceCheckTCP: 2104 if sc.Timeout == 0 { 2105 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 2106 } else if sc.Timeout < minCheckTimeout { 2107 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 2108 } 2109 case ServiceCheckHTTP: 2110 if sc.Path == "" { 2111 return fmt.Errorf("http type must have a valid http path") 2112 } 2113 2114 if sc.Timeout == 0 { 2115 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 2116 } else if sc.Timeout < minCheckTimeout { 2117 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 2118 } 2119 case ServiceCheckScript: 2120 if sc.Command == "" { 2121 return fmt.Errorf("script type must have a valid script path") 2122 } 2123 2124 // TODO: enforce timeout on the Client side and reenable 2125 // validation. 2126 default: 2127 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 2128 } 2129 2130 if sc.Interval == 0 { 2131 return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval) 2132 } else if sc.Interval < minCheckInterval { 2133 return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval) 2134 } 2135 2136 switch sc.InitialStatus { 2137 case "": 2138 // case api.HealthUnknown: TODO: Add when Consul releases 0.7.1 2139 case api.HealthPassing: 2140 case api.HealthWarning: 2141 case api.HealthCritical: 2142 default: 2143 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical) 2144 2145 } 2146 2147 return nil 2148 } 2149 2150 // RequiresPort returns whether the service check requires the task has a port. 2151 func (sc *ServiceCheck) RequiresPort() bool { 2152 switch sc.Type { 2153 case ServiceCheckHTTP, ServiceCheckTCP: 2154 return true 2155 default: 2156 return false 2157 } 2158 } 2159 2160 func (sc *ServiceCheck) Hash(serviceID string) string { 2161 h := sha1.New() 2162 io.WriteString(h, serviceID) 2163 io.WriteString(h, sc.Name) 2164 io.WriteString(h, sc.Type) 2165 io.WriteString(h, sc.Command) 2166 io.WriteString(h, strings.Join(sc.Args, "")) 2167 io.WriteString(h, sc.Path) 2168 io.WriteString(h, sc.Protocol) 2169 io.WriteString(h, sc.PortLabel) 2170 io.WriteString(h, sc.Interval.String()) 2171 io.WriteString(h, sc.Timeout.String()) 2172 return fmt.Sprintf("%x", h.Sum(nil)) 2173 } 2174 2175 // Service represents a Consul service definition in Nomad 2176 type Service struct { 2177 // Name of the service registered with Consul. Consul defaults the 2178 // Name to ServiceID if not specified. The Name if specified is used 2179 // as one of the seed values when generating a Consul ServiceID. 2180 Name string 2181 2182 // PortLabel is either the numeric port number or the `host:port`. 2183 // To specify the port number using the host's Consul Advertise 2184 // address, specify an empty host in the PortLabel (e.g. `:port`). 2185 PortLabel string 2186 Tags []string // List of tags for the service 2187 Checks []*ServiceCheck // List of checks associated with the service 2188 } 2189 2190 func (s *Service) Copy() *Service { 2191 if s == nil { 2192 return nil 2193 } 2194 ns := new(Service) 2195 *ns = *s 2196 ns.Tags = helper.CopySliceString(ns.Tags) 2197 2198 if s.Checks != nil { 2199 checks := make([]*ServiceCheck, len(ns.Checks)) 2200 for i, c := range ns.Checks { 2201 checks[i] = c.Copy() 2202 } 2203 ns.Checks = checks 2204 } 2205 2206 return ns 2207 } 2208 2209 // Canonicalize interpolates values of Job, Task Group and Task in the Service 2210 // Name. This also generates check names, service id and check ids. 2211 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 2212 // Ensure empty lists are treated as null to avoid scheduler issues when 2213 // using DeepEquals 2214 if len(s.Tags) == 0 { 2215 s.Tags = nil 2216 } 2217 if len(s.Checks) == 0 { 2218 s.Checks = nil 2219 } 2220 2221 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 2222 "JOB": job, 2223 "TASKGROUP": taskGroup, 2224 "TASK": task, 2225 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 2226 }, 2227 ) 2228 2229 for _, check := range s.Checks { 2230 check.Canonicalize(s.Name) 2231 } 2232 } 2233 2234 // Validate checks if the Check definition is valid 2235 func (s *Service) Validate() error { 2236 var mErr multierror.Error 2237 2238 // Ensure the service name is valid per the below RFCs but make an exception 2239 // for our interpolation syntax 2240 // RFC-952 §1 (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 2241 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 2242 // (https://tools.ietf.org/html/rfc2782). 2243 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9\$][a-zA-Z0-9\-\$\{\}\_\.]*[a-z0-9\}])$`) 2244 if !re.MatchString(s.Name) { 2245 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name)) 2246 } 2247 2248 for _, c := range s.Checks { 2249 if s.PortLabel == "" && c.RequiresPort() { 2250 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but the service %+q has no port", c.Name, s.Name)) 2251 continue 2252 } 2253 2254 if err := c.validate(); err != nil { 2255 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 2256 } 2257 } 2258 return mErr.ErrorOrNil() 2259 } 2260 2261 // ValidateName checks if the services Name is valid and should be called after 2262 // the name has been interpolated 2263 func (s *Service) ValidateName(name string) error { 2264 // Ensure the service name is valid per RFC-952 §1 2265 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 2266 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 2267 // (https://tools.ietf.org/html/rfc2782). 2268 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 2269 if !re.MatchString(name) { 2270 return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name) 2271 } 2272 return nil 2273 } 2274 2275 // Hash calculates the hash of the check based on it's content and the service 2276 // which owns it 2277 func (s *Service) Hash() string { 2278 h := sha1.New() 2279 io.WriteString(h, s.Name) 2280 io.WriteString(h, strings.Join(s.Tags, "")) 2281 io.WriteString(h, s.PortLabel) 2282 return fmt.Sprintf("%x", h.Sum(nil)) 2283 } 2284 2285 const ( 2286 // DefaultKillTimeout is the default timeout between signaling a task it 2287 // will be killed and killing it. 2288 DefaultKillTimeout = 5 * time.Second 2289 ) 2290 2291 // LogConfig provides configuration for log rotation 2292 type LogConfig struct { 2293 MaxFiles int 2294 MaxFileSizeMB int 2295 } 2296 2297 // DefaultLogConfig returns the default LogConfig values. 2298 func DefaultLogConfig() *LogConfig { 2299 return &LogConfig{ 2300 MaxFiles: 10, 2301 MaxFileSizeMB: 10, 2302 } 2303 } 2304 2305 // Validate returns an error if the log config specified are less than 2306 // the minimum allowed. 2307 func (l *LogConfig) Validate() error { 2308 var mErr multierror.Error 2309 if l.MaxFiles < 1 { 2310 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 2311 } 2312 if l.MaxFileSizeMB < 1 { 2313 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 2314 } 2315 return mErr.ErrorOrNil() 2316 } 2317 2318 // Task is a single process typically that is executed as part of a task group. 2319 type Task struct { 2320 // Name of the task 2321 Name string 2322 2323 // Driver is used to control which driver is used 2324 Driver string 2325 2326 // User is used to determine which user will run the task. It defaults to 2327 // the same user the Nomad client is being run as. 2328 User string 2329 2330 // Config is provided to the driver to initialize 2331 Config map[string]interface{} 2332 2333 // Map of environment variables to be used by the driver 2334 Env map[string]string 2335 2336 // List of service definitions exposed by the Task 2337 Services []*Service 2338 2339 // Vault is used to define the set of Vault policies that this task should 2340 // have access to. 2341 Vault *Vault 2342 2343 // Templates are the set of templates to be rendered for the task. 2344 Templates []*Template 2345 2346 // Constraints can be specified at a task level and apply only to 2347 // the particular task. 2348 Constraints []*Constraint 2349 2350 // Resources is the resources needed by this task 2351 Resources *Resources 2352 2353 // DispatchPayload configures how the task retrieves its input from a dispatch 2354 DispatchPayload *DispatchPayloadConfig 2355 2356 // Meta is used to associate arbitrary metadata with this 2357 // task. This is opaque to Nomad. 2358 Meta map[string]string 2359 2360 // KillTimeout is the time between signaling a task that it will be 2361 // killed and killing it. 2362 KillTimeout time.Duration 2363 2364 // LogConfig provides configuration for log rotation 2365 LogConfig *LogConfig 2366 2367 // Artifacts is a list of artifacts to download and extract before running 2368 // the task. 2369 Artifacts []*TaskArtifact 2370 2371 // Leader marks the task as the leader within the group. When the leader 2372 // task exits, other tasks will be gracefully terminated. 2373 Leader bool 2374 } 2375 2376 func (t *Task) Copy() *Task { 2377 if t == nil { 2378 return nil 2379 } 2380 nt := new(Task) 2381 *nt = *t 2382 nt.Env = helper.CopyMapStringString(nt.Env) 2383 2384 if t.Services != nil { 2385 services := make([]*Service, len(nt.Services)) 2386 for i, s := range nt.Services { 2387 services[i] = s.Copy() 2388 } 2389 nt.Services = services 2390 } 2391 2392 nt.Constraints = CopySliceConstraints(nt.Constraints) 2393 2394 nt.Vault = nt.Vault.Copy() 2395 nt.Resources = nt.Resources.Copy() 2396 nt.Meta = helper.CopyMapStringString(nt.Meta) 2397 nt.DispatchPayload = nt.DispatchPayload.Copy() 2398 2399 if t.Artifacts != nil { 2400 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 2401 for _, a := range nt.Artifacts { 2402 artifacts = append(artifacts, a.Copy()) 2403 } 2404 nt.Artifacts = artifacts 2405 } 2406 2407 if i, err := copystructure.Copy(nt.Config); err != nil { 2408 nt.Config = i.(map[string]interface{}) 2409 } 2410 2411 if t.Templates != nil { 2412 templates := make([]*Template, len(t.Templates)) 2413 for i, tmpl := range nt.Templates { 2414 templates[i] = tmpl.Copy() 2415 } 2416 nt.Templates = templates 2417 } 2418 2419 return nt 2420 } 2421 2422 // Canonicalize canonicalizes fields in the task. 2423 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 2424 // Ensure that an empty and nil map are treated the same to avoid scheduling 2425 // problems since we use reflect DeepEquals. 2426 if len(t.Meta) == 0 { 2427 t.Meta = nil 2428 } 2429 if len(t.Config) == 0 { 2430 t.Config = nil 2431 } 2432 if len(t.Env) == 0 { 2433 t.Env = nil 2434 } 2435 2436 for _, service := range t.Services { 2437 service.Canonicalize(job.Name, tg.Name, t.Name) 2438 } 2439 2440 // If Resources are nil initialize them to defaults, otherwise canonicalize 2441 if t.Resources == nil { 2442 t.Resources = DefaultResources() 2443 } else { 2444 t.Resources.Canonicalize() 2445 } 2446 2447 // Set the default timeout if it is not specified. 2448 if t.KillTimeout == 0 { 2449 t.KillTimeout = DefaultKillTimeout 2450 } 2451 2452 if t.Vault != nil { 2453 t.Vault.Canonicalize() 2454 } 2455 2456 for _, template := range t.Templates { 2457 template.Canonicalize() 2458 } 2459 } 2460 2461 func (t *Task) GoString() string { 2462 return fmt.Sprintf("*%#v", *t) 2463 } 2464 2465 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 2466 for _, network := range t.Resources.Networks { 2467 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 2468 return network.IP, p 2469 } 2470 } 2471 return "", 0 2472 } 2473 2474 // Validate is used to sanity check a task 2475 func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error { 2476 var mErr multierror.Error 2477 if t.Name == "" { 2478 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 2479 } 2480 if strings.ContainsAny(t.Name, `/\`) { 2481 // We enforce this so that when creating the directory on disk it will 2482 // not have any slashes. 2483 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 2484 } 2485 if t.Driver == "" { 2486 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 2487 } 2488 if t.KillTimeout.Nanoseconds() < 0 { 2489 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 2490 } 2491 2492 // Validate the resources. 2493 if t.Resources == nil { 2494 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 2495 } else { 2496 if err := t.Resources.MeetsMinResources(); err != nil { 2497 mErr.Errors = append(mErr.Errors, err) 2498 } 2499 2500 // Ensure the task isn't asking for disk resources 2501 if t.Resources.DiskMB > 0 { 2502 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2503 } 2504 } 2505 2506 // Validate the log config 2507 if t.LogConfig == nil { 2508 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 2509 } else if err := t.LogConfig.Validate(); err != nil { 2510 mErr.Errors = append(mErr.Errors, err) 2511 } 2512 2513 for idx, constr := range t.Constraints { 2514 if err := constr.Validate(); err != nil { 2515 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2516 mErr.Errors = append(mErr.Errors, outer) 2517 } 2518 2519 switch constr.Operand { 2520 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2521 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 2522 mErr.Errors = append(mErr.Errors, outer) 2523 } 2524 } 2525 2526 // Validate Services 2527 if err := validateServices(t); err != nil { 2528 mErr.Errors = append(mErr.Errors, err) 2529 } 2530 2531 if t.LogConfig != nil && ephemeralDisk != nil { 2532 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 2533 if ephemeralDisk.SizeMB <= logUsage { 2534 mErr.Errors = append(mErr.Errors, 2535 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 2536 logUsage, ephemeralDisk.SizeMB)) 2537 } 2538 } 2539 2540 for idx, artifact := range t.Artifacts { 2541 if err := artifact.Validate(); err != nil { 2542 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 2543 mErr.Errors = append(mErr.Errors, outer) 2544 } 2545 } 2546 2547 if t.Vault != nil { 2548 if err := t.Vault.Validate(); err != nil { 2549 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 2550 } 2551 } 2552 2553 destinations := make(map[string]int, len(t.Templates)) 2554 for idx, tmpl := range t.Templates { 2555 if err := tmpl.Validate(); err != nil { 2556 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 2557 mErr.Errors = append(mErr.Errors, outer) 2558 } 2559 2560 if other, ok := destinations[tmpl.DestPath]; ok { 2561 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 2562 mErr.Errors = append(mErr.Errors, outer) 2563 } else { 2564 destinations[tmpl.DestPath] = idx + 1 2565 } 2566 } 2567 2568 // Validate the dispatch payload block if there 2569 if t.DispatchPayload != nil { 2570 if err := t.DispatchPayload.Validate(); err != nil { 2571 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 2572 } 2573 } 2574 2575 return mErr.ErrorOrNil() 2576 } 2577 2578 // validateServices takes a task and validates the services within it are valid 2579 // and reference ports that exist. 2580 func validateServices(t *Task) error { 2581 var mErr multierror.Error 2582 2583 // Ensure that services don't ask for non-existent ports and their names are 2584 // unique. 2585 servicePorts := make(map[string][]string) 2586 knownServices := make(map[string]struct{}) 2587 for i, service := range t.Services { 2588 if err := service.Validate(); err != nil { 2589 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 2590 mErr.Errors = append(mErr.Errors, outer) 2591 } 2592 2593 // Ensure that services with the same name are not being registered for 2594 // the same port 2595 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 2596 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 2597 } 2598 knownServices[service.Name+service.PortLabel] = struct{}{} 2599 2600 if service.PortLabel != "" { 2601 servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name) 2602 } 2603 2604 // Ensure that check names are unique. 2605 knownChecks := make(map[string]struct{}) 2606 for _, check := range service.Checks { 2607 if _, ok := knownChecks[check.Name]; ok { 2608 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 2609 } 2610 knownChecks[check.Name] = struct{}{} 2611 } 2612 } 2613 2614 // Get the set of port labels. 2615 portLabels := make(map[string]struct{}) 2616 if t.Resources != nil { 2617 for _, network := range t.Resources.Networks { 2618 ports := network.MapLabelToValues(nil) 2619 for portLabel, _ := range ports { 2620 portLabels[portLabel] = struct{}{} 2621 } 2622 } 2623 } 2624 2625 // Ensure all ports referenced in services exist. 2626 for servicePort, services := range servicePorts { 2627 _, ok := portLabels[servicePort] 2628 if !ok { 2629 joined := strings.Join(services, ", ") 2630 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 2631 mErr.Errors = append(mErr.Errors, err) 2632 } 2633 } 2634 return mErr.ErrorOrNil() 2635 } 2636 2637 const ( 2638 // TemplateChangeModeNoop marks that no action should be taken if the 2639 // template is re-rendered 2640 TemplateChangeModeNoop = "noop" 2641 2642 // TemplateChangeModeSignal marks that the task should be signaled if the 2643 // template is re-rendered 2644 TemplateChangeModeSignal = "signal" 2645 2646 // TemplateChangeModeRestart marks that the task should be restarted if the 2647 // template is re-rendered 2648 TemplateChangeModeRestart = "restart" 2649 ) 2650 2651 var ( 2652 // TemplateChangeModeInvalidError is the error for when an invalid change 2653 // mode is given 2654 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 2655 ) 2656 2657 // Template represents a template configuration to be rendered for a given task 2658 type Template struct { 2659 // SourcePath is the path to the template to be rendered 2660 SourcePath string 2661 2662 // DestPath is the path to where the template should be rendered 2663 DestPath string 2664 2665 // EmbeddedTmpl store the raw template. This is useful for smaller templates 2666 // where they are embedded in the job file rather than sent as an artificat 2667 EmbeddedTmpl string 2668 2669 // ChangeMode indicates what should be done if the template is re-rendered 2670 ChangeMode string 2671 2672 // ChangeSignal is the signal that should be sent if the change mode 2673 // requires it. 2674 ChangeSignal string 2675 2676 // Splay is used to avoid coordinated restarts of processes by applying a 2677 // random wait between 0 and the given splay value before signalling the 2678 // application of a change 2679 Splay time.Duration 2680 2681 // Perms is the permission the file should be written out with. 2682 Perms string 2683 2684 // LeftDelim and RightDelim are optional configurations to control what 2685 // delimiter is utilized when parsing the template. 2686 LeftDelim string 2687 RightDelim string 2688 } 2689 2690 // DefaultTemplate returns a default template. 2691 func DefaultTemplate() *Template { 2692 return &Template{ 2693 ChangeMode: TemplateChangeModeRestart, 2694 Splay: 5 * time.Second, 2695 Perms: "0644", 2696 } 2697 } 2698 2699 func (t *Template) Copy() *Template { 2700 if t == nil { 2701 return nil 2702 } 2703 copy := new(Template) 2704 *copy = *t 2705 return copy 2706 } 2707 2708 func (t *Template) Canonicalize() { 2709 if t.ChangeSignal != "" { 2710 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 2711 } 2712 } 2713 2714 func (t *Template) Validate() error { 2715 var mErr multierror.Error 2716 2717 // Verify we have something to render 2718 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 2719 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 2720 } 2721 2722 // Verify we can render somewhere 2723 if t.DestPath == "" { 2724 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 2725 } 2726 2727 // Verify the destination doesn't escape 2728 escaped, err := PathEscapesAllocDir("task", t.DestPath) 2729 if err != nil { 2730 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 2731 } else if escaped { 2732 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 2733 } 2734 2735 // Verify a proper change mode 2736 switch t.ChangeMode { 2737 case TemplateChangeModeNoop, TemplateChangeModeRestart: 2738 case TemplateChangeModeSignal: 2739 if t.ChangeSignal == "" { 2740 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 2741 } 2742 default: 2743 multierror.Append(&mErr, TemplateChangeModeInvalidError) 2744 } 2745 2746 // Verify the splay is positive 2747 if t.Splay < 0 { 2748 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 2749 } 2750 2751 // Verify the permissions 2752 if t.Perms != "" { 2753 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 2754 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 2755 } 2756 } 2757 2758 return mErr.ErrorOrNil() 2759 } 2760 2761 // Set of possible states for a task. 2762 const ( 2763 TaskStatePending = "pending" // The task is waiting to be run. 2764 TaskStateRunning = "running" // The task is currently running. 2765 TaskStateDead = "dead" // Terminal state of task. 2766 ) 2767 2768 // TaskState tracks the current state of a task and events that caused state 2769 // transitions. 2770 type TaskState struct { 2771 // The current state of the task. 2772 State string 2773 2774 // Failed marks a task as having failed 2775 Failed bool 2776 2777 // StartedAt is the time the task is started. It is updated each time the 2778 // task starts 2779 StartedAt time.Time 2780 2781 // FinishedAt is the time at which the task transistioned to dead and will 2782 // not be started again. 2783 FinishedAt time.Time 2784 2785 // Series of task events that transition the state of the task. 2786 Events []*TaskEvent 2787 } 2788 2789 func (ts *TaskState) Copy() *TaskState { 2790 if ts == nil { 2791 return nil 2792 } 2793 copy := new(TaskState) 2794 copy.State = ts.State 2795 copy.Failed = ts.Failed 2796 copy.StartedAt = ts.StartedAt 2797 copy.FinishedAt = ts.FinishedAt 2798 2799 if ts.Events != nil { 2800 copy.Events = make([]*TaskEvent, len(ts.Events)) 2801 for i, e := range ts.Events { 2802 copy.Events[i] = e.Copy() 2803 } 2804 } 2805 return copy 2806 } 2807 2808 // Successful returns whether a task finished successfully. 2809 func (ts *TaskState) Successful() bool { 2810 l := len(ts.Events) 2811 if ts.State != TaskStateDead || l == 0 { 2812 return false 2813 } 2814 2815 e := ts.Events[l-1] 2816 if e.Type != TaskTerminated { 2817 return false 2818 } 2819 2820 return e.ExitCode == 0 2821 } 2822 2823 const ( 2824 // TaskSetupFailure indicates that the task could not be started due to a 2825 // a setup failure. 2826 TaskSetupFailure = "Setup Failure" 2827 2828 // TaskDriveFailure indicates that the task could not be started due to a 2829 // failure in the driver. 2830 TaskDriverFailure = "Driver Failure" 2831 2832 // TaskReceived signals that the task has been pulled by the client at the 2833 // given timestamp. 2834 TaskReceived = "Received" 2835 2836 // TaskFailedValidation indicates the task was invalid and as such was not 2837 // run. 2838 TaskFailedValidation = "Failed Validation" 2839 2840 // TaskStarted signals that the task was started and its timestamp can be 2841 // used to determine the running length of the task. 2842 TaskStarted = "Started" 2843 2844 // TaskTerminated indicates that the task was started and exited. 2845 TaskTerminated = "Terminated" 2846 2847 // TaskKilling indicates a kill signal has been sent to the task. 2848 TaskKilling = "Killing" 2849 2850 // TaskKilled indicates a user has killed the task. 2851 TaskKilled = "Killed" 2852 2853 // TaskRestarting indicates that task terminated and is being restarted. 2854 TaskRestarting = "Restarting" 2855 2856 // TaskNotRestarting indicates that the task has failed and is not being 2857 // restarted because it has exceeded its restart policy. 2858 TaskNotRestarting = "Not Restarting" 2859 2860 // TaskRestartSignal indicates that the task has been signalled to be 2861 // restarted 2862 TaskRestartSignal = "Restart Signaled" 2863 2864 // TaskSignaling indicates that the task is being signalled. 2865 TaskSignaling = "Signaling" 2866 2867 // TaskDownloadingArtifacts means the task is downloading the artifacts 2868 // specified in the task. 2869 TaskDownloadingArtifacts = "Downloading Artifacts" 2870 2871 // TaskArtifactDownloadFailed indicates that downloading the artifacts 2872 // failed. 2873 TaskArtifactDownloadFailed = "Failed Artifact Download" 2874 2875 // TaskBuildingTaskDir indicates that the task directory/chroot is being 2876 // built. 2877 TaskBuildingTaskDir = "Building Task Directory" 2878 2879 // TaskSetup indicates the task runner is setting up the task environment 2880 TaskSetup = "Task Setup" 2881 2882 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 2883 // exceeded the requested disk resources. 2884 TaskDiskExceeded = "Disk Resources Exceeded" 2885 2886 // TaskSiblingFailed indicates that a sibling task in the task group has 2887 // failed. 2888 TaskSiblingFailed = "Sibling Task Failed" 2889 2890 // TaskDriverMessage is an informational event message emitted by 2891 // drivers such as when they're performing a long running action like 2892 // downloading an image. 2893 TaskDriverMessage = "Driver" 2894 2895 // TaskLeaderDead indicates that the leader task within the has finished. 2896 TaskLeaderDead = "Leader Task Dead" 2897 ) 2898 2899 // TaskEvent is an event that effects the state of a task and contains meta-data 2900 // appropriate to the events type. 2901 type TaskEvent struct { 2902 Type string 2903 Time int64 // Unix Nanosecond timestamp 2904 2905 // FailsTask marks whether this event fails the task 2906 FailsTask bool 2907 2908 // Restart fields. 2909 RestartReason string 2910 2911 // Setup Failure fields. 2912 SetupError string 2913 2914 // Driver Failure fields. 2915 DriverError string // A driver error occurred while starting the task. 2916 2917 // Task Terminated Fields. 2918 ExitCode int // The exit code of the task. 2919 Signal int // The signal that terminated the task. 2920 Message string // A possible message explaining the termination of the task. 2921 2922 // Killing fields 2923 KillTimeout time.Duration 2924 2925 // Task Killed Fields. 2926 KillError string // Error killing the task. 2927 2928 // KillReason is the reason the task was killed 2929 KillReason string 2930 2931 // TaskRestarting fields. 2932 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 2933 2934 // Artifact Download fields 2935 DownloadError string // Error downloading artifacts 2936 2937 // Validation fields 2938 ValidationError string // Validation error 2939 2940 // The maximum allowed task disk size. 2941 DiskLimit int64 2942 2943 // Name of the sibling task that caused termination of the task that 2944 // the TaskEvent refers to. 2945 FailedSibling string 2946 2947 // VaultError is the error from token renewal 2948 VaultError string 2949 2950 // TaskSignalReason indicates the reason the task is being signalled. 2951 TaskSignalReason string 2952 2953 // TaskSignal is the signal that was sent to the task 2954 TaskSignal string 2955 2956 // DriverMessage indicates a driver action being taken. 2957 DriverMessage string 2958 } 2959 2960 func (te *TaskEvent) GoString() string { 2961 return fmt.Sprintf("%v at %v", te.Type, te.Time) 2962 } 2963 2964 // SetMessage sets the message of TaskEvent 2965 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 2966 te.Message = msg 2967 return te 2968 } 2969 2970 func (te *TaskEvent) Copy() *TaskEvent { 2971 if te == nil { 2972 return nil 2973 } 2974 copy := new(TaskEvent) 2975 *copy = *te 2976 return copy 2977 } 2978 2979 func NewTaskEvent(event string) *TaskEvent { 2980 return &TaskEvent{ 2981 Type: event, 2982 Time: time.Now().UnixNano(), 2983 } 2984 } 2985 2986 // SetSetupError is used to store an error that occured while setting up the 2987 // task 2988 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 2989 if err != nil { 2990 e.SetupError = err.Error() 2991 } 2992 return e 2993 } 2994 2995 func (e *TaskEvent) SetFailsTask() *TaskEvent { 2996 e.FailsTask = true 2997 return e 2998 } 2999 3000 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 3001 if err != nil { 3002 e.DriverError = err.Error() 3003 } 3004 return e 3005 } 3006 3007 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 3008 e.ExitCode = c 3009 return e 3010 } 3011 3012 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 3013 e.Signal = s 3014 return e 3015 } 3016 3017 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 3018 if err != nil { 3019 e.Message = err.Error() 3020 } 3021 return e 3022 } 3023 3024 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 3025 if err != nil { 3026 e.KillError = err.Error() 3027 } 3028 return e 3029 } 3030 3031 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 3032 e.KillReason = r 3033 return e 3034 } 3035 3036 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 3037 e.StartDelay = int64(delay) 3038 return e 3039 } 3040 3041 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 3042 e.RestartReason = reason 3043 return e 3044 } 3045 3046 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 3047 e.TaskSignalReason = r 3048 return e 3049 } 3050 3051 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 3052 e.TaskSignal = s.String() 3053 return e 3054 } 3055 3056 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 3057 if err != nil { 3058 e.DownloadError = err.Error() 3059 } 3060 return e 3061 } 3062 3063 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 3064 if err != nil { 3065 e.ValidationError = err.Error() 3066 } 3067 return e 3068 } 3069 3070 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 3071 e.KillTimeout = timeout 3072 return e 3073 } 3074 3075 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 3076 e.DiskLimit = limit 3077 return e 3078 } 3079 3080 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 3081 e.FailedSibling = sibling 3082 return e 3083 } 3084 3085 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 3086 if err != nil { 3087 e.VaultError = err.Error() 3088 } 3089 return e 3090 } 3091 3092 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 3093 e.DriverMessage = m 3094 return e 3095 } 3096 3097 // TaskArtifact is an artifact to download before running the task. 3098 type TaskArtifact struct { 3099 // GetterSource is the source to download an artifact using go-getter 3100 GetterSource string 3101 3102 // GetterOptions are options to use when downloading the artifact using 3103 // go-getter. 3104 GetterOptions map[string]string 3105 3106 // RelativeDest is the download destination given relative to the task's 3107 // directory. 3108 RelativeDest string 3109 } 3110 3111 func (ta *TaskArtifact) Copy() *TaskArtifact { 3112 if ta == nil { 3113 return nil 3114 } 3115 nta := new(TaskArtifact) 3116 *nta = *ta 3117 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 3118 return nta 3119 } 3120 3121 func (ta *TaskArtifact) GoString() string { 3122 return fmt.Sprintf("%+v", ta) 3123 } 3124 3125 // PathEscapesAllocDir returns if the given path escapes the allocation 3126 // directory. The prefix allows adding a prefix if the path will be joined, for 3127 // example a "task/local" prefix may be provided if the path will be joined 3128 // against that prefix. 3129 func PathEscapesAllocDir(prefix, path string) (bool, error) { 3130 // Verify the destination doesn't escape the tasks directory 3131 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 3132 if err != nil { 3133 return false, err 3134 } 3135 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 3136 if err != nil { 3137 return false, err 3138 } 3139 rel, err := filepath.Rel(alloc, abs) 3140 if err != nil { 3141 return false, err 3142 } 3143 3144 return strings.HasPrefix(rel, ".."), nil 3145 } 3146 3147 func (ta *TaskArtifact) Validate() error { 3148 // Verify the source 3149 var mErr multierror.Error 3150 if ta.GetterSource == "" { 3151 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 3152 } 3153 3154 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 3155 if err != nil { 3156 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 3157 } else if escaped { 3158 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 3159 } 3160 3161 // Verify the checksum 3162 if check, ok := ta.GetterOptions["checksum"]; ok { 3163 check = strings.TrimSpace(check) 3164 if check == "" { 3165 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty")) 3166 return mErr.ErrorOrNil() 3167 } 3168 3169 parts := strings.Split(check, ":") 3170 if l := len(parts); l != 2 { 3171 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 3172 return mErr.ErrorOrNil() 3173 } 3174 3175 checksumVal := parts[1] 3176 checksumBytes, err := hex.DecodeString(checksumVal) 3177 if err != nil { 3178 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 3179 return mErr.ErrorOrNil() 3180 } 3181 3182 checksumType := parts[0] 3183 expectedLength := 0 3184 switch checksumType { 3185 case "md5": 3186 expectedLength = md5.Size 3187 case "sha1": 3188 expectedLength = sha1.Size 3189 case "sha256": 3190 expectedLength = sha256.Size 3191 case "sha512": 3192 expectedLength = sha512.Size 3193 default: 3194 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 3195 return mErr.ErrorOrNil() 3196 } 3197 3198 if len(checksumBytes) != expectedLength { 3199 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 3200 return mErr.ErrorOrNil() 3201 } 3202 } 3203 3204 return mErr.ErrorOrNil() 3205 } 3206 3207 const ( 3208 ConstraintDistinctProperty = "distinct_property" 3209 ConstraintDistinctHosts = "distinct_hosts" 3210 ConstraintRegex = "regexp" 3211 ConstraintVersion = "version" 3212 ConstraintSetContains = "set_contains" 3213 ) 3214 3215 // Constraints are used to restrict placement options. 3216 type Constraint struct { 3217 LTarget string // Left-hand target 3218 RTarget string // Right-hand target 3219 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 3220 str string // Memoized string 3221 } 3222 3223 // Equal checks if two constraints are equal 3224 func (c *Constraint) Equal(o *Constraint) bool { 3225 return c.LTarget == o.LTarget && 3226 c.RTarget == o.RTarget && 3227 c.Operand == o.Operand 3228 } 3229 3230 func (c *Constraint) Copy() *Constraint { 3231 if c == nil { 3232 return nil 3233 } 3234 nc := new(Constraint) 3235 *nc = *c 3236 return nc 3237 } 3238 3239 func (c *Constraint) String() string { 3240 if c.str != "" { 3241 return c.str 3242 } 3243 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 3244 return c.str 3245 } 3246 3247 func (c *Constraint) Validate() error { 3248 var mErr multierror.Error 3249 if c.Operand == "" { 3250 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 3251 } 3252 3253 // Perform additional validation based on operand 3254 switch c.Operand { 3255 case ConstraintRegex: 3256 if _, err := regexp.Compile(c.RTarget); err != nil { 3257 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 3258 } 3259 case ConstraintVersion: 3260 if _, err := version.NewConstraint(c.RTarget); err != nil { 3261 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 3262 } 3263 } 3264 return mErr.ErrorOrNil() 3265 } 3266 3267 // EphemeralDisk is an ephemeral disk object 3268 type EphemeralDisk struct { 3269 // Sticky indicates whether the allocation is sticky to a node 3270 Sticky bool 3271 3272 // SizeMB is the size of the local disk 3273 SizeMB int 3274 3275 // Migrate determines if Nomad client should migrate the allocation dir for 3276 // sticky allocations 3277 Migrate bool 3278 } 3279 3280 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 3281 func DefaultEphemeralDisk() *EphemeralDisk { 3282 return &EphemeralDisk{ 3283 SizeMB: 300, 3284 } 3285 } 3286 3287 // Validate validates EphemeralDisk 3288 func (d *EphemeralDisk) Validate() error { 3289 if d.SizeMB < 10 { 3290 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 3291 } 3292 return nil 3293 } 3294 3295 // Copy copies the EphemeralDisk struct and returns a new one 3296 func (d *EphemeralDisk) Copy() *EphemeralDisk { 3297 ld := new(EphemeralDisk) 3298 *ld = *d 3299 return ld 3300 } 3301 3302 const ( 3303 // VaultChangeModeNoop takes no action when a new token is retrieved. 3304 VaultChangeModeNoop = "noop" 3305 3306 // VaultChangeModeSignal signals the task when a new token is retrieved. 3307 VaultChangeModeSignal = "signal" 3308 3309 // VaultChangeModeRestart restarts the task when a new token is retrieved. 3310 VaultChangeModeRestart = "restart" 3311 ) 3312 3313 // Vault stores the set of premissions a task needs access to from Vault. 3314 type Vault struct { 3315 // Policies is the set of policies that the task needs access to 3316 Policies []string 3317 3318 // Env marks whether the Vault Token should be exposed as an environment 3319 // variable 3320 Env bool 3321 3322 // ChangeMode is used to configure the task's behavior when the Vault 3323 // token changes because the original token could not be renewed in time. 3324 ChangeMode string 3325 3326 // ChangeSignal is the signal sent to the task when a new token is 3327 // retrieved. This is only valid when using the signal change mode. 3328 ChangeSignal string 3329 } 3330 3331 func DefaultVaultBlock() *Vault { 3332 return &Vault{ 3333 Env: true, 3334 ChangeMode: VaultChangeModeRestart, 3335 } 3336 } 3337 3338 // Copy returns a copy of this Vault block. 3339 func (v *Vault) Copy() *Vault { 3340 if v == nil { 3341 return nil 3342 } 3343 3344 nv := new(Vault) 3345 *nv = *v 3346 return nv 3347 } 3348 3349 func (v *Vault) Canonicalize() { 3350 if v.ChangeSignal != "" { 3351 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 3352 } 3353 } 3354 3355 // Validate returns if the Vault block is valid. 3356 func (v *Vault) Validate() error { 3357 if v == nil { 3358 return nil 3359 } 3360 3361 var mErr multierror.Error 3362 if len(v.Policies) == 0 { 3363 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 3364 } 3365 3366 for _, p := range v.Policies { 3367 if p == "root" { 3368 multierror.Append(&mErr, fmt.Errorf("Can not specifiy \"root\" policy")) 3369 } 3370 } 3371 3372 switch v.ChangeMode { 3373 case VaultChangeModeSignal: 3374 if v.ChangeSignal == "" { 3375 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 3376 } 3377 case VaultChangeModeNoop, VaultChangeModeRestart: 3378 default: 3379 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 3380 } 3381 3382 return mErr.ErrorOrNil() 3383 } 3384 3385 const ( 3386 AllocDesiredStatusRun = "run" // Allocation should run 3387 AllocDesiredStatusStop = "stop" // Allocation should stop 3388 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 3389 ) 3390 3391 const ( 3392 AllocClientStatusPending = "pending" 3393 AllocClientStatusRunning = "running" 3394 AllocClientStatusComplete = "complete" 3395 AllocClientStatusFailed = "failed" 3396 AllocClientStatusLost = "lost" 3397 ) 3398 3399 // Allocation is used to allocate the placement of a task group to a node. 3400 type Allocation struct { 3401 // ID of the allocation (UUID) 3402 ID string 3403 3404 // ID of the evaluation that generated this allocation 3405 EvalID string 3406 3407 // Name is a logical name of the allocation. 3408 Name string 3409 3410 // NodeID is the node this is being placed on 3411 NodeID string 3412 3413 // Job is the parent job of the task group being allocated. 3414 // This is copied at allocation time to avoid issues if the job 3415 // definition is updated. 3416 JobID string 3417 Job *Job 3418 3419 // TaskGroup is the name of the task group that should be run 3420 TaskGroup string 3421 3422 // Resources is the total set of resources allocated as part 3423 // of this allocation of the task group. 3424 Resources *Resources 3425 3426 // SharedResources are the resources that are shared by all the tasks in an 3427 // allocation 3428 SharedResources *Resources 3429 3430 // TaskResources is the set of resources allocated to each 3431 // task. These should sum to the total Resources. 3432 TaskResources map[string]*Resources 3433 3434 // Metrics associated with this allocation 3435 Metrics *AllocMetric 3436 3437 // Desired Status of the allocation on the client 3438 DesiredStatus string 3439 3440 // DesiredStatusDescription is meant to provide more human useful information 3441 DesiredDescription string 3442 3443 // Status of the allocation on the client 3444 ClientStatus string 3445 3446 // ClientStatusDescription is meant to provide more human useful information 3447 ClientDescription string 3448 3449 // TaskStates stores the state of each task, 3450 TaskStates map[string]*TaskState 3451 3452 // PreviousAllocation is the allocation that this allocation is replacing 3453 PreviousAllocation string 3454 3455 // Raft Indexes 3456 CreateIndex uint64 3457 ModifyIndex uint64 3458 3459 // AllocModifyIndex is not updated when the client updates allocations. This 3460 // lets the client pull only the allocs updated by the server. 3461 AllocModifyIndex uint64 3462 3463 // CreateTime is the time the allocation has finished scheduling and been 3464 // verified by the plan applier. 3465 CreateTime int64 3466 } 3467 3468 func (a *Allocation) Copy() *Allocation { 3469 if a == nil { 3470 return nil 3471 } 3472 na := new(Allocation) 3473 *na = *a 3474 3475 na.Job = na.Job.Copy() 3476 na.Resources = na.Resources.Copy() 3477 na.SharedResources = na.SharedResources.Copy() 3478 3479 if a.TaskResources != nil { 3480 tr := make(map[string]*Resources, len(na.TaskResources)) 3481 for task, resource := range na.TaskResources { 3482 tr[task] = resource.Copy() 3483 } 3484 na.TaskResources = tr 3485 } 3486 3487 na.Metrics = na.Metrics.Copy() 3488 3489 if a.TaskStates != nil { 3490 ts := make(map[string]*TaskState, len(na.TaskStates)) 3491 for task, state := range na.TaskStates { 3492 ts[task] = state.Copy() 3493 } 3494 na.TaskStates = ts 3495 } 3496 return na 3497 } 3498 3499 // TerminalStatus returns if the desired or actual status is terminal and 3500 // will no longer transition. 3501 func (a *Allocation) TerminalStatus() bool { 3502 // First check the desired state and if that isn't terminal, check client 3503 // state. 3504 switch a.DesiredStatus { 3505 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 3506 return true 3507 default: 3508 } 3509 3510 switch a.ClientStatus { 3511 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 3512 return true 3513 default: 3514 return false 3515 } 3516 } 3517 3518 // Terminated returns if the allocation is in a terminal state on a client. 3519 func (a *Allocation) Terminated() bool { 3520 if a.ClientStatus == AllocClientStatusFailed || 3521 a.ClientStatus == AllocClientStatusComplete || 3522 a.ClientStatus == AllocClientStatusLost { 3523 return true 3524 } 3525 return false 3526 } 3527 3528 // RanSuccessfully returns whether the client has ran the allocation and all 3529 // tasks finished successfully 3530 func (a *Allocation) RanSuccessfully() bool { 3531 // Handle the case the client hasn't started the allocation. 3532 if len(a.TaskStates) == 0 { 3533 return false 3534 } 3535 3536 // Check to see if all the tasks finised successfully in the allocation 3537 allSuccess := true 3538 for _, state := range a.TaskStates { 3539 allSuccess = allSuccess && state.Successful() 3540 } 3541 3542 return allSuccess 3543 } 3544 3545 // Stub returns a list stub for the allocation 3546 func (a *Allocation) Stub() *AllocListStub { 3547 return &AllocListStub{ 3548 ID: a.ID, 3549 EvalID: a.EvalID, 3550 Name: a.Name, 3551 NodeID: a.NodeID, 3552 JobID: a.JobID, 3553 TaskGroup: a.TaskGroup, 3554 DesiredStatus: a.DesiredStatus, 3555 DesiredDescription: a.DesiredDescription, 3556 ClientStatus: a.ClientStatus, 3557 ClientDescription: a.ClientDescription, 3558 TaskStates: a.TaskStates, 3559 CreateIndex: a.CreateIndex, 3560 ModifyIndex: a.ModifyIndex, 3561 CreateTime: a.CreateTime, 3562 } 3563 } 3564 3565 // ShouldMigrate returns if the allocation needs data migration 3566 func (a *Allocation) ShouldMigrate() bool { 3567 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 3568 return false 3569 } 3570 3571 tg := a.Job.LookupTaskGroup(a.TaskGroup) 3572 3573 // if the task group is nil or the ephemeral disk block isn't present then 3574 // we won't migrate 3575 if tg == nil || tg.EphemeralDisk == nil { 3576 return false 3577 } 3578 3579 // We won't migrate any data is the user hasn't enabled migration or the 3580 // disk is not marked as sticky 3581 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 3582 return false 3583 } 3584 3585 return true 3586 } 3587 3588 var ( 3589 // AllocationIndexRegex is a regular expression to find the allocation index. 3590 AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 3591 ) 3592 3593 // Index returns the index of the allocation. If the allocation is from a task 3594 // group with count greater than 1, there will be multiple allocations for it. 3595 func (a *Allocation) Index() int { 3596 matches := AllocationIndexRegex.FindStringSubmatch(a.Name) 3597 if len(matches) != 2 { 3598 return -1 3599 } 3600 3601 index, err := strconv.Atoi(matches[1]) 3602 if err != nil { 3603 return -1 3604 } 3605 3606 return index 3607 } 3608 3609 // AllocListStub is used to return a subset of alloc information 3610 type AllocListStub struct { 3611 ID string 3612 EvalID string 3613 Name string 3614 NodeID string 3615 JobID string 3616 TaskGroup string 3617 DesiredStatus string 3618 DesiredDescription string 3619 ClientStatus string 3620 ClientDescription string 3621 TaskStates map[string]*TaskState 3622 CreateIndex uint64 3623 ModifyIndex uint64 3624 CreateTime int64 3625 } 3626 3627 // AllocMetric is used to track various metrics while attempting 3628 // to make an allocation. These are used to debug a job, or to better 3629 // understand the pressure within the system. 3630 type AllocMetric struct { 3631 // NodesEvaluated is the number of nodes that were evaluated 3632 NodesEvaluated int 3633 3634 // NodesFiltered is the number of nodes filtered due to a constraint 3635 NodesFiltered int 3636 3637 // NodesAvailable is the number of nodes available for evaluation per DC. 3638 NodesAvailable map[string]int 3639 3640 // ClassFiltered is the number of nodes filtered by class 3641 ClassFiltered map[string]int 3642 3643 // ConstraintFiltered is the number of failures caused by constraint 3644 ConstraintFiltered map[string]int 3645 3646 // NodesExhausted is the number of nodes skipped due to being 3647 // exhausted of at least one resource 3648 NodesExhausted int 3649 3650 // ClassExhausted is the number of nodes exhausted by class 3651 ClassExhausted map[string]int 3652 3653 // DimensionExhausted provides the count by dimension or reason 3654 DimensionExhausted map[string]int 3655 3656 // Scores is the scores of the final few nodes remaining 3657 // for placement. The top score is typically selected. 3658 Scores map[string]float64 3659 3660 // AllocationTime is a measure of how long the allocation 3661 // attempt took. This can affect performance and SLAs. 3662 AllocationTime time.Duration 3663 3664 // CoalescedFailures indicates the number of other 3665 // allocations that were coalesced into this failed allocation. 3666 // This is to prevent creating many failed allocations for a 3667 // single task group. 3668 CoalescedFailures int 3669 } 3670 3671 func (a *AllocMetric) Copy() *AllocMetric { 3672 if a == nil { 3673 return nil 3674 } 3675 na := new(AllocMetric) 3676 *na = *a 3677 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 3678 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 3679 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 3680 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 3681 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 3682 na.Scores = helper.CopyMapStringFloat64(na.Scores) 3683 return na 3684 } 3685 3686 func (a *AllocMetric) EvaluateNode() { 3687 a.NodesEvaluated += 1 3688 } 3689 3690 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 3691 a.NodesFiltered += 1 3692 if node != nil && node.NodeClass != "" { 3693 if a.ClassFiltered == nil { 3694 a.ClassFiltered = make(map[string]int) 3695 } 3696 a.ClassFiltered[node.NodeClass] += 1 3697 } 3698 if constraint != "" { 3699 if a.ConstraintFiltered == nil { 3700 a.ConstraintFiltered = make(map[string]int) 3701 } 3702 a.ConstraintFiltered[constraint] += 1 3703 } 3704 } 3705 3706 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 3707 a.NodesExhausted += 1 3708 if node != nil && node.NodeClass != "" { 3709 if a.ClassExhausted == nil { 3710 a.ClassExhausted = make(map[string]int) 3711 } 3712 a.ClassExhausted[node.NodeClass] += 1 3713 } 3714 if dimension != "" { 3715 if a.DimensionExhausted == nil { 3716 a.DimensionExhausted = make(map[string]int) 3717 } 3718 a.DimensionExhausted[dimension] += 1 3719 } 3720 } 3721 3722 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 3723 if a.Scores == nil { 3724 a.Scores = make(map[string]float64) 3725 } 3726 key := fmt.Sprintf("%s.%s", node.ID, name) 3727 a.Scores[key] = score 3728 } 3729 3730 const ( 3731 EvalStatusBlocked = "blocked" 3732 EvalStatusPending = "pending" 3733 EvalStatusComplete = "complete" 3734 EvalStatusFailed = "failed" 3735 EvalStatusCancelled = "canceled" 3736 ) 3737 3738 const ( 3739 EvalTriggerJobRegister = "job-register" 3740 EvalTriggerJobDeregister = "job-deregister" 3741 EvalTriggerPeriodicJob = "periodic-job" 3742 EvalTriggerNodeUpdate = "node-update" 3743 EvalTriggerScheduled = "scheduled" 3744 EvalTriggerRollingUpdate = "rolling-update" 3745 EvalTriggerMaxPlans = "max-plan-attempts" 3746 ) 3747 3748 const ( 3749 // CoreJobEvalGC is used for the garbage collection of evaluations 3750 // and allocations. We periodically scan evaluations in a terminal state, 3751 // in which all the corresponding allocations are also terminal. We 3752 // delete these out of the system to bound the state. 3753 CoreJobEvalGC = "eval-gc" 3754 3755 // CoreJobNodeGC is used for the garbage collection of failed nodes. 3756 // We periodically scan nodes in a terminal state, and if they have no 3757 // corresponding allocations we delete these out of the system. 3758 CoreJobNodeGC = "node-gc" 3759 3760 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 3761 // periodically scan garbage collectible jobs and check if both their 3762 // evaluations and allocations are terminal. If so, we delete these out of 3763 // the system. 3764 CoreJobJobGC = "job-gc" 3765 3766 // CoreJobForceGC is used to force garbage collection of all GCable objects. 3767 CoreJobForceGC = "force-gc" 3768 ) 3769 3770 // Evaluation is used anytime we need to apply business logic as a result 3771 // of a change to our desired state (job specification) or the emergent state 3772 // (registered nodes). When the inputs change, we need to "evaluate" them, 3773 // potentially taking action (allocation of work) or doing nothing if the state 3774 // of the world does not require it. 3775 type Evaluation struct { 3776 // ID is a randonly generated UUID used for this evaluation. This 3777 // is assigned upon the creation of the evaluation. 3778 ID string 3779 3780 // Priority is used to control scheduling importance and if this job 3781 // can preempt other jobs. 3782 Priority int 3783 3784 // Type is used to control which schedulers are available to handle 3785 // this evaluation. 3786 Type string 3787 3788 // TriggeredBy is used to give some insight into why this Eval 3789 // was created. (Job change, node failure, alloc failure, etc). 3790 TriggeredBy string 3791 3792 // JobID is the job this evaluation is scoped to. Evaluations cannot 3793 // be run in parallel for a given JobID, so we serialize on this. 3794 JobID string 3795 3796 // JobModifyIndex is the modify index of the job at the time 3797 // the evaluation was created 3798 JobModifyIndex uint64 3799 3800 // NodeID is the node that was affected triggering the evaluation. 3801 NodeID string 3802 3803 // NodeModifyIndex is the modify index of the node at the time 3804 // the evaluation was created 3805 NodeModifyIndex uint64 3806 3807 // Status of the evaluation 3808 Status string 3809 3810 // StatusDescription is meant to provide more human useful information 3811 StatusDescription string 3812 3813 // Wait is a minimum wait time for running the eval. This is used to 3814 // support a rolling upgrade. 3815 Wait time.Duration 3816 3817 // NextEval is the evaluation ID for the eval created to do a followup. 3818 // This is used to support rolling upgrades, where we need a chain of evaluations. 3819 NextEval string 3820 3821 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 3822 // This is used to support rolling upgrades, where we need a chain of evaluations. 3823 PreviousEval string 3824 3825 // BlockedEval is the evaluation ID for a created blocked eval. A 3826 // blocked eval will be created if all allocations could not be placed due 3827 // to constraints or lacking resources. 3828 BlockedEval string 3829 3830 // FailedTGAllocs are task groups which have allocations that could not be 3831 // made, but the metrics are persisted so that the user can use the feedback 3832 // to determine the cause. 3833 FailedTGAllocs map[string]*AllocMetric 3834 3835 // ClassEligibility tracks computed node classes that have been explicitly 3836 // marked as eligible or ineligible. 3837 ClassEligibility map[string]bool 3838 3839 // EscapedComputedClass marks whether the job has constraints that are not 3840 // captured by computed node classes. 3841 EscapedComputedClass bool 3842 3843 // AnnotatePlan triggers the scheduler to provide additional annotations 3844 // during the evaluation. This should not be set during normal operations. 3845 AnnotatePlan bool 3846 3847 // QueuedAllocations is the number of unplaced allocations at the time the 3848 // evaluation was processed. The map is keyed by Task Group names. 3849 QueuedAllocations map[string]int 3850 3851 // SnapshotIndex is the Raft index of the snapshot used to process the 3852 // evaluation. As such it will only be set once it has gone through the 3853 // scheduler. 3854 SnapshotIndex uint64 3855 3856 // Raft Indexes 3857 CreateIndex uint64 3858 ModifyIndex uint64 3859 } 3860 3861 // TerminalStatus returns if the current status is terminal and 3862 // will no longer transition. 3863 func (e *Evaluation) TerminalStatus() bool { 3864 switch e.Status { 3865 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 3866 return true 3867 default: 3868 return false 3869 } 3870 } 3871 3872 func (e *Evaluation) GoString() string { 3873 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 3874 } 3875 3876 func (e *Evaluation) Copy() *Evaluation { 3877 if e == nil { 3878 return nil 3879 } 3880 ne := new(Evaluation) 3881 *ne = *e 3882 3883 // Copy ClassEligibility 3884 if e.ClassEligibility != nil { 3885 classes := make(map[string]bool, len(e.ClassEligibility)) 3886 for class, elig := range e.ClassEligibility { 3887 classes[class] = elig 3888 } 3889 ne.ClassEligibility = classes 3890 } 3891 3892 // Copy FailedTGAllocs 3893 if e.FailedTGAllocs != nil { 3894 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 3895 for tg, metric := range e.FailedTGAllocs { 3896 failedTGs[tg] = metric.Copy() 3897 } 3898 ne.FailedTGAllocs = failedTGs 3899 } 3900 3901 // Copy queued allocations 3902 if e.QueuedAllocations != nil { 3903 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 3904 for tg, num := range e.QueuedAllocations { 3905 queuedAllocations[tg] = num 3906 } 3907 ne.QueuedAllocations = queuedAllocations 3908 } 3909 3910 return ne 3911 } 3912 3913 // ShouldEnqueue checks if a given evaluation should be enqueued into the 3914 // eval_broker 3915 func (e *Evaluation) ShouldEnqueue() bool { 3916 switch e.Status { 3917 case EvalStatusPending: 3918 return true 3919 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 3920 return false 3921 default: 3922 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3923 } 3924 } 3925 3926 // ShouldBlock checks if a given evaluation should be entered into the blocked 3927 // eval tracker. 3928 func (e *Evaluation) ShouldBlock() bool { 3929 switch e.Status { 3930 case EvalStatusBlocked: 3931 return true 3932 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 3933 return false 3934 default: 3935 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 3936 } 3937 } 3938 3939 // MakePlan is used to make a plan from the given evaluation 3940 // for a given Job 3941 func (e *Evaluation) MakePlan(j *Job) *Plan { 3942 p := &Plan{ 3943 EvalID: e.ID, 3944 Priority: e.Priority, 3945 Job: j, 3946 NodeUpdate: make(map[string][]*Allocation), 3947 NodeAllocation: make(map[string][]*Allocation), 3948 } 3949 if j != nil { 3950 p.AllAtOnce = j.AllAtOnce 3951 } 3952 return p 3953 } 3954 3955 // NextRollingEval creates an evaluation to followup this eval for rolling updates 3956 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 3957 return &Evaluation{ 3958 ID: GenerateUUID(), 3959 Priority: e.Priority, 3960 Type: e.Type, 3961 TriggeredBy: EvalTriggerRollingUpdate, 3962 JobID: e.JobID, 3963 JobModifyIndex: e.JobModifyIndex, 3964 Status: EvalStatusPending, 3965 Wait: wait, 3966 PreviousEval: e.ID, 3967 } 3968 } 3969 3970 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 3971 // failed allocations. It takes the classes marked explicitly eligible or 3972 // ineligible and whether the job has escaped computed node classes. 3973 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation { 3974 return &Evaluation{ 3975 ID: GenerateUUID(), 3976 Priority: e.Priority, 3977 Type: e.Type, 3978 TriggeredBy: e.TriggeredBy, 3979 JobID: e.JobID, 3980 JobModifyIndex: e.JobModifyIndex, 3981 Status: EvalStatusBlocked, 3982 PreviousEval: e.ID, 3983 ClassEligibility: classEligibility, 3984 EscapedComputedClass: escaped, 3985 } 3986 } 3987 3988 // Plan is used to submit a commit plan for task allocations. These 3989 // are submitted to the leader which verifies that resources have 3990 // not been overcommitted before admiting the plan. 3991 type Plan struct { 3992 // EvalID is the evaluation ID this plan is associated with 3993 EvalID string 3994 3995 // EvalToken is used to prevent a split-brain processing of 3996 // an evaluation. There should only be a single scheduler running 3997 // an Eval at a time, but this could be violated after a leadership 3998 // transition. This unique token is used to reject plans that are 3999 // being submitted from a different leader. 4000 EvalToken string 4001 4002 // Priority is the priority of the upstream job 4003 Priority int 4004 4005 // AllAtOnce is used to control if incremental scheduling of task groups 4006 // is allowed or if we must do a gang scheduling of the entire job. 4007 // If this is false, a plan may be partially applied. Otherwise, the 4008 // entire plan must be able to make progress. 4009 AllAtOnce bool 4010 4011 // Job is the parent job of all the allocations in the Plan. 4012 // Since a Plan only involves a single Job, we can reduce the size 4013 // of the plan by only including it once. 4014 Job *Job 4015 4016 // NodeUpdate contains all the allocations for each node. For each node, 4017 // this is a list of the allocations to update to either stop or evict. 4018 NodeUpdate map[string][]*Allocation 4019 4020 // NodeAllocation contains all the allocations for each node. 4021 // The evicts must be considered prior to the allocations. 4022 NodeAllocation map[string][]*Allocation 4023 4024 // Annotations contains annotations by the scheduler to be used by operators 4025 // to understand the decisions made by the scheduler. 4026 Annotations *PlanAnnotations 4027 } 4028 4029 // AppendUpdate marks the allocation for eviction. The clientStatus of the 4030 // allocation may be optionally set by passing in a non-empty value. 4031 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 4032 newAlloc := new(Allocation) 4033 *newAlloc = *alloc 4034 4035 // If the job is not set in the plan we are deregistering a job so we 4036 // extract the job from the allocation. 4037 if p.Job == nil && newAlloc.Job != nil { 4038 p.Job = newAlloc.Job 4039 } 4040 4041 // Normalize the job 4042 newAlloc.Job = nil 4043 4044 // Strip the resources as it can be rebuilt. 4045 newAlloc.Resources = nil 4046 4047 newAlloc.DesiredStatus = desiredStatus 4048 newAlloc.DesiredDescription = desiredDesc 4049 4050 if clientStatus != "" { 4051 newAlloc.ClientStatus = clientStatus 4052 } 4053 4054 node := alloc.NodeID 4055 existing := p.NodeUpdate[node] 4056 p.NodeUpdate[node] = append(existing, newAlloc) 4057 } 4058 4059 func (p *Plan) PopUpdate(alloc *Allocation) { 4060 existing := p.NodeUpdate[alloc.NodeID] 4061 n := len(existing) 4062 if n > 0 && existing[n-1].ID == alloc.ID { 4063 existing = existing[:n-1] 4064 if len(existing) > 0 { 4065 p.NodeUpdate[alloc.NodeID] = existing 4066 } else { 4067 delete(p.NodeUpdate, alloc.NodeID) 4068 } 4069 } 4070 } 4071 4072 func (p *Plan) AppendAlloc(alloc *Allocation) { 4073 node := alloc.NodeID 4074 existing := p.NodeAllocation[node] 4075 p.NodeAllocation[node] = append(existing, alloc) 4076 } 4077 4078 // IsNoOp checks if this plan would do nothing 4079 func (p *Plan) IsNoOp() bool { 4080 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 4081 } 4082 4083 // PlanResult is the result of a plan submitted to the leader. 4084 type PlanResult struct { 4085 // NodeUpdate contains all the updates that were committed. 4086 NodeUpdate map[string][]*Allocation 4087 4088 // NodeAllocation contains all the allocations that were committed. 4089 NodeAllocation map[string][]*Allocation 4090 4091 // RefreshIndex is the index the worker should refresh state up to. 4092 // This allows all evictions and allocations to be materialized. 4093 // If any allocations were rejected due to stale data (node state, 4094 // over committed) this can be used to force a worker refresh. 4095 RefreshIndex uint64 4096 4097 // AllocIndex is the Raft index in which the evictions and 4098 // allocations took place. This is used for the write index. 4099 AllocIndex uint64 4100 } 4101 4102 // IsNoOp checks if this plan result would do nothing 4103 func (p *PlanResult) IsNoOp() bool { 4104 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 4105 } 4106 4107 // FullCommit is used to check if all the allocations in a plan 4108 // were committed as part of the result. Returns if there was 4109 // a match, and the number of expected and actual allocations. 4110 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 4111 expected := 0 4112 actual := 0 4113 for name, allocList := range plan.NodeAllocation { 4114 didAlloc, _ := p.NodeAllocation[name] 4115 expected += len(allocList) 4116 actual += len(didAlloc) 4117 } 4118 return actual == expected, expected, actual 4119 } 4120 4121 // PlanAnnotations holds annotations made by the scheduler to give further debug 4122 // information to operators. 4123 type PlanAnnotations struct { 4124 // DesiredTGUpdates is the set of desired updates per task group. 4125 DesiredTGUpdates map[string]*DesiredUpdates 4126 } 4127 4128 // DesiredUpdates is the set of changes the scheduler would like to make given 4129 // sufficient resources and cluster capacity. 4130 type DesiredUpdates struct { 4131 Ignore uint64 4132 Place uint64 4133 Migrate uint64 4134 Stop uint64 4135 InPlaceUpdate uint64 4136 DestructiveUpdate uint64 4137 } 4138 4139 // msgpackHandle is a shared handle for encoding/decoding of structs 4140 var MsgpackHandle = func() *codec.MsgpackHandle { 4141 h := &codec.MsgpackHandle{RawToString: true} 4142 4143 // Sets the default type for decoding a map into a nil interface{}. 4144 // This is necessary in particular because we store the driver configs as a 4145 // nil interface{}. 4146 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 4147 return h 4148 }() 4149 4150 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 4151 h := &hcodec.MsgpackHandle{RawToString: true} 4152 4153 // Sets the default type for decoding a map into a nil interface{}. 4154 // This is necessary in particular because we store the driver configs as a 4155 // nil interface{}. 4156 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 4157 return h 4158 }() 4159 4160 // Decode is used to decode a MsgPack encoded object 4161 func Decode(buf []byte, out interface{}) error { 4162 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 4163 } 4164 4165 // Encode is used to encode a MsgPack object with type prefix 4166 func Encode(t MessageType, msg interface{}) ([]byte, error) { 4167 var buf bytes.Buffer 4168 buf.WriteByte(uint8(t)) 4169 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 4170 return buf.Bytes(), err 4171 } 4172 4173 // KeyringResponse is a unified key response and can be used for install, 4174 // remove, use, as well as listing key queries. 4175 type KeyringResponse struct { 4176 Messages map[string]string 4177 Keys map[string]int 4178 NumNodes int 4179 } 4180 4181 // KeyringRequest is request objects for serf key operations. 4182 type KeyringRequest struct { 4183 Key string 4184 } 4185 4186 // RecoverableError wraps an error and marks whether it is recoverable and could 4187 // be retried or it is fatal. 4188 type RecoverableError struct { 4189 Err string 4190 Recoverable bool 4191 } 4192 4193 // NewRecoverableError is used to wrap an error and mark it as recoverable or 4194 // not. 4195 func NewRecoverableError(e error, recoverable bool) error { 4196 if e == nil { 4197 return nil 4198 } 4199 4200 return &RecoverableError{ 4201 Err: e.Error(), 4202 Recoverable: recoverable, 4203 } 4204 } 4205 4206 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 4207 // message. If the error was recoverable before the returned error is as well; 4208 // otherwise it is unrecoverable. 4209 func WrapRecoverable(msg string, err error) error { 4210 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 4211 } 4212 4213 func (r *RecoverableError) Error() string { 4214 return r.Err 4215 } 4216 4217 func (r *RecoverableError) IsRecoverable() bool { 4218 return r.Recoverable 4219 } 4220 4221 // Recoverable is an interface for errors to implement to indicate whether or 4222 // not they are fatal or recoverable. 4223 type Recoverable interface { 4224 error 4225 IsRecoverable() bool 4226 } 4227 4228 // IsRecoverable returns true if error is a RecoverableError with 4229 // Recoverable=true. Otherwise false is returned. 4230 func IsRecoverable(e error) bool { 4231 if re, ok := e.(Recoverable); ok { 4232 return re.IsRecoverable() 4233 } 4234 return false 4235 }