github.com/manicqin/nomad@v0.9.5/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "math" 16 "net" 17 "os" 18 "path/filepath" 19 "reflect" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 "time" 25 26 "github.com/gorhill/cronexpr" 27 hcodec "github.com/hashicorp/go-msgpack/codec" 28 "github.com/hashicorp/go-multierror" 29 "github.com/hashicorp/go-version" 30 "github.com/hashicorp/nomad/acl" 31 "github.com/hashicorp/nomad/command/agent/pprof" 32 "github.com/hashicorp/nomad/helper" 33 "github.com/hashicorp/nomad/helper/args" 34 "github.com/hashicorp/nomad/helper/constraints/semver" 35 "github.com/hashicorp/nomad/helper/uuid" 36 "github.com/hashicorp/nomad/lib/kheap" 37 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 38 "github.com/mitchellh/copystructure" 39 "github.com/ugorji/go/codec" 40 "golang.org/x/crypto/blake2b" 41 ) 42 43 var ( 44 // validPolicyName is used to validate a policy name 45 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 46 47 // validLogExtension is used to validate log extension 48 validLogExtension = regexp.MustCompile(`^[A-Za-z0-9_-]*$`) 49 50 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 51 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 52 ) 53 54 type MessageType uint8 55 56 const ( 57 NodeRegisterRequestType MessageType = iota 58 NodeDeregisterRequestType 59 NodeUpdateStatusRequestType 60 NodeUpdateDrainRequestType 61 JobRegisterRequestType 62 JobDeregisterRequestType 63 EvalUpdateRequestType 64 EvalDeleteRequestType 65 AllocUpdateRequestType 66 AllocClientUpdateRequestType 67 ReconcileJobSummariesRequestType 68 VaultAccessorRegisterRequestType 69 VaultAccessorDeregisterRequestType 70 ApplyPlanResultsRequestType 71 DeploymentStatusUpdateRequestType 72 DeploymentPromoteRequestType 73 DeploymentAllocHealthRequestType 74 DeploymentDeleteRequestType 75 JobStabilityRequestType 76 ACLPolicyUpsertRequestType 77 ACLPolicyDeleteRequestType 78 ACLTokenUpsertRequestType 79 ACLTokenDeleteRequestType 80 ACLTokenBootstrapRequestType 81 AutopilotRequestType 82 UpsertNodeEventsType 83 JobBatchDeregisterRequestType 84 AllocUpdateDesiredTransitionRequestType 85 NodeUpdateEligibilityRequestType 86 BatchNodeUpdateDrainRequestType 87 SchedulerConfigRequestType 88 NodeBatchDeregisterRequestType 89 ) 90 91 const ( 92 // IgnoreUnknownTypeFlag is set along with a MessageType 93 // to indicate that the message type can be safely ignored 94 // if it is not recognized. This is for future proofing, so 95 // that new commands can be added in a way that won't cause 96 // old servers to crash when the FSM attempts to process them. 97 IgnoreUnknownTypeFlag MessageType = 128 98 99 // ApiMajorVersion is returned as part of the Status.Version request. 100 // It should be incremented anytime the APIs are changed in a way 101 // that would break clients for sane client versioning. 102 ApiMajorVersion = 1 103 104 // ApiMinorVersion is returned as part of the Status.Version request. 105 // It should be incremented anytime the APIs are changed to allow 106 // for sane client versioning. Minor changes should be compatible 107 // within the major version. 108 ApiMinorVersion = 1 109 110 ProtocolVersion = "protocol" 111 APIMajorVersion = "api.major" 112 APIMinorVersion = "api.minor" 113 114 GetterModeAny = "any" 115 GetterModeFile = "file" 116 GetterModeDir = "dir" 117 118 // maxPolicyDescriptionLength limits a policy description length 119 maxPolicyDescriptionLength = 256 120 121 // maxTokenNameLength limits a ACL token name length 122 maxTokenNameLength = 256 123 124 // ACLClientToken and ACLManagementToken are the only types of tokens 125 ACLClientToken = "client" 126 ACLManagementToken = "management" 127 128 // DefaultNamespace is the default namespace. 129 DefaultNamespace = "default" 130 DefaultNamespaceDescription = "Default shared namespace" 131 132 // JitterFraction is a the limit to the amount of jitter we apply 133 // to a user specified MaxQueryTime. We divide the specified time by 134 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 135 // applied to RPCHoldTimeout. 136 JitterFraction = 16 137 138 // MaxRetainedNodeEvents is the maximum number of node events that will be 139 // retained for a single node 140 MaxRetainedNodeEvents = 10 141 142 // MaxRetainedNodeScores is the number of top scoring nodes for which we 143 // retain scoring metadata 144 MaxRetainedNodeScores = 5 145 146 // Normalized scorer name 147 NormScorerName = "normalized-score" 148 ) 149 150 // Context defines the scope in which a search for Nomad object operates, and 151 // is also used to query the matching index value for this context 152 type Context string 153 154 const ( 155 Allocs Context = "allocs" 156 Deployments Context = "deployment" 157 Evals Context = "evals" 158 Jobs Context = "jobs" 159 Nodes Context = "nodes" 160 Namespaces Context = "namespaces" 161 Quotas Context = "quotas" 162 All Context = "all" 163 ) 164 165 // NamespacedID is a tuple of an ID and a namespace 166 type NamespacedID struct { 167 ID string 168 Namespace string 169 } 170 171 // NewNamespacedID returns a new namespaced ID given the ID and namespace 172 func NewNamespacedID(id, ns string) NamespacedID { 173 return NamespacedID{ 174 ID: id, 175 Namespace: ns, 176 } 177 } 178 179 func (n NamespacedID) String() string { 180 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 181 } 182 183 // RPCInfo is used to describe common information about query 184 type RPCInfo interface { 185 RequestRegion() string 186 IsRead() bool 187 AllowStaleRead() bool 188 IsForwarded() bool 189 SetForwarded() 190 } 191 192 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 193 // should NOT be replicated in the API package as it is internal only. 194 type InternalRpcInfo struct { 195 // Forwarded marks whether the RPC has been forwarded. 196 Forwarded bool 197 } 198 199 // IsForwarded returns whether the RPC is forwarded from another server. 200 func (i *InternalRpcInfo) IsForwarded() bool { 201 return i.Forwarded 202 } 203 204 // SetForwarded marks that the RPC is being forwarded from another server. 205 func (i *InternalRpcInfo) SetForwarded() { 206 i.Forwarded = true 207 } 208 209 // QueryOptions is used to specify various flags for read queries 210 type QueryOptions struct { 211 // The target region for this query 212 Region string 213 214 // Namespace is the target namespace for the query. 215 // 216 // Since handlers do not have a default value set they should access 217 // the Namespace via the RequestNamespace method. 218 // 219 // Requests accessing specific namespaced objects must check ACLs 220 // against the namespace of the object, not the namespace in the 221 // request. 222 Namespace string 223 224 // If set, wait until query exceeds given index. Must be provided 225 // with MaxQueryTime. 226 MinQueryIndex uint64 227 228 // Provided with MinQueryIndex to wait for change. 229 MaxQueryTime time.Duration 230 231 // If set, any follower can service the request. Results 232 // may be arbitrarily stale. 233 AllowStale bool 234 235 // If set, used as prefix for resource list searches 236 Prefix string 237 238 // AuthToken is secret portion of the ACL token used for the request 239 AuthToken string 240 241 InternalRpcInfo 242 } 243 244 func (q QueryOptions) RequestRegion() string { 245 return q.Region 246 } 247 248 // RequestNamespace returns the request's namespace or the default namespace if 249 // no explicit namespace was sent. 250 // 251 // Requests accessing specific namespaced objects must check ACLs against the 252 // namespace of the object, not the namespace in the request. 253 func (q QueryOptions) RequestNamespace() string { 254 if q.Namespace == "" { 255 return DefaultNamespace 256 } 257 return q.Namespace 258 } 259 260 // QueryOption only applies to reads, so always true 261 func (q QueryOptions) IsRead() bool { 262 return true 263 } 264 265 func (q QueryOptions) AllowStaleRead() bool { 266 return q.AllowStale 267 } 268 269 // AgentPprofRequest is used to request a pprof report for a given node. 270 type AgentPprofRequest struct { 271 // ReqType specifies the profile to use 272 ReqType pprof.ReqType 273 274 // Profile specifies the runtime/pprof profile to lookup and generate. 275 Profile string 276 277 // Seconds is the number of seconds to capture a profile 278 Seconds int 279 280 // Debug specifies if pprof profile should inclue debug output 281 Debug int 282 283 // GC specifies if the profile should call runtime.GC() before 284 // running its profile. This is only used for "heap" profiles 285 GC int 286 287 // NodeID is the node we want to track the logs of 288 NodeID string 289 290 // ServerID is the server we want to track the logs of 291 ServerID string 292 293 QueryOptions 294 } 295 296 // AgentPprofResponse is used to return a generated pprof profile 297 type AgentPprofResponse struct { 298 // ID of the agent that fulfilled the request 299 AgentID string 300 301 // Payload is the generated pprof profile 302 Payload []byte 303 304 // HTTPHeaders are a set of key value pairs to be applied as 305 // HTTP headers for a specific runtime profile 306 HTTPHeaders map[string]string 307 } 308 309 type WriteRequest struct { 310 // The target region for this write 311 Region string 312 313 // Namespace is the target namespace for the write. 314 // 315 // Since RPC handlers do not have a default value set they should 316 // access the Namespace via the RequestNamespace method. 317 // 318 // Requests accessing specific namespaced objects must check ACLs 319 // against the namespace of the object, not the namespace in the 320 // request. 321 Namespace string 322 323 // AuthToken is secret portion of the ACL token used for the request 324 AuthToken string 325 326 InternalRpcInfo 327 } 328 329 func (w WriteRequest) RequestRegion() string { 330 // The target region for this request 331 return w.Region 332 } 333 334 // RequestNamespace returns the request's namespace or the default namespace if 335 // no explicit namespace was sent. 336 // 337 // Requests accessing specific namespaced objects must check ACLs against the 338 // namespace of the object, not the namespace in the request. 339 func (w WriteRequest) RequestNamespace() string { 340 if w.Namespace == "" { 341 return DefaultNamespace 342 } 343 return w.Namespace 344 } 345 346 // WriteRequest only applies to writes, always false 347 func (w WriteRequest) IsRead() bool { 348 return false 349 } 350 351 func (w WriteRequest) AllowStaleRead() bool { 352 return false 353 } 354 355 // QueryMeta allows a query response to include potentially 356 // useful metadata about a query 357 type QueryMeta struct { 358 // This is the index associated with the read 359 Index uint64 360 361 // If AllowStale is used, this is time elapsed since 362 // last contact between the follower and leader. This 363 // can be used to gauge staleness. 364 LastContact time.Duration 365 366 // Used to indicate if there is a known leader node 367 KnownLeader bool 368 } 369 370 // WriteMeta allows a write response to include potentially 371 // useful metadata about the write 372 type WriteMeta struct { 373 // This is the index associated with the write 374 Index uint64 375 } 376 377 // NodeRegisterRequest is used for Node.Register endpoint 378 // to register a node as being a schedulable entity. 379 type NodeRegisterRequest struct { 380 Node *Node 381 NodeEvent *NodeEvent 382 WriteRequest 383 } 384 385 // NodeDeregisterRequest is used for Node.Deregister endpoint 386 // to deregister a node as being a schedulable entity. 387 type NodeDeregisterRequest struct { 388 NodeID string 389 WriteRequest 390 } 391 392 // NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 393 // to deregister a batch of nodes from being schedulable entities. 394 type NodeBatchDeregisterRequest struct { 395 NodeIDs []string 396 WriteRequest 397 } 398 399 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 400 // information used in RPC server lists. 401 type NodeServerInfo struct { 402 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 403 // be contacted at for RPCs. 404 RPCAdvertiseAddr string 405 406 // RpcMajorVersion is the major version number the Nomad Server 407 // supports 408 RPCMajorVersion int32 409 410 // RpcMinorVersion is the minor version number the Nomad Server 411 // supports 412 RPCMinorVersion int32 413 414 // Datacenter is the datacenter that a Nomad server belongs to 415 Datacenter string 416 } 417 418 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 419 // to update the status of a node. 420 type NodeUpdateStatusRequest struct { 421 NodeID string 422 Status string 423 NodeEvent *NodeEvent 424 UpdatedAt int64 425 WriteRequest 426 } 427 428 // NodeUpdateDrainRequest is used for updating the drain strategy 429 type NodeUpdateDrainRequest struct { 430 NodeID string 431 DrainStrategy *DrainStrategy 432 433 // COMPAT Remove in version 0.10 434 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 435 // drain strategy but we need to handle the upgrade path where the Raft log 436 // contains drain updates with just the drain boolean being manipulated. 437 Drain bool 438 439 // MarkEligible marks the node as eligible if removing the drain strategy. 440 MarkEligible bool 441 442 // NodeEvent is the event added to the node 443 NodeEvent *NodeEvent 444 445 // UpdatedAt represents server time of receiving request 446 UpdatedAt int64 447 448 WriteRequest 449 } 450 451 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 452 // batch of nodes 453 type BatchNodeUpdateDrainRequest struct { 454 // Updates is a mapping of nodes to their updated drain strategy 455 Updates map[string]*DrainUpdate 456 457 // NodeEvents is a mapping of the node to the event to add to the node 458 NodeEvents map[string]*NodeEvent 459 460 // UpdatedAt represents server time of receiving request 461 UpdatedAt int64 462 463 WriteRequest 464 } 465 466 // DrainUpdate is used to update the drain of a node 467 type DrainUpdate struct { 468 // DrainStrategy is the new strategy for the node 469 DrainStrategy *DrainStrategy 470 471 // MarkEligible marks the node as eligible if removing the drain strategy. 472 MarkEligible bool 473 } 474 475 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 476 type NodeUpdateEligibilityRequest struct { 477 NodeID string 478 Eligibility string 479 480 // NodeEvent is the event added to the node 481 NodeEvent *NodeEvent 482 483 // UpdatedAt represents server time of receiving request 484 UpdatedAt int64 485 486 WriteRequest 487 } 488 489 // NodeEvaluateRequest is used to re-evaluate the node 490 type NodeEvaluateRequest struct { 491 NodeID string 492 WriteRequest 493 } 494 495 // NodeSpecificRequest is used when we just need to specify a target node 496 type NodeSpecificRequest struct { 497 NodeID string 498 SecretID string 499 QueryOptions 500 } 501 502 // SearchResponse is used to return matches and information about whether 503 // the match list is truncated specific to each type of context. 504 type SearchResponse struct { 505 // Map of context types to ids which match a specified prefix 506 Matches map[Context][]string 507 508 // Truncations indicates whether the matches for a particular context have 509 // been truncated 510 Truncations map[Context]bool 511 512 QueryMeta 513 } 514 515 // SearchRequest is used to parameterize a request, and returns a 516 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 517 // along with whether or not the information returned is truncated. 518 type SearchRequest struct { 519 // Prefix is what ids are matched to. I.e, if the given prefix were 520 // "a", potential matches might be "abcd" or "aabb" 521 Prefix string 522 523 // Context is the type that can be matched against. A context can be a job, 524 // node, evaluation, allocation, or empty (indicated every context should be 525 // matched) 526 Context Context 527 528 QueryOptions 529 } 530 531 // JobRegisterRequest is used for Job.Register endpoint 532 // to register a job as being a schedulable entity. 533 type JobRegisterRequest struct { 534 Job *Job 535 536 // If EnforceIndex is set then the job will only be registered if the passed 537 // JobModifyIndex matches the current Jobs index. If the index is zero, the 538 // register only occurs if the job is new. 539 EnforceIndex bool 540 JobModifyIndex uint64 541 542 // PolicyOverride is set when the user is attempting to override any policies 543 PolicyOverride bool 544 545 WriteRequest 546 } 547 548 // JobDeregisterRequest is used for Job.Deregister endpoint 549 // to deregister a job as being a schedulable entity. 550 type JobDeregisterRequest struct { 551 JobID string 552 553 // Purge controls whether the deregister purges the job from the system or 554 // whether the job is just marked as stopped and will be removed by the 555 // garbage collector 556 Purge bool 557 558 WriteRequest 559 } 560 561 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 562 // evaluations. 563 type JobBatchDeregisterRequest struct { 564 // Jobs is the set of jobs to deregister 565 Jobs map[NamespacedID]*JobDeregisterOptions 566 567 // Evals is the set of evaluations to create. 568 Evals []*Evaluation 569 570 WriteRequest 571 } 572 573 // JobDeregisterOptions configures how a job is deregistered. 574 type JobDeregisterOptions struct { 575 // Purge controls whether the deregister purges the job from the system or 576 // whether the job is just marked as stopped and will be removed by the 577 // garbage collector 578 Purge bool 579 } 580 581 // JobEvaluateRequest is used when we just need to re-evaluate a target job 582 type JobEvaluateRequest struct { 583 JobID string 584 EvalOptions EvalOptions 585 WriteRequest 586 } 587 588 // EvalOptions is used to encapsulate options when forcing a job evaluation 589 type EvalOptions struct { 590 ForceReschedule bool 591 } 592 593 // JobSpecificRequest is used when we just need to specify a target job 594 type JobSpecificRequest struct { 595 JobID string 596 All bool 597 QueryOptions 598 } 599 600 // JobListRequest is used to parameterize a list request 601 type JobListRequest struct { 602 QueryOptions 603 } 604 605 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 606 // evaluation of the Job. 607 type JobPlanRequest struct { 608 Job *Job 609 Diff bool // Toggles an annotated diff 610 // PolicyOverride is set when the user is attempting to override any policies 611 PolicyOverride bool 612 WriteRequest 613 } 614 615 // JobSummaryRequest is used when we just need to get a specific job summary 616 type JobSummaryRequest struct { 617 JobID string 618 QueryOptions 619 } 620 621 // JobDispatchRequest is used to dispatch a job based on a parameterized job 622 type JobDispatchRequest struct { 623 JobID string 624 Payload []byte 625 Meta map[string]string 626 WriteRequest 627 } 628 629 // JobValidateRequest is used to validate a job 630 type JobValidateRequest struct { 631 Job *Job 632 WriteRequest 633 } 634 635 // JobRevertRequest is used to revert a job to a prior version. 636 type JobRevertRequest struct { 637 // JobID is the ID of the job being reverted 638 JobID string 639 640 // JobVersion the version to revert to. 641 JobVersion uint64 642 643 // EnforcePriorVersion if set will enforce that the job is at the given 644 // version before reverting. 645 EnforcePriorVersion *uint64 646 647 // VaultToken is the Vault token that proves the submitter of the job revert 648 // has access to any Vault policies specified in the targeted job version. This 649 // field is only used to transfer the token and is not stored after the Job 650 // revert. 651 VaultToken string 652 653 WriteRequest 654 } 655 656 // JobStabilityRequest is used to marked a job as stable. 657 type JobStabilityRequest struct { 658 // Job to set the stability on 659 JobID string 660 JobVersion uint64 661 662 // Set the stability 663 Stable bool 664 WriteRequest 665 } 666 667 // JobStabilityResponse is the response when marking a job as stable. 668 type JobStabilityResponse struct { 669 WriteMeta 670 } 671 672 // NodeListRequest is used to parameterize a list request 673 type NodeListRequest struct { 674 QueryOptions 675 } 676 677 // EvalUpdateRequest is used for upserting evaluations. 678 type EvalUpdateRequest struct { 679 Evals []*Evaluation 680 EvalToken string 681 WriteRequest 682 } 683 684 // EvalDeleteRequest is used for deleting an evaluation. 685 type EvalDeleteRequest struct { 686 Evals []string 687 Allocs []string 688 WriteRequest 689 } 690 691 // EvalSpecificRequest is used when we just need to specify a target evaluation 692 type EvalSpecificRequest struct { 693 EvalID string 694 QueryOptions 695 } 696 697 // EvalAckRequest is used to Ack/Nack a specific evaluation 698 type EvalAckRequest struct { 699 EvalID string 700 Token string 701 WriteRequest 702 } 703 704 // EvalDequeueRequest is used when we want to dequeue an evaluation 705 type EvalDequeueRequest struct { 706 Schedulers []string 707 Timeout time.Duration 708 SchedulerVersion uint16 709 WriteRequest 710 } 711 712 // EvalListRequest is used to list the evaluations 713 type EvalListRequest struct { 714 QueryOptions 715 } 716 717 // PlanRequest is used to submit an allocation plan to the leader 718 type PlanRequest struct { 719 Plan *Plan 720 WriteRequest 721 } 722 723 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 724 // committing the result of a plan. 725 type ApplyPlanResultsRequest struct { 726 // AllocUpdateRequest holds the allocation updates to be made by the 727 // scheduler. 728 AllocUpdateRequest 729 730 // Deployment is the deployment created or updated as a result of a 731 // scheduling event. 732 Deployment *Deployment 733 734 // DeploymentUpdates is a set of status updates to apply to the given 735 // deployments. This allows the scheduler to cancel any unneeded deployment 736 // because the job is stopped or the update block is removed. 737 DeploymentUpdates []*DeploymentStatusUpdate 738 739 // EvalID is the eval ID of the plan being applied. The modify index of the 740 // evaluation is updated as part of applying the plan to ensure that subsequent 741 // scheduling events for the same job will wait for the index that last produced 742 // state changes. This is necessary for blocked evaluations since they can be 743 // processed many times, potentially making state updates, without the state of 744 // the evaluation itself being updated. 745 EvalID string 746 747 // COMPAT 0.11 748 // NodePreemptions is a slice of allocations from other lower priority jobs 749 // that are preempted. Preempted allocations are marked as evicted. 750 // Deprecated: Replaced with AllocsPreempted which contains only the diff 751 NodePreemptions []*Allocation 752 753 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 754 // that are preempted. Preempted allocations are marked as evicted. 755 AllocsPreempted []*AllocationDiff 756 757 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 758 // have been preempted to place allocs in this plan 759 PreemptionEvals []*Evaluation 760 } 761 762 // AllocUpdateRequest is used to submit changes to allocations, either 763 // to cause evictions or to assign new allocations. Both can be done 764 // within a single transaction 765 type AllocUpdateRequest struct { 766 // COMPAT 0.11 767 // Alloc is the list of new allocations to assign 768 // Deprecated: Replaced with two separate slices, one containing stopped allocations 769 // and another containing updated allocations 770 Alloc []*Allocation 771 772 // Allocations to stop. Contains only the diff, not the entire allocation 773 AllocsStopped []*AllocationDiff 774 775 // New or updated allocations 776 AllocsUpdated []*Allocation 777 778 // Evals is the list of new evaluations to create 779 // Evals are valid only when used in the Raft RPC 780 Evals []*Evaluation 781 782 // Job is the shared parent job of the allocations. 783 // It is pulled out since it is common to reduce payload size. 784 Job *Job 785 786 WriteRequest 787 } 788 789 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 790 // desired transition state. 791 type AllocUpdateDesiredTransitionRequest struct { 792 // Allocs is the mapping of allocation ids to their desired state 793 // transition 794 Allocs map[string]*DesiredTransition 795 796 // Evals is the set of evaluations to create 797 Evals []*Evaluation 798 799 WriteRequest 800 } 801 802 // AllocStopRequest is used to stop and reschedule a running Allocation. 803 type AllocStopRequest struct { 804 AllocID string 805 806 WriteRequest 807 } 808 809 // AllocStopResponse is the response to an `AllocStopRequest` 810 type AllocStopResponse struct { 811 // EvalID is the id of the follow up evalution for the rescheduled alloc. 812 EvalID string 813 814 WriteMeta 815 } 816 817 // AllocListRequest is used to request a list of allocations 818 type AllocListRequest struct { 819 QueryOptions 820 } 821 822 // AllocSpecificRequest is used to query a specific allocation 823 type AllocSpecificRequest struct { 824 AllocID string 825 QueryOptions 826 } 827 828 // AllocSignalRequest is used to signal a specific allocation 829 type AllocSignalRequest struct { 830 AllocID string 831 Task string 832 Signal string 833 QueryOptions 834 } 835 836 // AllocsGetRequest is used to query a set of allocations 837 type AllocsGetRequest struct { 838 AllocIDs []string 839 QueryOptions 840 } 841 842 // AllocRestartRequest is used to restart a specific allocations tasks. 843 type AllocRestartRequest struct { 844 AllocID string 845 TaskName string 846 847 QueryOptions 848 } 849 850 // PeriodicForceRequest is used to force a specific periodic job. 851 type PeriodicForceRequest struct { 852 JobID string 853 WriteRequest 854 } 855 856 // ServerMembersResponse has the list of servers in a cluster 857 type ServerMembersResponse struct { 858 ServerName string 859 ServerRegion string 860 ServerDC string 861 Members []*ServerMember 862 } 863 864 // ServerMember holds information about a Nomad server agent in a cluster 865 type ServerMember struct { 866 Name string 867 Addr net.IP 868 Port uint16 869 Tags map[string]string 870 Status string 871 ProtocolMin uint8 872 ProtocolMax uint8 873 ProtocolCur uint8 874 DelegateMin uint8 875 DelegateMax uint8 876 DelegateCur uint8 877 } 878 879 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 880 // following tasks in the given allocation 881 type DeriveVaultTokenRequest struct { 882 NodeID string 883 SecretID string 884 AllocID string 885 Tasks []string 886 QueryOptions 887 } 888 889 // VaultAccessorsRequest is used to operate on a set of Vault accessors 890 type VaultAccessorsRequest struct { 891 Accessors []*VaultAccessor 892 } 893 894 // VaultAccessor is a reference to a created Vault token on behalf of 895 // an allocation's task. 896 type VaultAccessor struct { 897 AllocID string 898 Task string 899 NodeID string 900 Accessor string 901 CreationTTL int 902 903 // Raft Indexes 904 CreateIndex uint64 905 } 906 907 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 908 type DeriveVaultTokenResponse struct { 909 // Tasks is a mapping between the task name and the wrapped token 910 Tasks map[string]string 911 912 // Error stores any error that occurred. Errors are stored here so we can 913 // communicate whether it is retriable 914 Error *RecoverableError 915 916 QueryMeta 917 } 918 919 // GenericRequest is used to request where no 920 // specific information is needed. 921 type GenericRequest struct { 922 QueryOptions 923 } 924 925 // DeploymentListRequest is used to list the deployments 926 type DeploymentListRequest struct { 927 QueryOptions 928 } 929 930 // DeploymentDeleteRequest is used for deleting deployments. 931 type DeploymentDeleteRequest struct { 932 Deployments []string 933 WriteRequest 934 } 935 936 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 937 // well as optionally creating an evaluation atomically. 938 type DeploymentStatusUpdateRequest struct { 939 // Eval, if set, is used to create an evaluation at the same time as 940 // updating the status of a deployment. 941 Eval *Evaluation 942 943 // DeploymentUpdate is a status update to apply to the given 944 // deployment. 945 DeploymentUpdate *DeploymentStatusUpdate 946 947 // Job is used to optionally upsert a job. This is used when setting the 948 // allocation health results in a deployment failure and the deployment 949 // auto-reverts to the latest stable job. 950 Job *Job 951 } 952 953 // DeploymentAllocHealthRequest is used to set the health of a set of 954 // allocations as part of a deployment. 955 type DeploymentAllocHealthRequest struct { 956 DeploymentID string 957 958 // Marks these allocations as healthy, allow further allocations 959 // to be rolled. 960 HealthyAllocationIDs []string 961 962 // Any unhealthy allocations fail the deployment 963 UnhealthyAllocationIDs []string 964 965 WriteRequest 966 } 967 968 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 969 type ApplyDeploymentAllocHealthRequest struct { 970 DeploymentAllocHealthRequest 971 972 // Timestamp is the timestamp to use when setting the allocations health. 973 Timestamp time.Time 974 975 // An optional field to update the status of a deployment 976 DeploymentUpdate *DeploymentStatusUpdate 977 978 // Job is used to optionally upsert a job. This is used when setting the 979 // allocation health results in a deployment failure and the deployment 980 // auto-reverts to the latest stable job. 981 Job *Job 982 983 // An optional evaluation to create after promoting the canaries 984 Eval *Evaluation 985 } 986 987 // DeploymentPromoteRequest is used to promote task groups in a deployment 988 type DeploymentPromoteRequest struct { 989 DeploymentID string 990 991 // All is to promote all task groups 992 All bool 993 994 // Groups is used to set the promotion status per task group 995 Groups []string 996 997 WriteRequest 998 } 999 1000 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1001 type ApplyDeploymentPromoteRequest struct { 1002 DeploymentPromoteRequest 1003 1004 // An optional evaluation to create after promoting the canaries 1005 Eval *Evaluation 1006 } 1007 1008 // DeploymentPauseRequest is used to pause a deployment 1009 type DeploymentPauseRequest struct { 1010 DeploymentID string 1011 1012 // Pause sets the pause status 1013 Pause bool 1014 1015 WriteRequest 1016 } 1017 1018 // DeploymentSpecificRequest is used to make a request specific to a particular 1019 // deployment 1020 type DeploymentSpecificRequest struct { 1021 DeploymentID string 1022 QueryOptions 1023 } 1024 1025 // DeploymentFailRequest is used to fail a particular deployment 1026 type DeploymentFailRequest struct { 1027 DeploymentID string 1028 WriteRequest 1029 } 1030 1031 // SingleDeploymentResponse is used to respond with a single deployment 1032 type SingleDeploymentResponse struct { 1033 Deployment *Deployment 1034 QueryMeta 1035 } 1036 1037 // GenericResponse is used to respond to a request where no 1038 // specific response information is needed. 1039 type GenericResponse struct { 1040 WriteMeta 1041 } 1042 1043 // VersionResponse is used for the Status.Version response 1044 type VersionResponse struct { 1045 Build string 1046 Versions map[string]int 1047 QueryMeta 1048 } 1049 1050 // JobRegisterResponse is used to respond to a job registration 1051 type JobRegisterResponse struct { 1052 EvalID string 1053 EvalCreateIndex uint64 1054 JobModifyIndex uint64 1055 1056 // Warnings contains any warnings about the given job. These may include 1057 // deprecation warnings. 1058 Warnings string 1059 1060 QueryMeta 1061 } 1062 1063 // JobDeregisterResponse is used to respond to a job deregistration 1064 type JobDeregisterResponse struct { 1065 EvalID string 1066 EvalCreateIndex uint64 1067 JobModifyIndex uint64 1068 QueryMeta 1069 } 1070 1071 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 1072 type JobBatchDeregisterResponse struct { 1073 // JobEvals maps the job to its created evaluation 1074 JobEvals map[NamespacedID]string 1075 QueryMeta 1076 } 1077 1078 // JobValidateResponse is the response from validate request 1079 type JobValidateResponse struct { 1080 // DriverConfigValidated indicates whether the agent validated the driver 1081 // config 1082 DriverConfigValidated bool 1083 1084 // ValidationErrors is a list of validation errors 1085 ValidationErrors []string 1086 1087 // Error is a string version of any error that may have occurred 1088 Error string 1089 1090 // Warnings contains any warnings about the given job. These may include 1091 // deprecation warnings. 1092 Warnings string 1093 } 1094 1095 // NodeUpdateResponse is used to respond to a node update 1096 type NodeUpdateResponse struct { 1097 HeartbeatTTL time.Duration 1098 EvalIDs []string 1099 EvalCreateIndex uint64 1100 NodeModifyIndex uint64 1101 1102 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1103 // empty, the current Nomad Server is in the minority of a partition. 1104 LeaderRPCAddr string 1105 1106 // NumNodes is the number of Nomad nodes attached to this quorum of 1107 // Nomad Servers at the time of the response. This value can 1108 // fluctuate based on the health of the cluster between heartbeats. 1109 NumNodes int32 1110 1111 // Servers is the full list of known Nomad servers in the local 1112 // region. 1113 Servers []*NodeServerInfo 1114 1115 QueryMeta 1116 } 1117 1118 // NodeDrainUpdateResponse is used to respond to a node drain update 1119 type NodeDrainUpdateResponse struct { 1120 NodeModifyIndex uint64 1121 EvalIDs []string 1122 EvalCreateIndex uint64 1123 WriteMeta 1124 } 1125 1126 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1127 type NodeEligibilityUpdateResponse struct { 1128 NodeModifyIndex uint64 1129 EvalIDs []string 1130 EvalCreateIndex uint64 1131 WriteMeta 1132 } 1133 1134 // NodeAllocsResponse is used to return allocs for a single node 1135 type NodeAllocsResponse struct { 1136 Allocs []*Allocation 1137 QueryMeta 1138 } 1139 1140 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1141 type NodeClientAllocsResponse struct { 1142 Allocs map[string]uint64 1143 1144 // MigrateTokens are used when ACLs are enabled to allow cross node, 1145 // authenticated access to sticky volumes 1146 MigrateTokens map[string]string 1147 1148 QueryMeta 1149 } 1150 1151 // SingleNodeResponse is used to return a single node 1152 type SingleNodeResponse struct { 1153 Node *Node 1154 QueryMeta 1155 } 1156 1157 // NodeListResponse is used for a list request 1158 type NodeListResponse struct { 1159 Nodes []*NodeListStub 1160 QueryMeta 1161 } 1162 1163 // SingleJobResponse is used to return a single job 1164 type SingleJobResponse struct { 1165 Job *Job 1166 QueryMeta 1167 } 1168 1169 // JobSummaryResponse is used to return a single job summary 1170 type JobSummaryResponse struct { 1171 JobSummary *JobSummary 1172 QueryMeta 1173 } 1174 1175 type JobDispatchResponse struct { 1176 DispatchedJobID string 1177 EvalID string 1178 EvalCreateIndex uint64 1179 JobCreateIndex uint64 1180 WriteMeta 1181 } 1182 1183 // JobListResponse is used for a list request 1184 type JobListResponse struct { 1185 Jobs []*JobListStub 1186 QueryMeta 1187 } 1188 1189 // JobVersionsRequest is used to get a jobs versions 1190 type JobVersionsRequest struct { 1191 JobID string 1192 Diffs bool 1193 QueryOptions 1194 } 1195 1196 // JobVersionsResponse is used for a job get versions request 1197 type JobVersionsResponse struct { 1198 Versions []*Job 1199 Diffs []*JobDiff 1200 QueryMeta 1201 } 1202 1203 // JobPlanResponse is used to respond to a job plan request 1204 type JobPlanResponse struct { 1205 // Annotations stores annotations explaining decisions the scheduler made. 1206 Annotations *PlanAnnotations 1207 1208 // FailedTGAllocs is the placement failures per task group. 1209 FailedTGAllocs map[string]*AllocMetric 1210 1211 // JobModifyIndex is the modification index of the job. The value can be 1212 // used when running `nomad run` to ensure that the Job wasn’t modified 1213 // since the last plan. If the job is being created, the value is zero. 1214 JobModifyIndex uint64 1215 1216 // CreatedEvals is the set of evaluations created by the scheduler. The 1217 // reasons for this can be rolling-updates or blocked evals. 1218 CreatedEvals []*Evaluation 1219 1220 // Diff contains the diff of the job and annotations on whether the change 1221 // causes an in-place update or create/destroy 1222 Diff *JobDiff 1223 1224 // NextPeriodicLaunch is the time duration till the job would be launched if 1225 // submitted. 1226 NextPeriodicLaunch time.Time 1227 1228 // Warnings contains any warnings about the given job. These may include 1229 // deprecation warnings. 1230 Warnings string 1231 1232 WriteMeta 1233 } 1234 1235 // SingleAllocResponse is used to return a single allocation 1236 type SingleAllocResponse struct { 1237 Alloc *Allocation 1238 QueryMeta 1239 } 1240 1241 // AllocsGetResponse is used to return a set of allocations 1242 type AllocsGetResponse struct { 1243 Allocs []*Allocation 1244 QueryMeta 1245 } 1246 1247 // JobAllocationsResponse is used to return the allocations for a job 1248 type JobAllocationsResponse struct { 1249 Allocations []*AllocListStub 1250 QueryMeta 1251 } 1252 1253 // JobEvaluationsResponse is used to return the evaluations for a job 1254 type JobEvaluationsResponse struct { 1255 Evaluations []*Evaluation 1256 QueryMeta 1257 } 1258 1259 // SingleEvalResponse is used to return a single evaluation 1260 type SingleEvalResponse struct { 1261 Eval *Evaluation 1262 QueryMeta 1263 } 1264 1265 // EvalDequeueResponse is used to return from a dequeue 1266 type EvalDequeueResponse struct { 1267 Eval *Evaluation 1268 Token string 1269 1270 // WaitIndex is the Raft index the worker should wait until invoking the 1271 // scheduler. 1272 WaitIndex uint64 1273 1274 QueryMeta 1275 } 1276 1277 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1278 // or beyond before invoking the scheduler. 1279 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1280 // Prefer the wait index sent. This will be populated on all responses from 1281 // 0.7.0 and above 1282 if e.WaitIndex != 0 { 1283 return e.WaitIndex 1284 } else if e.Eval != nil { 1285 return e.Eval.ModifyIndex 1286 } 1287 1288 // This should never happen 1289 return 1 1290 } 1291 1292 // PlanResponse is used to return from a PlanRequest 1293 type PlanResponse struct { 1294 Result *PlanResult 1295 WriteMeta 1296 } 1297 1298 // AllocListResponse is used for a list request 1299 type AllocListResponse struct { 1300 Allocations []*AllocListStub 1301 QueryMeta 1302 } 1303 1304 // DeploymentListResponse is used for a list request 1305 type DeploymentListResponse struct { 1306 Deployments []*Deployment 1307 QueryMeta 1308 } 1309 1310 // EvalListResponse is used for a list request 1311 type EvalListResponse struct { 1312 Evaluations []*Evaluation 1313 QueryMeta 1314 } 1315 1316 // EvalAllocationsResponse is used to return the allocations for an evaluation 1317 type EvalAllocationsResponse struct { 1318 Allocations []*AllocListStub 1319 QueryMeta 1320 } 1321 1322 // PeriodicForceResponse is used to respond to a periodic job force launch 1323 type PeriodicForceResponse struct { 1324 EvalID string 1325 EvalCreateIndex uint64 1326 WriteMeta 1327 } 1328 1329 // DeploymentUpdateResponse is used to respond to a deployment change. The 1330 // response will include the modify index of the deployment as well as details 1331 // of any triggered evaluation. 1332 type DeploymentUpdateResponse struct { 1333 EvalID string 1334 EvalCreateIndex uint64 1335 DeploymentModifyIndex uint64 1336 1337 // RevertedJobVersion is the version the job was reverted to. If unset, the 1338 // job wasn't reverted 1339 RevertedJobVersion *uint64 1340 1341 WriteMeta 1342 } 1343 1344 // NodeConnQueryResponse is used to respond to a query of whether a server has 1345 // a connection to a specific Node 1346 type NodeConnQueryResponse struct { 1347 // Connected indicates whether a connection to the Client exists 1348 Connected bool 1349 1350 // Established marks the time at which the connection was established 1351 Established time.Time 1352 1353 QueryMeta 1354 } 1355 1356 // EmitNodeEventsRequest is a request to update the node events source 1357 // with a new client-side event 1358 type EmitNodeEventsRequest struct { 1359 // NodeEvents are a map where the key is a node id, and value is a list of 1360 // events for that node 1361 NodeEvents map[string][]*NodeEvent 1362 1363 WriteRequest 1364 } 1365 1366 // EmitNodeEventsResponse is a response to the client about the status of 1367 // the node event source update. 1368 type EmitNodeEventsResponse struct { 1369 WriteMeta 1370 } 1371 1372 const ( 1373 NodeEventSubsystemDrain = "Drain" 1374 NodeEventSubsystemDriver = "Driver" 1375 NodeEventSubsystemHeartbeat = "Heartbeat" 1376 NodeEventSubsystemCluster = "Cluster" 1377 ) 1378 1379 // NodeEvent is a single unit representing a node’s state change 1380 type NodeEvent struct { 1381 Message string 1382 Subsystem string 1383 Details map[string]string 1384 Timestamp time.Time 1385 CreateIndex uint64 1386 } 1387 1388 func (ne *NodeEvent) String() string { 1389 var details []string 1390 for k, v := range ne.Details { 1391 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1392 } 1393 1394 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1395 } 1396 1397 func (ne *NodeEvent) Copy() *NodeEvent { 1398 c := new(NodeEvent) 1399 *c = *ne 1400 c.Details = helper.CopyMapStringString(ne.Details) 1401 return c 1402 } 1403 1404 // NewNodeEvent generates a new node event storing the current time as the 1405 // timestamp 1406 func NewNodeEvent() *NodeEvent { 1407 return &NodeEvent{Timestamp: time.Now()} 1408 } 1409 1410 // SetMessage is used to set the message on the node event 1411 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1412 ne.Message = msg 1413 return ne 1414 } 1415 1416 // SetSubsystem is used to set the subsystem on the node event 1417 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1418 ne.Subsystem = sys 1419 return ne 1420 } 1421 1422 // SetTimestamp is used to set the timestamp on the node event 1423 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1424 ne.Timestamp = ts 1425 return ne 1426 } 1427 1428 // AddDetail is used to add a detail to the node event 1429 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1430 if ne.Details == nil { 1431 ne.Details = make(map[string]string, 1) 1432 } 1433 ne.Details[k] = v 1434 return ne 1435 } 1436 1437 const ( 1438 NodeStatusInit = "initializing" 1439 NodeStatusReady = "ready" 1440 NodeStatusDown = "down" 1441 ) 1442 1443 // ShouldDrainNode checks if a given node status should trigger an 1444 // evaluation. Some states don't require any further action. 1445 func ShouldDrainNode(status string) bool { 1446 switch status { 1447 case NodeStatusInit, NodeStatusReady: 1448 return false 1449 case NodeStatusDown: 1450 return true 1451 default: 1452 panic(fmt.Sprintf("unhandled node status %s", status)) 1453 } 1454 } 1455 1456 // ValidNodeStatus is used to check if a node status is valid 1457 func ValidNodeStatus(status string) bool { 1458 switch status { 1459 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1460 return true 1461 default: 1462 return false 1463 } 1464 } 1465 1466 const ( 1467 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1468 // respectively, for receiving allocations. This is orthoginal to the node 1469 // status being ready. 1470 NodeSchedulingEligible = "eligible" 1471 NodeSchedulingIneligible = "ineligible" 1472 ) 1473 1474 // DrainSpec describes a Node's desired drain behavior. 1475 type DrainSpec struct { 1476 // Deadline is the duration after StartTime when the remaining 1477 // allocations on a draining Node should be told to stop. 1478 Deadline time.Duration 1479 1480 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1481 // has been marked for draining. 1482 IgnoreSystemJobs bool 1483 } 1484 1485 // DrainStrategy describes a Node's drain behavior. 1486 type DrainStrategy struct { 1487 // DrainSpec is the user declared drain specification 1488 DrainSpec 1489 1490 // ForceDeadline is the deadline time for the drain after which drains will 1491 // be forced 1492 ForceDeadline time.Time 1493 1494 // StartedAt is the time the drain process started 1495 StartedAt time.Time 1496 } 1497 1498 func (d *DrainStrategy) Copy() *DrainStrategy { 1499 if d == nil { 1500 return nil 1501 } 1502 1503 nd := new(DrainStrategy) 1504 *nd = *d 1505 return nd 1506 } 1507 1508 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1509 // duration or otherwise the deadline time. The force drain is captured by the 1510 // deadline time being in the past. 1511 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1512 // Treat the nil case as a force drain so during an upgrade where a node may 1513 // not have a drain strategy but has Drain set to true, it is treated as a 1514 // force to mimick old behavior. 1515 if d == nil { 1516 return false, time.Time{} 1517 } 1518 1519 ns := d.Deadline.Nanoseconds() 1520 switch { 1521 case ns < 0: // Force 1522 return false, time.Time{} 1523 case ns == 0: // Infinite 1524 return true, time.Time{} 1525 default: 1526 return false, d.ForceDeadline 1527 } 1528 } 1529 1530 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1531 if d == nil && o == nil { 1532 return true 1533 } else if o != nil && d == nil { 1534 return false 1535 } else if d != nil && o == nil { 1536 return false 1537 } 1538 1539 // Compare values 1540 if d.ForceDeadline != o.ForceDeadline { 1541 return false 1542 } else if d.Deadline != o.Deadline { 1543 return false 1544 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1545 return false 1546 } 1547 1548 return true 1549 } 1550 1551 // Node is a representation of a schedulable client node 1552 type Node struct { 1553 // ID is a unique identifier for the node. It can be constructed 1554 // by doing a concatenation of the Name and Datacenter as a simple 1555 // approach. Alternatively a UUID may be used. 1556 ID string 1557 1558 // SecretID is an ID that is only known by the Node and the set of Servers. 1559 // It is not accessible via the API and is used to authenticate nodes 1560 // conducting privileged activities. 1561 SecretID string 1562 1563 // Datacenter for this node 1564 Datacenter string 1565 1566 // Node name 1567 Name string 1568 1569 // HTTPAddr is the address on which the Nomad client is listening for http 1570 // requests 1571 HTTPAddr string 1572 1573 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1574 TLSEnabled bool 1575 1576 // Attributes is an arbitrary set of key/value 1577 // data that can be used for constraints. Examples 1578 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1579 // "docker.runtime=1.8.3" 1580 Attributes map[string]string 1581 1582 // NodeResources captures the available resources on the client. 1583 NodeResources *NodeResources 1584 1585 // ReservedResources captures the set resources on the client that are 1586 // reserved from scheduling. 1587 ReservedResources *NodeReservedResources 1588 1589 // Resources is the available resources on the client. 1590 // For example 'cpu=2' 'memory=2048' 1591 // COMPAT(0.10): Remove in 0.10 1592 Resources *Resources 1593 1594 // Reserved is the set of resources that are reserved, 1595 // and should be subtracted from the total resources for 1596 // the purposes of scheduling. This may be provide certain 1597 // high-watermark tolerances or because of external schedulers 1598 // consuming resources. 1599 Reserved *Resources 1600 1601 // Links are used to 'link' this client to external 1602 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1603 // 'ami=ami-123' 1604 Links map[string]string 1605 1606 // Meta is used to associate arbitrary metadata with this 1607 // client. This is opaque to Nomad. 1608 Meta map[string]string 1609 1610 // NodeClass is an opaque identifier used to group nodes 1611 // together for the purpose of determining scheduling pressure. 1612 NodeClass string 1613 1614 // The SecretID of an ACL token to use to authenticate RPC requests 1615 Token string 1616 1617 // ComputedClass is a unique id that identifies nodes with a common set of 1618 // attributes and capabilities. 1619 ComputedClass string 1620 1621 // COMPAT: Remove in Nomad 0.9 1622 // Drain is controlled by the servers, and not the client. 1623 // If true, no jobs will be scheduled to this node, and existing 1624 // allocations will be drained. Superceded by DrainStrategy in Nomad 1625 // 0.8 but kept for backward compat. 1626 Drain bool 1627 1628 // DrainStrategy determines the node's draining behavior. Will be nil 1629 // when Drain=false. 1630 DrainStrategy *DrainStrategy 1631 1632 // SchedulingEligibility determines whether this node will receive new 1633 // placements. 1634 SchedulingEligibility string 1635 1636 // Status of this node 1637 Status string 1638 1639 // StatusDescription is meant to provide more human useful information 1640 StatusDescription string 1641 1642 // StatusUpdatedAt is the time stamp at which the state of the node was 1643 // updated 1644 StatusUpdatedAt int64 1645 1646 // Events is the most recent set of events generated for the node, 1647 // retaining only MaxRetainedNodeEvents number at a time 1648 Events []*NodeEvent 1649 1650 // Drivers is a map of driver names to current driver information 1651 Drivers map[string]*DriverInfo 1652 1653 // HostVolumes is a map of host volume names to their configuration 1654 HostVolumes map[string]*ClientHostVolumeConfig 1655 1656 // Raft Indexes 1657 CreateIndex uint64 1658 ModifyIndex uint64 1659 } 1660 1661 // Ready returns true if the node is ready for running allocations 1662 func (n *Node) Ready() bool { 1663 // Drain is checked directly to support pre-0.8 Node data 1664 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1665 } 1666 1667 func (n *Node) Canonicalize() { 1668 if n == nil { 1669 return 1670 } 1671 1672 // COMPAT Remove in 0.10 1673 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1674 // upgrading nodes 1675 if n.SchedulingEligibility == "" { 1676 if n.Drain { 1677 n.SchedulingEligibility = NodeSchedulingIneligible 1678 } else { 1679 n.SchedulingEligibility = NodeSchedulingEligible 1680 } 1681 } 1682 } 1683 1684 func (n *Node) Copy() *Node { 1685 if n == nil { 1686 return nil 1687 } 1688 nn := new(Node) 1689 *nn = *n 1690 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1691 nn.Resources = nn.Resources.Copy() 1692 nn.Reserved = nn.Reserved.Copy() 1693 nn.NodeResources = nn.NodeResources.Copy() 1694 nn.ReservedResources = nn.ReservedResources.Copy() 1695 nn.Links = helper.CopyMapStringString(nn.Links) 1696 nn.Meta = helper.CopyMapStringString(nn.Meta) 1697 nn.Events = copyNodeEvents(n.Events) 1698 nn.DrainStrategy = nn.DrainStrategy.Copy() 1699 nn.Drivers = copyNodeDrivers(n.Drivers) 1700 nn.HostVolumes = copyNodeHostVolumes(n.HostVolumes) 1701 return nn 1702 } 1703 1704 // copyNodeEvents is a helper to copy a list of NodeEvent's 1705 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1706 l := len(events) 1707 if l == 0 { 1708 return nil 1709 } 1710 1711 c := make([]*NodeEvent, l) 1712 for i, event := range events { 1713 c[i] = event.Copy() 1714 } 1715 return c 1716 } 1717 1718 // copyNodeDrivers is a helper to copy a map of DriverInfo 1719 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 1720 l := len(drivers) 1721 if l == 0 { 1722 return nil 1723 } 1724 1725 c := make(map[string]*DriverInfo, l) 1726 for driver, info := range drivers { 1727 c[driver] = info.Copy() 1728 } 1729 return c 1730 } 1731 1732 // copyNodeHostVolumes is a helper to copy a map of string to Volume 1733 func copyNodeHostVolumes(volumes map[string]*ClientHostVolumeConfig) map[string]*ClientHostVolumeConfig { 1734 l := len(volumes) 1735 if l == 0 { 1736 return nil 1737 } 1738 1739 c := make(map[string]*ClientHostVolumeConfig, l) 1740 for volume, v := range volumes { 1741 c[volume] = v.Copy() 1742 } 1743 1744 return c 1745 } 1746 1747 // TerminalStatus returns if the current status is terminal and 1748 // will no longer transition. 1749 func (n *Node) TerminalStatus() bool { 1750 switch n.Status { 1751 case NodeStatusDown: 1752 return true 1753 default: 1754 return false 1755 } 1756 } 1757 1758 // COMPAT(0.11): Remove in 0.11 1759 // ComparableReservedResources returns the reserved resouces on the node 1760 // handling upgrade paths. Reserved networks must be handled separately. After 1761 // 0.11 calls to this should be replaced with: 1762 // node.ReservedResources.Comparable() 1763 func (n *Node) ComparableReservedResources() *ComparableResources { 1764 // See if we can no-op 1765 if n.Reserved == nil && n.ReservedResources == nil { 1766 return nil 1767 } 1768 1769 // Node already has 0.9+ behavior 1770 if n.ReservedResources != nil { 1771 return n.ReservedResources.Comparable() 1772 } 1773 1774 // Upgrade path 1775 return &ComparableResources{ 1776 Flattened: AllocatedTaskResources{ 1777 Cpu: AllocatedCpuResources{ 1778 CpuShares: int64(n.Reserved.CPU), 1779 }, 1780 Memory: AllocatedMemoryResources{ 1781 MemoryMB: int64(n.Reserved.MemoryMB), 1782 }, 1783 }, 1784 Shared: AllocatedSharedResources{ 1785 DiskMB: int64(n.Reserved.DiskMB), 1786 }, 1787 } 1788 } 1789 1790 // COMPAT(0.11): Remove in 0.11 1791 // ComparableResources returns the resouces on the node 1792 // handling upgrade paths. Networking must be handled separately. After 0.11 1793 // calls to this should be replaced with: node.NodeResources.Comparable() 1794 func (n *Node) ComparableResources() *ComparableResources { 1795 // Node already has 0.9+ behavior 1796 if n.NodeResources != nil { 1797 return n.NodeResources.Comparable() 1798 } 1799 1800 // Upgrade path 1801 return &ComparableResources{ 1802 Flattened: AllocatedTaskResources{ 1803 Cpu: AllocatedCpuResources{ 1804 CpuShares: int64(n.Resources.CPU), 1805 }, 1806 Memory: AllocatedMemoryResources{ 1807 MemoryMB: int64(n.Resources.MemoryMB), 1808 }, 1809 }, 1810 Shared: AllocatedSharedResources{ 1811 DiskMB: int64(n.Resources.DiskMB), 1812 }, 1813 } 1814 } 1815 1816 // Stub returns a summarized version of the node 1817 func (n *Node) Stub() *NodeListStub { 1818 1819 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1820 1821 return &NodeListStub{ 1822 Address: addr, 1823 ID: n.ID, 1824 Datacenter: n.Datacenter, 1825 Name: n.Name, 1826 NodeClass: n.NodeClass, 1827 Version: n.Attributes["nomad.version"], 1828 Drain: n.Drain, 1829 SchedulingEligibility: n.SchedulingEligibility, 1830 Status: n.Status, 1831 StatusDescription: n.StatusDescription, 1832 Drivers: n.Drivers, 1833 CreateIndex: n.CreateIndex, 1834 ModifyIndex: n.ModifyIndex, 1835 } 1836 } 1837 1838 // NodeListStub is used to return a subset of job information 1839 // for the job list 1840 type NodeListStub struct { 1841 Address string 1842 ID string 1843 Datacenter string 1844 Name string 1845 NodeClass string 1846 Version string 1847 Drain bool 1848 SchedulingEligibility string 1849 Status string 1850 StatusDescription string 1851 Drivers map[string]*DriverInfo 1852 CreateIndex uint64 1853 ModifyIndex uint64 1854 } 1855 1856 // Resources is used to define the resources available 1857 // on a client 1858 type Resources struct { 1859 CPU int 1860 MemoryMB int 1861 DiskMB int 1862 IOPS int // COMPAT(0.10): Only being used to issue warnings 1863 Networks Networks 1864 Devices ResourceDevices 1865 } 1866 1867 const ( 1868 BytesInMegabyte = 1024 * 1024 1869 ) 1870 1871 // DefaultResources is a small resources object that contains the 1872 // default resources requests that we will provide to an object. 1873 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1874 // be kept in sync. 1875 func DefaultResources() *Resources { 1876 return &Resources{ 1877 CPU: 100, 1878 MemoryMB: 300, 1879 } 1880 } 1881 1882 // MinResources is a small resources object that contains the 1883 // absolute minimum resources that we will provide to an object. 1884 // This should not be confused with the defaults which are 1885 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 1886 // api/resources.go and should be kept in sync. 1887 func MinResources() *Resources { 1888 return &Resources{ 1889 CPU: 20, 1890 MemoryMB: 10, 1891 } 1892 } 1893 1894 // DiskInBytes returns the amount of disk resources in bytes. 1895 func (r *Resources) DiskInBytes() int64 { 1896 return int64(r.DiskMB * BytesInMegabyte) 1897 } 1898 1899 func (r *Resources) Validate() error { 1900 var mErr multierror.Error 1901 if err := r.MeetsMinResources(); err != nil { 1902 mErr.Errors = append(mErr.Errors, err) 1903 } 1904 1905 // Ensure the task isn't asking for disk resources 1906 if r.DiskMB > 0 { 1907 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 1908 } 1909 1910 for i, d := range r.Devices { 1911 if err := d.Validate(); err != nil { 1912 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 1913 } 1914 } 1915 1916 return mErr.ErrorOrNil() 1917 } 1918 1919 // Merge merges this resource with another resource. 1920 // COMPAT(0.10): Remove in 0.10 1921 func (r *Resources) Merge(other *Resources) { 1922 if other.CPU != 0 { 1923 r.CPU = other.CPU 1924 } 1925 if other.MemoryMB != 0 { 1926 r.MemoryMB = other.MemoryMB 1927 } 1928 if other.DiskMB != 0 { 1929 r.DiskMB = other.DiskMB 1930 } 1931 if len(other.Networks) != 0 { 1932 r.Networks = other.Networks 1933 } 1934 if len(other.Devices) != 0 { 1935 r.Devices = other.Devices 1936 } 1937 } 1938 1939 // COMPAT(0.10): Remove in 0.10 1940 func (r *Resources) Equals(o *Resources) bool { 1941 if r == o { 1942 return true 1943 } 1944 if r == nil || o == nil { 1945 return false 1946 } 1947 return r.CPU == o.CPU && 1948 r.MemoryMB == o.MemoryMB && 1949 r.DiskMB == o.DiskMB && 1950 r.IOPS == o.IOPS && 1951 r.Networks.Equals(&o.Networks) && 1952 r.Devices.Equals(&o.Devices) 1953 } 1954 1955 // COMPAT(0.10): Remove in 0.10 1956 // ResourceDevices are part of Resources 1957 type ResourceDevices []*RequestedDevice 1958 1959 // COMPAT(0.10): Remove in 0.10 1960 // Equals ResourceDevices as set keyed by Name 1961 func (d *ResourceDevices) Equals(o *ResourceDevices) bool { 1962 if d == o { 1963 return true 1964 } 1965 if d == nil || o == nil { 1966 return false 1967 } 1968 if len(*d) != len(*o) { 1969 return false 1970 } 1971 m := make(map[string]*RequestedDevice, len(*d)) 1972 for _, e := range *d { 1973 m[e.Name] = e 1974 } 1975 for _, oe := range *o { 1976 de, ok := m[oe.Name] 1977 if !ok || !de.Equals(oe) { 1978 return false 1979 } 1980 } 1981 return true 1982 } 1983 1984 // COMPAT(0.10): Remove in 0.10 1985 func (r *Resources) Canonicalize() { 1986 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1987 // problems since we use reflect DeepEquals. 1988 if len(r.Networks) == 0 { 1989 r.Networks = nil 1990 } 1991 if len(r.Devices) == 0 { 1992 r.Devices = nil 1993 } 1994 1995 for _, n := range r.Networks { 1996 n.Canonicalize() 1997 } 1998 } 1999 2000 // MeetsMinResources returns an error if the resources specified are less than 2001 // the minimum allowed. 2002 // This is based on the minimums defined in the Resources type 2003 // COMPAT(0.10): Remove in 0.10 2004 func (r *Resources) MeetsMinResources() error { 2005 var mErr multierror.Error 2006 minResources := MinResources() 2007 if r.CPU < minResources.CPU { 2008 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2009 } 2010 if r.MemoryMB < minResources.MemoryMB { 2011 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2012 } 2013 for i, n := range r.Networks { 2014 if err := n.MeetsMinResources(); err != nil { 2015 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 2016 } 2017 } 2018 2019 return mErr.ErrorOrNil() 2020 } 2021 2022 // Copy returns a deep copy of the resources 2023 func (r *Resources) Copy() *Resources { 2024 if r == nil { 2025 return nil 2026 } 2027 newR := new(Resources) 2028 *newR = *r 2029 2030 // Copy the network objects 2031 newR.Networks = r.Networks.Copy() 2032 2033 // Copy the devices 2034 if r.Devices != nil { 2035 n := len(r.Devices) 2036 newR.Devices = make([]*RequestedDevice, n) 2037 for i := 0; i < n; i++ { 2038 newR.Devices[i] = r.Devices[i].Copy() 2039 } 2040 } 2041 2042 return newR 2043 } 2044 2045 // NetIndex finds the matching net index using device name 2046 // COMPAT(0.10): Remove in 0.10 2047 func (r *Resources) NetIndex(n *NetworkResource) int { 2048 return r.Networks.NetIndex(n) 2049 } 2050 2051 // Superset checks if one set of resources is a superset 2052 // of another. This ignores network resources, and the NetworkIndex 2053 // should be used for that. 2054 // COMPAT(0.10): Remove in 0.10 2055 func (r *Resources) Superset(other *Resources) (bool, string) { 2056 if r.CPU < other.CPU { 2057 return false, "cpu" 2058 } 2059 if r.MemoryMB < other.MemoryMB { 2060 return false, "memory" 2061 } 2062 if r.DiskMB < other.DiskMB { 2063 return false, "disk" 2064 } 2065 return true, "" 2066 } 2067 2068 // Add adds the resources of the delta to this, potentially 2069 // returning an error if not possible. 2070 // COMPAT(0.10): Remove in 0.10 2071 func (r *Resources) Add(delta *Resources) error { 2072 if delta == nil { 2073 return nil 2074 } 2075 r.CPU += delta.CPU 2076 r.MemoryMB += delta.MemoryMB 2077 r.DiskMB += delta.DiskMB 2078 2079 for _, n := range delta.Networks { 2080 // Find the matching interface by IP or CIDR 2081 idx := r.NetIndex(n) 2082 if idx == -1 { 2083 r.Networks = append(r.Networks, n.Copy()) 2084 } else { 2085 r.Networks[idx].Add(n) 2086 } 2087 } 2088 return nil 2089 } 2090 2091 // COMPAT(0.10): Remove in 0.10 2092 func (r *Resources) GoString() string { 2093 return fmt.Sprintf("*%#v", *r) 2094 } 2095 2096 type Port struct { 2097 Label string 2098 Value int 2099 To int 2100 } 2101 2102 // NetworkResource is used to represent available network 2103 // resources 2104 type NetworkResource struct { 2105 Mode string // Mode of the network 2106 Device string // Name of the device 2107 CIDR string // CIDR block of addresses 2108 IP string // Host IP address 2109 MBits int // Throughput 2110 ReservedPorts []Port // Host Reserved ports 2111 DynamicPorts []Port // Host Dynamically assigned ports 2112 } 2113 2114 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 2115 if nr.Mode != other.Mode { 2116 return false 2117 } 2118 2119 if nr.Device != other.Device { 2120 return false 2121 } 2122 2123 if nr.CIDR != other.CIDR { 2124 return false 2125 } 2126 2127 if nr.IP != other.IP { 2128 return false 2129 } 2130 2131 if nr.MBits != other.MBits { 2132 return false 2133 } 2134 2135 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 2136 return false 2137 } 2138 2139 for i, port := range nr.ReservedPorts { 2140 if len(other.ReservedPorts) <= i { 2141 return false 2142 } 2143 if port != other.ReservedPorts[i] { 2144 return false 2145 } 2146 } 2147 2148 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 2149 return false 2150 } 2151 for i, port := range nr.DynamicPorts { 2152 if len(other.DynamicPorts) <= i { 2153 return false 2154 } 2155 if port != other.DynamicPorts[i] { 2156 return false 2157 } 2158 } 2159 2160 return true 2161 } 2162 2163 func (n *NetworkResource) Canonicalize() { 2164 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2165 // problems since we use reflect DeepEquals. 2166 if len(n.ReservedPorts) == 0 { 2167 n.ReservedPorts = nil 2168 } 2169 if len(n.DynamicPorts) == 0 { 2170 n.DynamicPorts = nil 2171 } 2172 } 2173 2174 // MeetsMinResources returns an error if the resources specified are less than 2175 // the minimum allowed. 2176 func (n *NetworkResource) MeetsMinResources() error { 2177 var mErr multierror.Error 2178 if n.MBits < 1 { 2179 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 2180 } 2181 return mErr.ErrorOrNil() 2182 } 2183 2184 // Copy returns a deep copy of the network resource 2185 func (n *NetworkResource) Copy() *NetworkResource { 2186 if n == nil { 2187 return nil 2188 } 2189 newR := new(NetworkResource) 2190 *newR = *n 2191 if n.ReservedPorts != nil { 2192 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2193 copy(newR.ReservedPorts, n.ReservedPorts) 2194 } 2195 if n.DynamicPorts != nil { 2196 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2197 copy(newR.DynamicPorts, n.DynamicPorts) 2198 } 2199 return newR 2200 } 2201 2202 // Add adds the resources of the delta to this, potentially 2203 // returning an error if not possible. 2204 func (n *NetworkResource) Add(delta *NetworkResource) { 2205 if len(delta.ReservedPorts) > 0 { 2206 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2207 } 2208 n.MBits += delta.MBits 2209 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2210 } 2211 2212 func (n *NetworkResource) GoString() string { 2213 return fmt.Sprintf("*%#v", *n) 2214 } 2215 2216 // PortLabels returns a map of port labels to their assigned host ports. 2217 func (n *NetworkResource) PortLabels() map[string]int { 2218 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2219 labelValues := make(map[string]int, num) 2220 for _, port := range n.ReservedPorts { 2221 labelValues[port.Label] = port.Value 2222 } 2223 for _, port := range n.DynamicPorts { 2224 labelValues[port.Label] = port.Value 2225 } 2226 return labelValues 2227 } 2228 2229 // ConnectPort returns the Connect port for the given service. Returns false if 2230 // no port was found for a service with that name. 2231 func (n *NetworkResource) PortForService(serviceName string) (Port, bool) { 2232 label := fmt.Sprintf("%s-%s", ConnectProxyPrefix, serviceName) 2233 for _, port := range n.ReservedPorts { 2234 if port.Label == label { 2235 return port, true 2236 } 2237 } 2238 for _, port := range n.DynamicPorts { 2239 if port.Label == label { 2240 return port, true 2241 } 2242 } 2243 2244 return Port{}, false 2245 } 2246 2247 // Networks defined for a task on the Resources struct. 2248 type Networks []*NetworkResource 2249 2250 func (ns Networks) Copy() Networks { 2251 if len(ns) == 0 { 2252 return nil 2253 } 2254 2255 out := make([]*NetworkResource, len(ns)) 2256 for i := range ns { 2257 out[i] = ns[i].Copy() 2258 } 2259 return out 2260 } 2261 2262 // Port assignment and IP for the given label or empty values. 2263 func (ns Networks) Port(label string) (string, int) { 2264 for _, n := range ns { 2265 for _, p := range n.ReservedPorts { 2266 if p.Label == label { 2267 return n.IP, p.Value 2268 } 2269 } 2270 for _, p := range n.DynamicPorts { 2271 if p.Label == label { 2272 return n.IP, p.Value 2273 } 2274 } 2275 } 2276 return "", 0 2277 } 2278 2279 func (ns Networks) NetIndex(n *NetworkResource) int { 2280 for idx, net := range ns { 2281 if net.Device == n.Device { 2282 return idx 2283 } 2284 } 2285 return -1 2286 } 2287 2288 // RequestedDevice is used to request a device for a task. 2289 type RequestedDevice struct { 2290 // Name is the request name. The possible values are as follows: 2291 // * <type>: A single value only specifies the type of request. 2292 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2293 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2294 // 2295 // Examples are as follows: 2296 // * "gpu" 2297 // * "nvidia/gpu" 2298 // * "nvidia/gpu/GTX2080Ti" 2299 Name string 2300 2301 // Count is the number of requested devices 2302 Count uint64 2303 2304 // Constraints are a set of constraints to apply when selecting the device 2305 // to use. 2306 Constraints Constraints 2307 2308 // Affinities are a set of affinites to apply when selecting the device 2309 // to use. 2310 Affinities Affinities 2311 } 2312 2313 func (r *RequestedDevice) Equals(o *RequestedDevice) bool { 2314 if r == o { 2315 return true 2316 } 2317 if r == nil || o == nil { 2318 return false 2319 } 2320 return r.Name == o.Name && 2321 r.Count == o.Count && 2322 r.Constraints.Equals(&o.Constraints) && 2323 r.Affinities.Equals(&o.Affinities) 2324 } 2325 2326 func (r *RequestedDevice) Copy() *RequestedDevice { 2327 if r == nil { 2328 return nil 2329 } 2330 2331 nr := *r 2332 nr.Constraints = CopySliceConstraints(nr.Constraints) 2333 nr.Affinities = CopySliceAffinities(nr.Affinities) 2334 2335 return &nr 2336 } 2337 2338 func (r *RequestedDevice) ID() *DeviceIdTuple { 2339 if r == nil || r.Name == "" { 2340 return nil 2341 } 2342 2343 parts := strings.SplitN(r.Name, "/", 3) 2344 switch len(parts) { 2345 case 1: 2346 return &DeviceIdTuple{ 2347 Type: parts[0], 2348 } 2349 case 2: 2350 return &DeviceIdTuple{ 2351 Vendor: parts[0], 2352 Type: parts[1], 2353 } 2354 default: 2355 return &DeviceIdTuple{ 2356 Vendor: parts[0], 2357 Type: parts[1], 2358 Name: parts[2], 2359 } 2360 } 2361 } 2362 2363 func (r *RequestedDevice) Validate() error { 2364 if r == nil { 2365 return nil 2366 } 2367 2368 var mErr multierror.Error 2369 if r.Name == "" { 2370 multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2371 } 2372 2373 for idx, constr := range r.Constraints { 2374 // Ensure that the constraint doesn't use an operand we do not allow 2375 switch constr.Operand { 2376 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2377 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2378 multierror.Append(&mErr, outer) 2379 default: 2380 if err := constr.Validate(); err != nil { 2381 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2382 multierror.Append(&mErr, outer) 2383 } 2384 } 2385 } 2386 for idx, affinity := range r.Affinities { 2387 if err := affinity.Validate(); err != nil { 2388 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2389 multierror.Append(&mErr, outer) 2390 } 2391 } 2392 2393 return mErr.ErrorOrNil() 2394 } 2395 2396 // NodeResources is used to define the resources available on a client node. 2397 type NodeResources struct { 2398 Cpu NodeCpuResources 2399 Memory NodeMemoryResources 2400 Disk NodeDiskResources 2401 Networks Networks 2402 Devices []*NodeDeviceResource 2403 } 2404 2405 func (n *NodeResources) Copy() *NodeResources { 2406 if n == nil { 2407 return nil 2408 } 2409 2410 newN := new(NodeResources) 2411 *newN = *n 2412 2413 // Copy the networks 2414 newN.Networks = n.Networks.Copy() 2415 2416 // Copy the devices 2417 if n.Devices != nil { 2418 devices := len(n.Devices) 2419 newN.Devices = make([]*NodeDeviceResource, devices) 2420 for i := 0; i < devices; i++ { 2421 newN.Devices[i] = n.Devices[i].Copy() 2422 } 2423 } 2424 2425 return newN 2426 } 2427 2428 // Comparable returns a comparable version of the nodes resources. This 2429 // conversion can be lossy so care must be taken when using it. 2430 func (n *NodeResources) Comparable() *ComparableResources { 2431 if n == nil { 2432 return nil 2433 } 2434 2435 c := &ComparableResources{ 2436 Flattened: AllocatedTaskResources{ 2437 Cpu: AllocatedCpuResources{ 2438 CpuShares: n.Cpu.CpuShares, 2439 }, 2440 Memory: AllocatedMemoryResources{ 2441 MemoryMB: n.Memory.MemoryMB, 2442 }, 2443 Networks: n.Networks, 2444 }, 2445 Shared: AllocatedSharedResources{ 2446 DiskMB: n.Disk.DiskMB, 2447 }, 2448 } 2449 return c 2450 } 2451 2452 func (n *NodeResources) Merge(o *NodeResources) { 2453 if o == nil { 2454 return 2455 } 2456 2457 n.Cpu.Merge(&o.Cpu) 2458 n.Memory.Merge(&o.Memory) 2459 n.Disk.Merge(&o.Disk) 2460 2461 if len(o.Networks) != 0 { 2462 n.Networks = o.Networks 2463 } 2464 2465 if len(o.Devices) != 0 { 2466 n.Devices = o.Devices 2467 } 2468 } 2469 2470 func (n *NodeResources) Equals(o *NodeResources) bool { 2471 if o == nil && n == nil { 2472 return true 2473 } else if o == nil { 2474 return false 2475 } else if n == nil { 2476 return false 2477 } 2478 2479 if !n.Cpu.Equals(&o.Cpu) { 2480 return false 2481 } 2482 if !n.Memory.Equals(&o.Memory) { 2483 return false 2484 } 2485 if !n.Disk.Equals(&o.Disk) { 2486 return false 2487 } 2488 if !n.Networks.Equals(&o.Networks) { 2489 return false 2490 } 2491 2492 // Check the devices 2493 if !DevicesEquals(n.Devices, o.Devices) { 2494 return false 2495 } 2496 2497 return true 2498 } 2499 2500 // Equals equates Networks as a set 2501 func (n *Networks) Equals(o *Networks) bool { 2502 if n == o { 2503 return true 2504 } 2505 if n == nil || o == nil { 2506 return false 2507 } 2508 if len(*n) != len(*o) { 2509 return false 2510 } 2511 SETEQUALS: 2512 for _, ne := range *n { 2513 for _, oe := range *o { 2514 if ne.Equals(oe) { 2515 continue SETEQUALS 2516 } 2517 } 2518 return false 2519 } 2520 return true 2521 } 2522 2523 // DevicesEquals returns true if the two device arrays are set equal 2524 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2525 if len(d1) != len(d2) { 2526 return false 2527 } 2528 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2529 for _, d := range d1 { 2530 idMap[*d.ID()] = d 2531 } 2532 for _, otherD := range d2 { 2533 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2534 return false 2535 } 2536 } 2537 2538 return true 2539 } 2540 2541 // NodeCpuResources captures the CPU resources of the node. 2542 type NodeCpuResources struct { 2543 // CpuShares is the CPU shares available. This is calculated by number of 2544 // cores multiplied by the core frequency. 2545 CpuShares int64 2546 } 2547 2548 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2549 if o == nil { 2550 return 2551 } 2552 2553 if o.CpuShares != 0 { 2554 n.CpuShares = o.CpuShares 2555 } 2556 } 2557 2558 func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 2559 if o == nil && n == nil { 2560 return true 2561 } else if o == nil { 2562 return false 2563 } else if n == nil { 2564 return false 2565 } 2566 2567 if n.CpuShares != o.CpuShares { 2568 return false 2569 } 2570 2571 return true 2572 } 2573 2574 // NodeMemoryResources captures the memory resources of the node 2575 type NodeMemoryResources struct { 2576 // MemoryMB is the total available memory on the node 2577 MemoryMB int64 2578 } 2579 2580 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 2581 if o == nil { 2582 return 2583 } 2584 2585 if o.MemoryMB != 0 { 2586 n.MemoryMB = o.MemoryMB 2587 } 2588 } 2589 2590 func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 2591 if o == nil && n == nil { 2592 return true 2593 } else if o == nil { 2594 return false 2595 } else if n == nil { 2596 return false 2597 } 2598 2599 if n.MemoryMB != o.MemoryMB { 2600 return false 2601 } 2602 2603 return true 2604 } 2605 2606 // NodeDiskResources captures the disk resources of the node 2607 type NodeDiskResources struct { 2608 // DiskMB is the total available disk space on the node 2609 DiskMB int64 2610 } 2611 2612 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 2613 if o == nil { 2614 return 2615 } 2616 if o.DiskMB != 0 { 2617 n.DiskMB = o.DiskMB 2618 } 2619 } 2620 2621 func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 2622 if o == nil && n == nil { 2623 return true 2624 } else if o == nil { 2625 return false 2626 } else if n == nil { 2627 return false 2628 } 2629 2630 if n.DiskMB != o.DiskMB { 2631 return false 2632 } 2633 2634 return true 2635 } 2636 2637 // DeviceIdTuple is the tuple that identifies a device 2638 type DeviceIdTuple struct { 2639 Vendor string 2640 Type string 2641 Name string 2642 } 2643 2644 func (d *DeviceIdTuple) String() string { 2645 if d == nil { 2646 return "" 2647 } 2648 2649 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 2650 } 2651 2652 // Matches returns if this Device ID is a superset of the passed ID. 2653 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 2654 if other == nil { 2655 return false 2656 } 2657 2658 if other.Name != "" && other.Name != id.Name { 2659 return false 2660 } 2661 2662 if other.Vendor != "" && other.Vendor != id.Vendor { 2663 return false 2664 } 2665 2666 if other.Type != "" && other.Type != id.Type { 2667 return false 2668 } 2669 2670 return true 2671 } 2672 2673 // Equals returns if this Device ID is the same as the passed ID. 2674 func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 2675 if id == nil && o == nil { 2676 return true 2677 } else if id == nil || o == nil { 2678 return false 2679 } 2680 2681 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 2682 } 2683 2684 // NodeDeviceResource captures a set of devices sharing a common 2685 // vendor/type/device_name tuple. 2686 type NodeDeviceResource struct { 2687 Vendor string 2688 Type string 2689 Name string 2690 Instances []*NodeDevice 2691 Attributes map[string]*psstructs.Attribute 2692 } 2693 2694 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 2695 if n == nil { 2696 return nil 2697 } 2698 2699 return &DeviceIdTuple{ 2700 Vendor: n.Vendor, 2701 Type: n.Type, 2702 Name: n.Name, 2703 } 2704 } 2705 2706 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 2707 if n == nil { 2708 return nil 2709 } 2710 2711 // Copy the primitives 2712 nn := *n 2713 2714 // Copy the device instances 2715 if l := len(nn.Instances); l != 0 { 2716 nn.Instances = make([]*NodeDevice, 0, l) 2717 for _, d := range n.Instances { 2718 nn.Instances = append(nn.Instances, d.Copy()) 2719 } 2720 } 2721 2722 // Copy the Attributes 2723 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 2724 2725 return &nn 2726 } 2727 2728 func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 2729 if o == nil && n == nil { 2730 return true 2731 } else if o == nil { 2732 return false 2733 } else if n == nil { 2734 return false 2735 } 2736 2737 if n.Vendor != o.Vendor { 2738 return false 2739 } else if n.Type != o.Type { 2740 return false 2741 } else if n.Name != o.Name { 2742 return false 2743 } 2744 2745 // Check the attributes 2746 if len(n.Attributes) != len(o.Attributes) { 2747 return false 2748 } 2749 for k, v := range n.Attributes { 2750 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 2751 return false 2752 } 2753 } 2754 2755 // Check the instances 2756 if len(n.Instances) != len(o.Instances) { 2757 return false 2758 } 2759 idMap := make(map[string]*NodeDevice, len(n.Instances)) 2760 for _, d := range n.Instances { 2761 idMap[d.ID] = d 2762 } 2763 for _, otherD := range o.Instances { 2764 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 2765 return false 2766 } 2767 } 2768 2769 return true 2770 } 2771 2772 // NodeDevice is an instance of a particular device. 2773 type NodeDevice struct { 2774 // ID is the ID of the device. 2775 ID string 2776 2777 // Healthy captures whether the device is healthy. 2778 Healthy bool 2779 2780 // HealthDescription is used to provide a human readable description of why 2781 // the device may be unhealthy. 2782 HealthDescription string 2783 2784 // Locality stores HW locality information for the node to optionally be 2785 // used when making placement decisions. 2786 Locality *NodeDeviceLocality 2787 } 2788 2789 func (n *NodeDevice) Equals(o *NodeDevice) bool { 2790 if o == nil && n == nil { 2791 return true 2792 } else if o == nil { 2793 return false 2794 } else if n == nil { 2795 return false 2796 } 2797 2798 if n.ID != o.ID { 2799 return false 2800 } else if n.Healthy != o.Healthy { 2801 return false 2802 } else if n.HealthDescription != o.HealthDescription { 2803 return false 2804 } else if !n.Locality.Equals(o.Locality) { 2805 return false 2806 } 2807 2808 return false 2809 } 2810 2811 func (n *NodeDevice) Copy() *NodeDevice { 2812 if n == nil { 2813 return nil 2814 } 2815 2816 // Copy the primitives 2817 nn := *n 2818 2819 // Copy the locality 2820 nn.Locality = nn.Locality.Copy() 2821 2822 return &nn 2823 } 2824 2825 // NodeDeviceLocality stores information about the devices hardware locality on 2826 // the node. 2827 type NodeDeviceLocality struct { 2828 // PciBusID is the PCI Bus ID for the device. 2829 PciBusID string 2830 } 2831 2832 func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 2833 if o == nil && n == nil { 2834 return true 2835 } else if o == nil { 2836 return false 2837 } else if n == nil { 2838 return false 2839 } 2840 2841 if n.PciBusID != o.PciBusID { 2842 return false 2843 } 2844 2845 return true 2846 } 2847 2848 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 2849 if n == nil { 2850 return nil 2851 } 2852 2853 // Copy the primitives 2854 nn := *n 2855 return &nn 2856 } 2857 2858 // NodeReservedResources is used to capture the resources on a client node that 2859 // should be reserved and not made available to jobs. 2860 type NodeReservedResources struct { 2861 Cpu NodeReservedCpuResources 2862 Memory NodeReservedMemoryResources 2863 Disk NodeReservedDiskResources 2864 Networks NodeReservedNetworkResources 2865 } 2866 2867 func (n *NodeReservedResources) Copy() *NodeReservedResources { 2868 if n == nil { 2869 return nil 2870 } 2871 newN := new(NodeReservedResources) 2872 *newN = *n 2873 return newN 2874 } 2875 2876 // Comparable returns a comparable version of the node's reserved resources. The 2877 // returned resources doesn't contain any network information. This conversion 2878 // can be lossy so care must be taken when using it. 2879 func (n *NodeReservedResources) Comparable() *ComparableResources { 2880 if n == nil { 2881 return nil 2882 } 2883 2884 c := &ComparableResources{ 2885 Flattened: AllocatedTaskResources{ 2886 Cpu: AllocatedCpuResources{ 2887 CpuShares: n.Cpu.CpuShares, 2888 }, 2889 Memory: AllocatedMemoryResources{ 2890 MemoryMB: n.Memory.MemoryMB, 2891 }, 2892 }, 2893 Shared: AllocatedSharedResources{ 2894 DiskMB: n.Disk.DiskMB, 2895 }, 2896 } 2897 return c 2898 } 2899 2900 // NodeReservedCpuResources captures the reserved CPU resources of the node. 2901 type NodeReservedCpuResources struct { 2902 CpuShares int64 2903 } 2904 2905 // NodeReservedMemoryResources captures the reserved memory resources of the node. 2906 type NodeReservedMemoryResources struct { 2907 MemoryMB int64 2908 } 2909 2910 // NodeReservedDiskResources captures the reserved disk resources of the node. 2911 type NodeReservedDiskResources struct { 2912 DiskMB int64 2913 } 2914 2915 // NodeReservedNetworkResources captures the reserved network resources of the node. 2916 type NodeReservedNetworkResources struct { 2917 // ReservedHostPorts is the set of ports reserved on all host network 2918 // interfaces. Its format is a comma separate list of integers or integer 2919 // ranges. (80,443,1000-2000,2005) 2920 ReservedHostPorts string 2921 } 2922 2923 // ParsePortHostPorts returns the reserved host ports. 2924 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 2925 return ParsePortRanges(n.ReservedHostPorts) 2926 } 2927 2928 // AllocatedResources is the set of resources to be used by an allocation. 2929 type AllocatedResources struct { 2930 // Tasks is a mapping of task name to the resources for the task. 2931 Tasks map[string]*AllocatedTaskResources 2932 2933 // Shared is the set of resource that are shared by all tasks in the group. 2934 Shared AllocatedSharedResources 2935 } 2936 2937 func (a *AllocatedResources) Copy() *AllocatedResources { 2938 if a == nil { 2939 return nil 2940 } 2941 2942 out := AllocatedResources{ 2943 Shared: a.Shared.Copy(), 2944 } 2945 2946 if a.Tasks != nil { 2947 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 2948 for task, resource := range a.Tasks { 2949 out.Tasks[task] = resource.Copy() 2950 } 2951 } 2952 2953 return &out 2954 } 2955 2956 // Comparable returns a comparable version of the allocations allocated 2957 // resources. This conversion can be lossy so care must be taken when using it. 2958 func (a *AllocatedResources) Comparable() *ComparableResources { 2959 if a == nil { 2960 return nil 2961 } 2962 2963 c := &ComparableResources{ 2964 Shared: a.Shared, 2965 } 2966 for _, r := range a.Tasks { 2967 c.Flattened.Add(r) 2968 } 2969 // Add network resources that are at the task group level 2970 for _, network := range a.Shared.Networks { 2971 c.Flattened.Add(&AllocatedTaskResources{ 2972 Networks: []*NetworkResource{network}, 2973 }) 2974 } 2975 2976 return c 2977 } 2978 2979 // OldTaskResources returns the pre-0.9.0 map of task resources 2980 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 2981 m := make(map[string]*Resources, len(a.Tasks)) 2982 for name, res := range a.Tasks { 2983 m[name] = &Resources{ 2984 CPU: int(res.Cpu.CpuShares), 2985 MemoryMB: int(res.Memory.MemoryMB), 2986 Networks: res.Networks, 2987 } 2988 } 2989 2990 return m 2991 } 2992 2993 // AllocatedTaskResources are the set of resources allocated to a task. 2994 type AllocatedTaskResources struct { 2995 Cpu AllocatedCpuResources 2996 Memory AllocatedMemoryResources 2997 Networks Networks 2998 Devices []*AllocatedDeviceResource 2999 } 3000 3001 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3002 if a == nil { 3003 return nil 3004 } 3005 newA := new(AllocatedTaskResources) 3006 *newA = *a 3007 3008 // Copy the networks 3009 newA.Networks = a.Networks.Copy() 3010 3011 // Copy the devices 3012 if newA.Devices != nil { 3013 n := len(a.Devices) 3014 newA.Devices = make([]*AllocatedDeviceResource, n) 3015 for i := 0; i < n; i++ { 3016 newA.Devices[i] = a.Devices[i].Copy() 3017 } 3018 } 3019 3020 return newA 3021 } 3022 3023 // NetIndex finds the matching net index using device name 3024 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3025 return a.Networks.NetIndex(n) 3026 } 3027 3028 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3029 if delta == nil { 3030 return 3031 } 3032 3033 a.Cpu.Add(&delta.Cpu) 3034 a.Memory.Add(&delta.Memory) 3035 3036 for _, n := range delta.Networks { 3037 // Find the matching interface by IP or CIDR 3038 idx := a.NetIndex(n) 3039 if idx == -1 { 3040 a.Networks = append(a.Networks, n.Copy()) 3041 } else { 3042 a.Networks[idx].Add(n) 3043 } 3044 } 3045 3046 for _, d := range delta.Devices { 3047 // Find the matching device 3048 idx := AllocatedDevices(a.Devices).Index(d) 3049 if idx == -1 { 3050 a.Devices = append(a.Devices, d.Copy()) 3051 } else { 3052 a.Devices[idx].Add(d) 3053 } 3054 } 3055 } 3056 3057 // Comparable turns AllocatedTaskResources into ComparableResources 3058 // as a helper step in preemption 3059 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3060 ret := &ComparableResources{ 3061 Flattened: AllocatedTaskResources{ 3062 Cpu: AllocatedCpuResources{ 3063 CpuShares: a.Cpu.CpuShares, 3064 }, 3065 Memory: AllocatedMemoryResources{ 3066 MemoryMB: a.Memory.MemoryMB, 3067 }, 3068 }, 3069 } 3070 if len(a.Networks) > 0 { 3071 for _, net := range a.Networks { 3072 ret.Flattened.Networks = append(ret.Flattened.Networks, net) 3073 } 3074 } 3075 return ret 3076 } 3077 3078 // Subtract only subtracts CPU and Memory resources. Network utilization 3079 // is managed separately in NetworkIndex 3080 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3081 if delta == nil { 3082 return 3083 } 3084 3085 a.Cpu.Subtract(&delta.Cpu) 3086 a.Memory.Subtract(&delta.Memory) 3087 } 3088 3089 // AllocatedSharedResources are the set of resources allocated to a task group. 3090 type AllocatedSharedResources struct { 3091 Networks Networks 3092 DiskMB int64 3093 } 3094 3095 func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3096 return AllocatedSharedResources{ 3097 Networks: a.Networks.Copy(), 3098 DiskMB: a.DiskMB, 3099 } 3100 } 3101 3102 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3103 if delta == nil { 3104 return 3105 } 3106 a.Networks = append(a.Networks, delta.Networks...) 3107 a.DiskMB += delta.DiskMB 3108 3109 } 3110 3111 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3112 if delta == nil { 3113 return 3114 } 3115 3116 diff := map[*NetworkResource]bool{} 3117 for _, n := range delta.Networks { 3118 diff[n] = true 3119 } 3120 var nets Networks 3121 for _, n := range a.Networks { 3122 if _, ok := diff[n]; !ok { 3123 nets = append(nets, n) 3124 } 3125 } 3126 a.Networks = nets 3127 a.DiskMB -= delta.DiskMB 3128 } 3129 3130 // AllocatedCpuResources captures the allocated CPU resources. 3131 type AllocatedCpuResources struct { 3132 CpuShares int64 3133 } 3134 3135 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3136 if delta == nil { 3137 return 3138 } 3139 3140 a.CpuShares += delta.CpuShares 3141 } 3142 3143 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3144 if delta == nil { 3145 return 3146 } 3147 3148 a.CpuShares -= delta.CpuShares 3149 } 3150 3151 // AllocatedMemoryResources captures the allocated memory resources. 3152 type AllocatedMemoryResources struct { 3153 MemoryMB int64 3154 } 3155 3156 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3157 if delta == nil { 3158 return 3159 } 3160 3161 a.MemoryMB += delta.MemoryMB 3162 } 3163 3164 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3165 if delta == nil { 3166 return 3167 } 3168 3169 a.MemoryMB -= delta.MemoryMB 3170 } 3171 3172 type AllocatedDevices []*AllocatedDeviceResource 3173 3174 // Index finds the matching index using the passed device. If not found, -1 is 3175 // returned. 3176 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3177 if d == nil { 3178 return -1 3179 } 3180 3181 for i, o := range a { 3182 if o.ID().Equals(d.ID()) { 3183 return i 3184 } 3185 } 3186 3187 return -1 3188 } 3189 3190 // AllocatedDeviceResource captures a set of allocated devices. 3191 type AllocatedDeviceResource struct { 3192 // Vendor, Type, and Name are used to select the plugin to request the 3193 // device IDs from. 3194 Vendor string 3195 Type string 3196 Name string 3197 3198 // DeviceIDs is the set of allocated devices 3199 DeviceIDs []string 3200 } 3201 3202 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3203 if a == nil { 3204 return nil 3205 } 3206 3207 return &DeviceIdTuple{ 3208 Vendor: a.Vendor, 3209 Type: a.Type, 3210 Name: a.Name, 3211 } 3212 } 3213 3214 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3215 if delta == nil { 3216 return 3217 } 3218 3219 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 3220 } 3221 3222 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 3223 if a == nil { 3224 return a 3225 } 3226 3227 na := *a 3228 3229 // Copy the devices 3230 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 3231 for i, id := range a.DeviceIDs { 3232 na.DeviceIDs[i] = id 3233 } 3234 3235 return &na 3236 } 3237 3238 // ComparableResources is the set of resources allocated to a task group but 3239 // not keyed by Task, making it easier to compare. 3240 type ComparableResources struct { 3241 Flattened AllocatedTaskResources 3242 Shared AllocatedSharedResources 3243 } 3244 3245 func (c *ComparableResources) Add(delta *ComparableResources) { 3246 if delta == nil { 3247 return 3248 } 3249 3250 c.Flattened.Add(&delta.Flattened) 3251 c.Shared.Add(&delta.Shared) 3252 } 3253 3254 func (c *ComparableResources) Subtract(delta *ComparableResources) { 3255 if delta == nil { 3256 return 3257 } 3258 3259 c.Flattened.Subtract(&delta.Flattened) 3260 c.Shared.Subtract(&delta.Shared) 3261 } 3262 3263 func (c *ComparableResources) Copy() *ComparableResources { 3264 if c == nil { 3265 return nil 3266 } 3267 newR := new(ComparableResources) 3268 *newR = *c 3269 return newR 3270 } 3271 3272 // Superset checks if one set of resources is a superset of another. This 3273 // ignores network resources, and the NetworkIndex should be used for that. 3274 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 3275 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 3276 return false, "cpu" 3277 } 3278 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 3279 return false, "memory" 3280 } 3281 if c.Shared.DiskMB < other.Shared.DiskMB { 3282 return false, "disk" 3283 } 3284 return true, "" 3285 } 3286 3287 // allocated finds the matching net index using device name 3288 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3289 return c.Flattened.Networks.NetIndex(n) 3290 } 3291 3292 const ( 3293 // JobTypeNomad is reserved for internal system tasks and is 3294 // always handled by the CoreScheduler. 3295 JobTypeCore = "_core" 3296 JobTypeService = "service" 3297 JobTypeBatch = "batch" 3298 JobTypeSystem = "system" 3299 ) 3300 3301 const ( 3302 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3303 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3304 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3305 ) 3306 3307 const ( 3308 // JobMinPriority is the minimum allowed priority 3309 JobMinPriority = 1 3310 3311 // JobDefaultPriority is the default priority if not 3312 // not specified. 3313 JobDefaultPriority = 50 3314 3315 // JobMaxPriority is the maximum allowed priority 3316 JobMaxPriority = 100 3317 3318 // Ensure CoreJobPriority is higher than any user 3319 // specified job so that it gets priority. This is important 3320 // for the system to remain healthy. 3321 CoreJobPriority = JobMaxPriority * 2 3322 3323 // JobTrackedVersions is the number of historic job versions that are 3324 // kept. 3325 JobTrackedVersions = 6 3326 ) 3327 3328 // Job is the scope of a scheduling request to Nomad. It is the largest 3329 // scoped object, and is a named collection of task groups. Each task group 3330 // is further composed of tasks. A task group (TG) is the unit of scheduling 3331 // however. 3332 type Job struct { 3333 // Stop marks whether the user has stopped the job. A stopped job will 3334 // have all created allocations stopped and acts as a way to stop a job 3335 // without purging it from the system. This allows existing allocs to be 3336 // queried and the job to be inspected as it is being killed. 3337 Stop bool 3338 3339 // Region is the Nomad region that handles scheduling this job 3340 Region string 3341 3342 // Namespace is the namespace the job is submitted into. 3343 Namespace string 3344 3345 // ID is a unique identifier for the job per region. It can be 3346 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3347 ID string 3348 3349 // ParentID is the unique identifier of the job that spawned this job. 3350 ParentID string 3351 3352 // Name is the logical name of the job used to refer to it. This is unique 3353 // per region, but not unique globally. 3354 Name string 3355 3356 // Type is used to control various behaviors about the job. Most jobs 3357 // are service jobs, meaning they are expected to be long lived. 3358 // Some jobs are batch oriented meaning they run and then terminate. 3359 // This can be extended in the future to support custom schedulers. 3360 Type string 3361 3362 // Priority is used to control scheduling importance and if this job 3363 // can preempt other jobs. 3364 Priority int 3365 3366 // AllAtOnce is used to control if incremental scheduling of task groups 3367 // is allowed or if we must do a gang scheduling of the entire job. This 3368 // can slow down larger jobs if resources are not available. 3369 AllAtOnce bool 3370 3371 // Datacenters contains all the datacenters this job is allowed to span 3372 Datacenters []string 3373 3374 // Constraints can be specified at a job level and apply to 3375 // all the task groups and tasks. 3376 Constraints []*Constraint 3377 3378 // Affinities can be specified at the job level to express 3379 // scheduling preferences that apply to all groups and tasks 3380 Affinities []*Affinity 3381 3382 // Spread can be specified at the job level to express spreading 3383 // allocations across a desired attribute, such as datacenter 3384 Spreads []*Spread 3385 3386 // TaskGroups are the collections of task groups that this job needs 3387 // to run. Each task group is an atomic unit of scheduling and placement. 3388 TaskGroups []*TaskGroup 3389 3390 // See agent.ApiJobToStructJob 3391 // Update provides defaults for the TaskGroup Update stanzas 3392 Update UpdateStrategy 3393 3394 // Periodic is used to define the interval the job is run at. 3395 Periodic *PeriodicConfig 3396 3397 // ParameterizedJob is used to specify the job as a parameterized job 3398 // for dispatching. 3399 ParameterizedJob *ParameterizedJobConfig 3400 3401 // Dispatched is used to identify if the Job has been dispatched from a 3402 // parameterized job. 3403 Dispatched bool 3404 3405 // Payload is the payload supplied when the job was dispatched. 3406 Payload []byte 3407 3408 // Meta is used to associate arbitrary metadata with this 3409 // job. This is opaque to Nomad. 3410 Meta map[string]string 3411 3412 // VaultToken is the Vault token that proves the submitter of the job has 3413 // access to the specified Vault policies. This field is only used to 3414 // transfer the token and is not stored after Job submission. 3415 VaultToken string 3416 3417 // Job status 3418 Status string 3419 3420 // StatusDescription is meant to provide more human useful information 3421 StatusDescription string 3422 3423 // Stable marks a job as stable. Stability is only defined on "service" and 3424 // "system" jobs. The stability of a job will be set automatically as part 3425 // of a deployment and can be manually set via APIs. This field is updated 3426 // when the status of a corresponding deployment transitions to Failed 3427 // or Successful. This field is not meaningful for jobs that don't have an 3428 // update stanza. 3429 Stable bool 3430 3431 // Version is a monotonically increasing version number that is incremented 3432 // on each job register. 3433 Version uint64 3434 3435 // SubmitTime is the time at which the job was submitted as a UnixNano in 3436 // UTC 3437 SubmitTime int64 3438 3439 // Raft Indexes 3440 CreateIndex uint64 3441 ModifyIndex uint64 3442 JobModifyIndex uint64 3443 } 3444 3445 // NamespacedID returns the namespaced id useful for logging 3446 func (j *Job) NamespacedID() *NamespacedID { 3447 return &NamespacedID{ 3448 ID: j.ID, 3449 Namespace: j.Namespace, 3450 } 3451 } 3452 3453 // Canonicalize is used to canonicalize fields in the Job. This should be called 3454 // when registering a Job. A set of warnings are returned if the job was changed 3455 // in anyway that the user should be made aware of. 3456 func (j *Job) Canonicalize() (warnings error) { 3457 if j == nil { 3458 return nil 3459 } 3460 3461 var mErr multierror.Error 3462 // Ensure that an empty and nil map are treated the same to avoid scheduling 3463 // problems since we use reflect DeepEquals. 3464 if len(j.Meta) == 0 { 3465 j.Meta = nil 3466 } 3467 3468 // Ensure the job is in a namespace. 3469 if j.Namespace == "" { 3470 j.Namespace = DefaultNamespace 3471 } 3472 3473 for _, tg := range j.TaskGroups { 3474 tg.Canonicalize(j) 3475 } 3476 3477 if j.ParameterizedJob != nil { 3478 j.ParameterizedJob.Canonicalize() 3479 } 3480 3481 if j.Periodic != nil { 3482 j.Periodic.Canonicalize() 3483 } 3484 3485 return mErr.ErrorOrNil() 3486 } 3487 3488 // Copy returns a deep copy of the Job. It is expected that callers use recover. 3489 // This job can panic if the deep copy failed as it uses reflection. 3490 func (j *Job) Copy() *Job { 3491 if j == nil { 3492 return nil 3493 } 3494 nj := new(Job) 3495 *nj = *j 3496 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 3497 nj.Constraints = CopySliceConstraints(nj.Constraints) 3498 nj.Affinities = CopySliceAffinities(nj.Affinities) 3499 3500 if j.TaskGroups != nil { 3501 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 3502 for i, tg := range nj.TaskGroups { 3503 tgs[i] = tg.Copy() 3504 } 3505 nj.TaskGroups = tgs 3506 } 3507 3508 nj.Periodic = nj.Periodic.Copy() 3509 nj.Meta = helper.CopyMapStringString(nj.Meta) 3510 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 3511 return nj 3512 } 3513 3514 // Validate is used to sanity check a job input 3515 func (j *Job) Validate() error { 3516 var mErr multierror.Error 3517 3518 if j.Region == "" { 3519 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 3520 } 3521 if j.ID == "" { 3522 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 3523 } else if strings.Contains(j.ID, " ") { 3524 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 3525 } 3526 if j.Name == "" { 3527 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 3528 } 3529 if j.Namespace == "" { 3530 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 3531 } 3532 switch j.Type { 3533 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 3534 case "": 3535 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 3536 default: 3537 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 3538 } 3539 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 3540 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 3541 } 3542 if len(j.Datacenters) == 0 { 3543 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 3544 } else { 3545 for _, v := range j.Datacenters { 3546 if v == "" { 3547 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 3548 } 3549 } 3550 } 3551 if len(j.TaskGroups) == 0 { 3552 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 3553 } 3554 for idx, constr := range j.Constraints { 3555 if err := constr.Validate(); err != nil { 3556 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3557 mErr.Errors = append(mErr.Errors, outer) 3558 } 3559 } 3560 if j.Type == JobTypeSystem { 3561 if j.Affinities != nil { 3562 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 3563 } 3564 } else { 3565 for idx, affinity := range j.Affinities { 3566 if err := affinity.Validate(); err != nil { 3567 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 3568 mErr.Errors = append(mErr.Errors, outer) 3569 } 3570 } 3571 } 3572 3573 if j.Type == JobTypeSystem { 3574 if j.Spreads != nil { 3575 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 3576 } 3577 } else { 3578 for idx, spread := range j.Spreads { 3579 if err := spread.Validate(); err != nil { 3580 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 3581 mErr.Errors = append(mErr.Errors, outer) 3582 } 3583 } 3584 } 3585 3586 // Check for duplicate task groups 3587 taskGroups := make(map[string]int) 3588 for idx, tg := range j.TaskGroups { 3589 if tg.Name == "" { 3590 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 3591 } else if existing, ok := taskGroups[tg.Name]; ok { 3592 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 3593 } else { 3594 taskGroups[tg.Name] = idx 3595 } 3596 3597 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 3598 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 3599 } 3600 3601 if j.Type == "system" && tg.Count > 1 { 3602 mErr.Errors = append(mErr.Errors, 3603 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 3604 tg.Name, tg.Count)) 3605 } 3606 } 3607 3608 // Validate the task group 3609 for _, tg := range j.TaskGroups { 3610 if err := tg.Validate(j); err != nil { 3611 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 3612 mErr.Errors = append(mErr.Errors, outer) 3613 } 3614 } 3615 3616 // Validate periodic is only used with batch jobs. 3617 if j.IsPeriodic() && j.Periodic.Enabled { 3618 if j.Type != JobTypeBatch { 3619 mErr.Errors = append(mErr.Errors, 3620 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 3621 } 3622 3623 if err := j.Periodic.Validate(); err != nil { 3624 mErr.Errors = append(mErr.Errors, err) 3625 } 3626 } 3627 3628 if j.IsParameterized() { 3629 if j.Type != JobTypeBatch { 3630 mErr.Errors = append(mErr.Errors, 3631 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 3632 } 3633 3634 if err := j.ParameterizedJob.Validate(); err != nil { 3635 mErr.Errors = append(mErr.Errors, err) 3636 } 3637 } 3638 3639 return mErr.ErrorOrNil() 3640 } 3641 3642 // Warnings returns a list of warnings that may be from dubious settings or 3643 // deprecation warnings. 3644 func (j *Job) Warnings() error { 3645 var mErr multierror.Error 3646 3647 // Check the groups 3648 ap := 0 3649 for _, tg := range j.TaskGroups { 3650 if err := tg.Warnings(j); err != nil { 3651 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 3652 mErr.Errors = append(mErr.Errors, outer) 3653 } 3654 if tg.Update != nil && tg.Update.AutoPromote { 3655 ap += 1 3656 } 3657 } 3658 3659 // Check AutoPromote, should be all or none 3660 if ap > 0 && ap < len(j.TaskGroups) { 3661 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 3662 mErr.Errors = append(mErr.Errors, err) 3663 } 3664 3665 return mErr.ErrorOrNil() 3666 } 3667 3668 // LookupTaskGroup finds a task group by name 3669 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 3670 for _, tg := range j.TaskGroups { 3671 if tg.Name == name { 3672 return tg 3673 } 3674 } 3675 return nil 3676 } 3677 3678 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 3679 // meta data for the task. When joining Job, Group and Task Meta, the precedence 3680 // is by deepest scope (Task > Group > Job). 3681 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 3682 group := j.LookupTaskGroup(groupName) 3683 if group == nil { 3684 return j.Meta 3685 } 3686 3687 var meta map[string]string 3688 3689 task := group.LookupTask(taskName) 3690 if task != nil { 3691 meta = helper.CopyMapStringString(task.Meta) 3692 } 3693 3694 if meta == nil { 3695 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 3696 } 3697 3698 // Add the group specific meta 3699 for k, v := range group.Meta { 3700 if _, ok := meta[k]; !ok { 3701 meta[k] = v 3702 } 3703 } 3704 3705 // Add the job specific meta 3706 for k, v := range j.Meta { 3707 if _, ok := meta[k]; !ok { 3708 meta[k] = v 3709 } 3710 } 3711 3712 return meta 3713 } 3714 3715 // Stopped returns if a job is stopped. 3716 func (j *Job) Stopped() bool { 3717 return j == nil || j.Stop 3718 } 3719 3720 // HasUpdateStrategy returns if any task group in the job has an update strategy 3721 func (j *Job) HasUpdateStrategy() bool { 3722 for _, tg := range j.TaskGroups { 3723 if !tg.Update.IsEmpty() { 3724 return true 3725 } 3726 } 3727 3728 return false 3729 } 3730 3731 // Stub is used to return a summary of the job 3732 func (j *Job) Stub(summary *JobSummary) *JobListStub { 3733 return &JobListStub{ 3734 ID: j.ID, 3735 ParentID: j.ParentID, 3736 Name: j.Name, 3737 Datacenters: j.Datacenters, 3738 Type: j.Type, 3739 Priority: j.Priority, 3740 Periodic: j.IsPeriodic(), 3741 ParameterizedJob: j.IsParameterized(), 3742 Stop: j.Stop, 3743 Status: j.Status, 3744 StatusDescription: j.StatusDescription, 3745 CreateIndex: j.CreateIndex, 3746 ModifyIndex: j.ModifyIndex, 3747 JobModifyIndex: j.JobModifyIndex, 3748 SubmitTime: j.SubmitTime, 3749 JobSummary: summary, 3750 } 3751 } 3752 3753 // IsPeriodic returns whether a job is periodic. 3754 func (j *Job) IsPeriodic() bool { 3755 return j.Periodic != nil 3756 } 3757 3758 // IsPeriodicActive returns whether the job is an active periodic job that will 3759 // create child jobs 3760 func (j *Job) IsPeriodicActive() bool { 3761 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 3762 } 3763 3764 // IsParameterized returns whether a job is parameterized job. 3765 func (j *Job) IsParameterized() bool { 3766 return j.ParameterizedJob != nil && !j.Dispatched 3767 } 3768 3769 // VaultPolicies returns the set of Vault policies per task group, per task 3770 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 3771 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 3772 3773 for _, tg := range j.TaskGroups { 3774 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 3775 3776 for _, task := range tg.Tasks { 3777 if task.Vault == nil { 3778 continue 3779 } 3780 3781 tgPolicies[task.Name] = task.Vault 3782 } 3783 3784 if len(tgPolicies) != 0 { 3785 policies[tg.Name] = tgPolicies 3786 } 3787 } 3788 3789 return policies 3790 } 3791 3792 // RequiredSignals returns a mapping of task groups to tasks to their required 3793 // set of signals 3794 func (j *Job) RequiredSignals() map[string]map[string][]string { 3795 signals := make(map[string]map[string][]string) 3796 3797 for _, tg := range j.TaskGroups { 3798 for _, task := range tg.Tasks { 3799 // Use this local one as a set 3800 taskSignals := make(map[string]struct{}) 3801 3802 // Check if the Vault change mode uses signals 3803 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 3804 taskSignals[task.Vault.ChangeSignal] = struct{}{} 3805 } 3806 3807 // If a user has specified a KillSignal, add it to required signals 3808 if task.KillSignal != "" { 3809 taskSignals[task.KillSignal] = struct{}{} 3810 } 3811 3812 // Check if any template change mode uses signals 3813 for _, t := range task.Templates { 3814 if t.ChangeMode != TemplateChangeModeSignal { 3815 continue 3816 } 3817 3818 taskSignals[t.ChangeSignal] = struct{}{} 3819 } 3820 3821 // Flatten and sort the signals 3822 l := len(taskSignals) 3823 if l == 0 { 3824 continue 3825 } 3826 3827 flat := make([]string, 0, l) 3828 for sig := range taskSignals { 3829 flat = append(flat, sig) 3830 } 3831 3832 sort.Strings(flat) 3833 tgSignals, ok := signals[tg.Name] 3834 if !ok { 3835 tgSignals = make(map[string][]string) 3836 signals[tg.Name] = tgSignals 3837 } 3838 tgSignals[task.Name] = flat 3839 } 3840 3841 } 3842 3843 return signals 3844 } 3845 3846 // SpecChanged determines if the functional specification has changed between 3847 // two job versions. 3848 func (j *Job) SpecChanged(new *Job) bool { 3849 if j == nil { 3850 return new != nil 3851 } 3852 3853 // Create a copy of the new job 3854 c := new.Copy() 3855 3856 // Update the new job so we can do a reflect 3857 c.Status = j.Status 3858 c.StatusDescription = j.StatusDescription 3859 c.Stable = j.Stable 3860 c.Version = j.Version 3861 c.CreateIndex = j.CreateIndex 3862 c.ModifyIndex = j.ModifyIndex 3863 c.JobModifyIndex = j.JobModifyIndex 3864 c.SubmitTime = j.SubmitTime 3865 3866 // Deep equals the jobs 3867 return !reflect.DeepEqual(j, c) 3868 } 3869 3870 func (j *Job) SetSubmitTime() { 3871 j.SubmitTime = time.Now().UTC().UnixNano() 3872 } 3873 3874 // JobListStub is used to return a subset of job information 3875 // for the job list 3876 type JobListStub struct { 3877 ID string 3878 ParentID string 3879 Name string 3880 Datacenters []string 3881 Type string 3882 Priority int 3883 Periodic bool 3884 ParameterizedJob bool 3885 Stop bool 3886 Status string 3887 StatusDescription string 3888 JobSummary *JobSummary 3889 CreateIndex uint64 3890 ModifyIndex uint64 3891 JobModifyIndex uint64 3892 SubmitTime int64 3893 } 3894 3895 // JobSummary summarizes the state of the allocations of a job 3896 type JobSummary struct { 3897 // JobID is the ID of the job the summary is for 3898 JobID string 3899 3900 // Namespace is the namespace of the job and its summary 3901 Namespace string 3902 3903 // Summary contains the summary per task group for the Job 3904 Summary map[string]TaskGroupSummary 3905 3906 // Children contains a summary for the children of this job. 3907 Children *JobChildrenSummary 3908 3909 // Raft Indexes 3910 CreateIndex uint64 3911 ModifyIndex uint64 3912 } 3913 3914 // Copy returns a new copy of JobSummary 3915 func (js *JobSummary) Copy() *JobSummary { 3916 newJobSummary := new(JobSummary) 3917 *newJobSummary = *js 3918 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 3919 for k, v := range js.Summary { 3920 newTGSummary[k] = v 3921 } 3922 newJobSummary.Summary = newTGSummary 3923 newJobSummary.Children = newJobSummary.Children.Copy() 3924 return newJobSummary 3925 } 3926 3927 // JobChildrenSummary contains the summary of children job statuses 3928 type JobChildrenSummary struct { 3929 Pending int64 3930 Running int64 3931 Dead int64 3932 } 3933 3934 // Copy returns a new copy of a JobChildrenSummary 3935 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 3936 if jc == nil { 3937 return nil 3938 } 3939 3940 njc := new(JobChildrenSummary) 3941 *njc = *jc 3942 return njc 3943 } 3944 3945 // TaskGroup summarizes the state of all the allocations of a particular 3946 // TaskGroup 3947 type TaskGroupSummary struct { 3948 Queued int 3949 Complete int 3950 Failed int 3951 Running int 3952 Starting int 3953 Lost int 3954 } 3955 3956 const ( 3957 // Checks uses any registered health check state in combination with task 3958 // states to determine if a allocation is healthy. 3959 UpdateStrategyHealthCheck_Checks = "checks" 3960 3961 // TaskStates uses the task states of an allocation to determine if the 3962 // allocation is healthy. 3963 UpdateStrategyHealthCheck_TaskStates = "task_states" 3964 3965 // Manual allows the operator to manually signal to Nomad when an 3966 // allocations is healthy. This allows more advanced health checking that is 3967 // outside of the scope of Nomad. 3968 UpdateStrategyHealthCheck_Manual = "manual" 3969 ) 3970 3971 var ( 3972 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 3973 // jobs with the old policy or for populating field defaults. 3974 DefaultUpdateStrategy = &UpdateStrategy{ 3975 Stagger: 30 * time.Second, 3976 MaxParallel: 1, 3977 HealthCheck: UpdateStrategyHealthCheck_Checks, 3978 MinHealthyTime: 10 * time.Second, 3979 HealthyDeadline: 5 * time.Minute, 3980 ProgressDeadline: 10 * time.Minute, 3981 AutoRevert: false, 3982 AutoPromote: false, 3983 Canary: 0, 3984 } 3985 ) 3986 3987 // UpdateStrategy is used to modify how updates are done 3988 type UpdateStrategy struct { 3989 // Stagger is used to determine the rate at which allocations are migrated 3990 // due to down or draining nodes. 3991 Stagger time.Duration 3992 3993 // MaxParallel is how many updates can be done in parallel 3994 MaxParallel int 3995 3996 // HealthCheck specifies the mechanism in which allocations are marked 3997 // healthy or unhealthy as part of a deployment. 3998 HealthCheck string 3999 4000 // MinHealthyTime is the minimum time an allocation must be in the healthy 4001 // state before it is marked as healthy, unblocking more allocations to be 4002 // rolled. 4003 MinHealthyTime time.Duration 4004 4005 // HealthyDeadline is the time in which an allocation must be marked as 4006 // healthy before it is automatically transitioned to unhealthy. This time 4007 // period doesn't count against the MinHealthyTime. 4008 HealthyDeadline time.Duration 4009 4010 // ProgressDeadline is the time in which an allocation as part of the 4011 // deployment must transition to healthy. If no allocation becomes healthy 4012 // after the deadline, the deployment is marked as failed. If the deadline 4013 // is zero, the first failure causes the deployment to fail. 4014 ProgressDeadline time.Duration 4015 4016 // AutoRevert declares that if a deployment fails because of unhealthy 4017 // allocations, there should be an attempt to auto-revert the job to a 4018 // stable version. 4019 AutoRevert bool 4020 4021 // AutoPromote declares that the deployment should be promoted when all canaries are 4022 // healthy 4023 AutoPromote bool 4024 4025 // Canary is the number of canaries to deploy when a change to the task 4026 // group is detected. 4027 Canary int 4028 } 4029 4030 func (u *UpdateStrategy) Copy() *UpdateStrategy { 4031 if u == nil { 4032 return nil 4033 } 4034 4035 copy := new(UpdateStrategy) 4036 *copy = *u 4037 return copy 4038 } 4039 4040 func (u *UpdateStrategy) Validate() error { 4041 if u == nil { 4042 return nil 4043 } 4044 4045 var mErr multierror.Error 4046 switch u.HealthCheck { 4047 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4048 default: 4049 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4050 } 4051 4052 if u.MaxParallel < 0 { 4053 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4054 } 4055 if u.Canary < 0 { 4056 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4057 } 4058 if u.Canary == 0 && u.AutoPromote { 4059 multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4060 } 4061 if u.MinHealthyTime < 0 { 4062 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4063 } 4064 if u.HealthyDeadline <= 0 { 4065 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4066 } 4067 if u.ProgressDeadline < 0 { 4068 multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4069 } 4070 if u.MinHealthyTime >= u.HealthyDeadline { 4071 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4072 } 4073 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4074 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4075 } 4076 if u.Stagger <= 0 { 4077 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4078 } 4079 4080 return mErr.ErrorOrNil() 4081 } 4082 4083 func (u *UpdateStrategy) IsEmpty() bool { 4084 if u == nil { 4085 return true 4086 } 4087 4088 return u.MaxParallel == 0 4089 } 4090 4091 // TODO(alexdadgar): Remove once no longer used by the scheduler. 4092 // Rolling returns if a rolling strategy should be used 4093 func (u *UpdateStrategy) Rolling() bool { 4094 return u.Stagger > 0 && u.MaxParallel > 0 4095 } 4096 4097 const ( 4098 // PeriodicSpecCron is used for a cron spec. 4099 PeriodicSpecCron = "cron" 4100 4101 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 4102 // separated list of unix timestamps at which to launch. 4103 PeriodicSpecTest = "_internal_test" 4104 ) 4105 4106 // Periodic defines the interval a job should be run at. 4107 type PeriodicConfig struct { 4108 // Enabled determines if the job should be run periodically. 4109 Enabled bool 4110 4111 // Spec specifies the interval the job should be run as. It is parsed based 4112 // on the SpecType. 4113 Spec string 4114 4115 // SpecType defines the format of the spec. 4116 SpecType string 4117 4118 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 4119 ProhibitOverlap bool 4120 4121 // TimeZone is the user specified string that determines the time zone to 4122 // launch against. The time zones must be specified from IANA Time Zone 4123 // database, such as "America/New_York". 4124 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 4125 // Reference: https://www.iana.org/time-zones 4126 TimeZone string 4127 4128 // location is the time zone to evaluate the launch time against 4129 location *time.Location 4130 } 4131 4132 func (p *PeriodicConfig) Copy() *PeriodicConfig { 4133 if p == nil { 4134 return nil 4135 } 4136 np := new(PeriodicConfig) 4137 *np = *p 4138 return np 4139 } 4140 4141 func (p *PeriodicConfig) Validate() error { 4142 if !p.Enabled { 4143 return nil 4144 } 4145 4146 var mErr multierror.Error 4147 if p.Spec == "" { 4148 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 4149 } 4150 4151 // Check if we got a valid time zone 4152 if p.TimeZone != "" { 4153 if _, err := time.LoadLocation(p.TimeZone); err != nil { 4154 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 4155 } 4156 } 4157 4158 switch p.SpecType { 4159 case PeriodicSpecCron: 4160 // Validate the cron spec 4161 if _, err := cronexpr.Parse(p.Spec); err != nil { 4162 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 4163 } 4164 case PeriodicSpecTest: 4165 // No-op 4166 default: 4167 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 4168 } 4169 4170 return mErr.ErrorOrNil() 4171 } 4172 4173 func (p *PeriodicConfig) Canonicalize() { 4174 // Load the location 4175 l, err := time.LoadLocation(p.TimeZone) 4176 if err != nil { 4177 p.location = time.UTC 4178 } 4179 4180 p.location = l 4181 } 4182 4183 // CronParseNext is a helper that parses the next time for the given expression 4184 // but captures any panic that may occur in the underlying library. 4185 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 4186 defer func() { 4187 if recover() != nil { 4188 t = time.Time{} 4189 err = fmt.Errorf("failed parsing cron expression: %q", spec) 4190 } 4191 }() 4192 4193 return e.Next(fromTime), nil 4194 } 4195 4196 // Next returns the closest time instant matching the spec that is after the 4197 // passed time. If no matching instance exists, the zero value of time.Time is 4198 // returned. The `time.Location` of the returned value matches that of the 4199 // passed time. 4200 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 4201 switch p.SpecType { 4202 case PeriodicSpecCron: 4203 if e, err := cronexpr.Parse(p.Spec); err == nil { 4204 return CronParseNext(e, fromTime, p.Spec) 4205 } 4206 case PeriodicSpecTest: 4207 split := strings.Split(p.Spec, ",") 4208 if len(split) == 1 && split[0] == "" { 4209 return time.Time{}, nil 4210 } 4211 4212 // Parse the times 4213 times := make([]time.Time, len(split)) 4214 for i, s := range split { 4215 unix, err := strconv.Atoi(s) 4216 if err != nil { 4217 return time.Time{}, nil 4218 } 4219 4220 times[i] = time.Unix(int64(unix), 0) 4221 } 4222 4223 // Find the next match 4224 for _, next := range times { 4225 if fromTime.Before(next) { 4226 return next, nil 4227 } 4228 } 4229 } 4230 4231 return time.Time{}, nil 4232 } 4233 4234 // GetLocation returns the location to use for determining the time zone to run 4235 // the periodic job against. 4236 func (p *PeriodicConfig) GetLocation() *time.Location { 4237 // Jobs pre 0.5.5 will not have this 4238 if p.location != nil { 4239 return p.location 4240 } 4241 4242 return time.UTC 4243 } 4244 4245 const ( 4246 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 4247 // when launching derived instances of it. 4248 PeriodicLaunchSuffix = "/periodic-" 4249 ) 4250 4251 // PeriodicLaunch tracks the last launch time of a periodic job. 4252 type PeriodicLaunch struct { 4253 ID string // ID of the periodic job. 4254 Namespace string // Namespace of the periodic job 4255 Launch time.Time // The last launch time. 4256 4257 // Raft Indexes 4258 CreateIndex uint64 4259 ModifyIndex uint64 4260 } 4261 4262 const ( 4263 DispatchPayloadForbidden = "forbidden" 4264 DispatchPayloadOptional = "optional" 4265 DispatchPayloadRequired = "required" 4266 4267 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 4268 // when dispatching instances of it. 4269 DispatchLaunchSuffix = "/dispatch-" 4270 ) 4271 4272 // ParameterizedJobConfig is used to configure the parameterized job 4273 type ParameterizedJobConfig struct { 4274 // Payload configure the payload requirements 4275 Payload string 4276 4277 // MetaRequired is metadata keys that must be specified by the dispatcher 4278 MetaRequired []string 4279 4280 // MetaOptional is metadata keys that may be specified by the dispatcher 4281 MetaOptional []string 4282 } 4283 4284 func (d *ParameterizedJobConfig) Validate() error { 4285 var mErr multierror.Error 4286 switch d.Payload { 4287 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 4288 default: 4289 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 4290 } 4291 4292 // Check that the meta configurations are disjoint sets 4293 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 4294 if !disjoint { 4295 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 4296 } 4297 4298 return mErr.ErrorOrNil() 4299 } 4300 4301 func (d *ParameterizedJobConfig) Canonicalize() { 4302 if d.Payload == "" { 4303 d.Payload = DispatchPayloadOptional 4304 } 4305 } 4306 4307 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 4308 if d == nil { 4309 return nil 4310 } 4311 nd := new(ParameterizedJobConfig) 4312 *nd = *d 4313 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 4314 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 4315 return nd 4316 } 4317 4318 // DispatchedID returns an ID appropriate for a job dispatched against a 4319 // particular parameterized job 4320 func DispatchedID(templateID string, t time.Time) string { 4321 u := uuid.Generate()[:8] 4322 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 4323 } 4324 4325 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 4326 type DispatchPayloadConfig struct { 4327 // File specifies a relative path to where the input data should be written 4328 File string 4329 } 4330 4331 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 4332 if d == nil { 4333 return nil 4334 } 4335 nd := new(DispatchPayloadConfig) 4336 *nd = *d 4337 return nd 4338 } 4339 4340 func (d *DispatchPayloadConfig) Validate() error { 4341 // Verify the destination doesn't escape 4342 escaped, err := PathEscapesAllocDir("task/local/", d.File) 4343 if err != nil { 4344 return fmt.Errorf("invalid destination path: %v", err) 4345 } else if escaped { 4346 return fmt.Errorf("destination escapes allocation directory") 4347 } 4348 4349 return nil 4350 } 4351 4352 var ( 4353 // These default restart policies needs to be in sync with 4354 // Canonicalize in api/tasks.go 4355 4356 DefaultServiceJobRestartPolicy = RestartPolicy{ 4357 Delay: 15 * time.Second, 4358 Attempts: 2, 4359 Interval: 30 * time.Minute, 4360 Mode: RestartPolicyModeFail, 4361 } 4362 DefaultBatchJobRestartPolicy = RestartPolicy{ 4363 Delay: 15 * time.Second, 4364 Attempts: 3, 4365 Interval: 24 * time.Hour, 4366 Mode: RestartPolicyModeFail, 4367 } 4368 ) 4369 4370 var ( 4371 // These default reschedule policies needs to be in sync with 4372 // NewDefaultReschedulePolicy in api/tasks.go 4373 4374 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 4375 Delay: 30 * time.Second, 4376 DelayFunction: "exponential", 4377 MaxDelay: 1 * time.Hour, 4378 Unlimited: true, 4379 } 4380 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 4381 Attempts: 1, 4382 Interval: 24 * time.Hour, 4383 Delay: 5 * time.Second, 4384 DelayFunction: "constant", 4385 } 4386 ) 4387 4388 const ( 4389 // RestartPolicyModeDelay causes an artificial delay till the next interval is 4390 // reached when the specified attempts have been reached in the interval. 4391 RestartPolicyModeDelay = "delay" 4392 4393 // RestartPolicyModeFail causes a job to fail if the specified number of 4394 // attempts are reached within an interval. 4395 RestartPolicyModeFail = "fail" 4396 4397 // RestartPolicyMinInterval is the minimum interval that is accepted for a 4398 // restart policy. 4399 RestartPolicyMinInterval = 5 * time.Second 4400 4401 // ReasonWithinPolicy describes restart events that are within policy 4402 ReasonWithinPolicy = "Restart within policy" 4403 ) 4404 4405 // RestartPolicy configures how Tasks are restarted when they crash or fail. 4406 type RestartPolicy struct { 4407 // Attempts is the number of restart that will occur in an interval. 4408 Attempts int 4409 4410 // Interval is a duration in which we can limit the number of restarts 4411 // within. 4412 Interval time.Duration 4413 4414 // Delay is the time between a failure and a restart. 4415 Delay time.Duration 4416 4417 // Mode controls what happens when the task restarts more than attempt times 4418 // in an interval. 4419 Mode string 4420 } 4421 4422 func (r *RestartPolicy) Copy() *RestartPolicy { 4423 if r == nil { 4424 return nil 4425 } 4426 nrp := new(RestartPolicy) 4427 *nrp = *r 4428 return nrp 4429 } 4430 4431 func (r *RestartPolicy) Validate() error { 4432 var mErr multierror.Error 4433 switch r.Mode { 4434 case RestartPolicyModeDelay, RestartPolicyModeFail: 4435 default: 4436 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 4437 } 4438 4439 // Check for ambiguous/confusing settings 4440 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 4441 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 4442 } 4443 4444 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 4445 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 4446 } 4447 if time.Duration(r.Attempts)*r.Delay > r.Interval { 4448 multierror.Append(&mErr, 4449 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 4450 } 4451 return mErr.ErrorOrNil() 4452 } 4453 4454 func NewRestartPolicy(jobType string) *RestartPolicy { 4455 switch jobType { 4456 case JobTypeService, JobTypeSystem: 4457 rp := DefaultServiceJobRestartPolicy 4458 return &rp 4459 case JobTypeBatch: 4460 rp := DefaultBatchJobRestartPolicy 4461 return &rp 4462 } 4463 return nil 4464 } 4465 4466 const ReschedulePolicyMinInterval = 15 * time.Second 4467 const ReschedulePolicyMinDelay = 5 * time.Second 4468 4469 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 4470 4471 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 4472 type ReschedulePolicy struct { 4473 // Attempts limits the number of rescheduling attempts that can occur in an interval. 4474 Attempts int 4475 4476 // Interval is a duration in which we can limit the number of reschedule attempts. 4477 Interval time.Duration 4478 4479 // Delay is a minimum duration to wait between reschedule attempts. 4480 // The delay function determines how much subsequent reschedule attempts are delayed by. 4481 Delay time.Duration 4482 4483 // DelayFunction determines how the delay progressively changes on subsequent reschedule 4484 // attempts. Valid values are "exponential", "constant", and "fibonacci". 4485 DelayFunction string 4486 4487 // MaxDelay is an upper bound on the delay. 4488 MaxDelay time.Duration 4489 4490 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 4491 // between reschedule attempts. 4492 Unlimited bool 4493 } 4494 4495 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 4496 if r == nil { 4497 return nil 4498 } 4499 nrp := new(ReschedulePolicy) 4500 *nrp = *r 4501 return nrp 4502 } 4503 4504 func (r *ReschedulePolicy) Enabled() bool { 4505 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 4506 return enabled 4507 } 4508 4509 // Validate uses different criteria to validate the reschedule policy 4510 // Delay must be a minimum of 5 seconds 4511 // Delay Ceiling is ignored if Delay Function is "constant" 4512 // Number of possible attempts is validated, given the interval, delay and delay function 4513 func (r *ReschedulePolicy) Validate() error { 4514 if !r.Enabled() { 4515 return nil 4516 } 4517 var mErr multierror.Error 4518 // Check for ambiguous/confusing settings 4519 if r.Attempts > 0 { 4520 if r.Interval <= 0 { 4521 multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 4522 } 4523 if r.Unlimited { 4524 multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 4525 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 4526 multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 4527 } 4528 } 4529 4530 delayPreCheck := true 4531 // Delay should be bigger than the default 4532 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4533 multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4534 delayPreCheck = false 4535 } 4536 4537 // Must use a valid delay function 4538 if !isValidDelayFunction(r.DelayFunction) { 4539 multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 4540 delayPreCheck = false 4541 } 4542 4543 // Validate MaxDelay if not using linear delay progression 4544 if r.DelayFunction != "constant" { 4545 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4546 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4547 delayPreCheck = false 4548 } 4549 if r.MaxDelay < r.Delay { 4550 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 4551 delayPreCheck = false 4552 } 4553 4554 } 4555 4556 // Validate Interval and other delay parameters if attempts are limited 4557 if !r.Unlimited { 4558 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 4559 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 4560 } 4561 if !delayPreCheck { 4562 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 4563 return mErr.ErrorOrNil() 4564 } 4565 crossValidationErr := r.validateDelayParams() 4566 if crossValidationErr != nil { 4567 multierror.Append(&mErr, crossValidationErr) 4568 } 4569 } 4570 return mErr.ErrorOrNil() 4571 } 4572 4573 func isValidDelayFunction(delayFunc string) bool { 4574 for _, value := range RescheduleDelayFunctions { 4575 if value == delayFunc { 4576 return true 4577 } 4578 } 4579 return false 4580 } 4581 4582 func (r *ReschedulePolicy) validateDelayParams() error { 4583 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 4584 if ok { 4585 return nil 4586 } 4587 var mErr multierror.Error 4588 if r.DelayFunction == "constant" { 4589 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 4590 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 4591 } else { 4592 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 4593 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 4594 } 4595 multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 4596 return mErr.ErrorOrNil() 4597 } 4598 4599 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 4600 var possibleAttempts int 4601 var recommendedInterval time.Duration 4602 valid := true 4603 switch r.DelayFunction { 4604 case "constant": 4605 recommendedInterval = time.Duration(r.Attempts) * r.Delay 4606 if r.Interval < recommendedInterval { 4607 possibleAttempts = int(r.Interval / r.Delay) 4608 valid = false 4609 } 4610 case "exponential": 4611 for i := 0; i < r.Attempts; i++ { 4612 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 4613 if nextDelay > r.MaxDelay { 4614 nextDelay = r.MaxDelay 4615 recommendedInterval += nextDelay 4616 } else { 4617 recommendedInterval = nextDelay 4618 } 4619 if recommendedInterval < r.Interval { 4620 possibleAttempts++ 4621 } 4622 } 4623 if possibleAttempts < r.Attempts { 4624 valid = false 4625 } 4626 case "fibonacci": 4627 var slots []time.Duration 4628 slots = append(slots, r.Delay) 4629 slots = append(slots, r.Delay) 4630 reachedCeiling := false 4631 for i := 2; i < r.Attempts; i++ { 4632 var nextDelay time.Duration 4633 if reachedCeiling { 4634 //switch to linear 4635 nextDelay = slots[i-1] + r.MaxDelay 4636 } else { 4637 nextDelay = slots[i-1] + slots[i-2] 4638 if nextDelay > r.MaxDelay { 4639 nextDelay = r.MaxDelay 4640 reachedCeiling = true 4641 } 4642 } 4643 slots = append(slots, nextDelay) 4644 } 4645 recommendedInterval = slots[len(slots)-1] 4646 if r.Interval < recommendedInterval { 4647 valid = false 4648 // calculate possible attempts 4649 for i := 0; i < len(slots); i++ { 4650 if slots[i] > r.Interval { 4651 possibleAttempts = i 4652 break 4653 } 4654 } 4655 } 4656 default: 4657 return false, 0, 0 4658 } 4659 if possibleAttempts < 0 { // can happen if delay is bigger than interval 4660 possibleAttempts = 0 4661 } 4662 return valid, possibleAttempts, recommendedInterval 4663 } 4664 4665 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 4666 switch jobType { 4667 case JobTypeService: 4668 rp := DefaultServiceJobReschedulePolicy 4669 return &rp 4670 case JobTypeBatch: 4671 rp := DefaultBatchJobReschedulePolicy 4672 return &rp 4673 } 4674 return nil 4675 } 4676 4677 const ( 4678 MigrateStrategyHealthChecks = "checks" 4679 MigrateStrategyHealthStates = "task_states" 4680 ) 4681 4682 type MigrateStrategy struct { 4683 MaxParallel int 4684 HealthCheck string 4685 MinHealthyTime time.Duration 4686 HealthyDeadline time.Duration 4687 } 4688 4689 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 4690 // that lack an update strategy. 4691 // 4692 // This function should match its counterpart in api/tasks.go 4693 func DefaultMigrateStrategy() *MigrateStrategy { 4694 return &MigrateStrategy{ 4695 MaxParallel: 1, 4696 HealthCheck: MigrateStrategyHealthChecks, 4697 MinHealthyTime: 10 * time.Second, 4698 HealthyDeadline: 5 * time.Minute, 4699 } 4700 } 4701 4702 func (m *MigrateStrategy) Validate() error { 4703 var mErr multierror.Error 4704 4705 if m.MaxParallel < 0 { 4706 multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 4707 } 4708 4709 switch m.HealthCheck { 4710 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 4711 // ok 4712 case "": 4713 if m.MaxParallel > 0 { 4714 multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 4715 } 4716 default: 4717 multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 4718 } 4719 4720 if m.MinHealthyTime < 0 { 4721 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 4722 } 4723 4724 if m.HealthyDeadline < 0 { 4725 multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 4726 } 4727 4728 if m.MinHealthyTime > m.HealthyDeadline { 4729 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 4730 } 4731 4732 return mErr.ErrorOrNil() 4733 } 4734 4735 // TaskGroup is an atomic unit of placement. Each task group belongs to 4736 // a job and may contain any number of tasks. A task group support running 4737 // in many replicas using the same configuration.. 4738 type TaskGroup struct { 4739 // Name of the task group 4740 Name string 4741 4742 // Count is the number of replicas of this task group that should 4743 // be scheduled. 4744 Count int 4745 4746 // Update is used to control the update strategy for this task group 4747 Update *UpdateStrategy 4748 4749 // Migrate is used to control the migration strategy for this task group 4750 Migrate *MigrateStrategy 4751 4752 // Constraints can be specified at a task group level and apply to 4753 // all the tasks contained. 4754 Constraints []*Constraint 4755 4756 //RestartPolicy of a TaskGroup 4757 RestartPolicy *RestartPolicy 4758 4759 // Tasks are the collection of tasks that this task group needs to run 4760 Tasks []*Task 4761 4762 // EphemeralDisk is the disk resources that the task group requests 4763 EphemeralDisk *EphemeralDisk 4764 4765 // Meta is used to associate arbitrary metadata with this 4766 // task group. This is opaque to Nomad. 4767 Meta map[string]string 4768 4769 // ReschedulePolicy is used to configure how the scheduler should 4770 // retry failed allocations. 4771 ReschedulePolicy *ReschedulePolicy 4772 4773 // Affinities can be specified at the task group level to express 4774 // scheduling preferences. 4775 Affinities []*Affinity 4776 4777 // Spread can be specified at the task group level to express spreading 4778 // allocations across a desired attribute, such as datacenter 4779 Spreads []*Spread 4780 4781 // Networks are the network configuration for the task group. This can be 4782 // overridden in the task. 4783 Networks Networks 4784 4785 // Services this group provides 4786 Services []*Service 4787 4788 // Volumes is a map of volumes that have been requested by the task group. 4789 Volumes map[string]*VolumeRequest 4790 4791 // ShutdownDelay is the amount of time to wait between deregistering 4792 // group services in consul and stopping tasks. 4793 ShutdownDelay *time.Duration 4794 } 4795 4796 func (tg *TaskGroup) Copy() *TaskGroup { 4797 if tg == nil { 4798 return nil 4799 } 4800 ntg := new(TaskGroup) 4801 *ntg = *tg 4802 ntg.Update = ntg.Update.Copy() 4803 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 4804 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 4805 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 4806 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 4807 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 4808 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 4809 4810 // Copy the network objects 4811 if tg.Networks != nil { 4812 n := len(tg.Networks) 4813 ntg.Networks = make([]*NetworkResource, n) 4814 for i := 0; i < n; i++ { 4815 ntg.Networks[i] = tg.Networks[i].Copy() 4816 } 4817 } 4818 4819 if tg.Tasks != nil { 4820 tasks := make([]*Task, len(ntg.Tasks)) 4821 for i, t := range ntg.Tasks { 4822 tasks[i] = t.Copy() 4823 } 4824 ntg.Tasks = tasks 4825 } 4826 4827 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 4828 4829 if tg.EphemeralDisk != nil { 4830 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 4831 } 4832 4833 if tg.Services != nil { 4834 ntg.Services = make([]*Service, len(tg.Services)) 4835 for i, s := range tg.Services { 4836 ntg.Services[i] = s.Copy() 4837 } 4838 } 4839 4840 if tg.ShutdownDelay != nil { 4841 ntg.ShutdownDelay = tg.ShutdownDelay 4842 } 4843 4844 return ntg 4845 } 4846 4847 // Canonicalize is used to canonicalize fields in the TaskGroup. 4848 func (tg *TaskGroup) Canonicalize(job *Job) { 4849 // Ensure that an empty and nil map are treated the same to avoid scheduling 4850 // problems since we use reflect DeepEquals. 4851 if len(tg.Meta) == 0 { 4852 tg.Meta = nil 4853 } 4854 4855 // Set the default restart policy. 4856 if tg.RestartPolicy == nil { 4857 tg.RestartPolicy = NewRestartPolicy(job.Type) 4858 } 4859 4860 if tg.ReschedulePolicy == nil { 4861 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 4862 } 4863 4864 // Canonicalize Migrate for service jobs 4865 if job.Type == JobTypeService && tg.Migrate == nil { 4866 tg.Migrate = DefaultMigrateStrategy() 4867 } 4868 4869 // Set a default ephemeral disk object if the user has not requested for one 4870 if tg.EphemeralDisk == nil { 4871 tg.EphemeralDisk = DefaultEphemeralDisk() 4872 } 4873 4874 for _, service := range tg.Services { 4875 service.Canonicalize(job.Name, tg.Name, "group") 4876 } 4877 4878 for _, network := range tg.Networks { 4879 network.Canonicalize() 4880 } 4881 4882 for _, task := range tg.Tasks { 4883 task.Canonicalize(job, tg) 4884 } 4885 } 4886 4887 // Validate is used to sanity check a task group 4888 func (tg *TaskGroup) Validate(j *Job) error { 4889 var mErr multierror.Error 4890 if tg.Name == "" { 4891 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 4892 } 4893 if tg.Count < 0 { 4894 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 4895 } 4896 if len(tg.Tasks) == 0 { 4897 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 4898 } 4899 for idx, constr := range tg.Constraints { 4900 if err := constr.Validate(); err != nil { 4901 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4902 mErr.Errors = append(mErr.Errors, outer) 4903 } 4904 } 4905 if j.Type == JobTypeSystem { 4906 if tg.Affinities != nil { 4907 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4908 } 4909 } else { 4910 for idx, affinity := range tg.Affinities { 4911 if err := affinity.Validate(); err != nil { 4912 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4913 mErr.Errors = append(mErr.Errors, outer) 4914 } 4915 } 4916 } 4917 4918 if tg.RestartPolicy != nil { 4919 if err := tg.RestartPolicy.Validate(); err != nil { 4920 mErr.Errors = append(mErr.Errors, err) 4921 } 4922 } else { 4923 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 4924 } 4925 4926 if j.Type == JobTypeSystem { 4927 if tg.Spreads != nil { 4928 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 4929 } 4930 } else { 4931 for idx, spread := range tg.Spreads { 4932 if err := spread.Validate(); err != nil { 4933 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 4934 mErr.Errors = append(mErr.Errors, outer) 4935 } 4936 } 4937 } 4938 4939 if j.Type == JobTypeSystem { 4940 if tg.ReschedulePolicy != nil { 4941 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 4942 } 4943 } else { 4944 if tg.ReschedulePolicy != nil { 4945 if err := tg.ReschedulePolicy.Validate(); err != nil { 4946 mErr.Errors = append(mErr.Errors, err) 4947 } 4948 } else { 4949 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 4950 } 4951 } 4952 4953 if tg.EphemeralDisk != nil { 4954 if err := tg.EphemeralDisk.Validate(); err != nil { 4955 mErr.Errors = append(mErr.Errors, err) 4956 } 4957 } else { 4958 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 4959 } 4960 4961 // Validate the update strategy 4962 if u := tg.Update; u != nil { 4963 switch j.Type { 4964 case JobTypeService, JobTypeSystem: 4965 default: 4966 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 4967 } 4968 if err := u.Validate(); err != nil { 4969 mErr.Errors = append(mErr.Errors, err) 4970 } 4971 } 4972 4973 // Validate the migration strategy 4974 switch j.Type { 4975 case JobTypeService: 4976 if tg.Migrate != nil { 4977 if err := tg.Migrate.Validate(); err != nil { 4978 mErr.Errors = append(mErr.Errors, err) 4979 } 4980 } 4981 default: 4982 if tg.Migrate != nil { 4983 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 4984 } 4985 } 4986 4987 // Check that there is only one leader task if any 4988 tasks := make(map[string]int) 4989 leaderTasks := 0 4990 for idx, task := range tg.Tasks { 4991 if task.Name == "" { 4992 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 4993 } else if existing, ok := tasks[task.Name]; ok { 4994 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 4995 } else { 4996 tasks[task.Name] = idx 4997 } 4998 4999 if task.Leader { 5000 leaderTasks++ 5001 } 5002 } 5003 5004 if leaderTasks > 1 { 5005 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 5006 } 5007 5008 // Validate the Host Volumes 5009 for name, decl := range tg.Volumes { 5010 if decl.Type != VolumeTypeHost { 5011 // TODO: Remove this error when adding new volume types 5012 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has unrecognised type %s", name, decl.Type)) 5013 continue 5014 } 5015 5016 if decl.Source == "" { 5017 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has an empty source", name)) 5018 } 5019 } 5020 5021 // Validate task group and task network resources 5022 if err := tg.validateNetworks(); err != nil { 5023 outer := fmt.Errorf("Task group network validation failed: %v", err) 5024 mErr.Errors = append(mErr.Errors, outer) 5025 } 5026 5027 // Validate task group and task services 5028 if err := tg.validateServices(); err != nil { 5029 outer := fmt.Errorf("Task group service validation failed: %v", err) 5030 mErr.Errors = append(mErr.Errors, outer) 5031 } 5032 5033 // Validate the tasks 5034 for _, task := range tg.Tasks { 5035 // Validate the task does not reference undefined volume mounts 5036 for i, mnt := range task.VolumeMounts { 5037 if mnt.Volume == "" { 5038 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 5039 continue 5040 } 5041 5042 if _, ok := tg.Volumes[mnt.Volume]; !ok { 5043 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 5044 continue 5045 } 5046 } 5047 5048 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services); err != nil { 5049 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 5050 mErr.Errors = append(mErr.Errors, outer) 5051 } 5052 } 5053 return mErr.ErrorOrNil() 5054 } 5055 5056 func (tg *TaskGroup) validateNetworks() error { 5057 var mErr multierror.Error 5058 portLabels := make(map[string]string) 5059 staticPorts := make(map[int]string) 5060 mappedPorts := make(map[int]string) 5061 5062 for _, net := range tg.Networks { 5063 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5064 if other, ok := portLabels[port.Label]; ok { 5065 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5066 } else { 5067 portLabels[port.Label] = "taskgroup network" 5068 } 5069 5070 if port.Value != 0 { 5071 // static port 5072 if other, ok := staticPorts[port.Value]; ok { 5073 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5074 mErr.Errors = append(mErr.Errors, err) 5075 } else { 5076 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 5077 } 5078 } 5079 5080 if port.To > 0 { 5081 if other, ok := mappedPorts[port.To]; ok { 5082 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5083 mErr.Errors = append(mErr.Errors, err) 5084 } else { 5085 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5086 } 5087 } else if port.To < -1 { 5088 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 5089 mErr.Errors = append(mErr.Errors, err) 5090 } 5091 } 5092 } 5093 // Check for duplicate tasks or port labels, and no duplicated static or mapped ports 5094 for _, task := range tg.Tasks { 5095 if task.Resources == nil { 5096 continue 5097 } 5098 5099 for _, net := range task.Resources.Networks { 5100 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5101 if other, ok := portLabels[port.Label]; ok { 5102 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5103 } 5104 5105 if port.Value != 0 { 5106 if other, ok := staticPorts[port.Value]; ok { 5107 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5108 mErr.Errors = append(mErr.Errors, err) 5109 } else { 5110 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 5111 } 5112 } 5113 5114 if port.To != 0 { 5115 if other, ok := mappedPorts[port.To]; ok { 5116 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5117 mErr.Errors = append(mErr.Errors, err) 5118 } else { 5119 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5120 } 5121 } 5122 } 5123 } 5124 } 5125 return mErr.ErrorOrNil() 5126 } 5127 5128 // validateServices runs Service.Validate() on group-level services, 5129 // checks that group services do not conflict with task services and that 5130 // group service checks that refer to tasks only refer to tasks that exist. 5131 func (tg *TaskGroup) validateServices() error { 5132 var mErr multierror.Error 5133 knownTasks := make(map[string]struct{}) 5134 knownServices := make(map[string]struct{}) 5135 5136 // Create a map of known tasks and their services so we can compare 5137 // vs the group-level services and checks 5138 for _, task := range tg.Tasks { 5139 knownTasks[task.Name] = struct{}{} 5140 if task.Services == nil { 5141 continue 5142 } 5143 for _, service := range task.Services { 5144 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5145 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5146 } 5147 for _, check := range service.Checks { 5148 if check.TaskName != "" { 5149 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s is invalid: only task group service checks can be assigned tasks", check.Name)) 5150 } 5151 } 5152 knownServices[service.Name+service.PortLabel] = struct{}{} 5153 } 5154 } 5155 for i, service := range tg.Services { 5156 if err := service.Validate(); err != nil { 5157 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 5158 mErr.Errors = append(mErr.Errors, outer) 5159 // we break here to avoid the risk of crashing on null-pointer 5160 // access in a later step, accepting that we might miss out on 5161 // error messages to provide the user. 5162 continue 5163 } 5164 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5165 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5166 } 5167 knownServices[service.Name+service.PortLabel] = struct{}{} 5168 for _, check := range service.Checks { 5169 if check.TaskName != "" { 5170 if check.Type != ServiceCheckScript && check.Type != ServiceCheckGRPC { 5171 mErr.Errors = append(mErr.Errors, 5172 fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) 5173 } 5174 if _, ok := knownTasks[check.TaskName]; !ok { 5175 mErr.Errors = append(mErr.Errors, 5176 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 5177 } 5178 } 5179 } 5180 } 5181 return mErr.ErrorOrNil() 5182 } 5183 5184 // Warnings returns a list of warnings that may be from dubious settings or 5185 // deprecation warnings. 5186 func (tg *TaskGroup) Warnings(j *Job) error { 5187 var mErr multierror.Error 5188 5189 // Validate the update strategy 5190 if u := tg.Update; u != nil { 5191 // Check the counts are appropriate 5192 if u.MaxParallel > tg.Count { 5193 mErr.Errors = append(mErr.Errors, 5194 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 5195 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 5196 } 5197 } 5198 5199 for _, t := range tg.Tasks { 5200 if err := t.Warnings(); err != nil { 5201 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 5202 mErr.Errors = append(mErr.Errors, err) 5203 } 5204 } 5205 5206 return mErr.ErrorOrNil() 5207 } 5208 5209 // LookupTask finds a task by name 5210 func (tg *TaskGroup) LookupTask(name string) *Task { 5211 for _, t := range tg.Tasks { 5212 if t.Name == name { 5213 return t 5214 } 5215 } 5216 return nil 5217 } 5218 5219 func (tg *TaskGroup) GoString() string { 5220 return fmt.Sprintf("*%#v", *tg) 5221 } 5222 5223 // CheckRestart describes if and when a task should be restarted based on 5224 // failing health checks. 5225 type CheckRestart struct { 5226 Limit int // Restart task after this many unhealthy intervals 5227 Grace time.Duration // Grace time to give tasks after starting to get healthy 5228 IgnoreWarnings bool // If true treat checks in `warning` as passing 5229 } 5230 5231 func (c *CheckRestart) Copy() *CheckRestart { 5232 if c == nil { 5233 return nil 5234 } 5235 5236 nc := new(CheckRestart) 5237 *nc = *c 5238 return nc 5239 } 5240 5241 func (c *CheckRestart) Equals(o *CheckRestart) bool { 5242 if c == nil || o == nil { 5243 return c == o 5244 } 5245 5246 if c.Limit != o.Limit { 5247 return false 5248 } 5249 5250 if c.Grace != o.Grace { 5251 return false 5252 } 5253 5254 if c.IgnoreWarnings != o.IgnoreWarnings { 5255 return false 5256 } 5257 5258 return true 5259 } 5260 5261 func (c *CheckRestart) Validate() error { 5262 if c == nil { 5263 return nil 5264 } 5265 5266 var mErr multierror.Error 5267 if c.Limit < 0 { 5268 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 5269 } 5270 5271 if c.Grace < 0 { 5272 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 5273 } 5274 5275 return mErr.ErrorOrNil() 5276 } 5277 5278 const ( 5279 // DefaultKillTimeout is the default timeout between signaling a task it 5280 // will be killed and killing it. 5281 DefaultKillTimeout = 5 * time.Second 5282 ) 5283 5284 // LogConfig provides configuration for log rotation 5285 type LogConfig struct { 5286 MaxFiles int 5287 MaxFileSizeMB int 5288 FileExtension string 5289 } 5290 5291 func (l *LogConfig) Copy() *LogConfig { 5292 if l == nil { 5293 return nil 5294 } 5295 return &LogConfig{ 5296 MaxFiles: l.MaxFiles, 5297 MaxFileSizeMB: l.MaxFileSizeMB, 5298 FileExtension: l.FileExtension, 5299 } 5300 } 5301 5302 // DefaultLogConfig returns the default LogConfig values. 5303 func DefaultLogConfig() *LogConfig { 5304 return &LogConfig{ 5305 MaxFiles: 10, 5306 MaxFileSizeMB: 10, 5307 FileExtension: "", 5308 } 5309 } 5310 5311 // Validate returns an error if the log config specified are less than 5312 // the minimum allowed. 5313 func (l *LogConfig) Validate() error { 5314 var mErr multierror.Error 5315 if l.MaxFiles < 1 { 5316 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 5317 } 5318 if l.MaxFileSizeMB < 1 { 5319 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 5320 } 5321 if validLogExtension.MatchString(l.FileExtension) == false { 5322 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid log extension %s", l.FileExtension)) 5323 } 5324 5325 return mErr.ErrorOrNil() 5326 } 5327 5328 // Task is a single process typically that is executed as part of a task group. 5329 type Task struct { 5330 // Name of the task 5331 Name string 5332 5333 // Driver is used to control which driver is used 5334 Driver string 5335 5336 // User is used to determine which user will run the task. It defaults to 5337 // the same user the Nomad client is being run as. 5338 User string 5339 5340 // Config is provided to the driver to initialize 5341 Config map[string]interface{} 5342 5343 // Map of environment variables to be used by the driver 5344 Env map[string]string 5345 5346 // List of service definitions exposed by the Task 5347 Services []*Service 5348 5349 // Vault is used to define the set of Vault policies that this task should 5350 // have access to. 5351 Vault *Vault 5352 5353 // Templates are the set of templates to be rendered for the task. 5354 Templates []*Template 5355 5356 // Constraints can be specified at a task level and apply only to 5357 // the particular task. 5358 Constraints []*Constraint 5359 5360 // Affinities can be specified at the task level to express 5361 // scheduling preferences 5362 Affinities []*Affinity 5363 5364 // Resources is the resources needed by this task 5365 Resources *Resources 5366 5367 // DispatchPayload configures how the task retrieves its input from a dispatch 5368 DispatchPayload *DispatchPayloadConfig 5369 5370 // Meta is used to associate arbitrary metadata with this 5371 // task. This is opaque to Nomad. 5372 Meta map[string]string 5373 5374 // KillTimeout is the time between signaling a task that it will be 5375 // killed and killing it. 5376 KillTimeout time.Duration 5377 5378 // LogConfig provides configuration for log rotation 5379 LogConfig *LogConfig 5380 5381 // Artifacts is a list of artifacts to download and extract before running 5382 // the task. 5383 Artifacts []*TaskArtifact 5384 5385 // Leader marks the task as the leader within the group. When the leader 5386 // task exits, other tasks will be gracefully terminated. 5387 Leader bool 5388 5389 // ShutdownDelay is the duration of the delay between deregistering a 5390 // task from Consul and sending it a signal to shutdown. See #2441 5391 ShutdownDelay time.Duration 5392 5393 // VolumeMounts is a list of Volume name <-> mount configurations that will be 5394 // attached to this task. 5395 VolumeMounts []*VolumeMount 5396 5397 // The kill signal to use for the task. This is an optional specification, 5398 5399 // KillSignal is the kill signal to use for the task. This is an optional 5400 // specification and defaults to SIGINT 5401 KillSignal string 5402 5403 // Used internally to manage tasks according to their TaskKind. Initial use case 5404 // is for Consul Connect 5405 Kind TaskKind 5406 5407 // Timeout is the max time the task execution will run 5408 Timeout time.Duration 5409 } 5410 5411 func (t *Task) Copy() *Task { 5412 if t == nil { 5413 return nil 5414 } 5415 nt := new(Task) 5416 *nt = *t 5417 nt.Env = helper.CopyMapStringString(nt.Env) 5418 5419 if t.Services != nil { 5420 services := make([]*Service, len(nt.Services)) 5421 for i, s := range nt.Services { 5422 services[i] = s.Copy() 5423 } 5424 nt.Services = services 5425 } 5426 5427 nt.Constraints = CopySliceConstraints(nt.Constraints) 5428 nt.Affinities = CopySliceAffinities(nt.Affinities) 5429 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 5430 5431 nt.Vault = nt.Vault.Copy() 5432 nt.Resources = nt.Resources.Copy() 5433 nt.LogConfig = nt.LogConfig.Copy() 5434 nt.Meta = helper.CopyMapStringString(nt.Meta) 5435 nt.DispatchPayload = nt.DispatchPayload.Copy() 5436 5437 if t.Artifacts != nil { 5438 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 5439 for _, a := range nt.Artifacts { 5440 artifacts = append(artifacts, a.Copy()) 5441 } 5442 nt.Artifacts = artifacts 5443 } 5444 5445 if i, err := copystructure.Copy(nt.Config); err != nil { 5446 panic(err.Error()) 5447 } else { 5448 nt.Config = i.(map[string]interface{}) 5449 } 5450 5451 if t.Templates != nil { 5452 templates := make([]*Template, len(t.Templates)) 5453 for i, tmpl := range nt.Templates { 5454 templates[i] = tmpl.Copy() 5455 } 5456 nt.Templates = templates 5457 } 5458 5459 return nt 5460 } 5461 5462 // Canonicalize canonicalizes fields in the task. 5463 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 5464 // Ensure that an empty and nil map are treated the same to avoid scheduling 5465 // problems since we use reflect DeepEquals. 5466 if len(t.Meta) == 0 { 5467 t.Meta = nil 5468 } 5469 if len(t.Config) == 0 { 5470 t.Config = nil 5471 } 5472 if len(t.Env) == 0 { 5473 t.Env = nil 5474 } 5475 5476 for _, service := range t.Services { 5477 service.Canonicalize(job.Name, tg.Name, t.Name) 5478 } 5479 5480 // If Resources are nil initialize them to defaults, otherwise canonicalize 5481 if t.Resources == nil { 5482 t.Resources = DefaultResources() 5483 } else { 5484 t.Resources.Canonicalize() 5485 } 5486 5487 // Set the default timeout if it is not specified. 5488 if t.KillTimeout == 0 { 5489 t.KillTimeout = DefaultKillTimeout 5490 } 5491 5492 if t.Vault != nil { 5493 t.Vault.Canonicalize() 5494 } 5495 5496 for _, template := range t.Templates { 5497 template.Canonicalize() 5498 } 5499 } 5500 5501 func (t *Task) GoString() string { 5502 return fmt.Sprintf("*%#v", *t) 5503 } 5504 5505 // Validate is used to sanity check a task 5506 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service) error { 5507 var mErr multierror.Error 5508 if t.Name == "" { 5509 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 5510 } 5511 if strings.ContainsAny(t.Name, `/\`) { 5512 // We enforce this so that when creating the directory on disk it will 5513 // not have any slashes. 5514 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 5515 } 5516 if t.Driver == "" { 5517 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 5518 } 5519 if t.KillTimeout < 0 { 5520 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 5521 } 5522 if t.Timeout < 0 { 5523 mErr.Errors = append(mErr.Errors, errors.New("Timeout must be a positive value")) 5524 } 5525 if t.ShutdownDelay < 0 { 5526 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 5527 } 5528 5529 // Validate the resources. 5530 if t.Resources == nil { 5531 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 5532 } else if err := t.Resources.Validate(); err != nil { 5533 mErr.Errors = append(mErr.Errors, err) 5534 } 5535 5536 // Validate the log config 5537 if t.LogConfig == nil { 5538 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 5539 } else if err := t.LogConfig.Validate(); err != nil { 5540 mErr.Errors = append(mErr.Errors, err) 5541 } 5542 5543 for idx, constr := range t.Constraints { 5544 if err := constr.Validate(); err != nil { 5545 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 5546 mErr.Errors = append(mErr.Errors, outer) 5547 } 5548 5549 switch constr.Operand { 5550 case ConstraintDistinctHosts, ConstraintDistinctProperty: 5551 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 5552 mErr.Errors = append(mErr.Errors, outer) 5553 } 5554 } 5555 5556 if jobType == JobTypeSystem { 5557 if t.Affinities != nil { 5558 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 5559 } 5560 } else { 5561 for idx, affinity := range t.Affinities { 5562 if err := affinity.Validate(); err != nil { 5563 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 5564 mErr.Errors = append(mErr.Errors, outer) 5565 } 5566 } 5567 } 5568 5569 // Validate Services 5570 if err := validateServices(t); err != nil { 5571 mErr.Errors = append(mErr.Errors, err) 5572 } 5573 5574 if t.LogConfig != nil && ephemeralDisk != nil { 5575 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 5576 if ephemeralDisk.SizeMB <= logUsage { 5577 mErr.Errors = append(mErr.Errors, 5578 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 5579 logUsage, ephemeralDisk.SizeMB)) 5580 } 5581 } 5582 5583 for idx, artifact := range t.Artifacts { 5584 if err := artifact.Validate(); err != nil { 5585 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 5586 mErr.Errors = append(mErr.Errors, outer) 5587 } 5588 } 5589 5590 if t.Vault != nil { 5591 if err := t.Vault.Validate(); err != nil { 5592 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 5593 } 5594 } 5595 5596 destinations := make(map[string]int, len(t.Templates)) 5597 for idx, tmpl := range t.Templates { 5598 if err := tmpl.Validate(); err != nil { 5599 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 5600 mErr.Errors = append(mErr.Errors, outer) 5601 } 5602 5603 if other, ok := destinations[tmpl.DestPath]; ok { 5604 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 5605 mErr.Errors = append(mErr.Errors, outer) 5606 } else { 5607 destinations[tmpl.DestPath] = idx + 1 5608 } 5609 } 5610 5611 // Validate the dispatch payload block if there 5612 if t.DispatchPayload != nil { 5613 if err := t.DispatchPayload.Validate(); err != nil { 5614 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 5615 } 5616 } 5617 5618 // Validation for TaskKind field which is used for Consul Connect integration 5619 if t.Kind.IsConnectProxy() { 5620 // This task is a Connect proxy so it should not have service stanzas 5621 if len(t.Services) > 0 { 5622 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 5623 } 5624 if t.Leader { 5625 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 5626 } 5627 5628 // Ensure the proxy task has a corresponding service entry 5629 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 5630 if serviceErr != nil { 5631 mErr.Errors = append(mErr.Errors, serviceErr) 5632 } 5633 } 5634 5635 // Validation for volumes 5636 for idx, vm := range t.VolumeMounts { 5637 if !MountPropagationModeIsValid(vm.PropagationMode) { 5638 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 5639 } 5640 } 5641 5642 return mErr.ErrorOrNil() 5643 } 5644 5645 // validateServices takes a task and validates the services within it are valid 5646 // and reference ports that exist. 5647 func validateServices(t *Task) error { 5648 var mErr multierror.Error 5649 5650 // Ensure that services don't ask for nonexistent ports and their names are 5651 // unique. 5652 servicePorts := make(map[string]map[string]struct{}) 5653 addServicePort := func(label, service string) { 5654 if _, ok := servicePorts[label]; !ok { 5655 servicePorts[label] = map[string]struct{}{} 5656 } 5657 servicePorts[label][service] = struct{}{} 5658 } 5659 knownServices := make(map[string]struct{}) 5660 for i, service := range t.Services { 5661 if err := service.Validate(); err != nil { 5662 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 5663 mErr.Errors = append(mErr.Errors, outer) 5664 } 5665 5666 // Ensure that services with the same name are not being registered for 5667 // the same port 5668 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5669 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 5670 } 5671 knownServices[service.Name+service.PortLabel] = struct{}{} 5672 5673 if service.PortLabel != "" { 5674 if service.AddressMode == "driver" { 5675 // Numeric port labels are valid for address_mode=driver 5676 _, err := strconv.Atoi(service.PortLabel) 5677 if err != nil { 5678 // Not a numeric port label, add it to list to check 5679 addServicePort(service.PortLabel, service.Name) 5680 } 5681 } else { 5682 addServicePort(service.PortLabel, service.Name) 5683 } 5684 } 5685 5686 // Ensure that check names are unique and have valid ports 5687 knownChecks := make(map[string]struct{}) 5688 for _, check := range service.Checks { 5689 if _, ok := knownChecks[check.Name]; ok { 5690 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 5691 } 5692 knownChecks[check.Name] = struct{}{} 5693 5694 if !check.RequiresPort() { 5695 // No need to continue validating check if it doesn't need a port 5696 continue 5697 } 5698 5699 effectivePort := check.PortLabel 5700 if effectivePort == "" { 5701 // Inherits from service 5702 effectivePort = service.PortLabel 5703 } 5704 5705 if effectivePort == "" { 5706 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 5707 continue 5708 } 5709 5710 isNumeric := false 5711 portNumber, err := strconv.Atoi(effectivePort) 5712 if err == nil { 5713 isNumeric = true 5714 } 5715 5716 // Numeric ports are fine for address_mode = "driver" 5717 if check.AddressMode == "driver" && isNumeric { 5718 if portNumber <= 0 { 5719 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 5720 } 5721 continue 5722 } 5723 5724 if isNumeric { 5725 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 5726 continue 5727 } 5728 5729 // PortLabel must exist, report errors by its parent service 5730 addServicePort(effectivePort, service.Name) 5731 } 5732 } 5733 5734 // Get the set of port labels. 5735 portLabels := make(map[string]struct{}) 5736 if t.Resources != nil { 5737 for _, network := range t.Resources.Networks { 5738 ports := network.PortLabels() 5739 for portLabel := range ports { 5740 portLabels[portLabel] = struct{}{} 5741 } 5742 } 5743 } 5744 5745 // Iterate over a sorted list of keys to make error listings stable 5746 keys := make([]string, 0, len(servicePorts)) 5747 for p := range servicePorts { 5748 keys = append(keys, p) 5749 } 5750 sort.Strings(keys) 5751 5752 // Ensure all ports referenced in services exist. 5753 for _, servicePort := range keys { 5754 services := servicePorts[servicePort] 5755 _, ok := portLabels[servicePort] 5756 if !ok { 5757 names := make([]string, 0, len(services)) 5758 for name := range services { 5759 names = append(names, name) 5760 } 5761 5762 // Keep order deterministic 5763 sort.Strings(names) 5764 joined := strings.Join(names, ", ") 5765 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 5766 mErr.Errors = append(mErr.Errors, err) 5767 } 5768 } 5769 5770 // Ensure address mode is valid 5771 return mErr.ErrorOrNil() 5772 } 5773 5774 func (t *Task) Warnings() error { 5775 var mErr multierror.Error 5776 5777 // Validate the resources 5778 if t.Resources != nil && t.Resources.IOPS != 0 { 5779 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 5780 } 5781 5782 return mErr.ErrorOrNil() 5783 } 5784 5785 // TaskKind identifies the special kinds of tasks using the following format: 5786 // '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 5787 // is opague to the Task. This identier can be used to relate the task to some 5788 // other entity based on the kind. 5789 // 5790 // For example, a task may have the TaskKind of `connect-proxy:service` where 5791 // 'connect-proxy' is the kind name and 'service' is the identifier that relates the 5792 // task to the service name of which it is a connect proxy for. 5793 type TaskKind string 5794 5795 // Name returns the kind name portion of the TaskKind 5796 func (k TaskKind) Name() string { 5797 return strings.Split(string(k), ":")[0] 5798 } 5799 5800 // Value returns the identifier of the TaskKind or an empty string if it doesn't 5801 // include one. 5802 func (k TaskKind) Value() string { 5803 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 5804 return s[1] 5805 } 5806 return "" 5807 } 5808 5809 // IsConnectProxy returns true if the TaskKind is connect-proxy 5810 func (k TaskKind) IsConnectProxy() bool { 5811 return strings.HasPrefix(string(k), ConnectProxyPrefix+":") && len(k) > len(ConnectProxyPrefix)+1 5812 } 5813 5814 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 5815 // Proxy 5816 const ConnectProxyPrefix = "connect-proxy" 5817 5818 // ValidateConnectProxyService checks that the service that is being 5819 // proxied by this task exists in the task group and contains 5820 // valid Connect config. 5821 func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 5822 found := false 5823 names := make([]string, 0, len(tgServices)) 5824 for _, svc := range tgServices { 5825 if svc.Connect == nil || svc.Connect.SidecarService == nil { 5826 continue 5827 } 5828 5829 if svc.Name == serviceName { 5830 found = true 5831 break 5832 } 5833 5834 // Build up list of mismatched Connect service names for error 5835 // reporting. 5836 names = append(names, svc.Name) 5837 } 5838 5839 if !found { 5840 if len(names) == 0 { 5841 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 5842 } else { 5843 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 5844 } 5845 } 5846 5847 return nil 5848 } 5849 5850 const ( 5851 // TemplateChangeModeNoop marks that no action should be taken if the 5852 // template is re-rendered 5853 TemplateChangeModeNoop = "noop" 5854 5855 // TemplateChangeModeSignal marks that the task should be signaled if the 5856 // template is re-rendered 5857 TemplateChangeModeSignal = "signal" 5858 5859 // TemplateChangeModeRestart marks that the task should be restarted if the 5860 // template is re-rendered 5861 TemplateChangeModeRestart = "restart" 5862 ) 5863 5864 var ( 5865 // TemplateChangeModeInvalidError is the error for when an invalid change 5866 // mode is given 5867 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 5868 ) 5869 5870 // Template represents a template configuration to be rendered for a given task 5871 type Template struct { 5872 // SourcePath is the path to the template to be rendered 5873 SourcePath string 5874 5875 // DestPath is the path to where the template should be rendered 5876 DestPath string 5877 5878 // EmbeddedTmpl store the raw template. This is useful for smaller templates 5879 // where they are embedded in the job file rather than sent as an artifact 5880 EmbeddedTmpl string 5881 5882 // ChangeMode indicates what should be done if the template is re-rendered 5883 ChangeMode string 5884 5885 // ChangeSignal is the signal that should be sent if the change mode 5886 // requires it. 5887 ChangeSignal string 5888 5889 // Splay is used to avoid coordinated restarts of processes by applying a 5890 // random wait between 0 and the given splay value before signalling the 5891 // application of a change 5892 Splay time.Duration 5893 5894 // Perms is the permission the file should be written out with. 5895 Perms string 5896 5897 // LeftDelim and RightDelim are optional configurations to control what 5898 // delimiter is utilized when parsing the template. 5899 LeftDelim string 5900 RightDelim string 5901 5902 // Envvars enables exposing the template as environment variables 5903 // instead of as a file. The template must be of the form: 5904 // 5905 // VAR_NAME_1={{ key service/my-key }} 5906 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 5907 // 5908 // Lines will be split on the initial "=" with the first part being the 5909 // key name and the second part the value. 5910 // Empty lines and lines starting with # will be ignored, but to avoid 5911 // escaping issues #s within lines will not be treated as comments. 5912 Envvars bool 5913 5914 // VaultGrace is the grace duration between lease renewal and reacquiring a 5915 // secret. If the lease of a secret is less than the grace, a new secret is 5916 // acquired. 5917 VaultGrace time.Duration 5918 } 5919 5920 // DefaultTemplate returns a default template. 5921 func DefaultTemplate() *Template { 5922 return &Template{ 5923 ChangeMode: TemplateChangeModeRestart, 5924 Splay: 5 * time.Second, 5925 Perms: "0644", 5926 } 5927 } 5928 5929 func (t *Template) Copy() *Template { 5930 if t == nil { 5931 return nil 5932 } 5933 copy := new(Template) 5934 *copy = *t 5935 return copy 5936 } 5937 5938 func (t *Template) Canonicalize() { 5939 if t.ChangeSignal != "" { 5940 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 5941 } 5942 } 5943 5944 func (t *Template) Validate() error { 5945 var mErr multierror.Error 5946 5947 // Verify we have something to render 5948 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 5949 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 5950 } 5951 5952 // Verify we can render somewhere 5953 if t.DestPath == "" { 5954 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 5955 } 5956 5957 // Verify the destination doesn't escape 5958 escaped, err := PathEscapesAllocDir("task", t.DestPath) 5959 if err != nil { 5960 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 5961 } else if escaped { 5962 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 5963 } 5964 5965 // Verify a proper change mode 5966 switch t.ChangeMode { 5967 case TemplateChangeModeNoop, TemplateChangeModeRestart: 5968 case TemplateChangeModeSignal: 5969 if t.ChangeSignal == "" { 5970 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 5971 } 5972 if t.Envvars { 5973 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 5974 } 5975 default: 5976 multierror.Append(&mErr, TemplateChangeModeInvalidError) 5977 } 5978 5979 // Verify the splay is positive 5980 if t.Splay < 0 { 5981 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 5982 } 5983 5984 // Verify the permissions 5985 if t.Perms != "" { 5986 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 5987 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 5988 } 5989 } 5990 5991 if t.VaultGrace.Nanoseconds() < 0 { 5992 multierror.Append(&mErr, fmt.Errorf("Vault grace must be greater than zero: %v < 0", t.VaultGrace)) 5993 } 5994 5995 return mErr.ErrorOrNil() 5996 } 5997 5998 // Set of possible states for a task. 5999 const ( 6000 TaskStatePending = "pending" // The task is waiting to be run. 6001 TaskStateRunning = "running" // The task is currently running. 6002 TaskStateDead = "dead" // Terminal state of task. 6003 ) 6004 6005 // TaskState tracks the current state of a task and events that caused state 6006 // transitions. 6007 type TaskState struct { 6008 // The current state of the task. 6009 State string 6010 6011 // Failed marks a task as having failed 6012 Failed bool 6013 6014 // Restarts is the number of times the task has restarted 6015 Restarts uint64 6016 6017 // LastRestart is the time the task last restarted. It is updated each time the 6018 // task restarts 6019 LastRestart time.Time 6020 6021 // StartedAt is the time the task is started. It is updated each time the 6022 // task starts 6023 StartedAt time.Time 6024 6025 // FinishedAt is the time at which the task transitioned to dead and will 6026 // not be started again. 6027 FinishedAt time.Time 6028 6029 // Series of task events that transition the state of the task. 6030 Events []*TaskEvent 6031 } 6032 6033 // NewTaskState returns a TaskState initialized in the Pending state. 6034 func NewTaskState() *TaskState { 6035 return &TaskState{ 6036 State: TaskStatePending, 6037 } 6038 } 6039 6040 // Canonicalize ensures the TaskState has a State set. It should default to 6041 // Pending. 6042 func (ts *TaskState) Canonicalize() { 6043 if ts.State == "" { 6044 ts.State = TaskStatePending 6045 } 6046 } 6047 6048 func (ts *TaskState) Copy() *TaskState { 6049 if ts == nil { 6050 return nil 6051 } 6052 copy := new(TaskState) 6053 *copy = *ts 6054 6055 if ts.Events != nil { 6056 copy.Events = make([]*TaskEvent, len(ts.Events)) 6057 for i, e := range ts.Events { 6058 copy.Events[i] = e.Copy() 6059 } 6060 } 6061 return copy 6062 } 6063 6064 // Successful returns whether a task finished successfully. This doesn't really 6065 // have meaning on a non-batch allocation because a service and system 6066 // allocation should not finish. 6067 func (ts *TaskState) Successful() bool { 6068 return ts.State == TaskStateDead && !ts.Failed 6069 } 6070 6071 const ( 6072 // TaskSetupFailure indicates that the task could not be started due to a 6073 // a setup failure. 6074 TaskSetupFailure = "Setup Failure" 6075 6076 // TaskDriveFailure indicates that the task could not be started due to a 6077 // failure in the driver. TaskDriverFailure is considered Recoverable. 6078 TaskDriverFailure = "Driver Failure" 6079 6080 // TaskReceived signals that the task has been pulled by the client at the 6081 // given timestamp. 6082 TaskReceived = "Received" 6083 6084 // TaskFailedValidation indicates the task was invalid and as such was not run. 6085 // TaskFailedValidation is not considered Recoverable. 6086 TaskFailedValidation = "Failed Validation" 6087 6088 // TaskStarted signals that the task was started and its timestamp can be 6089 // used to determine the running length of the task. 6090 TaskStarted = "Started" 6091 6092 // TaskTerminated indicates that the task was started and exited. 6093 TaskTerminated = "Terminated" 6094 6095 // TaskKilling indicates a kill signal has been sent to the task. 6096 TaskKilling = "Killing" 6097 6098 // TaskKilled indicates a user has killed the task. 6099 TaskKilled = "Killed" 6100 6101 // TaskRestarting indicates that task terminated and is being restarted. 6102 TaskRestarting = "Restarting" 6103 6104 // TaskNotRestarting indicates that the task has failed and is not being 6105 // restarted because it has exceeded its restart policy. 6106 TaskNotRestarting = "Not Restarting" 6107 6108 // TaskRestartSignal indicates that the task has been signalled to be 6109 // restarted 6110 TaskRestartSignal = "Restart Signaled" 6111 6112 // TaskSignaling indicates that the task is being signalled. 6113 TaskSignaling = "Signaling" 6114 6115 // TaskDownloadingArtifacts means the task is downloading the artifacts 6116 // specified in the task. 6117 TaskDownloadingArtifacts = "Downloading Artifacts" 6118 6119 // TaskArtifactDownloadFailed indicates that downloading the artifacts 6120 // failed. 6121 TaskArtifactDownloadFailed = "Failed Artifact Download" 6122 6123 // TaskBuildingTaskDir indicates that the task directory/chroot is being 6124 // built. 6125 TaskBuildingTaskDir = "Building Task Directory" 6126 6127 // TaskSetup indicates the task runner is setting up the task environment 6128 TaskSetup = "Task Setup" 6129 6130 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 6131 // exceeded the requested disk resources. 6132 TaskDiskExceeded = "Disk Resources Exceeded" 6133 6134 // TaskSiblingFailed indicates that a sibling task in the task group has 6135 // failed. 6136 TaskSiblingFailed = "Sibling Task Failed" 6137 6138 // TaskDriverMessage is an informational event message emitted by 6139 // drivers such as when they're performing a long running action like 6140 // downloading an image. 6141 TaskDriverMessage = "Driver" 6142 6143 // TaskLeaderDead indicates that the leader task within the has finished. 6144 TaskLeaderDead = "Leader Task Dead" 6145 6146 // TaskHookFailed indicates that one of the hooks for a task failed. 6147 TaskHookFailed = "Task hook failed" 6148 6149 // TaskRestoreFailed indicates Nomad was unable to reattach to a 6150 // restored task. 6151 TaskRestoreFailed = "Failed Restoring Task" 6152 ) 6153 6154 // TaskEvent is an event that effects the state of a task and contains meta-data 6155 // appropriate to the events type. 6156 type TaskEvent struct { 6157 Type string 6158 Time int64 // Unix Nanosecond timestamp 6159 6160 Message string // A possible message explaining the termination of the task. 6161 6162 // DisplayMessage is a human friendly message about the event 6163 DisplayMessage string 6164 6165 // Details is a map with annotated info about the event 6166 Details map[string]string 6167 6168 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 6169 // in a future release. Field values are available in the Details map. 6170 6171 // FailsTask marks whether this event fails the task. 6172 // Deprecated, use Details["fails_task"] to access this. 6173 FailsTask bool 6174 6175 // Restart fields. 6176 // Deprecated, use Details["restart_reason"] to access this. 6177 RestartReason string 6178 6179 // Setup Failure fields. 6180 // Deprecated, use Details["setup_error"] to access this. 6181 SetupError string 6182 6183 // Driver Failure fields. 6184 // Deprecated, use Details["driver_error"] to access this. 6185 DriverError string // A driver error occurred while starting the task. 6186 6187 // Task Terminated Fields. 6188 6189 // Deprecated, use Details["exit_code"] to access this. 6190 ExitCode int // The exit code of the task. 6191 6192 // Deprecated, use Details["signal"] to access this. 6193 Signal int // The signal that terminated the task. 6194 6195 // Killing fields 6196 // Deprecated, use Details["kill_timeout"] to access this. 6197 KillTimeout time.Duration 6198 6199 // Task Killed Fields. 6200 // Deprecated, use Details["kill_error"] to access this. 6201 KillError string // Error killing the task. 6202 6203 // KillReason is the reason the task was killed 6204 // Deprecated, use Details["kill_reason"] to access this. 6205 KillReason string 6206 6207 // TaskRestarting fields. 6208 // Deprecated, use Details["start_delay"] to access this. 6209 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 6210 6211 // Artifact Download fields 6212 // Deprecated, use Details["download_error"] to access this. 6213 DownloadError string // Error downloading artifacts 6214 6215 // Validation fields 6216 // Deprecated, use Details["validation_error"] to access this. 6217 ValidationError string // Validation error 6218 6219 // The maximum allowed task disk size. 6220 // Deprecated, use Details["disk_limit"] to access this. 6221 DiskLimit int64 6222 6223 // Name of the sibling task that caused termination of the task that 6224 // the TaskEvent refers to. 6225 // Deprecated, use Details["failed_sibling"] to access this. 6226 FailedSibling string 6227 6228 // VaultError is the error from token renewal 6229 // Deprecated, use Details["vault_renewal_error"] to access this. 6230 VaultError string 6231 6232 // TaskSignalReason indicates the reason the task is being signalled. 6233 // Deprecated, use Details["task_signal_reason"] to access this. 6234 TaskSignalReason string 6235 6236 // TaskSignal is the signal that was sent to the task 6237 // Deprecated, use Details["task_signal"] to access this. 6238 TaskSignal string 6239 6240 // DriverMessage indicates a driver action being taken. 6241 // Deprecated, use Details["driver_message"] to access this. 6242 DriverMessage string 6243 6244 // GenericSource is the source of a message. 6245 // Deprecated, is redundant with event type. 6246 GenericSource string 6247 } 6248 6249 func (event *TaskEvent) PopulateEventDisplayMessage() { 6250 // Build up the description based on the event type. 6251 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 6252 return 6253 } 6254 6255 if event.DisplayMessage != "" { 6256 return 6257 } 6258 6259 var desc string 6260 switch event.Type { 6261 case TaskSetup: 6262 desc = event.Message 6263 case TaskStarted: 6264 desc = "Task started by client" 6265 case TaskReceived: 6266 desc = "Task received by client" 6267 case TaskFailedValidation: 6268 if event.ValidationError != "" { 6269 desc = event.ValidationError 6270 } else { 6271 desc = "Validation of task failed" 6272 } 6273 case TaskSetupFailure: 6274 if event.SetupError != "" { 6275 desc = event.SetupError 6276 } else { 6277 desc = "Task setup failed" 6278 } 6279 case TaskDriverFailure: 6280 if event.DriverError != "" { 6281 desc = event.DriverError 6282 } else { 6283 desc = "Failed to start task" 6284 } 6285 case TaskDownloadingArtifacts: 6286 desc = "Client is downloading artifacts" 6287 case TaskArtifactDownloadFailed: 6288 if event.DownloadError != "" { 6289 desc = event.DownloadError 6290 } else { 6291 desc = "Failed to download artifacts" 6292 } 6293 case TaskKilling: 6294 if event.KillReason != "" { 6295 desc = event.KillReason 6296 } else if event.KillTimeout != 0 { 6297 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 6298 } else { 6299 desc = "Sent interrupt" 6300 } 6301 case TaskKilled: 6302 if event.KillError != "" { 6303 desc = event.KillError 6304 } else { 6305 desc = "Task successfully killed" 6306 } 6307 case TaskTerminated: 6308 var parts []string 6309 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 6310 6311 if event.Signal != 0 { 6312 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 6313 } 6314 6315 if event.Message != "" { 6316 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 6317 } 6318 desc = strings.Join(parts, ", ") 6319 case TaskRestarting: 6320 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 6321 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 6322 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 6323 } else { 6324 desc = in 6325 } 6326 case TaskNotRestarting: 6327 if event.RestartReason != "" { 6328 desc = event.RestartReason 6329 } else { 6330 desc = "Task exceeded restart policy" 6331 } 6332 case TaskSiblingFailed: 6333 if event.FailedSibling != "" { 6334 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 6335 } else { 6336 desc = "Task's sibling failed" 6337 } 6338 case TaskSignaling: 6339 sig := event.TaskSignal 6340 reason := event.TaskSignalReason 6341 6342 if sig == "" && reason == "" { 6343 desc = "Task being sent a signal" 6344 } else if sig == "" { 6345 desc = reason 6346 } else if reason == "" { 6347 desc = fmt.Sprintf("Task being sent signal %v", sig) 6348 } else { 6349 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 6350 } 6351 case TaskRestartSignal: 6352 if event.RestartReason != "" { 6353 desc = event.RestartReason 6354 } else { 6355 desc = "Task signaled to restart" 6356 } 6357 case TaskDriverMessage: 6358 desc = event.DriverMessage 6359 case TaskLeaderDead: 6360 desc = "Leader Task in Group dead" 6361 default: 6362 desc = event.Message 6363 } 6364 6365 event.DisplayMessage = desc 6366 } 6367 6368 func (te *TaskEvent) GoString() string { 6369 return fmt.Sprintf("%v - %v", te.Time, te.Type) 6370 } 6371 6372 // SetDisplayMessage sets the display message of TaskEvent 6373 func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 6374 te.DisplayMessage = msg 6375 return te 6376 } 6377 6378 // SetMessage sets the message of TaskEvent 6379 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 6380 te.Message = msg 6381 te.Details["message"] = msg 6382 return te 6383 } 6384 6385 func (te *TaskEvent) Copy() *TaskEvent { 6386 if te == nil { 6387 return nil 6388 } 6389 copy := new(TaskEvent) 6390 *copy = *te 6391 return copy 6392 } 6393 6394 func NewTaskEvent(event string) *TaskEvent { 6395 return &TaskEvent{ 6396 Type: event, 6397 Time: time.Now().UnixNano(), 6398 Details: make(map[string]string), 6399 } 6400 } 6401 6402 // SetSetupError is used to store an error that occurred while setting up the 6403 // task 6404 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 6405 if err != nil { 6406 e.SetupError = err.Error() 6407 e.Details["setup_error"] = err.Error() 6408 } 6409 return e 6410 } 6411 6412 func (e *TaskEvent) SetFailsTask() *TaskEvent { 6413 e.FailsTask = true 6414 e.Details["fails_task"] = "true" 6415 return e 6416 } 6417 6418 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 6419 if err != nil { 6420 e.DriverError = err.Error() 6421 e.Details["driver_error"] = err.Error() 6422 } 6423 return e 6424 } 6425 6426 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 6427 e.ExitCode = c 6428 e.Details["exit_code"] = fmt.Sprintf("%d", c) 6429 return e 6430 } 6431 6432 func (e *TaskEvent) SetTimeout(t bool) *TaskEvent { 6433 e.Details["timeout"] = strconv.FormatBool(t) 6434 return e 6435 } 6436 6437 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 6438 e.Signal = s 6439 e.Details["signal"] = fmt.Sprintf("%d", s) 6440 return e 6441 } 6442 6443 func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 6444 e.Details["signal"] = s 6445 return e 6446 } 6447 6448 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 6449 if err != nil { 6450 e.Message = err.Error() 6451 e.Details["exit_message"] = err.Error() 6452 } 6453 return e 6454 } 6455 6456 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 6457 if err != nil { 6458 e.KillError = err.Error() 6459 e.Details["kill_error"] = err.Error() 6460 } 6461 return e 6462 } 6463 6464 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 6465 e.KillReason = r 6466 e.Details["kill_reason"] = r 6467 return e 6468 } 6469 6470 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 6471 e.StartDelay = int64(delay) 6472 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 6473 return e 6474 } 6475 6476 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 6477 e.RestartReason = reason 6478 e.Details["restart_reason"] = reason 6479 return e 6480 } 6481 6482 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 6483 e.TaskSignalReason = r 6484 e.Details["task_signal_reason"] = r 6485 return e 6486 } 6487 6488 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 6489 e.TaskSignal = s.String() 6490 e.Details["task_signal"] = s.String() 6491 return e 6492 } 6493 6494 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 6495 if err != nil { 6496 e.DownloadError = err.Error() 6497 e.Details["download_error"] = err.Error() 6498 } 6499 return e 6500 } 6501 6502 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 6503 if err != nil { 6504 e.ValidationError = err.Error() 6505 e.Details["validation_error"] = err.Error() 6506 } 6507 return e 6508 } 6509 6510 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 6511 e.KillTimeout = timeout 6512 e.Details["kill_timeout"] = timeout.String() 6513 return e 6514 } 6515 6516 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 6517 e.DiskLimit = limit 6518 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 6519 return e 6520 } 6521 6522 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 6523 e.FailedSibling = sibling 6524 e.Details["failed_sibling"] = sibling 6525 return e 6526 } 6527 6528 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 6529 if err != nil { 6530 e.VaultError = err.Error() 6531 e.Details["vault_renewal_error"] = err.Error() 6532 } 6533 return e 6534 } 6535 6536 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 6537 e.DriverMessage = m 6538 e.Details["driver_message"] = m 6539 return e 6540 } 6541 6542 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 6543 e.Details["oom_killed"] = strconv.FormatBool(oom) 6544 return e 6545 } 6546 6547 // TaskArtifact is an artifact to download before running the task. 6548 type TaskArtifact struct { 6549 // GetterSource is the source to download an artifact using go-getter 6550 GetterSource string 6551 6552 // GetterOptions are options to use when downloading the artifact using 6553 // go-getter. 6554 GetterOptions map[string]string 6555 6556 // GetterMode is the go-getter.ClientMode for fetching resources. 6557 // Defaults to "any" but can be set to "file" or "dir". 6558 GetterMode string 6559 6560 // RelativeDest is the download destination given relative to the task's 6561 // directory. 6562 RelativeDest string 6563 } 6564 6565 func (ta *TaskArtifact) Copy() *TaskArtifact { 6566 if ta == nil { 6567 return nil 6568 } 6569 nta := new(TaskArtifact) 6570 *nta = *ta 6571 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 6572 return nta 6573 } 6574 6575 func (ta *TaskArtifact) GoString() string { 6576 return fmt.Sprintf("%+v", ta) 6577 } 6578 6579 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 6580 // may be specified multiple times with different destinations. 6581 func (ta *TaskArtifact) Hash() string { 6582 hash, err := blake2b.New256(nil) 6583 if err != nil { 6584 panic(err) 6585 } 6586 6587 hash.Write([]byte(ta.GetterSource)) 6588 6589 // Must iterate over keys in a consistent order 6590 keys := make([]string, 0, len(ta.GetterOptions)) 6591 for k := range ta.GetterOptions { 6592 keys = append(keys, k) 6593 } 6594 sort.Strings(keys) 6595 for _, k := range keys { 6596 hash.Write([]byte(k)) 6597 hash.Write([]byte(ta.GetterOptions[k])) 6598 } 6599 6600 hash.Write([]byte(ta.GetterMode)) 6601 hash.Write([]byte(ta.RelativeDest)) 6602 return base64.RawStdEncoding.EncodeToString(hash.Sum(nil)) 6603 } 6604 6605 // PathEscapesAllocDir returns if the given path escapes the allocation 6606 // directory. The prefix allows adding a prefix if the path will be joined, for 6607 // example a "task/local" prefix may be provided if the path will be joined 6608 // against that prefix. 6609 func PathEscapesAllocDir(prefix, path string) (bool, error) { 6610 // Verify the destination doesn't escape the tasks directory 6611 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 6612 if err != nil { 6613 return false, err 6614 } 6615 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 6616 if err != nil { 6617 return false, err 6618 } 6619 rel, err := filepath.Rel(alloc, abs) 6620 if err != nil { 6621 return false, err 6622 } 6623 6624 return strings.HasPrefix(rel, ".."), nil 6625 } 6626 6627 func (ta *TaskArtifact) Validate() error { 6628 // Verify the source 6629 var mErr multierror.Error 6630 if ta.GetterSource == "" { 6631 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 6632 } 6633 6634 switch ta.GetterMode { 6635 case "": 6636 // Default to any 6637 ta.GetterMode = GetterModeAny 6638 case GetterModeAny, GetterModeFile, GetterModeDir: 6639 // Ok 6640 default: 6641 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 6642 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 6643 } 6644 6645 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 6646 if err != nil { 6647 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 6648 } else if escaped { 6649 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 6650 } 6651 6652 if err := ta.validateChecksum(); err != nil { 6653 mErr.Errors = append(mErr.Errors, err) 6654 } 6655 6656 return mErr.ErrorOrNil() 6657 } 6658 6659 func (ta *TaskArtifact) validateChecksum() error { 6660 check, ok := ta.GetterOptions["checksum"] 6661 if !ok { 6662 return nil 6663 } 6664 6665 // Job struct validation occurs before interpolation resolution can be effective. 6666 // Skip checking if checksum contain variable reference, and artifacts fetching will 6667 // eventually fail, if checksum is indeed invalid. 6668 if args.ContainsEnv(check) { 6669 return nil 6670 } 6671 6672 check = strings.TrimSpace(check) 6673 if check == "" { 6674 return fmt.Errorf("checksum value cannot be empty") 6675 } 6676 6677 parts := strings.Split(check, ":") 6678 if l := len(parts); l != 2 { 6679 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 6680 } 6681 6682 checksumVal := parts[1] 6683 checksumBytes, err := hex.DecodeString(checksumVal) 6684 if err != nil { 6685 return fmt.Errorf("invalid checksum: %v", err) 6686 } 6687 6688 checksumType := parts[0] 6689 expectedLength := 0 6690 switch checksumType { 6691 case "md5": 6692 expectedLength = md5.Size 6693 case "sha1": 6694 expectedLength = sha1.Size 6695 case "sha256": 6696 expectedLength = sha256.Size 6697 case "sha512": 6698 expectedLength = sha512.Size 6699 default: 6700 return fmt.Errorf("unsupported checksum type: %s", checksumType) 6701 } 6702 6703 if len(checksumBytes) != expectedLength { 6704 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 6705 } 6706 6707 return nil 6708 } 6709 6710 const ( 6711 ConstraintDistinctProperty = "distinct_property" 6712 ConstraintDistinctHosts = "distinct_hosts" 6713 ConstraintRegex = "regexp" 6714 ConstraintVersion = "version" 6715 ConstraintSemver = "semver" 6716 ConstraintSetContains = "set_contains" 6717 ConstraintSetContainsAll = "set_contains_all" 6718 ConstraintSetContainsAny = "set_contains_any" 6719 ConstraintAttributeIsSet = "is_set" 6720 ConstraintAttributeIsNotSet = "is_not_set" 6721 ) 6722 6723 // Constraints are used to restrict placement options. 6724 type Constraint struct { 6725 LTarget string // Left-hand target 6726 RTarget string // Right-hand target 6727 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 6728 str string // Memoized string 6729 } 6730 6731 // Equal checks if two constraints are equal 6732 func (c *Constraint) Equals(o *Constraint) bool { 6733 return c == o || 6734 c.LTarget == o.LTarget && 6735 c.RTarget == o.RTarget && 6736 c.Operand == o.Operand 6737 } 6738 6739 func (c *Constraint) Equal(o *Constraint) bool { 6740 return c.Equals(o) 6741 } 6742 6743 func (c *Constraint) Copy() *Constraint { 6744 if c == nil { 6745 return nil 6746 } 6747 nc := new(Constraint) 6748 *nc = *c 6749 return nc 6750 } 6751 6752 func (c *Constraint) String() string { 6753 if c.str != "" { 6754 return c.str 6755 } 6756 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 6757 return c.str 6758 } 6759 6760 func (c *Constraint) Validate() error { 6761 var mErr multierror.Error 6762 if c.Operand == "" { 6763 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 6764 } 6765 6766 // requireLtarget specifies whether the constraint requires an LTarget to be 6767 // provided. 6768 requireLtarget := true 6769 6770 // Perform additional validation based on operand 6771 switch c.Operand { 6772 case ConstraintDistinctHosts: 6773 requireLtarget = false 6774 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 6775 if c.RTarget == "" { 6776 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 6777 } 6778 case ConstraintRegex: 6779 if _, err := regexp.Compile(c.RTarget); err != nil { 6780 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 6781 } 6782 case ConstraintVersion: 6783 if _, err := version.NewConstraint(c.RTarget); err != nil { 6784 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 6785 } 6786 case ConstraintSemver: 6787 if _, err := semver.NewConstraint(c.RTarget); err != nil { 6788 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 6789 } 6790 case ConstraintDistinctProperty: 6791 // If a count is set, make sure it is convertible to a uint64 6792 if c.RTarget != "" { 6793 count, err := strconv.ParseUint(c.RTarget, 10, 64) 6794 if err != nil { 6795 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 6796 } else if count < 1 { 6797 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 6798 } 6799 } 6800 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 6801 if c.RTarget != "" { 6802 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 6803 } 6804 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 6805 if c.RTarget == "" { 6806 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 6807 } 6808 default: 6809 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 6810 } 6811 6812 // Ensure we have an LTarget for the constraints that need one 6813 if requireLtarget && c.LTarget == "" { 6814 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 6815 } 6816 6817 return mErr.ErrorOrNil() 6818 } 6819 6820 type Constraints []*Constraint 6821 6822 // Equals compares Constraints as a set 6823 func (xs *Constraints) Equals(ys *Constraints) bool { 6824 if xs == ys { 6825 return true 6826 } 6827 if xs == nil || ys == nil { 6828 return false 6829 } 6830 if len(*xs) != len(*ys) { 6831 return false 6832 } 6833 SETEQUALS: 6834 for _, x := range *xs { 6835 for _, y := range *ys { 6836 if x.Equals(y) { 6837 continue SETEQUALS 6838 } 6839 } 6840 return false 6841 } 6842 return true 6843 } 6844 6845 // Affinity is used to score placement options based on a weight 6846 type Affinity struct { 6847 LTarget string // Left-hand target 6848 RTarget string // Right-hand target 6849 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 6850 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 6851 str string // Memoized string 6852 } 6853 6854 // Equal checks if two affinities are equal 6855 func (a *Affinity) Equals(o *Affinity) bool { 6856 return a == o || 6857 a.LTarget == o.LTarget && 6858 a.RTarget == o.RTarget && 6859 a.Operand == o.Operand && 6860 a.Weight == o.Weight 6861 } 6862 6863 func (a *Affinity) Equal(o *Affinity) bool { 6864 return a.Equals(o) 6865 } 6866 6867 func (a *Affinity) Copy() *Affinity { 6868 if a == nil { 6869 return nil 6870 } 6871 na := new(Affinity) 6872 *na = *a 6873 return na 6874 } 6875 6876 func (a *Affinity) String() string { 6877 if a.str != "" { 6878 return a.str 6879 } 6880 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 6881 return a.str 6882 } 6883 6884 func (a *Affinity) Validate() error { 6885 var mErr multierror.Error 6886 if a.Operand == "" { 6887 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 6888 } 6889 6890 // Perform additional validation based on operand 6891 switch a.Operand { 6892 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 6893 if a.RTarget == "" { 6894 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 6895 } 6896 case ConstraintRegex: 6897 if _, err := regexp.Compile(a.RTarget); err != nil { 6898 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 6899 } 6900 case ConstraintVersion: 6901 if _, err := version.NewConstraint(a.RTarget); err != nil { 6902 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 6903 } 6904 case ConstraintSemver: 6905 if _, err := semver.NewConstraint(a.RTarget); err != nil { 6906 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 6907 } 6908 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 6909 if a.RTarget == "" { 6910 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 6911 } 6912 default: 6913 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 6914 } 6915 6916 // Ensure we have an LTarget 6917 if a.LTarget == "" { 6918 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 6919 } 6920 6921 // Ensure that weight is between -100 and 100, and not zero 6922 if a.Weight == 0 { 6923 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 6924 } 6925 6926 if a.Weight > 100 || a.Weight < -100 { 6927 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 6928 } 6929 6930 return mErr.ErrorOrNil() 6931 } 6932 6933 // Spread is used to specify desired distribution of allocations according to weight 6934 type Spread struct { 6935 // Attribute is the node attribute used as the spread criteria 6936 Attribute string 6937 6938 // Weight is the relative weight of this spread, useful when there are multiple 6939 // spread and affinities 6940 Weight int8 6941 6942 // SpreadTarget is used to describe desired percentages for each attribute value 6943 SpreadTarget []*SpreadTarget 6944 6945 // Memoized string representation 6946 str string 6947 } 6948 6949 type Affinities []*Affinity 6950 6951 // Equals compares Affinities as a set 6952 func (xs *Affinities) Equals(ys *Affinities) bool { 6953 if xs == ys { 6954 return true 6955 } 6956 if xs == nil || ys == nil { 6957 return false 6958 } 6959 if len(*xs) != len(*ys) { 6960 return false 6961 } 6962 SETEQUALS: 6963 for _, x := range *xs { 6964 for _, y := range *ys { 6965 if x.Equals(y) { 6966 continue SETEQUALS 6967 } 6968 } 6969 return false 6970 } 6971 return true 6972 } 6973 6974 func (s *Spread) Copy() *Spread { 6975 if s == nil { 6976 return nil 6977 } 6978 ns := new(Spread) 6979 *ns = *s 6980 6981 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 6982 return ns 6983 } 6984 6985 func (s *Spread) String() string { 6986 if s.str != "" { 6987 return s.str 6988 } 6989 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 6990 return s.str 6991 } 6992 6993 func (s *Spread) Validate() error { 6994 var mErr multierror.Error 6995 if s.Attribute == "" { 6996 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 6997 } 6998 if s.Weight <= 0 || s.Weight > 100 { 6999 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 7000 } 7001 seen := make(map[string]struct{}) 7002 sumPercent := uint32(0) 7003 7004 for _, target := range s.SpreadTarget { 7005 // Make sure there are no duplicates 7006 _, ok := seen[target.Value] 7007 if !ok { 7008 seen[target.Value] = struct{}{} 7009 } else { 7010 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target value %q already defined", target.Value))) 7011 } 7012 if target.Percent < 0 || target.Percent > 100 { 7013 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target percentage for value %q must be between 0 and 100", target.Value))) 7014 } 7015 sumPercent += uint32(target.Percent) 7016 } 7017 if sumPercent > 100 { 7018 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent))) 7019 } 7020 return mErr.ErrorOrNil() 7021 } 7022 7023 // SpreadTarget is used to specify desired percentages for each attribute value 7024 type SpreadTarget struct { 7025 // Value is a single attribute value, like "dc1" 7026 Value string 7027 7028 // Percent is the desired percentage of allocs 7029 Percent uint8 7030 7031 // Memoized string representation 7032 str string 7033 } 7034 7035 func (s *SpreadTarget) Copy() *SpreadTarget { 7036 if s == nil { 7037 return nil 7038 } 7039 7040 ns := new(SpreadTarget) 7041 *ns = *s 7042 return ns 7043 } 7044 7045 func (s *SpreadTarget) String() string { 7046 if s.str != "" { 7047 return s.str 7048 } 7049 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 7050 return s.str 7051 } 7052 7053 // EphemeralDisk is an ephemeral disk object 7054 type EphemeralDisk struct { 7055 // Sticky indicates whether the allocation is sticky to a node 7056 Sticky bool 7057 7058 // SizeMB is the size of the local disk 7059 SizeMB int 7060 7061 // Migrate determines if Nomad client should migrate the allocation dir for 7062 // sticky allocations 7063 Migrate bool 7064 } 7065 7066 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 7067 func DefaultEphemeralDisk() *EphemeralDisk { 7068 return &EphemeralDisk{ 7069 SizeMB: 300, 7070 } 7071 } 7072 7073 // Validate validates EphemeralDisk 7074 func (d *EphemeralDisk) Validate() error { 7075 if d.SizeMB < 10 { 7076 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 7077 } 7078 return nil 7079 } 7080 7081 // Copy copies the EphemeralDisk struct and returns a new one 7082 func (d *EphemeralDisk) Copy() *EphemeralDisk { 7083 ld := new(EphemeralDisk) 7084 *ld = *d 7085 return ld 7086 } 7087 7088 var ( 7089 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 7090 // server 7091 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 7092 ) 7093 7094 const ( 7095 // VaultChangeModeNoop takes no action when a new token is retrieved. 7096 VaultChangeModeNoop = "noop" 7097 7098 // VaultChangeModeSignal signals the task when a new token is retrieved. 7099 VaultChangeModeSignal = "signal" 7100 7101 // VaultChangeModeRestart restarts the task when a new token is retrieved. 7102 VaultChangeModeRestart = "restart" 7103 ) 7104 7105 // Vault stores the set of permissions a task needs access to from Vault. 7106 type Vault struct { 7107 // Policies is the set of policies that the task needs access to 7108 Policies []string 7109 7110 // Env marks whether the Vault Token should be exposed as an environment 7111 // variable 7112 Env bool 7113 7114 // ChangeMode is used to configure the task's behavior when the Vault 7115 // token changes because the original token could not be renewed in time. 7116 ChangeMode string 7117 7118 // ChangeSignal is the signal sent to the task when a new token is 7119 // retrieved. This is only valid when using the signal change mode. 7120 ChangeSignal string 7121 } 7122 7123 func DefaultVaultBlock() *Vault { 7124 return &Vault{ 7125 Env: true, 7126 ChangeMode: VaultChangeModeRestart, 7127 } 7128 } 7129 7130 // Copy returns a copy of this Vault block. 7131 func (v *Vault) Copy() *Vault { 7132 if v == nil { 7133 return nil 7134 } 7135 7136 nv := new(Vault) 7137 *nv = *v 7138 return nv 7139 } 7140 7141 func (v *Vault) Canonicalize() { 7142 if v.ChangeSignal != "" { 7143 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 7144 } 7145 } 7146 7147 // Validate returns if the Vault block is valid. 7148 func (v *Vault) Validate() error { 7149 if v == nil { 7150 return nil 7151 } 7152 7153 var mErr multierror.Error 7154 if len(v.Policies) == 0 { 7155 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 7156 } 7157 7158 for _, p := range v.Policies { 7159 if p == "root" { 7160 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 7161 } 7162 } 7163 7164 switch v.ChangeMode { 7165 case VaultChangeModeSignal: 7166 if v.ChangeSignal == "" { 7167 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 7168 } 7169 case VaultChangeModeNoop, VaultChangeModeRestart: 7170 default: 7171 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 7172 } 7173 7174 return mErr.ErrorOrNil() 7175 } 7176 7177 const ( 7178 // DeploymentStatuses are the various states a deployment can be be in 7179 DeploymentStatusRunning = "running" 7180 DeploymentStatusPaused = "paused" 7181 DeploymentStatusFailed = "failed" 7182 DeploymentStatusSuccessful = "successful" 7183 DeploymentStatusCancelled = "cancelled" 7184 7185 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 7186 7187 // DeploymentStatusDescriptions are the various descriptions of the states a 7188 // deployment can be in. 7189 DeploymentStatusDescriptionRunning = "Deployment is running" 7190 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 7191 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 7192 DeploymentStatusDescriptionPaused = "Deployment is paused" 7193 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 7194 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 7195 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 7196 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 7197 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 7198 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 7199 ) 7200 7201 // DeploymentStatusDescriptionRollback is used to get the status description of 7202 // a deployment when rolling back to an older job. 7203 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 7204 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 7205 } 7206 7207 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 7208 // a deployment when rolling back is not possible because it has the same specification 7209 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 7210 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 7211 } 7212 7213 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 7214 // a deployment when there is no target to rollback to but autorevert is desired. 7215 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 7216 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 7217 } 7218 7219 // Deployment is the object that represents a job deployment which is used to 7220 // transition a job between versions. 7221 type Deployment struct { 7222 // ID is a generated UUID for the deployment 7223 ID string 7224 7225 // Namespace is the namespace the deployment is created in 7226 Namespace string 7227 7228 // JobID is the job the deployment is created for 7229 JobID string 7230 7231 // JobVersion is the version of the job at which the deployment is tracking 7232 JobVersion uint64 7233 7234 // JobModifyIndex is the ModifyIndex of the job which the deployment is 7235 // tracking. 7236 JobModifyIndex uint64 7237 7238 // JobSpecModifyIndex is the JobModifyIndex of the job which the 7239 // deployment is tracking. 7240 JobSpecModifyIndex uint64 7241 7242 // JobCreateIndex is the create index of the job which the deployment is 7243 // tracking. It is needed so that if the job gets stopped and reran we can 7244 // present the correct list of deployments for the job and not old ones. 7245 JobCreateIndex uint64 7246 7247 // TaskGroups is the set of task groups effected by the deployment and their 7248 // current deployment status. 7249 TaskGroups map[string]*DeploymentState 7250 7251 // The status of the deployment 7252 Status string 7253 7254 // StatusDescription allows a human readable description of the deployment 7255 // status. 7256 StatusDescription string 7257 7258 CreateIndex uint64 7259 ModifyIndex uint64 7260 } 7261 7262 // NewDeployment creates a new deployment given the job. 7263 func NewDeployment(job *Job) *Deployment { 7264 return &Deployment{ 7265 ID: uuid.Generate(), 7266 Namespace: job.Namespace, 7267 JobID: job.ID, 7268 JobVersion: job.Version, 7269 JobModifyIndex: job.ModifyIndex, 7270 JobSpecModifyIndex: job.JobModifyIndex, 7271 JobCreateIndex: job.CreateIndex, 7272 Status: DeploymentStatusRunning, 7273 StatusDescription: DeploymentStatusDescriptionRunning, 7274 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 7275 } 7276 } 7277 7278 func (d *Deployment) Copy() *Deployment { 7279 if d == nil { 7280 return nil 7281 } 7282 7283 c := &Deployment{} 7284 *c = *d 7285 7286 c.TaskGroups = nil 7287 if l := len(d.TaskGroups); d.TaskGroups != nil { 7288 c.TaskGroups = make(map[string]*DeploymentState, l) 7289 for tg, s := range d.TaskGroups { 7290 c.TaskGroups[tg] = s.Copy() 7291 } 7292 } 7293 7294 return c 7295 } 7296 7297 // Active returns whether the deployment is active or terminal. 7298 func (d *Deployment) Active() bool { 7299 switch d.Status { 7300 case DeploymentStatusRunning, DeploymentStatusPaused: 7301 return true 7302 default: 7303 return false 7304 } 7305 } 7306 7307 // GetID is a helper for getting the ID when the object may be nil 7308 func (d *Deployment) GetID() string { 7309 if d == nil { 7310 return "" 7311 } 7312 return d.ID 7313 } 7314 7315 // HasPlacedCanaries returns whether the deployment has placed canaries 7316 func (d *Deployment) HasPlacedCanaries() bool { 7317 if d == nil || len(d.TaskGroups) == 0 { 7318 return false 7319 } 7320 for _, group := range d.TaskGroups { 7321 if len(group.PlacedCanaries) != 0 { 7322 return true 7323 } 7324 } 7325 return false 7326 } 7327 7328 // RequiresPromotion returns whether the deployment requires promotion to 7329 // continue 7330 func (d *Deployment) RequiresPromotion() bool { 7331 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7332 return false 7333 } 7334 for _, group := range d.TaskGroups { 7335 if group.DesiredCanaries > 0 && !group.Promoted { 7336 return true 7337 } 7338 } 7339 return false 7340 } 7341 7342 // HasAutoPromote determines if all taskgroups are marked auto_promote 7343 func (d *Deployment) HasAutoPromote() bool { 7344 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7345 return false 7346 } 7347 for _, group := range d.TaskGroups { 7348 if !group.AutoPromote { 7349 return false 7350 } 7351 } 7352 return true 7353 } 7354 7355 func (d *Deployment) GoString() string { 7356 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 7357 for group, state := range d.TaskGroups { 7358 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 7359 } 7360 return base 7361 } 7362 7363 // DeploymentState tracks the state of a deployment for a given task group. 7364 type DeploymentState struct { 7365 // AutoRevert marks whether the task group has indicated the job should be 7366 // reverted on failure 7367 AutoRevert bool 7368 7369 // AutoPromote marks promotion triggered automatically by healthy canaries 7370 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 7371 AutoPromote bool 7372 7373 // ProgressDeadline is the deadline by which an allocation must transition 7374 // to healthy before the deployment is considered failed. 7375 ProgressDeadline time.Duration 7376 7377 // RequireProgressBy is the time by which an allocation must transition 7378 // to healthy before the deployment is considered failed. 7379 RequireProgressBy time.Time 7380 7381 // Promoted marks whether the canaries have been promoted 7382 Promoted bool 7383 7384 // PlacedCanaries is the set of placed canary allocations 7385 PlacedCanaries []string 7386 7387 // DesiredCanaries is the number of canaries that should be created. 7388 DesiredCanaries int 7389 7390 // DesiredTotal is the total number of allocations that should be created as 7391 // part of the deployment. 7392 DesiredTotal int 7393 7394 // PlacedAllocs is the number of allocations that have been placed 7395 PlacedAllocs int 7396 7397 // HealthyAllocs is the number of allocations that have been marked healthy. 7398 HealthyAllocs int 7399 7400 // UnhealthyAllocs are allocations that have been marked as unhealthy. 7401 UnhealthyAllocs int 7402 } 7403 7404 func (d *DeploymentState) GoString() string { 7405 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 7406 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 7407 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 7408 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 7409 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 7410 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 7411 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 7412 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 7413 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 7414 return base 7415 } 7416 7417 func (d *DeploymentState) Copy() *DeploymentState { 7418 c := &DeploymentState{} 7419 *c = *d 7420 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 7421 return c 7422 } 7423 7424 // DeploymentStatusUpdate is used to update the status of a given deployment 7425 type DeploymentStatusUpdate struct { 7426 // DeploymentID is the ID of the deployment to update 7427 DeploymentID string 7428 7429 // Status is the new status of the deployment. 7430 Status string 7431 7432 // StatusDescription is the new status description of the deployment. 7433 StatusDescription string 7434 } 7435 7436 // RescheduleTracker encapsulates previous reschedule events 7437 type RescheduleTracker struct { 7438 Events []*RescheduleEvent 7439 } 7440 7441 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 7442 if rt == nil { 7443 return nil 7444 } 7445 nt := &RescheduleTracker{} 7446 *nt = *rt 7447 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 7448 for _, tracker := range rt.Events { 7449 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 7450 } 7451 nt.Events = rescheduleEvents 7452 return nt 7453 } 7454 7455 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 7456 type RescheduleEvent struct { 7457 // RescheduleTime is the timestamp of a reschedule attempt 7458 RescheduleTime int64 7459 7460 // PrevAllocID is the ID of the previous allocation being restarted 7461 PrevAllocID string 7462 7463 // PrevNodeID is the node ID of the previous allocation 7464 PrevNodeID string 7465 7466 // Delay is the reschedule delay associated with the attempt 7467 Delay time.Duration 7468 } 7469 7470 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 7471 return &RescheduleEvent{RescheduleTime: rescheduleTime, 7472 PrevAllocID: prevAllocID, 7473 PrevNodeID: prevNodeID, 7474 Delay: delay} 7475 } 7476 7477 func (re *RescheduleEvent) Copy() *RescheduleEvent { 7478 if re == nil { 7479 return nil 7480 } 7481 copy := new(RescheduleEvent) 7482 *copy = *re 7483 return copy 7484 } 7485 7486 // DesiredTransition is used to mark an allocation as having a desired state 7487 // transition. This information can be used by the scheduler to make the 7488 // correct decision. 7489 type DesiredTransition struct { 7490 // Migrate is used to indicate that this allocation should be stopped and 7491 // migrated to another node. 7492 Migrate *bool 7493 7494 // Reschedule is used to indicate that this allocation is eligible to be 7495 // rescheduled. Most allocations are automatically eligible for 7496 // rescheduling, so this field is only required when an allocation is not 7497 // automatically eligible. An example is an allocation that is part of a 7498 // deployment. 7499 Reschedule *bool 7500 7501 // ForceReschedule is used to indicate that this allocation must be rescheduled. 7502 // This field is only used when operators want to force a placement even if 7503 // a failed allocation is not eligible to be rescheduled 7504 ForceReschedule *bool 7505 } 7506 7507 // Merge merges the two desired transitions, preferring the values from the 7508 // passed in object. 7509 func (d *DesiredTransition) Merge(o *DesiredTransition) { 7510 if o.Migrate != nil { 7511 d.Migrate = o.Migrate 7512 } 7513 7514 if o.Reschedule != nil { 7515 d.Reschedule = o.Reschedule 7516 } 7517 7518 if o.ForceReschedule != nil { 7519 d.ForceReschedule = o.ForceReschedule 7520 } 7521 } 7522 7523 // ShouldMigrate returns whether the transition object dictates a migration. 7524 func (d *DesiredTransition) ShouldMigrate() bool { 7525 return d.Migrate != nil && *d.Migrate 7526 } 7527 7528 // ShouldReschedule returns whether the transition object dictates a 7529 // rescheduling. 7530 func (d *DesiredTransition) ShouldReschedule() bool { 7531 return d.Reschedule != nil && *d.Reschedule 7532 } 7533 7534 // ShouldForceReschedule returns whether the transition object dictates a 7535 // forced rescheduling. 7536 func (d *DesiredTransition) ShouldForceReschedule() bool { 7537 if d == nil { 7538 return false 7539 } 7540 return d.ForceReschedule != nil && *d.ForceReschedule 7541 } 7542 7543 const ( 7544 AllocDesiredStatusRun = "run" // Allocation should run 7545 AllocDesiredStatusStop = "stop" // Allocation should stop 7546 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 7547 ) 7548 7549 const ( 7550 AllocClientStatusPending = "pending" 7551 AllocClientStatusRunning = "running" 7552 AllocClientStatusComplete = "complete" 7553 AllocClientStatusFailed = "failed" 7554 AllocClientStatusLost = "lost" 7555 ) 7556 7557 // Allocation is used to allocate the placement of a task group to a node. 7558 type Allocation struct { 7559 // msgpack omit empty fields during serialization 7560 _struct bool `codec:",omitempty"` // nolint: structcheck 7561 7562 // ID of the allocation (UUID) 7563 ID string 7564 7565 // Namespace is the namespace the allocation is created in 7566 Namespace string 7567 7568 // ID of the evaluation that generated this allocation 7569 EvalID string 7570 7571 // Name is a logical name of the allocation. 7572 Name string 7573 7574 // NodeID is the node this is being placed on 7575 NodeID string 7576 7577 // NodeName is the name of the node this is being placed on. 7578 NodeName string 7579 7580 // Job is the parent job of the task group being allocated. 7581 // This is copied at allocation time to avoid issues if the job 7582 // definition is updated. 7583 JobID string 7584 Job *Job 7585 7586 // TaskGroup is the name of the task group that should be run 7587 TaskGroup string 7588 7589 // COMPAT(0.11): Remove in 0.11 7590 // Resources is the total set of resources allocated as part 7591 // of this allocation of the task group. Dynamic ports will be set by 7592 // the scheduler. 7593 Resources *Resources 7594 7595 // COMPAT(0.11): Remove in 0.11 7596 // SharedResources are the resources that are shared by all the tasks in an 7597 // allocation 7598 SharedResources *Resources 7599 7600 // COMPAT(0.11): Remove in 0.11 7601 // TaskResources is the set of resources allocated to each 7602 // task. These should sum to the total Resources. Dynamic ports will be 7603 // set by the scheduler. 7604 TaskResources map[string]*Resources 7605 7606 // AllocatedResources is the total resources allocated for the task group. 7607 AllocatedResources *AllocatedResources 7608 7609 // Metrics associated with this allocation 7610 Metrics *AllocMetric 7611 7612 // Desired Status of the allocation on the client 7613 DesiredStatus string 7614 7615 // DesiredStatusDescription is meant to provide more human useful information 7616 DesiredDescription string 7617 7618 // DesiredTransition is used to indicate that a state transition 7619 // is desired for a given reason. 7620 DesiredTransition DesiredTransition 7621 7622 // Status of the allocation on the client 7623 ClientStatus string 7624 7625 // ClientStatusDescription is meant to provide more human useful information 7626 ClientDescription string 7627 7628 // TaskStates stores the state of each task, 7629 TaskStates map[string]*TaskState 7630 7631 // PreviousAllocation is the allocation that this allocation is replacing 7632 PreviousAllocation string 7633 7634 // NextAllocation is the allocation that this allocation is being replaced by 7635 NextAllocation string 7636 7637 // DeploymentID identifies an allocation as being created from a 7638 // particular deployment 7639 DeploymentID string 7640 7641 // DeploymentStatus captures the status of the allocation as part of the 7642 // given deployment 7643 DeploymentStatus *AllocDeploymentStatus 7644 7645 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 7646 RescheduleTracker *RescheduleTracker 7647 7648 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 7649 // that can be rescheduled in the future 7650 FollowupEvalID string 7651 7652 // PreemptedAllocations captures IDs of any allocations that were preempted 7653 // in order to place this allocation 7654 PreemptedAllocations []string 7655 7656 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 7657 // to stop running because it got preempted 7658 PreemptedByAllocation string 7659 7660 // Raft Indexes 7661 CreateIndex uint64 7662 ModifyIndex uint64 7663 7664 // AllocModifyIndex is not updated when the client updates allocations. This 7665 // lets the client pull only the allocs updated by the server. 7666 AllocModifyIndex uint64 7667 7668 // CreateTime is the time the allocation has finished scheduling and been 7669 // verified by the plan applier. 7670 CreateTime int64 7671 7672 // ModifyTime is the time the allocation was last updated. 7673 ModifyTime int64 7674 } 7675 7676 // Index returns the index of the allocation. If the allocation is from a task 7677 // group with count greater than 1, there will be multiple allocations for it. 7678 func (a *Allocation) Index() uint { 7679 l := len(a.Name) 7680 prefix := len(a.JobID) + len(a.TaskGroup) + 2 7681 if l <= 3 || l <= prefix { 7682 return uint(0) 7683 } 7684 7685 strNum := a.Name[prefix : len(a.Name)-1] 7686 num, _ := strconv.Atoi(strNum) 7687 return uint(num) 7688 } 7689 7690 // Copy provides a copy of the allocation and deep copies the job 7691 func (a *Allocation) Copy() *Allocation { 7692 return a.copyImpl(true) 7693 } 7694 7695 // CopySkipJob provides a copy of the allocation but doesn't deep copy the job 7696 func (a *Allocation) CopySkipJob() *Allocation { 7697 return a.copyImpl(false) 7698 } 7699 7700 func (a *Allocation) copyImpl(job bool) *Allocation { 7701 if a == nil { 7702 return nil 7703 } 7704 na := new(Allocation) 7705 *na = *a 7706 7707 if job { 7708 na.Job = na.Job.Copy() 7709 } 7710 7711 na.AllocatedResources = na.AllocatedResources.Copy() 7712 na.Resources = na.Resources.Copy() 7713 na.SharedResources = na.SharedResources.Copy() 7714 7715 if a.TaskResources != nil { 7716 tr := make(map[string]*Resources, len(na.TaskResources)) 7717 for task, resource := range na.TaskResources { 7718 tr[task] = resource.Copy() 7719 } 7720 na.TaskResources = tr 7721 } 7722 7723 na.Metrics = na.Metrics.Copy() 7724 na.DeploymentStatus = na.DeploymentStatus.Copy() 7725 7726 if a.TaskStates != nil { 7727 ts := make(map[string]*TaskState, len(na.TaskStates)) 7728 for task, state := range na.TaskStates { 7729 ts[task] = state.Copy() 7730 } 7731 na.TaskStates = ts 7732 } 7733 7734 na.RescheduleTracker = a.RescheduleTracker.Copy() 7735 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 7736 return na 7737 } 7738 7739 // TerminalStatus returns if the desired or actual status is terminal and 7740 // will no longer transition. 7741 func (a *Allocation) TerminalStatus() bool { 7742 // First check the desired state and if that isn't terminal, check client 7743 // state. 7744 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 7745 } 7746 7747 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 7748 func (a *Allocation) ServerTerminalStatus() bool { 7749 switch a.DesiredStatus { 7750 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 7751 return true 7752 default: 7753 return false 7754 } 7755 } 7756 7757 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 7758 func (a *Allocation) ClientTerminalStatus() bool { 7759 switch a.ClientStatus { 7760 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 7761 return true 7762 default: 7763 return false 7764 } 7765 } 7766 7767 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 7768 // to its status and ReschedulePolicy given its failure time 7769 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 7770 // First check the desired state 7771 switch a.DesiredStatus { 7772 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 7773 return false 7774 default: 7775 } 7776 switch a.ClientStatus { 7777 case AllocClientStatusFailed: 7778 return a.RescheduleEligible(reschedulePolicy, failTime) 7779 default: 7780 return false 7781 } 7782 } 7783 7784 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 7785 // to its ReschedulePolicy and the current state of its reschedule trackers 7786 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 7787 if reschedulePolicy == nil { 7788 return false 7789 } 7790 attempts := reschedulePolicy.Attempts 7791 interval := reschedulePolicy.Interval 7792 enabled := attempts > 0 || reschedulePolicy.Unlimited 7793 if !enabled { 7794 return false 7795 } 7796 if reschedulePolicy.Unlimited { 7797 return true 7798 } 7799 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 7800 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 7801 return true 7802 } 7803 attempted := 0 7804 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 7805 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 7806 timeDiff := failTime.UTC().UnixNano() - lastAttempt 7807 if timeDiff < interval.Nanoseconds() { 7808 attempted += 1 7809 } 7810 } 7811 return attempted < attempts 7812 } 7813 7814 // LastEventTime is the time of the last task event in the allocation. 7815 // It is used to determine allocation failure time. If the FinishedAt field 7816 // is not set, the alloc's modify time is used 7817 func (a *Allocation) LastEventTime() time.Time { 7818 var lastEventTime time.Time 7819 if a.TaskStates != nil { 7820 for _, s := range a.TaskStates { 7821 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 7822 lastEventTime = s.FinishedAt 7823 } 7824 } 7825 } 7826 7827 if lastEventTime.IsZero() { 7828 return time.Unix(0, a.ModifyTime).UTC() 7829 } 7830 return lastEventTime 7831 } 7832 7833 // ReschedulePolicy returns the reschedule policy based on the task group 7834 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 7835 tg := a.Job.LookupTaskGroup(a.TaskGroup) 7836 if tg == nil { 7837 return nil 7838 } 7839 return tg.ReschedulePolicy 7840 } 7841 7842 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 7843 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 7844 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 7845 failTime := a.LastEventTime() 7846 reschedulePolicy := a.ReschedulePolicy() 7847 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 7848 return time.Time{}, false 7849 } 7850 7851 nextDelay := a.NextDelay() 7852 nextRescheduleTime := failTime.Add(nextDelay) 7853 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 7854 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 7855 // Check for eligibility based on the interval if max attempts is set 7856 attempted := 0 7857 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 7858 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 7859 timeDiff := failTime.UTC().UnixNano() - lastAttempt 7860 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 7861 attempted += 1 7862 } 7863 } 7864 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 7865 } 7866 return nextRescheduleTime, rescheduleEligible 7867 } 7868 7869 // NextDelay returns a duration after which the allocation can be rescheduled. 7870 // It is calculated according to the delay function and previous reschedule attempts. 7871 func (a *Allocation) NextDelay() time.Duration { 7872 policy := a.ReschedulePolicy() 7873 // Can be nil if the task group was updated to remove its reschedule policy 7874 if policy == nil { 7875 return 0 7876 } 7877 delayDur := policy.Delay 7878 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 7879 return delayDur 7880 } 7881 events := a.RescheduleTracker.Events 7882 switch policy.DelayFunction { 7883 case "exponential": 7884 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 7885 case "fibonacci": 7886 if len(events) >= 2 { 7887 fibN1Delay := events[len(events)-1].Delay 7888 fibN2Delay := events[len(events)-2].Delay 7889 // Handle reset of delay ceiling which should cause 7890 // a new series to start 7891 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 7892 delayDur = fibN1Delay 7893 } else { 7894 delayDur = fibN1Delay + fibN2Delay 7895 } 7896 } 7897 default: 7898 return delayDur 7899 } 7900 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 7901 delayDur = policy.MaxDelay 7902 // check if delay needs to be reset 7903 7904 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 7905 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 7906 if timeDiff > delayDur.Nanoseconds() { 7907 delayDur = policy.Delay 7908 } 7909 7910 } 7911 7912 return delayDur 7913 } 7914 7915 // Terminated returns if the allocation is in a terminal state on a client. 7916 func (a *Allocation) Terminated() bool { 7917 if a.ClientStatus == AllocClientStatusFailed || 7918 a.ClientStatus == AllocClientStatusComplete || 7919 a.ClientStatus == AllocClientStatusLost { 7920 return true 7921 } 7922 return false 7923 } 7924 7925 // RanSuccessfully returns whether the client has ran the allocation and all 7926 // tasks finished successfully. Critically this function returns whether the 7927 // allocation has ran to completion and not just that the alloc has converged to 7928 // its desired state. That is to say that a batch allocation must have finished 7929 // with exit code 0 on all task groups. This doesn't really have meaning on a 7930 // non-batch allocation because a service and system allocation should not 7931 // finish. 7932 func (a *Allocation) RanSuccessfully() bool { 7933 // Handle the case the client hasn't started the allocation. 7934 if len(a.TaskStates) == 0 { 7935 return false 7936 } 7937 7938 // Check to see if all the tasks finished successfully in the allocation 7939 allSuccess := true 7940 for _, state := range a.TaskStates { 7941 allSuccess = allSuccess && state.Successful() 7942 } 7943 7944 return allSuccess 7945 } 7946 7947 // ShouldMigrate returns if the allocation needs data migration 7948 func (a *Allocation) ShouldMigrate() bool { 7949 if a.PreviousAllocation == "" { 7950 return false 7951 } 7952 7953 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 7954 return false 7955 } 7956 7957 tg := a.Job.LookupTaskGroup(a.TaskGroup) 7958 7959 // if the task group is nil or the ephemeral disk block isn't present then 7960 // we won't migrate 7961 if tg == nil || tg.EphemeralDisk == nil { 7962 return false 7963 } 7964 7965 // We won't migrate any data is the user hasn't enabled migration or the 7966 // disk is not marked as sticky 7967 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 7968 return false 7969 } 7970 7971 return true 7972 } 7973 7974 // SetEventDisplayMessage populates the display message if its not already set, 7975 // a temporary fix to handle old allocations that don't have it. 7976 // This method will be removed in a future release. 7977 func (a *Allocation) SetEventDisplayMessages() { 7978 setDisplayMsg(a.TaskStates) 7979 } 7980 7981 // COMPAT(0.11): Remove in 0.11 7982 // ComparableResources returns the resources on the allocation 7983 // handling upgrade paths. After 0.11 calls to this should be replaced with: 7984 // alloc.AllocatedResources.Comparable() 7985 func (a *Allocation) ComparableResources() *ComparableResources { 7986 // ALloc already has 0.9+ behavior 7987 if a.AllocatedResources != nil { 7988 return a.AllocatedResources.Comparable() 7989 } 7990 7991 var resources *Resources 7992 if a.Resources != nil { 7993 resources = a.Resources 7994 } else if a.TaskResources != nil { 7995 resources = new(Resources) 7996 resources.Add(a.SharedResources) 7997 for _, taskResource := range a.TaskResources { 7998 resources.Add(taskResource) 7999 } 8000 } 8001 8002 // Upgrade path 8003 return &ComparableResources{ 8004 Flattened: AllocatedTaskResources{ 8005 Cpu: AllocatedCpuResources{ 8006 CpuShares: int64(resources.CPU), 8007 }, 8008 Memory: AllocatedMemoryResources{ 8009 MemoryMB: int64(resources.MemoryMB), 8010 }, 8011 Networks: resources.Networks, 8012 }, 8013 Shared: AllocatedSharedResources{ 8014 DiskMB: int64(resources.DiskMB), 8015 }, 8016 } 8017 } 8018 8019 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 8020 // TaskGroup does not exist, or the task name cannot be found. 8021 func (a *Allocation) LookupTask(name string) *Task { 8022 if a.Job == nil { 8023 return nil 8024 } 8025 8026 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8027 if tg == nil { 8028 return nil 8029 } 8030 8031 return tg.LookupTask(name) 8032 } 8033 8034 // Stub returns a list stub for the allocation 8035 func (a *Allocation) Stub() *AllocListStub { 8036 return &AllocListStub{ 8037 ID: a.ID, 8038 EvalID: a.EvalID, 8039 Name: a.Name, 8040 Namespace: a.Namespace, 8041 NodeID: a.NodeID, 8042 NodeName: a.NodeName, 8043 JobID: a.JobID, 8044 JobType: a.Job.Type, 8045 JobVersion: a.Job.Version, 8046 TaskGroup: a.TaskGroup, 8047 DesiredStatus: a.DesiredStatus, 8048 DesiredDescription: a.DesiredDescription, 8049 ClientStatus: a.ClientStatus, 8050 ClientDescription: a.ClientDescription, 8051 DesiredTransition: a.DesiredTransition, 8052 TaskStates: a.TaskStates, 8053 DeploymentStatus: a.DeploymentStatus, 8054 FollowupEvalID: a.FollowupEvalID, 8055 RescheduleTracker: a.RescheduleTracker, 8056 PreemptedAllocations: a.PreemptedAllocations, 8057 PreemptedByAllocation: a.PreemptedByAllocation, 8058 CreateIndex: a.CreateIndex, 8059 ModifyIndex: a.ModifyIndex, 8060 CreateTime: a.CreateTime, 8061 ModifyTime: a.ModifyTime, 8062 } 8063 } 8064 8065 // AllocationDiff converts an Allocation type to an AllocationDiff type 8066 // If at any time, modification are made to AllocationDiff so that an 8067 // Allocation can no longer be safely converted to AllocationDiff, 8068 // this method should be changed accordingly. 8069 func (a *Allocation) AllocationDiff() *AllocationDiff { 8070 return (*AllocationDiff)(a) 8071 } 8072 8073 // AllocationDiff is another named type for Allocation (to use the same fields), 8074 // which is used to represent the delta for an Allocation. If you need a method 8075 // defined on the al 8076 type AllocationDiff Allocation 8077 8078 // AllocListStub is used to return a subset of alloc information 8079 type AllocListStub struct { 8080 ID string 8081 EvalID string 8082 Name string 8083 Namespace string 8084 NodeID string 8085 NodeName string 8086 JobID string 8087 JobType string 8088 JobVersion uint64 8089 TaskGroup string 8090 DesiredStatus string 8091 DesiredDescription string 8092 ClientStatus string 8093 ClientDescription string 8094 DesiredTransition DesiredTransition 8095 TaskStates map[string]*TaskState 8096 DeploymentStatus *AllocDeploymentStatus 8097 FollowupEvalID string 8098 RescheduleTracker *RescheduleTracker 8099 PreemptedAllocations []string 8100 PreemptedByAllocation string 8101 CreateIndex uint64 8102 ModifyIndex uint64 8103 CreateTime int64 8104 ModifyTime int64 8105 } 8106 8107 // SetEventDisplayMessage populates the display message if its not already set, 8108 // a temporary fix to handle old allocations that don't have it. 8109 // This method will be removed in a future release. 8110 func (a *AllocListStub) SetEventDisplayMessages() { 8111 setDisplayMsg(a.TaskStates) 8112 } 8113 8114 func setDisplayMsg(taskStates map[string]*TaskState) { 8115 if taskStates != nil { 8116 for _, taskState := range taskStates { 8117 for _, event := range taskState.Events { 8118 event.PopulateEventDisplayMessage() 8119 } 8120 } 8121 } 8122 } 8123 8124 // AllocMetric is used to track various metrics while attempting 8125 // to make an allocation. These are used to debug a job, or to better 8126 // understand the pressure within the system. 8127 type AllocMetric struct { 8128 // NodesEvaluated is the number of nodes that were evaluated 8129 NodesEvaluated int 8130 8131 // NodesFiltered is the number of nodes filtered due to a constraint 8132 NodesFiltered int 8133 8134 // NodesAvailable is the number of nodes available for evaluation per DC. 8135 NodesAvailable map[string]int 8136 8137 // ClassFiltered is the number of nodes filtered by class 8138 ClassFiltered map[string]int 8139 8140 // ConstraintFiltered is the number of failures caused by constraint 8141 ConstraintFiltered map[string]int 8142 8143 // NodesExhausted is the number of nodes skipped due to being 8144 // exhausted of at least one resource 8145 NodesExhausted int 8146 8147 // ClassExhausted is the number of nodes exhausted by class 8148 ClassExhausted map[string]int 8149 8150 // DimensionExhausted provides the count by dimension or reason 8151 DimensionExhausted map[string]int 8152 8153 // QuotaExhausted provides the exhausted dimensions 8154 QuotaExhausted []string 8155 8156 // Scores is the scores of the final few nodes remaining 8157 // for placement. The top score is typically selected. 8158 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 8159 Scores map[string]float64 8160 8161 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 8162 ScoreMetaData []*NodeScoreMeta 8163 8164 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 8165 // we receive normalized score during the last step of the scoring stack. 8166 nodeScoreMeta *NodeScoreMeta 8167 8168 // topScores is used to maintain a heap of the top K nodes with 8169 // the highest normalized score 8170 topScores *kheap.ScoreHeap 8171 8172 // AllocationTime is a measure of how long the allocation 8173 // attempt took. This can affect performance and SLAs. 8174 AllocationTime time.Duration 8175 8176 // CoalescedFailures indicates the number of other 8177 // allocations that were coalesced into this failed allocation. 8178 // This is to prevent creating many failed allocations for a 8179 // single task group. 8180 CoalescedFailures int 8181 } 8182 8183 func (a *AllocMetric) Copy() *AllocMetric { 8184 if a == nil { 8185 return nil 8186 } 8187 na := new(AllocMetric) 8188 *na = *a 8189 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 8190 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 8191 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 8192 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 8193 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 8194 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 8195 na.Scores = helper.CopyMapStringFloat64(na.Scores) 8196 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 8197 return na 8198 } 8199 8200 func (a *AllocMetric) EvaluateNode() { 8201 a.NodesEvaluated += 1 8202 } 8203 8204 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 8205 a.NodesFiltered += 1 8206 if node != nil && node.NodeClass != "" { 8207 if a.ClassFiltered == nil { 8208 a.ClassFiltered = make(map[string]int) 8209 } 8210 a.ClassFiltered[node.NodeClass] += 1 8211 } 8212 if constraint != "" { 8213 if a.ConstraintFiltered == nil { 8214 a.ConstraintFiltered = make(map[string]int) 8215 } 8216 a.ConstraintFiltered[constraint] += 1 8217 } 8218 } 8219 8220 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 8221 a.NodesExhausted += 1 8222 if node != nil && node.NodeClass != "" { 8223 if a.ClassExhausted == nil { 8224 a.ClassExhausted = make(map[string]int) 8225 } 8226 a.ClassExhausted[node.NodeClass] += 1 8227 } 8228 if dimension != "" { 8229 if a.DimensionExhausted == nil { 8230 a.DimensionExhausted = make(map[string]int) 8231 } 8232 a.DimensionExhausted[dimension] += 1 8233 } 8234 } 8235 8236 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 8237 if a.QuotaExhausted == nil { 8238 a.QuotaExhausted = make([]string, 0, len(dimensions)) 8239 } 8240 8241 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 8242 } 8243 8244 // ScoreNode is used to gather top K scoring nodes in a heap 8245 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 8246 // Create nodeScoreMeta lazily if its the first time or if its a new node 8247 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 8248 a.nodeScoreMeta = &NodeScoreMeta{ 8249 NodeID: node.ID, 8250 Scores: make(map[string]float64), 8251 } 8252 } 8253 if name == NormScorerName { 8254 a.nodeScoreMeta.NormScore = score 8255 // Once we have the normalized score we can push to the heap 8256 // that tracks top K by normalized score 8257 8258 // Create the heap if its not there already 8259 if a.topScores == nil { 8260 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 8261 } 8262 heap.Push(a.topScores, a.nodeScoreMeta) 8263 8264 // Clear out this entry because its now in the heap 8265 a.nodeScoreMeta = nil 8266 } else { 8267 a.nodeScoreMeta.Scores[name] = score 8268 } 8269 } 8270 8271 // PopulateScoreMetaData populates a map of scorer to scoring metadata 8272 // The map is populated by popping elements from a heap of top K scores 8273 // maintained per scorer 8274 func (a *AllocMetric) PopulateScoreMetaData() { 8275 if a.topScores == nil { 8276 return 8277 } 8278 8279 if a.ScoreMetaData == nil { 8280 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 8281 } 8282 heapItems := a.topScores.GetItemsReverse() 8283 for i, item := range heapItems { 8284 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 8285 } 8286 } 8287 8288 // NodeScoreMeta captures scoring meta data derived from 8289 // different scoring factors. 8290 type NodeScoreMeta struct { 8291 NodeID string 8292 Scores map[string]float64 8293 NormScore float64 8294 } 8295 8296 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 8297 if s == nil { 8298 return nil 8299 } 8300 ns := new(NodeScoreMeta) 8301 *ns = *s 8302 return ns 8303 } 8304 8305 func (s *NodeScoreMeta) String() string { 8306 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 8307 } 8308 8309 func (s *NodeScoreMeta) Score() float64 { 8310 return s.NormScore 8311 } 8312 8313 func (s *NodeScoreMeta) Data() interface{} { 8314 return s 8315 } 8316 8317 // AllocDeploymentStatus captures the status of the allocation as part of the 8318 // deployment. This can include things like if the allocation has been marked as 8319 // healthy. 8320 type AllocDeploymentStatus struct { 8321 // Healthy marks whether the allocation has been marked healthy or unhealthy 8322 // as part of a deployment. It can be unset if it has neither been marked 8323 // healthy or unhealthy. 8324 Healthy *bool 8325 8326 // Timestamp is the time at which the health status was set. 8327 Timestamp time.Time 8328 8329 // Canary marks whether the allocation is a canary or not. A canary that has 8330 // been promoted will have this field set to false. 8331 Canary bool 8332 8333 // ModifyIndex is the raft index in which the deployment status was last 8334 // changed. 8335 ModifyIndex uint64 8336 } 8337 8338 // HasHealth returns true if the allocation has its health set. 8339 func (a *AllocDeploymentStatus) HasHealth() bool { 8340 return a != nil && a.Healthy != nil 8341 } 8342 8343 // IsHealthy returns if the allocation is marked as healthy as part of a 8344 // deployment 8345 func (a *AllocDeploymentStatus) IsHealthy() bool { 8346 if a == nil { 8347 return false 8348 } 8349 8350 return a.Healthy != nil && *a.Healthy 8351 } 8352 8353 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 8354 // deployment 8355 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 8356 if a == nil { 8357 return false 8358 } 8359 8360 return a.Healthy != nil && !*a.Healthy 8361 } 8362 8363 // IsCanary returns if the allocation is marked as a canary 8364 func (a *AllocDeploymentStatus) IsCanary() bool { 8365 if a == nil { 8366 return false 8367 } 8368 8369 return a.Canary 8370 } 8371 8372 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 8373 if a == nil { 8374 return nil 8375 } 8376 8377 c := new(AllocDeploymentStatus) 8378 *c = *a 8379 8380 if a.Healthy != nil { 8381 c.Healthy = helper.BoolToPtr(*a.Healthy) 8382 } 8383 8384 return c 8385 } 8386 8387 const ( 8388 EvalStatusBlocked = "blocked" 8389 EvalStatusPending = "pending" 8390 EvalStatusComplete = "complete" 8391 EvalStatusFailed = "failed" 8392 EvalStatusCancelled = "canceled" 8393 ) 8394 8395 const ( 8396 EvalTriggerJobRegister = "job-register" 8397 EvalTriggerJobDeregister = "job-deregister" 8398 EvalTriggerPeriodicJob = "periodic-job" 8399 EvalTriggerNodeDrain = "node-drain" 8400 EvalTriggerNodeUpdate = "node-update" 8401 EvalTriggerAllocStop = "alloc-stop" 8402 EvalTriggerScheduled = "scheduled" 8403 EvalTriggerRollingUpdate = "rolling-update" 8404 EvalTriggerDeploymentWatcher = "deployment-watcher" 8405 EvalTriggerFailedFollowUp = "failed-follow-up" 8406 EvalTriggerMaxPlans = "max-plan-attempts" 8407 EvalTriggerRetryFailedAlloc = "alloc-failure" 8408 EvalTriggerQueuedAllocs = "queued-allocs" 8409 EvalTriggerPreemption = "preemption" 8410 ) 8411 8412 const ( 8413 // CoreJobEvalGC is used for the garbage collection of evaluations 8414 // and allocations. We periodically scan evaluations in a terminal state, 8415 // in which all the corresponding allocations are also terminal. We 8416 // delete these out of the system to bound the state. 8417 CoreJobEvalGC = "eval-gc" 8418 8419 // CoreJobNodeGC is used for the garbage collection of failed nodes. 8420 // We periodically scan nodes in a terminal state, and if they have no 8421 // corresponding allocations we delete these out of the system. 8422 CoreJobNodeGC = "node-gc" 8423 8424 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 8425 // periodically scan garbage collectible jobs and check if both their 8426 // evaluations and allocations are terminal. If so, we delete these out of 8427 // the system. 8428 CoreJobJobGC = "job-gc" 8429 8430 // CoreJobDeploymentGC is used for the garbage collection of eligible 8431 // deployments. We periodically scan garbage collectible deployments and 8432 // check if they are terminal. If so, we delete these out of the system. 8433 CoreJobDeploymentGC = "deployment-gc" 8434 8435 // CoreJobForceGC is used to force garbage collection of all GCable objects. 8436 CoreJobForceGC = "force-gc" 8437 ) 8438 8439 // Evaluation is used anytime we need to apply business logic as a result 8440 // of a change to our desired state (job specification) or the emergent state 8441 // (registered nodes). When the inputs change, we need to "evaluate" them, 8442 // potentially taking action (allocation of work) or doing nothing if the state 8443 // of the world does not require it. 8444 type Evaluation struct { 8445 // msgpack omit empty fields during serialization 8446 _struct bool `codec:",omitempty"` // nolint: structcheck 8447 8448 // ID is a randomly generated UUID used for this evaluation. This 8449 // is assigned upon the creation of the evaluation. 8450 ID string 8451 8452 // Namespace is the namespace the evaluation is created in 8453 Namespace string 8454 8455 // Priority is used to control scheduling importance and if this job 8456 // can preempt other jobs. 8457 Priority int 8458 8459 // Type is used to control which schedulers are available to handle 8460 // this evaluation. 8461 Type string 8462 8463 // TriggeredBy is used to give some insight into why this Eval 8464 // was created. (Job change, node failure, alloc failure, etc). 8465 TriggeredBy string 8466 8467 // JobID is the job this evaluation is scoped to. Evaluations cannot 8468 // be run in parallel for a given JobID, so we serialize on this. 8469 JobID string 8470 8471 // JobModifyIndex is the modify index of the job at the time 8472 // the evaluation was created 8473 JobModifyIndex uint64 8474 8475 // NodeID is the node that was affected triggering the evaluation. 8476 NodeID string 8477 8478 // NodeModifyIndex is the modify index of the node at the time 8479 // the evaluation was created 8480 NodeModifyIndex uint64 8481 8482 // DeploymentID is the ID of the deployment that triggered the evaluation. 8483 DeploymentID string 8484 8485 // Status of the evaluation 8486 Status string 8487 8488 // StatusDescription is meant to provide more human useful information 8489 StatusDescription string 8490 8491 // Wait is a minimum wait time for running the eval. This is used to 8492 // support a rolling upgrade in versions prior to 0.7.0 8493 // Deprecated 8494 Wait time.Duration 8495 8496 // WaitUntil is the time when this eval should be run. This is used to 8497 // supported delayed rescheduling of failed allocations 8498 WaitUntil time.Time 8499 8500 // NextEval is the evaluation ID for the eval created to do a followup. 8501 // This is used to support rolling upgrades and failed-follow-up evals, where 8502 // we need a chain of evaluations. 8503 NextEval string 8504 8505 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 8506 // This is used to support rolling upgrades and failed-follow-up evals, where 8507 // we need a chain of evaluations. 8508 PreviousEval string 8509 8510 // BlockedEval is the evaluation ID for a created blocked eval. A 8511 // blocked eval will be created if all allocations could not be placed due 8512 // to constraints or lacking resources. 8513 BlockedEval string 8514 8515 // FailedTGAllocs are task groups which have allocations that could not be 8516 // made, but the metrics are persisted so that the user can use the feedback 8517 // to determine the cause. 8518 FailedTGAllocs map[string]*AllocMetric 8519 8520 // ClassEligibility tracks computed node classes that have been explicitly 8521 // marked as eligible or ineligible. 8522 ClassEligibility map[string]bool 8523 8524 // QuotaLimitReached marks whether a quota limit was reached for the 8525 // evaluation. 8526 QuotaLimitReached string 8527 8528 // EscapedComputedClass marks whether the job has constraints that are not 8529 // captured by computed node classes. 8530 EscapedComputedClass bool 8531 8532 // AnnotatePlan triggers the scheduler to provide additional annotations 8533 // during the evaluation. This should not be set during normal operations. 8534 AnnotatePlan bool 8535 8536 // QueuedAllocations is the number of unplaced allocations at the time the 8537 // evaluation was processed. The map is keyed by Task Group names. 8538 QueuedAllocations map[string]int 8539 8540 // LeaderACL provides the ACL token to when issuing RPCs back to the 8541 // leader. This will be a valid management token as long as the leader is 8542 // active. This should not ever be exposed via the API. 8543 LeaderACL string 8544 8545 // SnapshotIndex is the Raft index of the snapshot used to process the 8546 // evaluation. The index will either be set when it has gone through the 8547 // scheduler or if a blocked evaluation is being created. The index is set 8548 // in this case so we can determine if an early unblocking is required since 8549 // capacity has changed since the evaluation was created. This can result in 8550 // the SnapshotIndex being less than the CreateIndex. 8551 SnapshotIndex uint64 8552 8553 // Raft Indexes 8554 CreateIndex uint64 8555 ModifyIndex uint64 8556 8557 CreateTime int64 8558 ModifyTime int64 8559 } 8560 8561 // TerminalStatus returns if the current status is terminal and 8562 // will no longer transition. 8563 func (e *Evaluation) TerminalStatus() bool { 8564 switch e.Status { 8565 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 8566 return true 8567 default: 8568 return false 8569 } 8570 } 8571 8572 func (e *Evaluation) GoString() string { 8573 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 8574 } 8575 8576 func (e *Evaluation) Copy() *Evaluation { 8577 if e == nil { 8578 return nil 8579 } 8580 ne := new(Evaluation) 8581 *ne = *e 8582 8583 // Copy ClassEligibility 8584 if e.ClassEligibility != nil { 8585 classes := make(map[string]bool, len(e.ClassEligibility)) 8586 for class, elig := range e.ClassEligibility { 8587 classes[class] = elig 8588 } 8589 ne.ClassEligibility = classes 8590 } 8591 8592 // Copy FailedTGAllocs 8593 if e.FailedTGAllocs != nil { 8594 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 8595 for tg, metric := range e.FailedTGAllocs { 8596 failedTGs[tg] = metric.Copy() 8597 } 8598 ne.FailedTGAllocs = failedTGs 8599 } 8600 8601 // Copy queued allocations 8602 if e.QueuedAllocations != nil { 8603 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 8604 for tg, num := range e.QueuedAllocations { 8605 queuedAllocations[tg] = num 8606 } 8607 ne.QueuedAllocations = queuedAllocations 8608 } 8609 8610 return ne 8611 } 8612 8613 // ShouldEnqueue checks if a given evaluation should be enqueued into the 8614 // eval_broker 8615 func (e *Evaluation) ShouldEnqueue() bool { 8616 switch e.Status { 8617 case EvalStatusPending: 8618 return true 8619 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 8620 return false 8621 default: 8622 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 8623 } 8624 } 8625 8626 // ShouldBlock checks if a given evaluation should be entered into the blocked 8627 // eval tracker. 8628 func (e *Evaluation) ShouldBlock() bool { 8629 switch e.Status { 8630 case EvalStatusBlocked: 8631 return true 8632 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 8633 return false 8634 default: 8635 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 8636 } 8637 } 8638 8639 // MakePlan is used to make a plan from the given evaluation 8640 // for a given Job 8641 func (e *Evaluation) MakePlan(j *Job) *Plan { 8642 p := &Plan{ 8643 EvalID: e.ID, 8644 Priority: e.Priority, 8645 Job: j, 8646 NodeUpdate: make(map[string][]*Allocation), 8647 NodeAllocation: make(map[string][]*Allocation), 8648 NodePreemptions: make(map[string][]*Allocation), 8649 } 8650 if j != nil { 8651 p.AllAtOnce = j.AllAtOnce 8652 } 8653 return p 8654 } 8655 8656 // NextRollingEval creates an evaluation to followup this eval for rolling updates 8657 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 8658 now := time.Now().UTC().UnixNano() 8659 return &Evaluation{ 8660 ID: uuid.Generate(), 8661 Namespace: e.Namespace, 8662 Priority: e.Priority, 8663 Type: e.Type, 8664 TriggeredBy: EvalTriggerRollingUpdate, 8665 JobID: e.JobID, 8666 JobModifyIndex: e.JobModifyIndex, 8667 Status: EvalStatusPending, 8668 Wait: wait, 8669 PreviousEval: e.ID, 8670 CreateTime: now, 8671 ModifyTime: now, 8672 } 8673 } 8674 8675 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 8676 // failed allocations. It takes the classes marked explicitly eligible or 8677 // ineligible, whether the job has escaped computed node classes and whether the 8678 // quota limit was reached. 8679 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 8680 escaped bool, quotaReached string) *Evaluation { 8681 now := time.Now().UTC().UnixNano() 8682 return &Evaluation{ 8683 ID: uuid.Generate(), 8684 Namespace: e.Namespace, 8685 Priority: e.Priority, 8686 Type: e.Type, 8687 TriggeredBy: EvalTriggerQueuedAllocs, 8688 JobID: e.JobID, 8689 JobModifyIndex: e.JobModifyIndex, 8690 Status: EvalStatusBlocked, 8691 PreviousEval: e.ID, 8692 ClassEligibility: classEligibility, 8693 EscapedComputedClass: escaped, 8694 QuotaLimitReached: quotaReached, 8695 CreateTime: now, 8696 ModifyTime: now, 8697 } 8698 } 8699 8700 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 8701 // has been marked as failed because it has hit the delivery limit and will not 8702 // be retried by the eval_broker. Callers should copy the created eval's ID to 8703 // into the old eval's NextEval field. 8704 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 8705 now := time.Now().UTC().UnixNano() 8706 return &Evaluation{ 8707 ID: uuid.Generate(), 8708 Namespace: e.Namespace, 8709 Priority: e.Priority, 8710 Type: e.Type, 8711 TriggeredBy: EvalTriggerFailedFollowUp, 8712 JobID: e.JobID, 8713 JobModifyIndex: e.JobModifyIndex, 8714 Status: EvalStatusPending, 8715 Wait: wait, 8716 PreviousEval: e.ID, 8717 CreateTime: now, 8718 ModifyTime: now, 8719 } 8720 } 8721 8722 // UpdateModifyTime takes into account that clocks on different servers may be 8723 // slightly out of sync. Even in case of a leader change, this method will 8724 // guarantee that ModifyTime will always be after CreateTime. 8725 func (e *Evaluation) UpdateModifyTime() { 8726 now := time.Now().UTC().UnixNano() 8727 if now <= e.CreateTime { 8728 e.ModifyTime = e.CreateTime + 1 8729 } else { 8730 e.ModifyTime = now 8731 } 8732 } 8733 8734 // Plan is used to submit a commit plan for task allocations. These 8735 // are submitted to the leader which verifies that resources have 8736 // not been overcommitted before admitting the plan. 8737 type Plan struct { 8738 // msgpack omit empty fields during serialization 8739 _struct bool `codec:",omitempty"` // nolint: structcheck 8740 8741 // EvalID is the evaluation ID this plan is associated with 8742 EvalID string 8743 8744 // EvalToken is used to prevent a split-brain processing of 8745 // an evaluation. There should only be a single scheduler running 8746 // an Eval at a time, but this could be violated after a leadership 8747 // transition. This unique token is used to reject plans that are 8748 // being submitted from a different leader. 8749 EvalToken string 8750 8751 // Priority is the priority of the upstream job 8752 Priority int 8753 8754 // AllAtOnce is used to control if incremental scheduling of task groups 8755 // is allowed or if we must do a gang scheduling of the entire job. 8756 // If this is false, a plan may be partially applied. Otherwise, the 8757 // entire plan must be able to make progress. 8758 AllAtOnce bool 8759 8760 // Job is the parent job of all the allocations in the Plan. 8761 // Since a Plan only involves a single Job, we can reduce the size 8762 // of the plan by only including it once. 8763 Job *Job 8764 8765 // NodeUpdate contains all the allocations for each node. For each node, 8766 // this is a list of the allocations to update to either stop or evict. 8767 NodeUpdate map[string][]*Allocation 8768 8769 // NodeAllocation contains all the allocations for each node. 8770 // The evicts must be considered prior to the allocations. 8771 NodeAllocation map[string][]*Allocation 8772 8773 // Annotations contains annotations by the scheduler to be used by operators 8774 // to understand the decisions made by the scheduler. 8775 Annotations *PlanAnnotations 8776 8777 // Deployment is the deployment created or updated by the scheduler that 8778 // should be applied by the planner. 8779 Deployment *Deployment 8780 8781 // DeploymentUpdates is a set of status updates to apply to the given 8782 // deployments. This allows the scheduler to cancel any unneeded deployment 8783 // because the job is stopped or the update block is removed. 8784 DeploymentUpdates []*DeploymentStatusUpdate 8785 8786 // NodePreemptions is a map from node id to a set of allocations from other 8787 // lower priority jobs that are preempted. Preempted allocations are marked 8788 // as evicted. 8789 NodePreemptions map[string][]*Allocation 8790 8791 // SnapshotIndex is the Raft index of the snapshot used to create the 8792 // Plan. The leader will wait to evaluate the plan until its StateStore 8793 // has reached at least this index. 8794 SnapshotIndex uint64 8795 } 8796 8797 // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 8798 // allocation may be optionally set by passing in a non-empty value. 8799 func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus string) { 8800 newAlloc := new(Allocation) 8801 *newAlloc = *alloc 8802 8803 // If the job is not set in the plan we are deregistering a job so we 8804 // extract the job from the allocation. 8805 if p.Job == nil && newAlloc.Job != nil { 8806 p.Job = newAlloc.Job 8807 } 8808 8809 // Normalize the job 8810 newAlloc.Job = nil 8811 8812 // Strip the resources as it can be rebuilt. 8813 newAlloc.Resources = nil 8814 8815 newAlloc.DesiredStatus = AllocDesiredStatusStop 8816 newAlloc.DesiredDescription = desiredDesc 8817 8818 if clientStatus != "" { 8819 newAlloc.ClientStatus = clientStatus 8820 } 8821 8822 node := alloc.NodeID 8823 existing := p.NodeUpdate[node] 8824 p.NodeUpdate[node] = append(existing, newAlloc) 8825 } 8826 8827 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 8828 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 8829 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 8830 newAlloc := &Allocation{} 8831 newAlloc.ID = alloc.ID 8832 newAlloc.JobID = alloc.JobID 8833 newAlloc.Namespace = alloc.Namespace 8834 newAlloc.DesiredStatus = AllocDesiredStatusEvict 8835 newAlloc.PreemptedByAllocation = preemptingAllocID 8836 8837 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 8838 newAlloc.DesiredDescription = desiredDesc 8839 8840 // TaskResources are needed by the plan applier to check if allocations fit 8841 // after removing preempted allocations 8842 if alloc.AllocatedResources != nil { 8843 newAlloc.AllocatedResources = alloc.AllocatedResources 8844 } else { 8845 // COMPAT Remove in version 0.11 8846 newAlloc.TaskResources = alloc.TaskResources 8847 newAlloc.SharedResources = alloc.SharedResources 8848 } 8849 8850 // Append this alloc to slice for this node 8851 node := alloc.NodeID 8852 existing := p.NodePreemptions[node] 8853 p.NodePreemptions[node] = append(existing, newAlloc) 8854 } 8855 8856 func (p *Plan) PopUpdate(alloc *Allocation) { 8857 existing := p.NodeUpdate[alloc.NodeID] 8858 n := len(existing) 8859 if n > 0 && existing[n-1].ID == alloc.ID { 8860 existing = existing[:n-1] 8861 if len(existing) > 0 { 8862 p.NodeUpdate[alloc.NodeID] = existing 8863 } else { 8864 delete(p.NodeUpdate, alloc.NodeID) 8865 } 8866 } 8867 } 8868 8869 func (p *Plan) AppendAlloc(alloc *Allocation) { 8870 node := alloc.NodeID 8871 existing := p.NodeAllocation[node] 8872 8873 // Normalize the job 8874 alloc.Job = nil 8875 8876 p.NodeAllocation[node] = append(existing, alloc) 8877 } 8878 8879 // IsNoOp checks if this plan would do nothing 8880 func (p *Plan) IsNoOp() bool { 8881 return len(p.NodeUpdate) == 0 && 8882 len(p.NodeAllocation) == 0 && 8883 p.Deployment == nil && 8884 len(p.DeploymentUpdates) == 0 8885 } 8886 8887 // NormalizeAllocations normalizes allocations to remove fields that can 8888 // be fetched from the MemDB instead of sending over the wire 8889 func (p *Plan) NormalizeAllocations() { 8890 for _, allocs := range p.NodeUpdate { 8891 for i, alloc := range allocs { 8892 allocs[i] = &Allocation{ 8893 ID: alloc.ID, 8894 DesiredDescription: alloc.DesiredDescription, 8895 ClientStatus: alloc.ClientStatus, 8896 } 8897 } 8898 } 8899 8900 for _, allocs := range p.NodePreemptions { 8901 for i, alloc := range allocs { 8902 allocs[i] = &Allocation{ 8903 ID: alloc.ID, 8904 PreemptedByAllocation: alloc.PreemptedByAllocation, 8905 } 8906 } 8907 } 8908 } 8909 8910 // PlanResult is the result of a plan submitted to the leader. 8911 type PlanResult struct { 8912 // NodeUpdate contains all the updates that were committed. 8913 NodeUpdate map[string][]*Allocation 8914 8915 // NodeAllocation contains all the allocations that were committed. 8916 NodeAllocation map[string][]*Allocation 8917 8918 // Deployment is the deployment that was committed. 8919 Deployment *Deployment 8920 8921 // DeploymentUpdates is the set of deployment updates that were committed. 8922 DeploymentUpdates []*DeploymentStatusUpdate 8923 8924 // NodePreemptions is a map from node id to a set of allocations from other 8925 // lower priority jobs that are preempted. Preempted allocations are marked 8926 // as stopped. 8927 NodePreemptions map[string][]*Allocation 8928 8929 // RefreshIndex is the index the worker should refresh state up to. 8930 // This allows all evictions and allocations to be materialized. 8931 // If any allocations were rejected due to stale data (node state, 8932 // over committed) this can be used to force a worker refresh. 8933 RefreshIndex uint64 8934 8935 // AllocIndex is the Raft index in which the evictions and 8936 // allocations took place. This is used for the write index. 8937 AllocIndex uint64 8938 } 8939 8940 // IsNoOp checks if this plan result would do nothing 8941 func (p *PlanResult) IsNoOp() bool { 8942 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 8943 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 8944 } 8945 8946 // FullCommit is used to check if all the allocations in a plan 8947 // were committed as part of the result. Returns if there was 8948 // a match, and the number of expected and actual allocations. 8949 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 8950 expected := 0 8951 actual := 0 8952 for name, allocList := range plan.NodeAllocation { 8953 didAlloc, _ := p.NodeAllocation[name] 8954 expected += len(allocList) 8955 actual += len(didAlloc) 8956 } 8957 return actual == expected, expected, actual 8958 } 8959 8960 // PlanAnnotations holds annotations made by the scheduler to give further debug 8961 // information to operators. 8962 type PlanAnnotations struct { 8963 // DesiredTGUpdates is the set of desired updates per task group. 8964 DesiredTGUpdates map[string]*DesiredUpdates 8965 8966 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 8967 PreemptedAllocs []*AllocListStub 8968 } 8969 8970 // DesiredUpdates is the set of changes the scheduler would like to make given 8971 // sufficient resources and cluster capacity. 8972 type DesiredUpdates struct { 8973 Ignore uint64 8974 Place uint64 8975 Migrate uint64 8976 Stop uint64 8977 InPlaceUpdate uint64 8978 DestructiveUpdate uint64 8979 Canary uint64 8980 Preemptions uint64 8981 } 8982 8983 func (d *DesiredUpdates) GoString() string { 8984 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 8985 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 8986 } 8987 8988 // msgpackHandle is a shared handle for encoding/decoding of structs 8989 var MsgpackHandle = func() *codec.MsgpackHandle { 8990 h := &codec.MsgpackHandle{} 8991 h.RawToString = true 8992 8993 // maintain binary format from time prior to upgrading latest ugorji 8994 h.BasicHandle.TimeNotBuiltin = true 8995 8996 // Sets the default type for decoding a map into a nil interface{}. 8997 // This is necessary in particular because we store the driver configs as a 8998 // nil interface{}. 8999 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 9000 9001 return h 9002 }() 9003 9004 var ( 9005 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 9006 // structs. The pretty handle will add indents for easier human consumption. 9007 JsonHandle = &codec.JsonHandle{ 9008 HTMLCharsAsIs: true, 9009 } 9010 JsonHandlePretty = &codec.JsonHandle{ 9011 HTMLCharsAsIs: true, 9012 Indent: 4, 9013 } 9014 ) 9015 9016 // TODO Figure out if we can remove this. This is our fork that is just way 9017 // behind. I feel like its original purpose was to pin at a stable version but 9018 // now we can accomplish this with vendoring. 9019 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 9020 h := &hcodec.MsgpackHandle{} 9021 h.RawToString = true 9022 9023 // maintain binary format from time prior to upgrading latest ugorji 9024 h.BasicHandle.TimeNotBuiltin = true 9025 9026 // Sets the default type for decoding a map into a nil interface{}. 9027 // This is necessary in particular because we store the driver configs as a 9028 // nil interface{}. 9029 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 9030 return h 9031 }() 9032 9033 // Decode is used to decode a MsgPack encoded object 9034 func Decode(buf []byte, out interface{}) error { 9035 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 9036 } 9037 9038 // Encode is used to encode a MsgPack object with type prefix 9039 func Encode(t MessageType, msg interface{}) ([]byte, error) { 9040 var buf bytes.Buffer 9041 buf.WriteByte(uint8(t)) 9042 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 9043 return buf.Bytes(), err 9044 } 9045 9046 // KeyringResponse is a unified key response and can be used for install, 9047 // remove, use, as well as listing key queries. 9048 type KeyringResponse struct { 9049 Messages map[string]string 9050 Keys map[string]int 9051 NumNodes int 9052 } 9053 9054 // KeyringRequest is request objects for serf key operations. 9055 type KeyringRequest struct { 9056 Key string 9057 } 9058 9059 // RecoverableError wraps an error and marks whether it is recoverable and could 9060 // be retried or it is fatal. 9061 type RecoverableError struct { 9062 Err string 9063 Recoverable bool 9064 } 9065 9066 // NewRecoverableError is used to wrap an error and mark it as recoverable or 9067 // not. 9068 func NewRecoverableError(e error, recoverable bool) error { 9069 if e == nil { 9070 return nil 9071 } 9072 9073 return &RecoverableError{ 9074 Err: e.Error(), 9075 Recoverable: recoverable, 9076 } 9077 } 9078 9079 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 9080 // message. If the error was recoverable before the returned error is as well; 9081 // otherwise it is unrecoverable. 9082 func WrapRecoverable(msg string, err error) error { 9083 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 9084 } 9085 9086 func (r *RecoverableError) Error() string { 9087 return r.Err 9088 } 9089 9090 func (r *RecoverableError) IsRecoverable() bool { 9091 return r.Recoverable 9092 } 9093 9094 func (r *RecoverableError) IsUnrecoverable() bool { 9095 return !r.Recoverable 9096 } 9097 9098 // Recoverable is an interface for errors to implement to indicate whether or 9099 // not they are fatal or recoverable. 9100 type Recoverable interface { 9101 error 9102 IsRecoverable() bool 9103 } 9104 9105 // IsRecoverable returns true if error is a RecoverableError with 9106 // Recoverable=true. Otherwise false is returned. 9107 func IsRecoverable(e error) bool { 9108 if re, ok := e.(Recoverable); ok { 9109 return re.IsRecoverable() 9110 } 9111 return false 9112 } 9113 9114 // WrappedServerError wraps an error and satisfies 9115 // both the Recoverable and the ServerSideError interfaces 9116 type WrappedServerError struct { 9117 Err error 9118 } 9119 9120 // NewWrappedServerError is used to create a wrapped server side error 9121 func NewWrappedServerError(e error) error { 9122 return &WrappedServerError{ 9123 Err: e, 9124 } 9125 } 9126 9127 func (r *WrappedServerError) IsRecoverable() bool { 9128 return IsRecoverable(r.Err) 9129 } 9130 9131 func (r *WrappedServerError) Error() string { 9132 return r.Err.Error() 9133 } 9134 9135 func (r *WrappedServerError) IsServerSide() bool { 9136 return true 9137 } 9138 9139 // ServerSideError is an interface for errors to implement to indicate 9140 // errors occurring after the request makes it to a server 9141 type ServerSideError interface { 9142 error 9143 IsServerSide() bool 9144 } 9145 9146 // IsServerSide returns true if error is a wrapped 9147 // server side error 9148 func IsServerSide(e error) bool { 9149 if se, ok := e.(ServerSideError); ok { 9150 return se.IsServerSide() 9151 } 9152 return false 9153 } 9154 9155 // ACLPolicy is used to represent an ACL policy 9156 type ACLPolicy struct { 9157 Name string // Unique name 9158 Description string // Human readable 9159 Rules string // HCL or JSON format 9160 RulesJSON *acl.Policy // Generated from Rules on read 9161 Hash []byte 9162 CreateIndex uint64 9163 ModifyIndex uint64 9164 } 9165 9166 // SetHash is used to compute and set the hash of the ACL policy 9167 func (c *ACLPolicy) SetHash() []byte { 9168 // Initialize a 256bit Blake2 hash (32 bytes) 9169 hash, err := blake2b.New256(nil) 9170 if err != nil { 9171 panic(err) 9172 } 9173 9174 // Write all the user set fields 9175 hash.Write([]byte(c.Name)) 9176 hash.Write([]byte(c.Description)) 9177 hash.Write([]byte(c.Rules)) 9178 9179 // Finalize the hash 9180 hashVal := hash.Sum(nil) 9181 9182 // Set and return the hash 9183 c.Hash = hashVal 9184 return hashVal 9185 } 9186 9187 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 9188 return &ACLPolicyListStub{ 9189 Name: a.Name, 9190 Description: a.Description, 9191 Hash: a.Hash, 9192 CreateIndex: a.CreateIndex, 9193 ModifyIndex: a.ModifyIndex, 9194 } 9195 } 9196 9197 func (a *ACLPolicy) Validate() error { 9198 var mErr multierror.Error 9199 if !validPolicyName.MatchString(a.Name) { 9200 err := fmt.Errorf("invalid name '%s'", a.Name) 9201 mErr.Errors = append(mErr.Errors, err) 9202 } 9203 if _, err := acl.Parse(a.Rules); err != nil { 9204 err = fmt.Errorf("failed to parse rules: %v", err) 9205 mErr.Errors = append(mErr.Errors, err) 9206 } 9207 if len(a.Description) > maxPolicyDescriptionLength { 9208 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 9209 mErr.Errors = append(mErr.Errors, err) 9210 } 9211 return mErr.ErrorOrNil() 9212 } 9213 9214 // ACLPolicyListStub is used to for listing ACL policies 9215 type ACLPolicyListStub struct { 9216 Name string 9217 Description string 9218 Hash []byte 9219 CreateIndex uint64 9220 ModifyIndex uint64 9221 } 9222 9223 // ACLPolicyListRequest is used to request a list of policies 9224 type ACLPolicyListRequest struct { 9225 QueryOptions 9226 } 9227 9228 // ACLPolicySpecificRequest is used to query a specific policy 9229 type ACLPolicySpecificRequest struct { 9230 Name string 9231 QueryOptions 9232 } 9233 9234 // ACLPolicySetRequest is used to query a set of policies 9235 type ACLPolicySetRequest struct { 9236 Names []string 9237 QueryOptions 9238 } 9239 9240 // ACLPolicyListResponse is used for a list request 9241 type ACLPolicyListResponse struct { 9242 Policies []*ACLPolicyListStub 9243 QueryMeta 9244 } 9245 9246 // SingleACLPolicyResponse is used to return a single policy 9247 type SingleACLPolicyResponse struct { 9248 Policy *ACLPolicy 9249 QueryMeta 9250 } 9251 9252 // ACLPolicySetResponse is used to return a set of policies 9253 type ACLPolicySetResponse struct { 9254 Policies map[string]*ACLPolicy 9255 QueryMeta 9256 } 9257 9258 // ACLPolicyDeleteRequest is used to delete a set of policies 9259 type ACLPolicyDeleteRequest struct { 9260 Names []string 9261 WriteRequest 9262 } 9263 9264 // ACLPolicyUpsertRequest is used to upsert a set of policies 9265 type ACLPolicyUpsertRequest struct { 9266 Policies []*ACLPolicy 9267 WriteRequest 9268 } 9269 9270 // ACLToken represents a client token which is used to Authenticate 9271 type ACLToken struct { 9272 AccessorID string // Public Accessor ID (UUID) 9273 SecretID string // Secret ID, private (UUID) 9274 Name string // Human friendly name 9275 Type string // Client or Management 9276 Policies []string // Policies this token ties to 9277 Global bool // Global or Region local 9278 Hash []byte 9279 CreateTime time.Time // Time of creation 9280 CreateIndex uint64 9281 ModifyIndex uint64 9282 } 9283 9284 var ( 9285 // AnonymousACLToken is used no SecretID is provided, and the 9286 // request is made anonymously. 9287 AnonymousACLToken = &ACLToken{ 9288 AccessorID: "anonymous", 9289 Name: "Anonymous Token", 9290 Type: ACLClientToken, 9291 Policies: []string{"anonymous"}, 9292 Global: false, 9293 } 9294 ) 9295 9296 type ACLTokenListStub struct { 9297 AccessorID string 9298 Name string 9299 Type string 9300 Policies []string 9301 Global bool 9302 Hash []byte 9303 CreateTime time.Time 9304 CreateIndex uint64 9305 ModifyIndex uint64 9306 } 9307 9308 // SetHash is used to compute and set the hash of the ACL token 9309 func (a *ACLToken) SetHash() []byte { 9310 // Initialize a 256bit Blake2 hash (32 bytes) 9311 hash, err := blake2b.New256(nil) 9312 if err != nil { 9313 panic(err) 9314 } 9315 9316 // Write all the user set fields 9317 hash.Write([]byte(a.Name)) 9318 hash.Write([]byte(a.Type)) 9319 for _, policyName := range a.Policies { 9320 hash.Write([]byte(policyName)) 9321 } 9322 if a.Global { 9323 hash.Write([]byte("global")) 9324 } else { 9325 hash.Write([]byte("local")) 9326 } 9327 9328 // Finalize the hash 9329 hashVal := hash.Sum(nil) 9330 9331 // Set and return the hash 9332 a.Hash = hashVal 9333 return hashVal 9334 } 9335 9336 func (a *ACLToken) Stub() *ACLTokenListStub { 9337 return &ACLTokenListStub{ 9338 AccessorID: a.AccessorID, 9339 Name: a.Name, 9340 Type: a.Type, 9341 Policies: a.Policies, 9342 Global: a.Global, 9343 Hash: a.Hash, 9344 CreateTime: a.CreateTime, 9345 CreateIndex: a.CreateIndex, 9346 ModifyIndex: a.ModifyIndex, 9347 } 9348 } 9349 9350 // Validate is used to sanity check a token 9351 func (a *ACLToken) Validate() error { 9352 var mErr multierror.Error 9353 if len(a.Name) > maxTokenNameLength { 9354 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 9355 } 9356 switch a.Type { 9357 case ACLClientToken: 9358 if len(a.Policies) == 0 { 9359 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 9360 } 9361 case ACLManagementToken: 9362 if len(a.Policies) != 0 { 9363 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 9364 } 9365 default: 9366 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 9367 } 9368 return mErr.ErrorOrNil() 9369 } 9370 9371 // PolicySubset checks if a given set of policies is a subset of the token 9372 func (a *ACLToken) PolicySubset(policies []string) bool { 9373 // Hot-path the management tokens, superset of all policies. 9374 if a.Type == ACLManagementToken { 9375 return true 9376 } 9377 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 9378 for _, policy := range a.Policies { 9379 associatedPolicies[policy] = struct{}{} 9380 } 9381 for _, policy := range policies { 9382 if _, ok := associatedPolicies[policy]; !ok { 9383 return false 9384 } 9385 } 9386 return true 9387 } 9388 9389 // ACLTokenListRequest is used to request a list of tokens 9390 type ACLTokenListRequest struct { 9391 GlobalOnly bool 9392 QueryOptions 9393 } 9394 9395 // ACLTokenSpecificRequest is used to query a specific token 9396 type ACLTokenSpecificRequest struct { 9397 AccessorID string 9398 QueryOptions 9399 } 9400 9401 // ACLTokenSetRequest is used to query a set of tokens 9402 type ACLTokenSetRequest struct { 9403 AccessorIDS []string 9404 QueryOptions 9405 } 9406 9407 // ACLTokenListResponse is used for a list request 9408 type ACLTokenListResponse struct { 9409 Tokens []*ACLTokenListStub 9410 QueryMeta 9411 } 9412 9413 // SingleACLTokenResponse is used to return a single token 9414 type SingleACLTokenResponse struct { 9415 Token *ACLToken 9416 QueryMeta 9417 } 9418 9419 // ACLTokenSetResponse is used to return a set of token 9420 type ACLTokenSetResponse struct { 9421 Tokens map[string]*ACLToken // Keyed by Accessor ID 9422 QueryMeta 9423 } 9424 9425 // ResolveACLTokenRequest is used to resolve a specific token 9426 type ResolveACLTokenRequest struct { 9427 SecretID string 9428 QueryOptions 9429 } 9430 9431 // ResolveACLTokenResponse is used to resolve a single token 9432 type ResolveACLTokenResponse struct { 9433 Token *ACLToken 9434 QueryMeta 9435 } 9436 9437 // ACLTokenDeleteRequest is used to delete a set of tokens 9438 type ACLTokenDeleteRequest struct { 9439 AccessorIDs []string 9440 WriteRequest 9441 } 9442 9443 // ACLTokenBootstrapRequest is used to bootstrap ACLs 9444 type ACLTokenBootstrapRequest struct { 9445 Token *ACLToken // Not client specifiable 9446 ResetIndex uint64 // Reset index is used to clear the bootstrap token 9447 WriteRequest 9448 } 9449 9450 // ACLTokenUpsertRequest is used to upsert a set of tokens 9451 type ACLTokenUpsertRequest struct { 9452 Tokens []*ACLToken 9453 WriteRequest 9454 } 9455 9456 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 9457 type ACLTokenUpsertResponse struct { 9458 Tokens []*ACLToken 9459 WriteMeta 9460 }