github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "math" 16 "net" 17 "os" 18 "path/filepath" 19 "reflect" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 "time" 25 26 "github.com/hashicorp/cronexpr" 27 "github.com/hashicorp/go-msgpack/codec" 28 "github.com/hashicorp/go-multierror" 29 "github.com/hashicorp/go-version" 30 "github.com/mitchellh/copystructure" 31 "golang.org/x/crypto/blake2b" 32 33 "github.com/hashicorp/nomad/acl" 34 "github.com/hashicorp/nomad/command/agent/pprof" 35 "github.com/hashicorp/nomad/helper" 36 "github.com/hashicorp/nomad/helper/args" 37 "github.com/hashicorp/nomad/helper/constraints/semver" 38 "github.com/hashicorp/nomad/helper/uuid" 39 "github.com/hashicorp/nomad/lib/kheap" 40 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 41 ) 42 43 var ( 44 // validPolicyName is used to validate a policy name 45 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 46 47 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 48 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 49 ) 50 51 type MessageType uint8 52 53 // note: new raft message types need to be added to the end of this 54 // list of contents 55 const ( 56 NodeRegisterRequestType MessageType = iota 57 NodeDeregisterRequestType 58 NodeUpdateStatusRequestType 59 NodeUpdateDrainRequestType 60 JobRegisterRequestType 61 JobDeregisterRequestType 62 EvalUpdateRequestType 63 EvalDeleteRequestType 64 AllocUpdateRequestType 65 AllocClientUpdateRequestType 66 ReconcileJobSummariesRequestType 67 VaultAccessorRegisterRequestType 68 VaultAccessorDeregisterRequestType 69 ApplyPlanResultsRequestType 70 DeploymentStatusUpdateRequestType 71 DeploymentPromoteRequestType 72 DeploymentAllocHealthRequestType 73 DeploymentDeleteRequestType 74 JobStabilityRequestType 75 ACLPolicyUpsertRequestType 76 ACLPolicyDeleteRequestType 77 ACLTokenUpsertRequestType 78 ACLTokenDeleteRequestType 79 ACLTokenBootstrapRequestType 80 AutopilotRequestType 81 UpsertNodeEventsType 82 JobBatchDeregisterRequestType 83 AllocUpdateDesiredTransitionRequestType 84 NodeUpdateEligibilityRequestType 85 BatchNodeUpdateDrainRequestType 86 SchedulerConfigRequestType 87 NodeBatchDeregisterRequestType 88 ClusterMetadataRequestType 89 ServiceIdentityAccessorRegisterRequestType 90 ServiceIdentityAccessorDeregisterRequestType 91 CSIVolumeRegisterRequestType 92 CSIVolumeDeregisterRequestType 93 CSIVolumeClaimRequestType 94 ScalingEventRegisterRequestType 95 CSIVolumeClaimBatchRequestType 96 CSIPluginDeleteRequestType 97 ) 98 99 const ( 100 // IgnoreUnknownTypeFlag is set along with a MessageType 101 // to indicate that the message type can be safely ignored 102 // if it is not recognized. This is for future proofing, so 103 // that new commands can be added in a way that won't cause 104 // old servers to crash when the FSM attempts to process them. 105 IgnoreUnknownTypeFlag MessageType = 128 106 107 // ApiMajorVersion is returned as part of the Status.Version request. 108 // It should be incremented anytime the APIs are changed in a way 109 // that would break clients for sane client versioning. 110 ApiMajorVersion = 1 111 112 // ApiMinorVersion is returned as part of the Status.Version request. 113 // It should be incremented anytime the APIs are changed to allow 114 // for sane client versioning. Minor changes should be compatible 115 // within the major version. 116 ApiMinorVersion = 1 117 118 ProtocolVersion = "protocol" 119 APIMajorVersion = "api.major" 120 APIMinorVersion = "api.minor" 121 122 GetterModeAny = "any" 123 GetterModeFile = "file" 124 GetterModeDir = "dir" 125 126 // maxPolicyDescriptionLength limits a policy description length 127 maxPolicyDescriptionLength = 256 128 129 // maxTokenNameLength limits a ACL token name length 130 maxTokenNameLength = 256 131 132 // ACLClientToken and ACLManagementToken are the only types of tokens 133 ACLClientToken = "client" 134 ACLManagementToken = "management" 135 136 // DefaultNamespace is the default namespace. 137 DefaultNamespace = "default" 138 DefaultNamespaceDescription = "Default shared namespace" 139 140 // JitterFraction is a the limit to the amount of jitter we apply 141 // to a user specified MaxQueryTime. We divide the specified time by 142 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 143 // applied to RPCHoldTimeout. 144 JitterFraction = 16 145 146 // MaxRetainedNodeEvents is the maximum number of node events that will be 147 // retained for a single node 148 MaxRetainedNodeEvents = 10 149 150 // MaxRetainedNodeScores is the number of top scoring nodes for which we 151 // retain scoring metadata 152 MaxRetainedNodeScores = 5 153 154 // Normalized scorer name 155 NormScorerName = "normalized-score" 156 ) 157 158 // Context defines the scope in which a search for Nomad object operates, and 159 // is also used to query the matching index value for this context 160 type Context string 161 162 const ( 163 Allocs Context = "allocs" 164 Deployments Context = "deployment" 165 Evals Context = "evals" 166 Jobs Context = "jobs" 167 Nodes Context = "nodes" 168 Namespaces Context = "namespaces" 169 Quotas Context = "quotas" 170 All Context = "all" 171 Plugins Context = "plugins" 172 Volumes Context = "volumes" 173 ) 174 175 // NamespacedID is a tuple of an ID and a namespace 176 type NamespacedID struct { 177 ID string 178 Namespace string 179 } 180 181 // NewNamespacedID returns a new namespaced ID given the ID and namespace 182 func NewNamespacedID(id, ns string) NamespacedID { 183 return NamespacedID{ 184 ID: id, 185 Namespace: ns, 186 } 187 } 188 189 func (n NamespacedID) String() string { 190 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 191 } 192 193 // RPCInfo is used to describe common information about query 194 type RPCInfo interface { 195 RequestRegion() string 196 IsRead() bool 197 AllowStaleRead() bool 198 IsForwarded() bool 199 SetForwarded() 200 } 201 202 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 203 // should NOT be replicated in the API package as it is internal only. 204 type InternalRpcInfo struct { 205 // Forwarded marks whether the RPC has been forwarded. 206 Forwarded bool 207 } 208 209 // IsForwarded returns whether the RPC is forwarded from another server. 210 func (i *InternalRpcInfo) IsForwarded() bool { 211 return i.Forwarded 212 } 213 214 // SetForwarded marks that the RPC is being forwarded from another server. 215 func (i *InternalRpcInfo) SetForwarded() { 216 i.Forwarded = true 217 } 218 219 // QueryOptions is used to specify various flags for read queries 220 type QueryOptions struct { 221 // The target region for this query 222 Region string 223 224 // Namespace is the target namespace for the query. 225 // 226 // Since handlers do not have a default value set they should access 227 // the Namespace via the RequestNamespace method. 228 // 229 // Requests accessing specific namespaced objects must check ACLs 230 // against the namespace of the object, not the namespace in the 231 // request. 232 Namespace string 233 234 // If set, wait until query exceeds given index. Must be provided 235 // with MaxQueryTime. 236 MinQueryIndex uint64 237 238 // Provided with MinQueryIndex to wait for change. 239 MaxQueryTime time.Duration 240 241 // If set, any follower can service the request. Results 242 // may be arbitrarily stale. 243 AllowStale bool 244 245 // If set, used as prefix for resource list searches 246 Prefix string 247 248 // If set, used as token for resource list searches 249 Token string 250 251 // AuthToken is secret portion of the ACL token used for the request 252 AuthToken string 253 254 InternalRpcInfo 255 } 256 257 func (q QueryOptions) RequestRegion() string { 258 return q.Region 259 } 260 261 // RequestNamespace returns the request's namespace or the default namespace if 262 // no explicit namespace was sent. 263 // 264 // Requests accessing specific namespaced objects must check ACLs against the 265 // namespace of the object, not the namespace in the request. 266 func (q QueryOptions) RequestNamespace() string { 267 if q.Namespace == "" { 268 return DefaultNamespace 269 } 270 return q.Namespace 271 } 272 273 // QueryOption only applies to reads, so always true 274 func (q QueryOptions) IsRead() bool { 275 return true 276 } 277 278 func (q QueryOptions) AllowStaleRead() bool { 279 return q.AllowStale 280 } 281 282 // AgentPprofRequest is used to request a pprof report for a given node. 283 type AgentPprofRequest struct { 284 // ReqType specifies the profile to use 285 ReqType pprof.ReqType 286 287 // Profile specifies the runtime/pprof profile to lookup and generate. 288 Profile string 289 290 // Seconds is the number of seconds to capture a profile 291 Seconds int 292 293 // Debug specifies if pprof profile should inclue debug output 294 Debug int 295 296 // GC specifies if the profile should call runtime.GC() before 297 // running its profile. This is only used for "heap" profiles 298 GC int 299 300 // NodeID is the node we want to track the logs of 301 NodeID string 302 303 // ServerID is the server we want to track the logs of 304 ServerID string 305 306 QueryOptions 307 } 308 309 // AgentPprofResponse is used to return a generated pprof profile 310 type AgentPprofResponse struct { 311 // ID of the agent that fulfilled the request 312 AgentID string 313 314 // Payload is the generated pprof profile 315 Payload []byte 316 317 // HTTPHeaders are a set of key value pairs to be applied as 318 // HTTP headers for a specific runtime profile 319 HTTPHeaders map[string]string 320 } 321 322 type WriteRequest struct { 323 // The target region for this write 324 Region string 325 326 // Namespace is the target namespace for the write. 327 // 328 // Since RPC handlers do not have a default value set they should 329 // access the Namespace via the RequestNamespace method. 330 // 331 // Requests accessing specific namespaced objects must check ACLs 332 // against the namespace of the object, not the namespace in the 333 // request. 334 Namespace string 335 336 // AuthToken is secret portion of the ACL token used for the request 337 AuthToken string 338 339 InternalRpcInfo 340 } 341 342 func (w WriteRequest) RequestRegion() string { 343 // The target region for this request 344 return w.Region 345 } 346 347 // RequestNamespace returns the request's namespace or the default namespace if 348 // no explicit namespace was sent. 349 // 350 // Requests accessing specific namespaced objects must check ACLs against the 351 // namespace of the object, not the namespace in the request. 352 func (w WriteRequest) RequestNamespace() string { 353 if w.Namespace == "" { 354 return DefaultNamespace 355 } 356 return w.Namespace 357 } 358 359 // WriteRequest only applies to writes, always false 360 func (w WriteRequest) IsRead() bool { 361 return false 362 } 363 364 func (w WriteRequest) AllowStaleRead() bool { 365 return false 366 } 367 368 // QueryMeta allows a query response to include potentially 369 // useful metadata about a query 370 type QueryMeta struct { 371 // This is the index associated with the read 372 Index uint64 373 374 // If AllowStale is used, this is time elapsed since 375 // last contact between the follower and leader. This 376 // can be used to gauge staleness. 377 LastContact time.Duration 378 379 // Used to indicate if there is a known leader node 380 KnownLeader bool 381 } 382 383 // WriteMeta allows a write response to include potentially 384 // useful metadata about the write 385 type WriteMeta struct { 386 // This is the index associated with the write 387 Index uint64 388 } 389 390 // NodeRegisterRequest is used for Node.Register endpoint 391 // to register a node as being a schedulable entity. 392 type NodeRegisterRequest struct { 393 Node *Node 394 NodeEvent *NodeEvent 395 WriteRequest 396 } 397 398 // NodeDeregisterRequest is used for Node.Deregister endpoint 399 // to deregister a node as being a schedulable entity. 400 type NodeDeregisterRequest struct { 401 NodeID string 402 WriteRequest 403 } 404 405 // NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 406 // to deregister a batch of nodes from being schedulable entities. 407 type NodeBatchDeregisterRequest struct { 408 NodeIDs []string 409 WriteRequest 410 } 411 412 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 413 // information used in RPC server lists. 414 type NodeServerInfo struct { 415 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 416 // be contacted at for RPCs. 417 RPCAdvertiseAddr string 418 419 // RpcMajorVersion is the major version number the Nomad Server 420 // supports 421 RPCMajorVersion int32 422 423 // RpcMinorVersion is the minor version number the Nomad Server 424 // supports 425 RPCMinorVersion int32 426 427 // Datacenter is the datacenter that a Nomad server belongs to 428 Datacenter string 429 } 430 431 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 432 // to update the status of a node. 433 type NodeUpdateStatusRequest struct { 434 NodeID string 435 Status string 436 NodeEvent *NodeEvent 437 UpdatedAt int64 438 WriteRequest 439 } 440 441 // NodeUpdateDrainRequest is used for updating the drain strategy 442 type NodeUpdateDrainRequest struct { 443 NodeID string 444 DrainStrategy *DrainStrategy 445 446 // COMPAT Remove in version 0.10 447 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 448 // drain strategy but we need to handle the upgrade path where the Raft log 449 // contains drain updates with just the drain boolean being manipulated. 450 Drain bool 451 452 // MarkEligible marks the node as eligible if removing the drain strategy. 453 MarkEligible bool 454 455 // NodeEvent is the event added to the node 456 NodeEvent *NodeEvent 457 458 // UpdatedAt represents server time of receiving request 459 UpdatedAt int64 460 461 WriteRequest 462 } 463 464 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 465 // batch of nodes 466 type BatchNodeUpdateDrainRequest struct { 467 // Updates is a mapping of nodes to their updated drain strategy 468 Updates map[string]*DrainUpdate 469 470 // NodeEvents is a mapping of the node to the event to add to the node 471 NodeEvents map[string]*NodeEvent 472 473 // UpdatedAt represents server time of receiving request 474 UpdatedAt int64 475 476 WriteRequest 477 } 478 479 // DrainUpdate is used to update the drain of a node 480 type DrainUpdate struct { 481 // DrainStrategy is the new strategy for the node 482 DrainStrategy *DrainStrategy 483 484 // MarkEligible marks the node as eligible if removing the drain strategy. 485 MarkEligible bool 486 } 487 488 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 489 type NodeUpdateEligibilityRequest struct { 490 NodeID string 491 Eligibility string 492 493 // NodeEvent is the event added to the node 494 NodeEvent *NodeEvent 495 496 // UpdatedAt represents server time of receiving request 497 UpdatedAt int64 498 499 WriteRequest 500 } 501 502 // NodeEvaluateRequest is used to re-evaluate the node 503 type NodeEvaluateRequest struct { 504 NodeID string 505 WriteRequest 506 } 507 508 // NodeSpecificRequest is used when we just need to specify a target node 509 type NodeSpecificRequest struct { 510 NodeID string 511 SecretID string 512 QueryOptions 513 } 514 515 // SearchResponse is used to return matches and information about whether 516 // the match list is truncated specific to each type of context. 517 type SearchResponse struct { 518 // Map of context types to ids which match a specified prefix 519 Matches map[Context][]string 520 521 // Truncations indicates whether the matches for a particular context have 522 // been truncated 523 Truncations map[Context]bool 524 525 QueryMeta 526 } 527 528 // SearchRequest is used to parameterize a request, and returns a 529 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 530 // along with whether or not the information returned is truncated. 531 type SearchRequest struct { 532 // Prefix is what ids are matched to. I.e, if the given prefix were 533 // "a", potential matches might be "abcd" or "aabb" 534 Prefix string 535 536 // Context is the type that can be matched against. A context can be a job, 537 // node, evaluation, allocation, or empty (indicated every context should be 538 // matched) 539 Context Context 540 541 QueryOptions 542 } 543 544 // JobRegisterRequest is used for Job.Register endpoint 545 // to register a job as being a schedulable entity. 546 type JobRegisterRequest struct { 547 Job *Job 548 549 // If EnforceIndex is set then the job will only be registered if the passed 550 // JobModifyIndex matches the current Jobs index. If the index is zero, the 551 // register only occurs if the job is new. 552 EnforceIndex bool 553 JobModifyIndex uint64 554 555 // PolicyOverride is set when the user is attempting to override any policies 556 PolicyOverride bool 557 558 WriteRequest 559 } 560 561 // JobDeregisterRequest is used for Job.Deregister endpoint 562 // to deregister a job as being a schedulable entity. 563 type JobDeregisterRequest struct { 564 JobID string 565 566 // Purge controls whether the deregister purges the job from the system or 567 // whether the job is just marked as stopped and will be removed by the 568 // garbage collector 569 Purge bool 570 571 WriteRequest 572 } 573 574 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 575 // evaluations. 576 type JobBatchDeregisterRequest struct { 577 // Jobs is the set of jobs to deregister 578 Jobs map[NamespacedID]*JobDeregisterOptions 579 580 // Evals is the set of evaluations to create. 581 Evals []*Evaluation 582 583 WriteRequest 584 } 585 586 // JobDeregisterOptions configures how a job is deregistered. 587 type JobDeregisterOptions struct { 588 // Purge controls whether the deregister purges the job from the system or 589 // whether the job is just marked as stopped and will be removed by the 590 // garbage collector 591 Purge bool 592 } 593 594 // JobEvaluateRequest is used when we just need to re-evaluate a target job 595 type JobEvaluateRequest struct { 596 JobID string 597 EvalOptions EvalOptions 598 WriteRequest 599 } 600 601 // EvalOptions is used to encapsulate options when forcing a job evaluation 602 type EvalOptions struct { 603 ForceReschedule bool 604 } 605 606 // JobSpecificRequest is used when we just need to specify a target job 607 type JobSpecificRequest struct { 608 JobID string 609 All bool 610 QueryOptions 611 } 612 613 // JobListRequest is used to parameterize a list request 614 type JobListRequest struct { 615 QueryOptions 616 } 617 618 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 619 // evaluation of the Job. 620 type JobPlanRequest struct { 621 Job *Job 622 Diff bool // Toggles an annotated diff 623 // PolicyOverride is set when the user is attempting to override any policies 624 PolicyOverride bool 625 WriteRequest 626 } 627 628 // JobScaleRequest is used for the Job.Scale endpoint to scale one of the 629 // scaling targets in a job 630 type JobScaleRequest struct { 631 Namespace string 632 JobID string 633 Target map[string]string 634 Count *int64 635 Message string 636 Error bool 637 Meta map[string]interface{} 638 // PolicyOverride is set when the user is attempting to override any policies 639 PolicyOverride bool 640 WriteRequest 641 } 642 643 // JobSummaryRequest is used when we just need to get a specific job summary 644 type JobSummaryRequest struct { 645 JobID string 646 QueryOptions 647 } 648 649 // JobScaleStatusRequest is used to get the scale status for a job 650 type JobScaleStatusRequest struct { 651 JobID string 652 QueryOptions 653 } 654 655 // JobDispatchRequest is used to dispatch a job based on a parameterized job 656 type JobDispatchRequest struct { 657 JobID string 658 Payload []byte 659 Meta map[string]string 660 WriteRequest 661 } 662 663 // JobValidateRequest is used to validate a job 664 type JobValidateRequest struct { 665 Job *Job 666 WriteRequest 667 } 668 669 // JobRevertRequest is used to revert a job to a prior version. 670 type JobRevertRequest struct { 671 // JobID is the ID of the job being reverted 672 JobID string 673 674 // JobVersion the version to revert to. 675 JobVersion uint64 676 677 // EnforcePriorVersion if set will enforce that the job is at the given 678 // version before reverting. 679 EnforcePriorVersion *uint64 680 681 // ConsulToken is the Consul token that proves the submitter of the job revert 682 // has access to the Service Identity policies associated with the job's 683 // Consul Connect enabled services. This field is only used to transfer the 684 // token and is not stored after the Job revert. 685 ConsulToken string 686 687 // VaultToken is the Vault token that proves the submitter of the job revert 688 // has access to any Vault policies specified in the targeted job version. This 689 // field is only used to transfer the token and is not stored after the Job 690 // revert. 691 VaultToken string 692 693 WriteRequest 694 } 695 696 // JobStabilityRequest is used to marked a job as stable. 697 type JobStabilityRequest struct { 698 // Job to set the stability on 699 JobID string 700 JobVersion uint64 701 702 // Set the stability 703 Stable bool 704 WriteRequest 705 } 706 707 // JobStabilityResponse is the response when marking a job as stable. 708 type JobStabilityResponse struct { 709 WriteMeta 710 } 711 712 // NodeListRequest is used to parameterize a list request 713 type NodeListRequest struct { 714 QueryOptions 715 } 716 717 // EvalUpdateRequest is used for upserting evaluations. 718 type EvalUpdateRequest struct { 719 Evals []*Evaluation 720 EvalToken string 721 WriteRequest 722 } 723 724 // EvalDeleteRequest is used for deleting an evaluation. 725 type EvalDeleteRequest struct { 726 Evals []string 727 Allocs []string 728 WriteRequest 729 } 730 731 // EvalSpecificRequest is used when we just need to specify a target evaluation 732 type EvalSpecificRequest struct { 733 EvalID string 734 QueryOptions 735 } 736 737 // EvalAckRequest is used to Ack/Nack a specific evaluation 738 type EvalAckRequest struct { 739 EvalID string 740 Token string 741 WriteRequest 742 } 743 744 // EvalDequeueRequest is used when we want to dequeue an evaluation 745 type EvalDequeueRequest struct { 746 Schedulers []string 747 Timeout time.Duration 748 SchedulerVersion uint16 749 WriteRequest 750 } 751 752 // EvalListRequest is used to list the evaluations 753 type EvalListRequest struct { 754 QueryOptions 755 } 756 757 // PlanRequest is used to submit an allocation plan to the leader 758 type PlanRequest struct { 759 Plan *Plan 760 WriteRequest 761 } 762 763 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 764 // committing the result of a plan. 765 type ApplyPlanResultsRequest struct { 766 // AllocUpdateRequest holds the allocation updates to be made by the 767 // scheduler. 768 AllocUpdateRequest 769 770 // Deployment is the deployment created or updated as a result of a 771 // scheduling event. 772 Deployment *Deployment 773 774 // DeploymentUpdates is a set of status updates to apply to the given 775 // deployments. This allows the scheduler to cancel any unneeded deployment 776 // because the job is stopped or the update block is removed. 777 DeploymentUpdates []*DeploymentStatusUpdate 778 779 // EvalID is the eval ID of the plan being applied. The modify index of the 780 // evaluation is updated as part of applying the plan to ensure that subsequent 781 // scheduling events for the same job will wait for the index that last produced 782 // state changes. This is necessary for blocked evaluations since they can be 783 // processed many times, potentially making state updates, without the state of 784 // the evaluation itself being updated. 785 EvalID string 786 787 // COMPAT 0.11 788 // NodePreemptions is a slice of allocations from other lower priority jobs 789 // that are preempted. Preempted allocations are marked as evicted. 790 // Deprecated: Replaced with AllocsPreempted which contains only the diff 791 NodePreemptions []*Allocation 792 793 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 794 // that are preempted. Preempted allocations are marked as evicted. 795 AllocsPreempted []*AllocationDiff 796 797 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 798 // have been preempted to place allocs in this plan 799 PreemptionEvals []*Evaluation 800 } 801 802 // AllocUpdateRequest is used to submit changes to allocations, either 803 // to cause evictions or to assign new allocations. Both can be done 804 // within a single transaction 805 type AllocUpdateRequest struct { 806 // COMPAT 0.11 807 // Alloc is the list of new allocations to assign 808 // Deprecated: Replaced with two separate slices, one containing stopped allocations 809 // and another containing updated allocations 810 Alloc []*Allocation 811 812 // Allocations to stop. Contains only the diff, not the entire allocation 813 AllocsStopped []*AllocationDiff 814 815 // New or updated allocations 816 AllocsUpdated []*Allocation 817 818 // Evals is the list of new evaluations to create 819 // Evals are valid only when used in the Raft RPC 820 Evals []*Evaluation 821 822 // Job is the shared parent job of the allocations. 823 // It is pulled out since it is common to reduce payload size. 824 Job *Job 825 826 WriteRequest 827 } 828 829 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 830 // desired transition state. 831 type AllocUpdateDesiredTransitionRequest struct { 832 // Allocs is the mapping of allocation ids to their desired state 833 // transition 834 Allocs map[string]*DesiredTransition 835 836 // Evals is the set of evaluations to create 837 Evals []*Evaluation 838 839 WriteRequest 840 } 841 842 // AllocStopRequest is used to stop and reschedule a running Allocation. 843 type AllocStopRequest struct { 844 AllocID string 845 846 WriteRequest 847 } 848 849 // AllocStopResponse is the response to an `AllocStopRequest` 850 type AllocStopResponse struct { 851 // EvalID is the id of the follow up evalution for the rescheduled alloc. 852 EvalID string 853 854 WriteMeta 855 } 856 857 // AllocListRequest is used to request a list of allocations 858 type AllocListRequest struct { 859 QueryOptions 860 } 861 862 // AllocSpecificRequest is used to query a specific allocation 863 type AllocSpecificRequest struct { 864 AllocID string 865 QueryOptions 866 } 867 868 // AllocSignalRequest is used to signal a specific allocation 869 type AllocSignalRequest struct { 870 AllocID string 871 Task string 872 Signal string 873 QueryOptions 874 } 875 876 // AllocsGetRequest is used to query a set of allocations 877 type AllocsGetRequest struct { 878 AllocIDs []string 879 QueryOptions 880 } 881 882 // AllocRestartRequest is used to restart a specific allocations tasks. 883 type AllocRestartRequest struct { 884 AllocID string 885 TaskName string 886 887 QueryOptions 888 } 889 890 // PeriodicForceRequest is used to force a specific periodic job. 891 type PeriodicForceRequest struct { 892 JobID string 893 WriteRequest 894 } 895 896 // ServerMembersResponse has the list of servers in a cluster 897 type ServerMembersResponse struct { 898 ServerName string 899 ServerRegion string 900 ServerDC string 901 Members []*ServerMember 902 } 903 904 // ServerMember holds information about a Nomad server agent in a cluster 905 type ServerMember struct { 906 Name string 907 Addr net.IP 908 Port uint16 909 Tags map[string]string 910 Status string 911 ProtocolMin uint8 912 ProtocolMax uint8 913 ProtocolCur uint8 914 DelegateMin uint8 915 DelegateMax uint8 916 DelegateCur uint8 917 } 918 919 // ClusterMetadata is used to store per-cluster metadata. 920 type ClusterMetadata struct { 921 ClusterID string 922 CreateTime int64 923 } 924 925 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 926 // following tasks in the given allocation 927 type DeriveVaultTokenRequest struct { 928 NodeID string 929 SecretID string 930 AllocID string 931 Tasks []string 932 QueryOptions 933 } 934 935 // VaultAccessorsRequest is used to operate on a set of Vault accessors 936 type VaultAccessorsRequest struct { 937 Accessors []*VaultAccessor 938 } 939 940 // VaultAccessor is a reference to a created Vault token on behalf of 941 // an allocation's task. 942 type VaultAccessor struct { 943 AllocID string 944 Task string 945 NodeID string 946 Accessor string 947 CreationTTL int 948 949 // Raft Indexes 950 CreateIndex uint64 951 } 952 953 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 954 type DeriveVaultTokenResponse struct { 955 // Tasks is a mapping between the task name and the wrapped token 956 Tasks map[string]string 957 958 // Error stores any error that occurred. Errors are stored here so we can 959 // communicate whether it is retryable 960 Error *RecoverableError 961 962 QueryMeta 963 } 964 965 // GenericRequest is used to request where no 966 // specific information is needed. 967 type GenericRequest struct { 968 QueryOptions 969 } 970 971 // DeploymentListRequest is used to list the deployments 972 type DeploymentListRequest struct { 973 QueryOptions 974 } 975 976 // DeploymentDeleteRequest is used for deleting deployments. 977 type DeploymentDeleteRequest struct { 978 Deployments []string 979 WriteRequest 980 } 981 982 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 983 // well as optionally creating an evaluation atomically. 984 type DeploymentStatusUpdateRequest struct { 985 // Eval, if set, is used to create an evaluation at the same time as 986 // updating the status of a deployment. 987 Eval *Evaluation 988 989 // DeploymentUpdate is a status update to apply to the given 990 // deployment. 991 DeploymentUpdate *DeploymentStatusUpdate 992 993 // Job is used to optionally upsert a job. This is used when setting the 994 // allocation health results in a deployment failure and the deployment 995 // auto-reverts to the latest stable job. 996 Job *Job 997 } 998 999 // DeploymentAllocHealthRequest is used to set the health of a set of 1000 // allocations as part of a deployment. 1001 type DeploymentAllocHealthRequest struct { 1002 DeploymentID string 1003 1004 // Marks these allocations as healthy, allow further allocations 1005 // to be rolled. 1006 HealthyAllocationIDs []string 1007 1008 // Any unhealthy allocations fail the deployment 1009 UnhealthyAllocationIDs []string 1010 1011 WriteRequest 1012 } 1013 1014 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 1015 type ApplyDeploymentAllocHealthRequest struct { 1016 DeploymentAllocHealthRequest 1017 1018 // Timestamp is the timestamp to use when setting the allocations health. 1019 Timestamp time.Time 1020 1021 // An optional field to update the status of a deployment 1022 DeploymentUpdate *DeploymentStatusUpdate 1023 1024 // Job is used to optionally upsert a job. This is used when setting the 1025 // allocation health results in a deployment failure and the deployment 1026 // auto-reverts to the latest stable job. 1027 Job *Job 1028 1029 // An optional evaluation to create after promoting the canaries 1030 Eval *Evaluation 1031 } 1032 1033 // DeploymentPromoteRequest is used to promote task groups in a deployment 1034 type DeploymentPromoteRequest struct { 1035 DeploymentID string 1036 1037 // All is to promote all task groups 1038 All bool 1039 1040 // Groups is used to set the promotion status per task group 1041 Groups []string 1042 1043 WriteRequest 1044 } 1045 1046 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1047 type ApplyDeploymentPromoteRequest struct { 1048 DeploymentPromoteRequest 1049 1050 // An optional evaluation to create after promoting the canaries 1051 Eval *Evaluation 1052 } 1053 1054 // DeploymentPauseRequest is used to pause a deployment 1055 type DeploymentPauseRequest struct { 1056 DeploymentID string 1057 1058 // Pause sets the pause status 1059 Pause bool 1060 1061 WriteRequest 1062 } 1063 1064 // DeploymentSpecificRequest is used to make a request specific to a particular 1065 // deployment 1066 type DeploymentSpecificRequest struct { 1067 DeploymentID string 1068 QueryOptions 1069 } 1070 1071 // DeploymentFailRequest is used to fail a particular deployment 1072 type DeploymentFailRequest struct { 1073 DeploymentID string 1074 WriteRequest 1075 } 1076 1077 // ScalingPolicySpecificRequest is used when we just need to specify a target scaling policy 1078 type ScalingPolicySpecificRequest struct { 1079 ID string 1080 QueryOptions 1081 } 1082 1083 // SingleScalingPolicyResponse is used to return a single job 1084 type SingleScalingPolicyResponse struct { 1085 Policy *ScalingPolicy 1086 QueryMeta 1087 } 1088 1089 // ScalingPolicyListRequest is used to parameterize a scaling policy list request 1090 type ScalingPolicyListRequest struct { 1091 QueryOptions 1092 } 1093 1094 // ScalingPolicyListResponse is used for a list request 1095 type ScalingPolicyListResponse struct { 1096 Policies []*ScalingPolicyListStub 1097 QueryMeta 1098 } 1099 1100 // SingleDeploymentResponse is used to respond with a single deployment 1101 type SingleDeploymentResponse struct { 1102 Deployment *Deployment 1103 QueryMeta 1104 } 1105 1106 // GenericResponse is used to respond to a request where no 1107 // specific response information is needed. 1108 type GenericResponse struct { 1109 WriteMeta 1110 } 1111 1112 // VersionResponse is used for the Status.Version response 1113 type VersionResponse struct { 1114 Build string 1115 Versions map[string]int 1116 QueryMeta 1117 } 1118 1119 // JobRegisterResponse is used to respond to a job registration 1120 type JobRegisterResponse struct { 1121 EvalID string 1122 EvalCreateIndex uint64 1123 JobModifyIndex uint64 1124 1125 // Warnings contains any warnings about the given job. These may include 1126 // deprecation warnings. 1127 Warnings string 1128 1129 QueryMeta 1130 } 1131 1132 // JobDeregisterResponse is used to respond to a job deregistration 1133 type JobDeregisterResponse struct { 1134 EvalID string 1135 EvalCreateIndex uint64 1136 JobModifyIndex uint64 1137 VolumeEvalID string 1138 VolumeEvalIndex uint64 1139 QueryMeta 1140 } 1141 1142 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 1143 type JobBatchDeregisterResponse struct { 1144 // JobEvals maps the job to its created evaluation 1145 JobEvals map[NamespacedID]string 1146 QueryMeta 1147 } 1148 1149 // JobValidateResponse is the response from validate request 1150 type JobValidateResponse struct { 1151 // DriverConfigValidated indicates whether the agent validated the driver 1152 // config 1153 DriverConfigValidated bool 1154 1155 // ValidationErrors is a list of validation errors 1156 ValidationErrors []string 1157 1158 // Error is a string version of any error that may have occurred 1159 Error string 1160 1161 // Warnings contains any warnings about the given job. These may include 1162 // deprecation warnings. 1163 Warnings string 1164 } 1165 1166 // NodeUpdateResponse is used to respond to a node update 1167 type NodeUpdateResponse struct { 1168 HeartbeatTTL time.Duration 1169 EvalIDs []string 1170 EvalCreateIndex uint64 1171 NodeModifyIndex uint64 1172 1173 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1174 // empty, the current Nomad Server is in the minority of a partition. 1175 LeaderRPCAddr string 1176 1177 // NumNodes is the number of Nomad nodes attached to this quorum of 1178 // Nomad Servers at the time of the response. This value can 1179 // fluctuate based on the health of the cluster between heartbeats. 1180 NumNodes int32 1181 1182 // Servers is the full list of known Nomad servers in the local 1183 // region. 1184 Servers []*NodeServerInfo 1185 1186 QueryMeta 1187 } 1188 1189 // NodeDrainUpdateResponse is used to respond to a node drain update 1190 type NodeDrainUpdateResponse struct { 1191 NodeModifyIndex uint64 1192 EvalIDs []string 1193 EvalCreateIndex uint64 1194 WriteMeta 1195 } 1196 1197 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1198 type NodeEligibilityUpdateResponse struct { 1199 NodeModifyIndex uint64 1200 EvalIDs []string 1201 EvalCreateIndex uint64 1202 WriteMeta 1203 } 1204 1205 // NodeAllocsResponse is used to return allocs for a single node 1206 type NodeAllocsResponse struct { 1207 Allocs []*Allocation 1208 QueryMeta 1209 } 1210 1211 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1212 type NodeClientAllocsResponse struct { 1213 Allocs map[string]uint64 1214 1215 // MigrateTokens are used when ACLs are enabled to allow cross node, 1216 // authenticated access to sticky volumes 1217 MigrateTokens map[string]string 1218 1219 QueryMeta 1220 } 1221 1222 // SingleNodeResponse is used to return a single node 1223 type SingleNodeResponse struct { 1224 Node *Node 1225 QueryMeta 1226 } 1227 1228 // NodeListResponse is used for a list request 1229 type NodeListResponse struct { 1230 Nodes []*NodeListStub 1231 QueryMeta 1232 } 1233 1234 // SingleJobResponse is used to return a single job 1235 type SingleJobResponse struct { 1236 Job *Job 1237 QueryMeta 1238 } 1239 1240 // JobSummaryResponse is used to return a single job summary 1241 type JobSummaryResponse struct { 1242 JobSummary *JobSummary 1243 QueryMeta 1244 } 1245 1246 // JobScaleStatusResponse is used to return the scale status for a job 1247 type JobScaleStatusResponse struct { 1248 JobScaleStatus *JobScaleStatus 1249 QueryMeta 1250 } 1251 1252 type JobScaleStatus struct { 1253 JobID string 1254 JobCreateIndex uint64 1255 JobModifyIndex uint64 1256 JobStopped bool 1257 TaskGroups map[string]*TaskGroupScaleStatus 1258 } 1259 1260 // TaskGroupScaleStatus is used to return the scale status for a given task group 1261 type TaskGroupScaleStatus struct { 1262 Desired int 1263 Placed int 1264 Running int 1265 Healthy int 1266 Unhealthy int 1267 Events []*ScalingEvent 1268 } 1269 1270 type JobDispatchResponse struct { 1271 DispatchedJobID string 1272 EvalID string 1273 EvalCreateIndex uint64 1274 JobCreateIndex uint64 1275 WriteMeta 1276 } 1277 1278 // JobListResponse is used for a list request 1279 type JobListResponse struct { 1280 Jobs []*JobListStub 1281 QueryMeta 1282 } 1283 1284 // JobVersionsRequest is used to get a jobs versions 1285 type JobVersionsRequest struct { 1286 JobID string 1287 Diffs bool 1288 QueryOptions 1289 } 1290 1291 // JobVersionsResponse is used for a job get versions request 1292 type JobVersionsResponse struct { 1293 Versions []*Job 1294 Diffs []*JobDiff 1295 QueryMeta 1296 } 1297 1298 // JobPlanResponse is used to respond to a job plan request 1299 type JobPlanResponse struct { 1300 // Annotations stores annotations explaining decisions the scheduler made. 1301 Annotations *PlanAnnotations 1302 1303 // FailedTGAllocs is the placement failures per task group. 1304 FailedTGAllocs map[string]*AllocMetric 1305 1306 // JobModifyIndex is the modification index of the job. The value can be 1307 // used when running `nomad run` to ensure that the Job wasn’t modified 1308 // since the last plan. If the job is being created, the value is zero. 1309 JobModifyIndex uint64 1310 1311 // CreatedEvals is the set of evaluations created by the scheduler. The 1312 // reasons for this can be rolling-updates or blocked evals. 1313 CreatedEvals []*Evaluation 1314 1315 // Diff contains the diff of the job and annotations on whether the change 1316 // causes an in-place update or create/destroy 1317 Diff *JobDiff 1318 1319 // NextPeriodicLaunch is the time duration till the job would be launched if 1320 // submitted. 1321 NextPeriodicLaunch time.Time 1322 1323 // Warnings contains any warnings about the given job. These may include 1324 // deprecation warnings. 1325 Warnings string 1326 1327 WriteMeta 1328 } 1329 1330 // SingleAllocResponse is used to return a single allocation 1331 type SingleAllocResponse struct { 1332 Alloc *Allocation 1333 QueryMeta 1334 } 1335 1336 // AllocsGetResponse is used to return a set of allocations 1337 type AllocsGetResponse struct { 1338 Allocs []*Allocation 1339 QueryMeta 1340 } 1341 1342 // JobAllocationsResponse is used to return the allocations for a job 1343 type JobAllocationsResponse struct { 1344 Allocations []*AllocListStub 1345 QueryMeta 1346 } 1347 1348 // JobEvaluationsResponse is used to return the evaluations for a job 1349 type JobEvaluationsResponse struct { 1350 Evaluations []*Evaluation 1351 QueryMeta 1352 } 1353 1354 // SingleEvalResponse is used to return a single evaluation 1355 type SingleEvalResponse struct { 1356 Eval *Evaluation 1357 QueryMeta 1358 } 1359 1360 // EvalDequeueResponse is used to return from a dequeue 1361 type EvalDequeueResponse struct { 1362 Eval *Evaluation 1363 Token string 1364 1365 // WaitIndex is the Raft index the worker should wait until invoking the 1366 // scheduler. 1367 WaitIndex uint64 1368 1369 QueryMeta 1370 } 1371 1372 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1373 // or beyond before invoking the scheduler. 1374 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1375 // Prefer the wait index sent. This will be populated on all responses from 1376 // 0.7.0 and above 1377 if e.WaitIndex != 0 { 1378 return e.WaitIndex 1379 } else if e.Eval != nil { 1380 return e.Eval.ModifyIndex 1381 } 1382 1383 // This should never happen 1384 return 1 1385 } 1386 1387 // PlanResponse is used to return from a PlanRequest 1388 type PlanResponse struct { 1389 Result *PlanResult 1390 WriteMeta 1391 } 1392 1393 // AllocListResponse is used for a list request 1394 type AllocListResponse struct { 1395 Allocations []*AllocListStub 1396 QueryMeta 1397 } 1398 1399 // DeploymentListResponse is used for a list request 1400 type DeploymentListResponse struct { 1401 Deployments []*Deployment 1402 QueryMeta 1403 } 1404 1405 // EvalListResponse is used for a list request 1406 type EvalListResponse struct { 1407 Evaluations []*Evaluation 1408 QueryMeta 1409 } 1410 1411 // EvalAllocationsResponse is used to return the allocations for an evaluation 1412 type EvalAllocationsResponse struct { 1413 Allocations []*AllocListStub 1414 QueryMeta 1415 } 1416 1417 // PeriodicForceResponse is used to respond to a periodic job force launch 1418 type PeriodicForceResponse struct { 1419 EvalID string 1420 EvalCreateIndex uint64 1421 WriteMeta 1422 } 1423 1424 // DeploymentUpdateResponse is used to respond to a deployment change. The 1425 // response will include the modify index of the deployment as well as details 1426 // of any triggered evaluation. 1427 type DeploymentUpdateResponse struct { 1428 EvalID string 1429 EvalCreateIndex uint64 1430 DeploymentModifyIndex uint64 1431 1432 // RevertedJobVersion is the version the job was reverted to. If unset, the 1433 // job wasn't reverted 1434 RevertedJobVersion *uint64 1435 1436 WriteMeta 1437 } 1438 1439 // NodeConnQueryResponse is used to respond to a query of whether a server has 1440 // a connection to a specific Node 1441 type NodeConnQueryResponse struct { 1442 // Connected indicates whether a connection to the Client exists 1443 Connected bool 1444 1445 // Established marks the time at which the connection was established 1446 Established time.Time 1447 1448 QueryMeta 1449 } 1450 1451 // EmitNodeEventsRequest is a request to update the node events source 1452 // with a new client-side event 1453 type EmitNodeEventsRequest struct { 1454 // NodeEvents are a map where the key is a node id, and value is a list of 1455 // events for that node 1456 NodeEvents map[string][]*NodeEvent 1457 1458 WriteRequest 1459 } 1460 1461 // EmitNodeEventsResponse is a response to the client about the status of 1462 // the node event source update. 1463 type EmitNodeEventsResponse struct { 1464 WriteMeta 1465 } 1466 1467 const ( 1468 NodeEventSubsystemDrain = "Drain" 1469 NodeEventSubsystemDriver = "Driver" 1470 NodeEventSubsystemHeartbeat = "Heartbeat" 1471 NodeEventSubsystemCluster = "Cluster" 1472 NodeEventSubsystemStorage = "Storage" 1473 ) 1474 1475 // NodeEvent is a single unit representing a node’s state change 1476 type NodeEvent struct { 1477 Message string 1478 Subsystem string 1479 Details map[string]string 1480 Timestamp time.Time 1481 CreateIndex uint64 1482 } 1483 1484 func (ne *NodeEvent) String() string { 1485 var details []string 1486 for k, v := range ne.Details { 1487 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1488 } 1489 1490 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1491 } 1492 1493 func (ne *NodeEvent) Copy() *NodeEvent { 1494 c := new(NodeEvent) 1495 *c = *ne 1496 c.Details = helper.CopyMapStringString(ne.Details) 1497 return c 1498 } 1499 1500 // NewNodeEvent generates a new node event storing the current time as the 1501 // timestamp 1502 func NewNodeEvent() *NodeEvent { 1503 return &NodeEvent{Timestamp: time.Now()} 1504 } 1505 1506 // SetMessage is used to set the message on the node event 1507 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1508 ne.Message = msg 1509 return ne 1510 } 1511 1512 // SetSubsystem is used to set the subsystem on the node event 1513 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1514 ne.Subsystem = sys 1515 return ne 1516 } 1517 1518 // SetTimestamp is used to set the timestamp on the node event 1519 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1520 ne.Timestamp = ts 1521 return ne 1522 } 1523 1524 // AddDetail is used to add a detail to the node event 1525 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1526 if ne.Details == nil { 1527 ne.Details = make(map[string]string, 1) 1528 } 1529 ne.Details[k] = v 1530 return ne 1531 } 1532 1533 const ( 1534 NodeStatusInit = "initializing" 1535 NodeStatusReady = "ready" 1536 NodeStatusDown = "down" 1537 ) 1538 1539 // ShouldDrainNode checks if a given node status should trigger an 1540 // evaluation. Some states don't require any further action. 1541 func ShouldDrainNode(status string) bool { 1542 switch status { 1543 case NodeStatusInit, NodeStatusReady: 1544 return false 1545 case NodeStatusDown: 1546 return true 1547 default: 1548 panic(fmt.Sprintf("unhandled node status %s", status)) 1549 } 1550 } 1551 1552 // ValidNodeStatus is used to check if a node status is valid 1553 func ValidNodeStatus(status string) bool { 1554 switch status { 1555 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1556 return true 1557 default: 1558 return false 1559 } 1560 } 1561 1562 const ( 1563 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1564 // respectively, for receiving allocations. This is orthoginal to the node 1565 // status being ready. 1566 NodeSchedulingEligible = "eligible" 1567 NodeSchedulingIneligible = "ineligible" 1568 ) 1569 1570 // DrainSpec describes a Node's desired drain behavior. 1571 type DrainSpec struct { 1572 // Deadline is the duration after StartTime when the remaining 1573 // allocations on a draining Node should be told to stop. 1574 Deadline time.Duration 1575 1576 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1577 // has been marked for draining. 1578 IgnoreSystemJobs bool 1579 } 1580 1581 // DrainStrategy describes a Node's drain behavior. 1582 type DrainStrategy struct { 1583 // DrainSpec is the user declared drain specification 1584 DrainSpec 1585 1586 // ForceDeadline is the deadline time for the drain after which drains will 1587 // be forced 1588 ForceDeadline time.Time 1589 1590 // StartedAt is the time the drain process started 1591 StartedAt time.Time 1592 } 1593 1594 func (d *DrainStrategy) Copy() *DrainStrategy { 1595 if d == nil { 1596 return nil 1597 } 1598 1599 nd := new(DrainStrategy) 1600 *nd = *d 1601 return nd 1602 } 1603 1604 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1605 // duration or otherwise the deadline time. The force drain is captured by the 1606 // deadline time being in the past. 1607 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1608 // Treat the nil case as a force drain so during an upgrade where a node may 1609 // not have a drain strategy but has Drain set to true, it is treated as a 1610 // force to mimick old behavior. 1611 if d == nil { 1612 return false, time.Time{} 1613 } 1614 1615 ns := d.Deadline.Nanoseconds() 1616 switch { 1617 case ns < 0: // Force 1618 return false, time.Time{} 1619 case ns == 0: // Infinite 1620 return true, time.Time{} 1621 default: 1622 return false, d.ForceDeadline 1623 } 1624 } 1625 1626 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1627 if d == nil && o == nil { 1628 return true 1629 } else if o != nil && d == nil { 1630 return false 1631 } else if d != nil && o == nil { 1632 return false 1633 } 1634 1635 // Compare values 1636 if d.ForceDeadline != o.ForceDeadline { 1637 return false 1638 } else if d.Deadline != o.Deadline { 1639 return false 1640 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1641 return false 1642 } 1643 1644 return true 1645 } 1646 1647 // Node is a representation of a schedulable client node 1648 type Node struct { 1649 // ID is a unique identifier for the node. It can be constructed 1650 // by doing a concatenation of the Name and Datacenter as a simple 1651 // approach. Alternatively a UUID may be used. 1652 ID string 1653 1654 // SecretID is an ID that is only known by the Node and the set of Servers. 1655 // It is not accessible via the API and is used to authenticate nodes 1656 // conducting privileged activities. 1657 SecretID string 1658 1659 // Datacenter for this node 1660 Datacenter string 1661 1662 // Node name 1663 Name string 1664 1665 // HTTPAddr is the address on which the Nomad client is listening for http 1666 // requests 1667 HTTPAddr string 1668 1669 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1670 TLSEnabled bool 1671 1672 // Attributes is an arbitrary set of key/value 1673 // data that can be used for constraints. Examples 1674 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1675 // "docker.runtime=1.8.3" 1676 Attributes map[string]string 1677 1678 // NodeResources captures the available resources on the client. 1679 NodeResources *NodeResources 1680 1681 // ReservedResources captures the set resources on the client that are 1682 // reserved from scheduling. 1683 ReservedResources *NodeReservedResources 1684 1685 // Resources is the available resources on the client. 1686 // For example 'cpu=2' 'memory=2048' 1687 // COMPAT(0.10): Remove in 0.10 1688 Resources *Resources 1689 1690 // Reserved is the set of resources that are reserved, 1691 // and should be subtracted from the total resources for 1692 // the purposes of scheduling. This may be provide certain 1693 // high-watermark tolerances or because of external schedulers 1694 // consuming resources. 1695 Reserved *Resources 1696 1697 // Links are used to 'link' this client to external 1698 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1699 // 'ami=ami-123' 1700 Links map[string]string 1701 1702 // Meta is used to associate arbitrary metadata with this 1703 // client. This is opaque to Nomad. 1704 Meta map[string]string 1705 1706 // NodeClass is an opaque identifier used to group nodes 1707 // together for the purpose of determining scheduling pressure. 1708 NodeClass string 1709 1710 // The SecretID of an ACL token to use to authenticate RPC requests 1711 Token string 1712 1713 // ComputedClass is a unique id that identifies nodes with a common set of 1714 // attributes and capabilities. 1715 ComputedClass string 1716 1717 // COMPAT: Remove in Nomad 0.9 1718 // Drain is controlled by the servers, and not the client. 1719 // If true, no jobs will be scheduled to this node, and existing 1720 // allocations will be drained. Superseded by DrainStrategy in Nomad 1721 // 0.8 but kept for backward compat. 1722 Drain bool 1723 1724 // DrainStrategy determines the node's draining behavior. Will be nil 1725 // when Drain=false. 1726 DrainStrategy *DrainStrategy 1727 1728 // SchedulingEligibility determines whether this node will receive new 1729 // placements. 1730 SchedulingEligibility string 1731 1732 // Status of this node 1733 Status string 1734 1735 // StatusDescription is meant to provide more human useful information 1736 StatusDescription string 1737 1738 // StatusUpdatedAt is the time stamp at which the state of the node was 1739 // updated 1740 StatusUpdatedAt int64 1741 1742 // Events is the most recent set of events generated for the node, 1743 // retaining only MaxRetainedNodeEvents number at a time 1744 Events []*NodeEvent 1745 1746 // Drivers is a map of driver names to current driver information 1747 Drivers map[string]*DriverInfo 1748 1749 // CSIControllerPlugins is a map of plugin names to current CSI Plugin info 1750 CSIControllerPlugins map[string]*CSIInfo 1751 // CSINodePlugins is a map of plugin names to current CSI Plugin info 1752 CSINodePlugins map[string]*CSIInfo 1753 1754 // HostVolumes is a map of host volume names to their configuration 1755 HostVolumes map[string]*ClientHostVolumeConfig 1756 1757 // Raft Indexes 1758 CreateIndex uint64 1759 ModifyIndex uint64 1760 } 1761 1762 // Ready returns true if the node is ready for running allocations 1763 func (n *Node) Ready() bool { 1764 // Drain is checked directly to support pre-0.8 Node data 1765 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1766 } 1767 1768 func (n *Node) Canonicalize() { 1769 if n == nil { 1770 return 1771 } 1772 1773 // COMPAT Remove in 0.10 1774 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1775 // upgrading nodes 1776 if n.SchedulingEligibility == "" { 1777 if n.Drain { 1778 n.SchedulingEligibility = NodeSchedulingIneligible 1779 } else { 1780 n.SchedulingEligibility = NodeSchedulingEligible 1781 } 1782 } 1783 } 1784 1785 func (n *Node) Copy() *Node { 1786 if n == nil { 1787 return nil 1788 } 1789 nn := new(Node) 1790 *nn = *n 1791 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1792 nn.Resources = nn.Resources.Copy() 1793 nn.Reserved = nn.Reserved.Copy() 1794 nn.NodeResources = nn.NodeResources.Copy() 1795 nn.ReservedResources = nn.ReservedResources.Copy() 1796 nn.Links = helper.CopyMapStringString(nn.Links) 1797 nn.Meta = helper.CopyMapStringString(nn.Meta) 1798 nn.Events = copyNodeEvents(n.Events) 1799 nn.DrainStrategy = nn.DrainStrategy.Copy() 1800 nn.CSIControllerPlugins = copyNodeCSI(nn.CSIControllerPlugins) 1801 nn.CSINodePlugins = copyNodeCSI(nn.CSINodePlugins) 1802 nn.Drivers = copyNodeDrivers(n.Drivers) 1803 nn.HostVolumes = copyNodeHostVolumes(n.HostVolumes) 1804 return nn 1805 } 1806 1807 // copyNodeEvents is a helper to copy a list of NodeEvent's 1808 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1809 l := len(events) 1810 if l == 0 { 1811 return nil 1812 } 1813 1814 c := make([]*NodeEvent, l) 1815 for i, event := range events { 1816 c[i] = event.Copy() 1817 } 1818 return c 1819 } 1820 1821 // copyNodeCSI is a helper to copy a map of CSIInfo 1822 func copyNodeCSI(plugins map[string]*CSIInfo) map[string]*CSIInfo { 1823 l := len(plugins) 1824 if l == 0 { 1825 return nil 1826 } 1827 1828 c := make(map[string]*CSIInfo, l) 1829 for plugin, info := range plugins { 1830 c[plugin] = info.Copy() 1831 } 1832 1833 return c 1834 } 1835 1836 // copyNodeDrivers is a helper to copy a map of DriverInfo 1837 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 1838 l := len(drivers) 1839 if l == 0 { 1840 return nil 1841 } 1842 1843 c := make(map[string]*DriverInfo, l) 1844 for driver, info := range drivers { 1845 c[driver] = info.Copy() 1846 } 1847 return c 1848 } 1849 1850 // copyNodeHostVolumes is a helper to copy a map of string to Volume 1851 func copyNodeHostVolumes(volumes map[string]*ClientHostVolumeConfig) map[string]*ClientHostVolumeConfig { 1852 l := len(volumes) 1853 if l == 0 { 1854 return nil 1855 } 1856 1857 c := make(map[string]*ClientHostVolumeConfig, l) 1858 for volume, v := range volumes { 1859 c[volume] = v.Copy() 1860 } 1861 1862 return c 1863 } 1864 1865 // TerminalStatus returns if the current status is terminal and 1866 // will no longer transition. 1867 func (n *Node) TerminalStatus() bool { 1868 switch n.Status { 1869 case NodeStatusDown: 1870 return true 1871 default: 1872 return false 1873 } 1874 } 1875 1876 // COMPAT(0.11): Remove in 0.11 1877 // ComparableReservedResources returns the reserved resouces on the node 1878 // handling upgrade paths. Reserved networks must be handled separately. After 1879 // 0.11 calls to this should be replaced with: 1880 // node.ReservedResources.Comparable() 1881 func (n *Node) ComparableReservedResources() *ComparableResources { 1882 // See if we can no-op 1883 if n.Reserved == nil && n.ReservedResources == nil { 1884 return nil 1885 } 1886 1887 // Node already has 0.9+ behavior 1888 if n.ReservedResources != nil { 1889 return n.ReservedResources.Comparable() 1890 } 1891 1892 // Upgrade path 1893 return &ComparableResources{ 1894 Flattened: AllocatedTaskResources{ 1895 Cpu: AllocatedCpuResources{ 1896 CpuShares: int64(n.Reserved.CPU), 1897 }, 1898 Memory: AllocatedMemoryResources{ 1899 MemoryMB: int64(n.Reserved.MemoryMB), 1900 }, 1901 }, 1902 Shared: AllocatedSharedResources{ 1903 DiskMB: int64(n.Reserved.DiskMB), 1904 }, 1905 } 1906 } 1907 1908 // COMPAT(0.11): Remove in 0.11 1909 // ComparableResources returns the resouces on the node 1910 // handling upgrade paths. Networking must be handled separately. After 0.11 1911 // calls to this should be replaced with: node.NodeResources.Comparable() 1912 func (n *Node) ComparableResources() *ComparableResources { 1913 // Node already has 0.9+ behavior 1914 if n.NodeResources != nil { 1915 return n.NodeResources.Comparable() 1916 } 1917 1918 // Upgrade path 1919 return &ComparableResources{ 1920 Flattened: AllocatedTaskResources{ 1921 Cpu: AllocatedCpuResources{ 1922 CpuShares: int64(n.Resources.CPU), 1923 }, 1924 Memory: AllocatedMemoryResources{ 1925 MemoryMB: int64(n.Resources.MemoryMB), 1926 }, 1927 }, 1928 Shared: AllocatedSharedResources{ 1929 DiskMB: int64(n.Resources.DiskMB), 1930 }, 1931 } 1932 } 1933 1934 // Stub returns a summarized version of the node 1935 func (n *Node) Stub() *NodeListStub { 1936 1937 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1938 1939 return &NodeListStub{ 1940 Address: addr, 1941 ID: n.ID, 1942 Datacenter: n.Datacenter, 1943 Name: n.Name, 1944 NodeClass: n.NodeClass, 1945 Version: n.Attributes["nomad.version"], 1946 Drain: n.Drain, 1947 SchedulingEligibility: n.SchedulingEligibility, 1948 Status: n.Status, 1949 StatusDescription: n.StatusDescription, 1950 Drivers: n.Drivers, 1951 HostVolumes: n.HostVolumes, 1952 CreateIndex: n.CreateIndex, 1953 ModifyIndex: n.ModifyIndex, 1954 } 1955 } 1956 1957 // NodeListStub is used to return a subset of job information 1958 // for the job list 1959 type NodeListStub struct { 1960 Address string 1961 ID string 1962 Datacenter string 1963 Name string 1964 NodeClass string 1965 Version string 1966 Drain bool 1967 SchedulingEligibility string 1968 Status string 1969 StatusDescription string 1970 Drivers map[string]*DriverInfo 1971 HostVolumes map[string]*ClientHostVolumeConfig 1972 CreateIndex uint64 1973 ModifyIndex uint64 1974 } 1975 1976 // Resources is used to define the resources available 1977 // on a client 1978 type Resources struct { 1979 CPU int 1980 MemoryMB int 1981 DiskMB int 1982 IOPS int // COMPAT(0.10): Only being used to issue warnings 1983 Networks Networks 1984 Devices ResourceDevices 1985 } 1986 1987 const ( 1988 BytesInMegabyte = 1024 * 1024 1989 ) 1990 1991 // DefaultResources is a small resources object that contains the 1992 // default resources requests that we will provide to an object. 1993 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1994 // be kept in sync. 1995 func DefaultResources() *Resources { 1996 return &Resources{ 1997 CPU: 100, 1998 MemoryMB: 300, 1999 } 2000 } 2001 2002 // MinResources is a small resources object that contains the 2003 // absolute minimum resources that we will provide to an object. 2004 // This should not be confused with the defaults which are 2005 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 2006 // api/resources.go and should be kept in sync. 2007 func MinResources() *Resources { 2008 return &Resources{ 2009 CPU: 20, 2010 MemoryMB: 10, 2011 } 2012 } 2013 2014 // DiskInBytes returns the amount of disk resources in bytes. 2015 func (r *Resources) DiskInBytes() int64 { 2016 return int64(r.DiskMB * BytesInMegabyte) 2017 } 2018 2019 func (r *Resources) Validate() error { 2020 var mErr multierror.Error 2021 if err := r.MeetsMinResources(); err != nil { 2022 mErr.Errors = append(mErr.Errors, err) 2023 } 2024 2025 // Ensure the task isn't asking for disk resources 2026 if r.DiskMB > 0 { 2027 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2028 } 2029 2030 for i, d := range r.Devices { 2031 if err := d.Validate(); err != nil { 2032 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 2033 } 2034 } 2035 2036 return mErr.ErrorOrNil() 2037 } 2038 2039 // Merge merges this resource with another resource. 2040 // COMPAT(0.10): Remove in 0.10 2041 func (r *Resources) Merge(other *Resources) { 2042 if other.CPU != 0 { 2043 r.CPU = other.CPU 2044 } 2045 if other.MemoryMB != 0 { 2046 r.MemoryMB = other.MemoryMB 2047 } 2048 if other.DiskMB != 0 { 2049 r.DiskMB = other.DiskMB 2050 } 2051 if len(other.Networks) != 0 { 2052 r.Networks = other.Networks 2053 } 2054 if len(other.Devices) != 0 { 2055 r.Devices = other.Devices 2056 } 2057 } 2058 2059 // COMPAT(0.10): Remove in 0.10 2060 func (r *Resources) Equals(o *Resources) bool { 2061 if r == o { 2062 return true 2063 } 2064 if r == nil || o == nil { 2065 return false 2066 } 2067 return r.CPU == o.CPU && 2068 r.MemoryMB == o.MemoryMB && 2069 r.DiskMB == o.DiskMB && 2070 r.IOPS == o.IOPS && 2071 r.Networks.Equals(&o.Networks) && 2072 r.Devices.Equals(&o.Devices) 2073 } 2074 2075 // COMPAT(0.10): Remove in 0.10 2076 // ResourceDevices are part of Resources 2077 type ResourceDevices []*RequestedDevice 2078 2079 // COMPAT(0.10): Remove in 0.10 2080 // Equals ResourceDevices as set keyed by Name 2081 func (d *ResourceDevices) Equals(o *ResourceDevices) bool { 2082 if d == o { 2083 return true 2084 } 2085 if d == nil || o == nil { 2086 return false 2087 } 2088 if len(*d) != len(*o) { 2089 return false 2090 } 2091 m := make(map[string]*RequestedDevice, len(*d)) 2092 for _, e := range *d { 2093 m[e.Name] = e 2094 } 2095 for _, oe := range *o { 2096 de, ok := m[oe.Name] 2097 if !ok || !de.Equals(oe) { 2098 return false 2099 } 2100 } 2101 return true 2102 } 2103 2104 // COMPAT(0.10): Remove in 0.10 2105 func (r *Resources) Canonicalize() { 2106 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2107 // problems since we use reflect DeepEquals. 2108 if len(r.Networks) == 0 { 2109 r.Networks = nil 2110 } 2111 if len(r.Devices) == 0 { 2112 r.Devices = nil 2113 } 2114 2115 for _, n := range r.Networks { 2116 n.Canonicalize() 2117 } 2118 } 2119 2120 // MeetsMinResources returns an error if the resources specified are less than 2121 // the minimum allowed. 2122 // This is based on the minimums defined in the Resources type 2123 // COMPAT(0.10): Remove in 0.10 2124 func (r *Resources) MeetsMinResources() error { 2125 var mErr multierror.Error 2126 minResources := MinResources() 2127 if r.CPU < minResources.CPU { 2128 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2129 } 2130 if r.MemoryMB < minResources.MemoryMB { 2131 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2132 } 2133 for i, n := range r.Networks { 2134 if err := n.MeetsMinResources(); err != nil { 2135 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 2136 } 2137 } 2138 2139 return mErr.ErrorOrNil() 2140 } 2141 2142 // Copy returns a deep copy of the resources 2143 func (r *Resources) Copy() *Resources { 2144 if r == nil { 2145 return nil 2146 } 2147 newR := new(Resources) 2148 *newR = *r 2149 2150 // Copy the network objects 2151 newR.Networks = r.Networks.Copy() 2152 2153 // Copy the devices 2154 if r.Devices != nil { 2155 n := len(r.Devices) 2156 newR.Devices = make([]*RequestedDevice, n) 2157 for i := 0; i < n; i++ { 2158 newR.Devices[i] = r.Devices[i].Copy() 2159 } 2160 } 2161 2162 return newR 2163 } 2164 2165 // NetIndex finds the matching net index using device name 2166 // COMPAT(0.10): Remove in 0.10 2167 func (r *Resources) NetIndex(n *NetworkResource) int { 2168 return r.Networks.NetIndex(n) 2169 } 2170 2171 // Superset checks if one set of resources is a superset 2172 // of another. This ignores network resources, and the NetworkIndex 2173 // should be used for that. 2174 // COMPAT(0.10): Remove in 0.10 2175 func (r *Resources) Superset(other *Resources) (bool, string) { 2176 if r.CPU < other.CPU { 2177 return false, "cpu" 2178 } 2179 if r.MemoryMB < other.MemoryMB { 2180 return false, "memory" 2181 } 2182 if r.DiskMB < other.DiskMB { 2183 return false, "disk" 2184 } 2185 return true, "" 2186 } 2187 2188 // Add adds the resources of the delta to this, potentially 2189 // returning an error if not possible. 2190 // COMPAT(0.10): Remove in 0.10 2191 func (r *Resources) Add(delta *Resources) error { 2192 if delta == nil { 2193 return nil 2194 } 2195 r.CPU += delta.CPU 2196 r.MemoryMB += delta.MemoryMB 2197 r.DiskMB += delta.DiskMB 2198 2199 for _, n := range delta.Networks { 2200 // Find the matching interface by IP or CIDR 2201 idx := r.NetIndex(n) 2202 if idx == -1 { 2203 r.Networks = append(r.Networks, n.Copy()) 2204 } else { 2205 r.Networks[idx].Add(n) 2206 } 2207 } 2208 return nil 2209 } 2210 2211 // COMPAT(0.10): Remove in 0.10 2212 func (r *Resources) GoString() string { 2213 return fmt.Sprintf("*%#v", *r) 2214 } 2215 2216 type Port struct { 2217 Label string 2218 Value int 2219 To int 2220 } 2221 2222 // NetworkResource is used to represent available network 2223 // resources 2224 type NetworkResource struct { 2225 Mode string // Mode of the network 2226 Device string // Name of the device 2227 CIDR string // CIDR block of addresses 2228 IP string // Host IP address 2229 MBits int // Throughput 2230 ReservedPorts []Port // Host Reserved ports 2231 DynamicPorts []Port // Host Dynamically assigned ports 2232 } 2233 2234 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 2235 if nr.Mode != other.Mode { 2236 return false 2237 } 2238 2239 if nr.Device != other.Device { 2240 return false 2241 } 2242 2243 if nr.CIDR != other.CIDR { 2244 return false 2245 } 2246 2247 if nr.IP != other.IP { 2248 return false 2249 } 2250 2251 if nr.MBits != other.MBits { 2252 return false 2253 } 2254 2255 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 2256 return false 2257 } 2258 2259 for i, port := range nr.ReservedPorts { 2260 if len(other.ReservedPorts) <= i { 2261 return false 2262 } 2263 if port != other.ReservedPorts[i] { 2264 return false 2265 } 2266 } 2267 2268 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 2269 return false 2270 } 2271 for i, port := range nr.DynamicPorts { 2272 if len(other.DynamicPorts) <= i { 2273 return false 2274 } 2275 if port != other.DynamicPorts[i] { 2276 return false 2277 } 2278 } 2279 2280 return true 2281 } 2282 2283 func (n *NetworkResource) Canonicalize() { 2284 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2285 // problems since we use reflect DeepEquals. 2286 if len(n.ReservedPorts) == 0 { 2287 n.ReservedPorts = nil 2288 } 2289 if len(n.DynamicPorts) == 0 { 2290 n.DynamicPorts = nil 2291 } 2292 } 2293 2294 // MeetsMinResources returns an error if the resources specified are less than 2295 // the minimum allowed. 2296 func (n *NetworkResource) MeetsMinResources() error { 2297 var mErr multierror.Error 2298 if n.MBits < 1 { 2299 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 2300 } 2301 return mErr.ErrorOrNil() 2302 } 2303 2304 // Copy returns a deep copy of the network resource 2305 func (n *NetworkResource) Copy() *NetworkResource { 2306 if n == nil { 2307 return nil 2308 } 2309 newR := new(NetworkResource) 2310 *newR = *n 2311 if n.ReservedPorts != nil { 2312 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2313 copy(newR.ReservedPorts, n.ReservedPorts) 2314 } 2315 if n.DynamicPorts != nil { 2316 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2317 copy(newR.DynamicPorts, n.DynamicPorts) 2318 } 2319 return newR 2320 } 2321 2322 // Add adds the resources of the delta to this, potentially 2323 // returning an error if not possible. 2324 func (n *NetworkResource) Add(delta *NetworkResource) { 2325 if len(delta.ReservedPorts) > 0 { 2326 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2327 } 2328 n.MBits += delta.MBits 2329 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2330 } 2331 2332 func (n *NetworkResource) GoString() string { 2333 return fmt.Sprintf("*%#v", *n) 2334 } 2335 2336 // PortLabels returns a map of port labels to their assigned host ports. 2337 func (n *NetworkResource) PortLabels() map[string]int { 2338 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2339 labelValues := make(map[string]int, num) 2340 for _, port := range n.ReservedPorts { 2341 labelValues[port.Label] = port.Value 2342 } 2343 for _, port := range n.DynamicPorts { 2344 labelValues[port.Label] = port.Value 2345 } 2346 return labelValues 2347 } 2348 2349 // ConnectPort returns the Connect port for the given service. Returns false if 2350 // no port was found for a service with that name. 2351 func (n *NetworkResource) PortForService(serviceName string) (Port, bool) { 2352 label := fmt.Sprintf("%s-%s", ConnectProxyPrefix, serviceName) 2353 for _, port := range n.ReservedPorts { 2354 if port.Label == label { 2355 return port, true 2356 } 2357 } 2358 for _, port := range n.DynamicPorts { 2359 if port.Label == label { 2360 return port, true 2361 } 2362 } 2363 2364 return Port{}, false 2365 } 2366 2367 // Networks defined for a task on the Resources struct. 2368 type Networks []*NetworkResource 2369 2370 func (ns Networks) Copy() Networks { 2371 if len(ns) == 0 { 2372 return nil 2373 } 2374 2375 out := make([]*NetworkResource, len(ns)) 2376 for i := range ns { 2377 out[i] = ns[i].Copy() 2378 } 2379 return out 2380 } 2381 2382 // Port assignment and IP for the given label or empty values. 2383 func (ns Networks) Port(label string) (string, int) { 2384 for _, n := range ns { 2385 for _, p := range n.ReservedPorts { 2386 if p.Label == label { 2387 return n.IP, p.Value 2388 } 2389 } 2390 for _, p := range n.DynamicPorts { 2391 if p.Label == label { 2392 return n.IP, p.Value 2393 } 2394 } 2395 } 2396 return "", 0 2397 } 2398 2399 func (ns Networks) NetIndex(n *NetworkResource) int { 2400 for idx, net := range ns { 2401 if net.Device == n.Device { 2402 return idx 2403 } 2404 } 2405 return -1 2406 } 2407 2408 // RequestedDevice is used to request a device for a task. 2409 type RequestedDevice struct { 2410 // Name is the request name. The possible values are as follows: 2411 // * <type>: A single value only specifies the type of request. 2412 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2413 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2414 // 2415 // Examples are as follows: 2416 // * "gpu" 2417 // * "nvidia/gpu" 2418 // * "nvidia/gpu/GTX2080Ti" 2419 Name string 2420 2421 // Count is the number of requested devices 2422 Count uint64 2423 2424 // Constraints are a set of constraints to apply when selecting the device 2425 // to use. 2426 Constraints Constraints 2427 2428 // Affinities are a set of affinities to apply when selecting the device 2429 // to use. 2430 Affinities Affinities 2431 } 2432 2433 func (r *RequestedDevice) Equals(o *RequestedDevice) bool { 2434 if r == o { 2435 return true 2436 } 2437 if r == nil || o == nil { 2438 return false 2439 } 2440 return r.Name == o.Name && 2441 r.Count == o.Count && 2442 r.Constraints.Equals(&o.Constraints) && 2443 r.Affinities.Equals(&o.Affinities) 2444 } 2445 2446 func (r *RequestedDevice) Copy() *RequestedDevice { 2447 if r == nil { 2448 return nil 2449 } 2450 2451 nr := *r 2452 nr.Constraints = CopySliceConstraints(nr.Constraints) 2453 nr.Affinities = CopySliceAffinities(nr.Affinities) 2454 2455 return &nr 2456 } 2457 2458 func (r *RequestedDevice) ID() *DeviceIdTuple { 2459 if r == nil || r.Name == "" { 2460 return nil 2461 } 2462 2463 parts := strings.SplitN(r.Name, "/", 3) 2464 switch len(parts) { 2465 case 1: 2466 return &DeviceIdTuple{ 2467 Type: parts[0], 2468 } 2469 case 2: 2470 return &DeviceIdTuple{ 2471 Vendor: parts[0], 2472 Type: parts[1], 2473 } 2474 default: 2475 return &DeviceIdTuple{ 2476 Vendor: parts[0], 2477 Type: parts[1], 2478 Name: parts[2], 2479 } 2480 } 2481 } 2482 2483 func (r *RequestedDevice) Validate() error { 2484 if r == nil { 2485 return nil 2486 } 2487 2488 var mErr multierror.Error 2489 if r.Name == "" { 2490 multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2491 } 2492 2493 for idx, constr := range r.Constraints { 2494 // Ensure that the constraint doesn't use an operand we do not allow 2495 switch constr.Operand { 2496 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2497 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2498 multierror.Append(&mErr, outer) 2499 default: 2500 if err := constr.Validate(); err != nil { 2501 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2502 multierror.Append(&mErr, outer) 2503 } 2504 } 2505 } 2506 for idx, affinity := range r.Affinities { 2507 if err := affinity.Validate(); err != nil { 2508 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2509 multierror.Append(&mErr, outer) 2510 } 2511 } 2512 2513 return mErr.ErrorOrNil() 2514 } 2515 2516 // NodeResources is used to define the resources available on a client node. 2517 type NodeResources struct { 2518 Cpu NodeCpuResources 2519 Memory NodeMemoryResources 2520 Disk NodeDiskResources 2521 Networks Networks 2522 Devices []*NodeDeviceResource 2523 } 2524 2525 func (n *NodeResources) Copy() *NodeResources { 2526 if n == nil { 2527 return nil 2528 } 2529 2530 newN := new(NodeResources) 2531 *newN = *n 2532 2533 // Copy the networks 2534 newN.Networks = n.Networks.Copy() 2535 2536 // Copy the devices 2537 if n.Devices != nil { 2538 devices := len(n.Devices) 2539 newN.Devices = make([]*NodeDeviceResource, devices) 2540 for i := 0; i < devices; i++ { 2541 newN.Devices[i] = n.Devices[i].Copy() 2542 } 2543 } 2544 2545 return newN 2546 } 2547 2548 // Comparable returns a comparable version of the nodes resources. This 2549 // conversion can be lossy so care must be taken when using it. 2550 func (n *NodeResources) Comparable() *ComparableResources { 2551 if n == nil { 2552 return nil 2553 } 2554 2555 c := &ComparableResources{ 2556 Flattened: AllocatedTaskResources{ 2557 Cpu: AllocatedCpuResources{ 2558 CpuShares: n.Cpu.CpuShares, 2559 }, 2560 Memory: AllocatedMemoryResources{ 2561 MemoryMB: n.Memory.MemoryMB, 2562 }, 2563 Networks: n.Networks, 2564 }, 2565 Shared: AllocatedSharedResources{ 2566 DiskMB: n.Disk.DiskMB, 2567 }, 2568 } 2569 return c 2570 } 2571 2572 func (n *NodeResources) Merge(o *NodeResources) { 2573 if o == nil { 2574 return 2575 } 2576 2577 n.Cpu.Merge(&o.Cpu) 2578 n.Memory.Merge(&o.Memory) 2579 n.Disk.Merge(&o.Disk) 2580 2581 if len(o.Networks) != 0 { 2582 n.Networks = o.Networks 2583 } 2584 2585 if len(o.Devices) != 0 { 2586 n.Devices = o.Devices 2587 } 2588 } 2589 2590 func (n *NodeResources) Equals(o *NodeResources) bool { 2591 if o == nil && n == nil { 2592 return true 2593 } else if o == nil { 2594 return false 2595 } else if n == nil { 2596 return false 2597 } 2598 2599 if !n.Cpu.Equals(&o.Cpu) { 2600 return false 2601 } 2602 if !n.Memory.Equals(&o.Memory) { 2603 return false 2604 } 2605 if !n.Disk.Equals(&o.Disk) { 2606 return false 2607 } 2608 if !n.Networks.Equals(&o.Networks) { 2609 return false 2610 } 2611 2612 // Check the devices 2613 if !DevicesEquals(n.Devices, o.Devices) { 2614 return false 2615 } 2616 2617 return true 2618 } 2619 2620 // Equals equates Networks as a set 2621 func (ns *Networks) Equals(o *Networks) bool { 2622 if ns == o { 2623 return true 2624 } 2625 if ns == nil || o == nil { 2626 return false 2627 } 2628 if len(*ns) != len(*o) { 2629 return false 2630 } 2631 SETEQUALS: 2632 for _, ne := range *ns { 2633 for _, oe := range *o { 2634 if ne.Equals(oe) { 2635 continue SETEQUALS 2636 } 2637 } 2638 return false 2639 } 2640 return true 2641 } 2642 2643 // DevicesEquals returns true if the two device arrays are set equal 2644 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2645 if len(d1) != len(d2) { 2646 return false 2647 } 2648 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2649 for _, d := range d1 { 2650 idMap[*d.ID()] = d 2651 } 2652 for _, otherD := range d2 { 2653 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2654 return false 2655 } 2656 } 2657 2658 return true 2659 } 2660 2661 // NodeCpuResources captures the CPU resources of the node. 2662 type NodeCpuResources struct { 2663 // CpuShares is the CPU shares available. This is calculated by number of 2664 // cores multiplied by the core frequency. 2665 CpuShares int64 2666 } 2667 2668 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2669 if o == nil { 2670 return 2671 } 2672 2673 if o.CpuShares != 0 { 2674 n.CpuShares = o.CpuShares 2675 } 2676 } 2677 2678 func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 2679 if o == nil && n == nil { 2680 return true 2681 } else if o == nil { 2682 return false 2683 } else if n == nil { 2684 return false 2685 } 2686 2687 if n.CpuShares != o.CpuShares { 2688 return false 2689 } 2690 2691 return true 2692 } 2693 2694 // NodeMemoryResources captures the memory resources of the node 2695 type NodeMemoryResources struct { 2696 // MemoryMB is the total available memory on the node 2697 MemoryMB int64 2698 } 2699 2700 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 2701 if o == nil { 2702 return 2703 } 2704 2705 if o.MemoryMB != 0 { 2706 n.MemoryMB = o.MemoryMB 2707 } 2708 } 2709 2710 func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 2711 if o == nil && n == nil { 2712 return true 2713 } else if o == nil { 2714 return false 2715 } else if n == nil { 2716 return false 2717 } 2718 2719 if n.MemoryMB != o.MemoryMB { 2720 return false 2721 } 2722 2723 return true 2724 } 2725 2726 // NodeDiskResources captures the disk resources of the node 2727 type NodeDiskResources struct { 2728 // DiskMB is the total available disk space on the node 2729 DiskMB int64 2730 } 2731 2732 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 2733 if o == nil { 2734 return 2735 } 2736 if o.DiskMB != 0 { 2737 n.DiskMB = o.DiskMB 2738 } 2739 } 2740 2741 func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 2742 if o == nil && n == nil { 2743 return true 2744 } else if o == nil { 2745 return false 2746 } else if n == nil { 2747 return false 2748 } 2749 2750 if n.DiskMB != o.DiskMB { 2751 return false 2752 } 2753 2754 return true 2755 } 2756 2757 // DeviceIdTuple is the tuple that identifies a device 2758 type DeviceIdTuple struct { 2759 Vendor string 2760 Type string 2761 Name string 2762 } 2763 2764 func (d *DeviceIdTuple) String() string { 2765 if d == nil { 2766 return "" 2767 } 2768 2769 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 2770 } 2771 2772 // Matches returns if this Device ID is a superset of the passed ID. 2773 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 2774 if other == nil { 2775 return false 2776 } 2777 2778 if other.Name != "" && other.Name != id.Name { 2779 return false 2780 } 2781 2782 if other.Vendor != "" && other.Vendor != id.Vendor { 2783 return false 2784 } 2785 2786 if other.Type != "" && other.Type != id.Type { 2787 return false 2788 } 2789 2790 return true 2791 } 2792 2793 // Equals returns if this Device ID is the same as the passed ID. 2794 func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 2795 if id == nil && o == nil { 2796 return true 2797 } else if id == nil || o == nil { 2798 return false 2799 } 2800 2801 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 2802 } 2803 2804 // NodeDeviceResource captures a set of devices sharing a common 2805 // vendor/type/device_name tuple. 2806 type NodeDeviceResource struct { 2807 Vendor string 2808 Type string 2809 Name string 2810 Instances []*NodeDevice 2811 Attributes map[string]*psstructs.Attribute 2812 } 2813 2814 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 2815 if n == nil { 2816 return nil 2817 } 2818 2819 return &DeviceIdTuple{ 2820 Vendor: n.Vendor, 2821 Type: n.Type, 2822 Name: n.Name, 2823 } 2824 } 2825 2826 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 2827 if n == nil { 2828 return nil 2829 } 2830 2831 // Copy the primitives 2832 nn := *n 2833 2834 // Copy the device instances 2835 if l := len(nn.Instances); l != 0 { 2836 nn.Instances = make([]*NodeDevice, 0, l) 2837 for _, d := range n.Instances { 2838 nn.Instances = append(nn.Instances, d.Copy()) 2839 } 2840 } 2841 2842 // Copy the Attributes 2843 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 2844 2845 return &nn 2846 } 2847 2848 func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 2849 if o == nil && n == nil { 2850 return true 2851 } else if o == nil { 2852 return false 2853 } else if n == nil { 2854 return false 2855 } 2856 2857 if n.Vendor != o.Vendor { 2858 return false 2859 } else if n.Type != o.Type { 2860 return false 2861 } else if n.Name != o.Name { 2862 return false 2863 } 2864 2865 // Check the attributes 2866 if len(n.Attributes) != len(o.Attributes) { 2867 return false 2868 } 2869 for k, v := range n.Attributes { 2870 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 2871 return false 2872 } 2873 } 2874 2875 // Check the instances 2876 if len(n.Instances) != len(o.Instances) { 2877 return false 2878 } 2879 idMap := make(map[string]*NodeDevice, len(n.Instances)) 2880 for _, d := range n.Instances { 2881 idMap[d.ID] = d 2882 } 2883 for _, otherD := range o.Instances { 2884 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 2885 return false 2886 } 2887 } 2888 2889 return true 2890 } 2891 2892 // NodeDevice is an instance of a particular device. 2893 type NodeDevice struct { 2894 // ID is the ID of the device. 2895 ID string 2896 2897 // Healthy captures whether the device is healthy. 2898 Healthy bool 2899 2900 // HealthDescription is used to provide a human readable description of why 2901 // the device may be unhealthy. 2902 HealthDescription string 2903 2904 // Locality stores HW locality information for the node to optionally be 2905 // used when making placement decisions. 2906 Locality *NodeDeviceLocality 2907 } 2908 2909 func (n *NodeDevice) Equals(o *NodeDevice) bool { 2910 if o == nil && n == nil { 2911 return true 2912 } else if o == nil { 2913 return false 2914 } else if n == nil { 2915 return false 2916 } 2917 2918 if n.ID != o.ID { 2919 return false 2920 } else if n.Healthy != o.Healthy { 2921 return false 2922 } else if n.HealthDescription != o.HealthDescription { 2923 return false 2924 } else if !n.Locality.Equals(o.Locality) { 2925 return false 2926 } 2927 2928 return false 2929 } 2930 2931 func (n *NodeDevice) Copy() *NodeDevice { 2932 if n == nil { 2933 return nil 2934 } 2935 2936 // Copy the primitives 2937 nn := *n 2938 2939 // Copy the locality 2940 nn.Locality = nn.Locality.Copy() 2941 2942 return &nn 2943 } 2944 2945 // NodeDeviceLocality stores information about the devices hardware locality on 2946 // the node. 2947 type NodeDeviceLocality struct { 2948 // PciBusID is the PCI Bus ID for the device. 2949 PciBusID string 2950 } 2951 2952 func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 2953 if o == nil && n == nil { 2954 return true 2955 } else if o == nil { 2956 return false 2957 } else if n == nil { 2958 return false 2959 } 2960 2961 if n.PciBusID != o.PciBusID { 2962 return false 2963 } 2964 2965 return true 2966 } 2967 2968 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 2969 if n == nil { 2970 return nil 2971 } 2972 2973 // Copy the primitives 2974 nn := *n 2975 return &nn 2976 } 2977 2978 // NodeReservedResources is used to capture the resources on a client node that 2979 // should be reserved and not made available to jobs. 2980 type NodeReservedResources struct { 2981 Cpu NodeReservedCpuResources 2982 Memory NodeReservedMemoryResources 2983 Disk NodeReservedDiskResources 2984 Networks NodeReservedNetworkResources 2985 } 2986 2987 func (n *NodeReservedResources) Copy() *NodeReservedResources { 2988 if n == nil { 2989 return nil 2990 } 2991 newN := new(NodeReservedResources) 2992 *newN = *n 2993 return newN 2994 } 2995 2996 // Comparable returns a comparable version of the node's reserved resources. The 2997 // returned resources doesn't contain any network information. This conversion 2998 // can be lossy so care must be taken when using it. 2999 func (n *NodeReservedResources) Comparable() *ComparableResources { 3000 if n == nil { 3001 return nil 3002 } 3003 3004 c := &ComparableResources{ 3005 Flattened: AllocatedTaskResources{ 3006 Cpu: AllocatedCpuResources{ 3007 CpuShares: n.Cpu.CpuShares, 3008 }, 3009 Memory: AllocatedMemoryResources{ 3010 MemoryMB: n.Memory.MemoryMB, 3011 }, 3012 }, 3013 Shared: AllocatedSharedResources{ 3014 DiskMB: n.Disk.DiskMB, 3015 }, 3016 } 3017 return c 3018 } 3019 3020 // NodeReservedCpuResources captures the reserved CPU resources of the node. 3021 type NodeReservedCpuResources struct { 3022 CpuShares int64 3023 } 3024 3025 // NodeReservedMemoryResources captures the reserved memory resources of the node. 3026 type NodeReservedMemoryResources struct { 3027 MemoryMB int64 3028 } 3029 3030 // NodeReservedDiskResources captures the reserved disk resources of the node. 3031 type NodeReservedDiskResources struct { 3032 DiskMB int64 3033 } 3034 3035 // NodeReservedNetworkResources captures the reserved network resources of the node. 3036 type NodeReservedNetworkResources struct { 3037 // ReservedHostPorts is the set of ports reserved on all host network 3038 // interfaces. Its format is a comma separate list of integers or integer 3039 // ranges. (80,443,1000-2000,2005) 3040 ReservedHostPorts string 3041 } 3042 3043 // ParsePortHostPorts returns the reserved host ports. 3044 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 3045 return ParsePortRanges(n.ReservedHostPorts) 3046 } 3047 3048 // AllocatedResources is the set of resources to be used by an allocation. 3049 type AllocatedResources struct { 3050 // Tasks is a mapping of task name to the resources for the task. 3051 Tasks map[string]*AllocatedTaskResources 3052 TaskLifecycles map[string]*TaskLifecycleConfig 3053 3054 // Shared is the set of resource that are shared by all tasks in the group. 3055 Shared AllocatedSharedResources 3056 } 3057 3058 func (a *AllocatedResources) Copy() *AllocatedResources { 3059 if a == nil { 3060 return nil 3061 } 3062 3063 out := AllocatedResources{ 3064 Shared: a.Shared.Copy(), 3065 } 3066 3067 if a.Tasks != nil { 3068 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 3069 for task, resource := range a.Tasks { 3070 out.Tasks[task] = resource.Copy() 3071 } 3072 } 3073 if a.TaskLifecycles != nil { 3074 out.TaskLifecycles = make(map[string]*TaskLifecycleConfig, len(out.TaskLifecycles)) 3075 for task, lifecycle := range a.TaskLifecycles { 3076 out.TaskLifecycles[task] = lifecycle.Copy() 3077 } 3078 3079 } 3080 3081 return &out 3082 } 3083 3084 // Comparable returns a comparable version of the allocations allocated 3085 // resources. This conversion can be lossy so care must be taken when using it. 3086 func (a *AllocatedResources) Comparable() *ComparableResources { 3087 if a == nil { 3088 return nil 3089 } 3090 3091 c := &ComparableResources{ 3092 Shared: a.Shared, 3093 } 3094 3095 prestartSidecarTasks := &AllocatedTaskResources{} 3096 prestartEphemeralTasks := &AllocatedTaskResources{} 3097 main := &AllocatedTaskResources{} 3098 3099 for taskName, r := range a.Tasks { 3100 lc := a.TaskLifecycles[taskName] 3101 if lc == nil { 3102 main.Add(r) 3103 } else if lc.Hook == TaskLifecycleHookPrestart { 3104 if lc.Sidecar { 3105 prestartSidecarTasks.Add(r) 3106 } else { 3107 prestartEphemeralTasks.Add(r) 3108 } 3109 } 3110 } 3111 3112 // update this loop to account for lifecycle hook 3113 prestartEphemeralTasks.Max(main) 3114 prestartSidecarTasks.Add(prestartEphemeralTasks) 3115 c.Flattened.Add(prestartSidecarTasks) 3116 3117 // Add network resources that are at the task group level 3118 for _, network := range a.Shared.Networks { 3119 c.Flattened.Add(&AllocatedTaskResources{ 3120 Networks: []*NetworkResource{network}, 3121 }) 3122 } 3123 3124 return c 3125 } 3126 3127 // OldTaskResources returns the pre-0.9.0 map of task resources 3128 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 3129 m := make(map[string]*Resources, len(a.Tasks)) 3130 for name, res := range a.Tasks { 3131 m[name] = &Resources{ 3132 CPU: int(res.Cpu.CpuShares), 3133 MemoryMB: int(res.Memory.MemoryMB), 3134 Networks: res.Networks, 3135 } 3136 } 3137 3138 return m 3139 } 3140 3141 // AllocatedTaskResources are the set of resources allocated to a task. 3142 type AllocatedTaskResources struct { 3143 Cpu AllocatedCpuResources 3144 Memory AllocatedMemoryResources 3145 Networks Networks 3146 Devices []*AllocatedDeviceResource 3147 } 3148 3149 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3150 if a == nil { 3151 return nil 3152 } 3153 newA := new(AllocatedTaskResources) 3154 *newA = *a 3155 3156 // Copy the networks 3157 newA.Networks = a.Networks.Copy() 3158 3159 // Copy the devices 3160 if newA.Devices != nil { 3161 n := len(a.Devices) 3162 newA.Devices = make([]*AllocatedDeviceResource, n) 3163 for i := 0; i < n; i++ { 3164 newA.Devices[i] = a.Devices[i].Copy() 3165 } 3166 } 3167 3168 return newA 3169 } 3170 3171 // NetIndex finds the matching net index using device name 3172 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3173 return a.Networks.NetIndex(n) 3174 } 3175 3176 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3177 if delta == nil { 3178 return 3179 } 3180 3181 a.Cpu.Add(&delta.Cpu) 3182 a.Memory.Add(&delta.Memory) 3183 3184 for _, n := range delta.Networks { 3185 // Find the matching interface by IP or CIDR 3186 idx := a.NetIndex(n) 3187 if idx == -1 { 3188 a.Networks = append(a.Networks, n.Copy()) 3189 } else { 3190 a.Networks[idx].Add(n) 3191 } 3192 } 3193 3194 for _, d := range delta.Devices { 3195 // Find the matching device 3196 idx := AllocatedDevices(a.Devices).Index(d) 3197 if idx == -1 { 3198 a.Devices = append(a.Devices, d.Copy()) 3199 } else { 3200 a.Devices[idx].Add(d) 3201 } 3202 } 3203 } 3204 3205 func (a *AllocatedTaskResources) Max(other *AllocatedTaskResources) { 3206 if other == nil { 3207 return 3208 } 3209 3210 a.Cpu.Max(&other.Cpu) 3211 a.Memory.Max(&other.Memory) 3212 3213 for _, n := range other.Networks { 3214 // Find the matching interface by IP or CIDR 3215 idx := a.NetIndex(n) 3216 if idx == -1 { 3217 a.Networks = append(a.Networks, n.Copy()) 3218 } else { 3219 a.Networks[idx].Add(n) 3220 } 3221 } 3222 3223 for _, d := range other.Devices { 3224 // Find the matching device 3225 idx := AllocatedDevices(a.Devices).Index(d) 3226 if idx == -1 { 3227 a.Devices = append(a.Devices, d.Copy()) 3228 } else { 3229 a.Devices[idx].Add(d) 3230 } 3231 } 3232 } 3233 3234 // Comparable turns AllocatedTaskResources into ComparableResources 3235 // as a helper step in preemption 3236 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3237 ret := &ComparableResources{ 3238 Flattened: AllocatedTaskResources{ 3239 Cpu: AllocatedCpuResources{ 3240 CpuShares: a.Cpu.CpuShares, 3241 }, 3242 Memory: AllocatedMemoryResources{ 3243 MemoryMB: a.Memory.MemoryMB, 3244 }, 3245 }, 3246 } 3247 if len(a.Networks) > 0 { 3248 for _, net := range a.Networks { 3249 ret.Flattened.Networks = append(ret.Flattened.Networks, net) 3250 } 3251 } 3252 return ret 3253 } 3254 3255 // Subtract only subtracts CPU and Memory resources. Network utilization 3256 // is managed separately in NetworkIndex 3257 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3258 if delta == nil { 3259 return 3260 } 3261 3262 a.Cpu.Subtract(&delta.Cpu) 3263 a.Memory.Subtract(&delta.Memory) 3264 } 3265 3266 // AllocatedSharedResources are the set of resources allocated to a task group. 3267 type AllocatedSharedResources struct { 3268 Networks Networks 3269 DiskMB int64 3270 } 3271 3272 func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3273 return AllocatedSharedResources{ 3274 Networks: a.Networks.Copy(), 3275 DiskMB: a.DiskMB, 3276 } 3277 } 3278 3279 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3280 if delta == nil { 3281 return 3282 } 3283 a.Networks = append(a.Networks, delta.Networks...) 3284 a.DiskMB += delta.DiskMB 3285 3286 } 3287 3288 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3289 if delta == nil { 3290 return 3291 } 3292 3293 diff := map[*NetworkResource]bool{} 3294 for _, n := range delta.Networks { 3295 diff[n] = true 3296 } 3297 var nets Networks 3298 for _, n := range a.Networks { 3299 if _, ok := diff[n]; !ok { 3300 nets = append(nets, n) 3301 } 3302 } 3303 a.Networks = nets 3304 a.DiskMB -= delta.DiskMB 3305 } 3306 3307 // AllocatedCpuResources captures the allocated CPU resources. 3308 type AllocatedCpuResources struct { 3309 CpuShares int64 3310 } 3311 3312 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3313 if delta == nil { 3314 return 3315 } 3316 3317 a.CpuShares += delta.CpuShares 3318 } 3319 3320 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3321 if delta == nil { 3322 return 3323 } 3324 3325 a.CpuShares -= delta.CpuShares 3326 } 3327 3328 func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { 3329 if other == nil { 3330 return 3331 } 3332 3333 if other.CpuShares > a.CpuShares { 3334 a.CpuShares = other.CpuShares 3335 } 3336 } 3337 3338 // AllocatedMemoryResources captures the allocated memory resources. 3339 type AllocatedMemoryResources struct { 3340 MemoryMB int64 3341 } 3342 3343 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3344 if delta == nil { 3345 return 3346 } 3347 3348 a.MemoryMB += delta.MemoryMB 3349 } 3350 3351 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3352 if delta == nil { 3353 return 3354 } 3355 3356 a.MemoryMB -= delta.MemoryMB 3357 } 3358 3359 func (a *AllocatedMemoryResources) Max(other *AllocatedMemoryResources) { 3360 if other == nil { 3361 return 3362 } 3363 3364 if other.MemoryMB > a.MemoryMB { 3365 a.MemoryMB = other.MemoryMB 3366 } 3367 } 3368 3369 type AllocatedDevices []*AllocatedDeviceResource 3370 3371 // Index finds the matching index using the passed device. If not found, -1 is 3372 // returned. 3373 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3374 if d == nil { 3375 return -1 3376 } 3377 3378 for i, o := range a { 3379 if o.ID().Equals(d.ID()) { 3380 return i 3381 } 3382 } 3383 3384 return -1 3385 } 3386 3387 // AllocatedDeviceResource captures a set of allocated devices. 3388 type AllocatedDeviceResource struct { 3389 // Vendor, Type, and Name are used to select the plugin to request the 3390 // device IDs from. 3391 Vendor string 3392 Type string 3393 Name string 3394 3395 // DeviceIDs is the set of allocated devices 3396 DeviceIDs []string 3397 } 3398 3399 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3400 if a == nil { 3401 return nil 3402 } 3403 3404 return &DeviceIdTuple{ 3405 Vendor: a.Vendor, 3406 Type: a.Type, 3407 Name: a.Name, 3408 } 3409 } 3410 3411 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3412 if delta == nil { 3413 return 3414 } 3415 3416 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 3417 } 3418 3419 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 3420 if a == nil { 3421 return a 3422 } 3423 3424 na := *a 3425 3426 // Copy the devices 3427 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 3428 for i, id := range a.DeviceIDs { 3429 na.DeviceIDs[i] = id 3430 } 3431 3432 return &na 3433 } 3434 3435 // ComparableResources is the set of resources allocated to a task group but 3436 // not keyed by Task, making it easier to compare. 3437 type ComparableResources struct { 3438 Flattened AllocatedTaskResources 3439 Shared AllocatedSharedResources 3440 } 3441 3442 func (c *ComparableResources) Add(delta *ComparableResources) { 3443 if delta == nil { 3444 return 3445 } 3446 3447 c.Flattened.Add(&delta.Flattened) 3448 c.Shared.Add(&delta.Shared) 3449 } 3450 3451 func (c *ComparableResources) Subtract(delta *ComparableResources) { 3452 if delta == nil { 3453 return 3454 } 3455 3456 c.Flattened.Subtract(&delta.Flattened) 3457 c.Shared.Subtract(&delta.Shared) 3458 } 3459 3460 func (c *ComparableResources) Copy() *ComparableResources { 3461 if c == nil { 3462 return nil 3463 } 3464 newR := new(ComparableResources) 3465 *newR = *c 3466 return newR 3467 } 3468 3469 // Superset checks if one set of resources is a superset of another. This 3470 // ignores network resources, and the NetworkIndex should be used for that. 3471 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 3472 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 3473 return false, "cpu" 3474 } 3475 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 3476 return false, "memory" 3477 } 3478 if c.Shared.DiskMB < other.Shared.DiskMB { 3479 return false, "disk" 3480 } 3481 return true, "" 3482 } 3483 3484 // allocated finds the matching net index using device name 3485 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3486 return c.Flattened.Networks.NetIndex(n) 3487 } 3488 3489 const ( 3490 // JobTypeNomad is reserved for internal system tasks and is 3491 // always handled by the CoreScheduler. 3492 JobTypeCore = "_core" 3493 JobTypeService = "service" 3494 JobTypeBatch = "batch" 3495 JobTypeSystem = "system" 3496 ) 3497 3498 const ( 3499 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3500 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3501 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3502 ) 3503 3504 const ( 3505 // JobMinPriority is the minimum allowed priority 3506 JobMinPriority = 1 3507 3508 // JobDefaultPriority is the default priority if not 3509 // not specified. 3510 JobDefaultPriority = 50 3511 3512 // JobMaxPriority is the maximum allowed priority 3513 JobMaxPriority = 100 3514 3515 // Ensure CoreJobPriority is higher than any user 3516 // specified job so that it gets priority. This is important 3517 // for the system to remain healthy. 3518 CoreJobPriority = JobMaxPriority * 2 3519 3520 // JobTrackedVersions is the number of historic job versions that are 3521 // kept. 3522 JobTrackedVersions = 6 3523 3524 // JobTrackedScalingEvents is the number of scaling events that are 3525 // kept for a single task group. 3526 JobTrackedScalingEvents = 20 3527 ) 3528 3529 // Job is the scope of a scheduling request to Nomad. It is the largest 3530 // scoped object, and is a named collection of task groups. Each task group 3531 // is further composed of tasks. A task group (TG) is the unit of scheduling 3532 // however. 3533 type Job struct { 3534 // Stop marks whether the user has stopped the job. A stopped job will 3535 // have all created allocations stopped and acts as a way to stop a job 3536 // without purging it from the system. This allows existing allocs to be 3537 // queried and the job to be inspected as it is being killed. 3538 Stop bool 3539 3540 // Region is the Nomad region that handles scheduling this job 3541 Region string 3542 3543 // Namespace is the namespace the job is submitted into. 3544 Namespace string 3545 3546 // ID is a unique identifier for the job per region. It can be 3547 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3548 ID string 3549 3550 // ParentID is the unique identifier of the job that spawned this job. 3551 ParentID string 3552 3553 // Name is the logical name of the job used to refer to it. This is unique 3554 // per region, but not unique globally. 3555 Name string 3556 3557 // Type is used to control various behaviors about the job. Most jobs 3558 // are service jobs, meaning they are expected to be long lived. 3559 // Some jobs are batch oriented meaning they run and then terminate. 3560 // This can be extended in the future to support custom schedulers. 3561 Type string 3562 3563 // Priority is used to control scheduling importance and if this job 3564 // can preempt other jobs. 3565 Priority int 3566 3567 // AllAtOnce is used to control if incremental scheduling of task groups 3568 // is allowed or if we must do a gang scheduling of the entire job. This 3569 // can slow down larger jobs if resources are not available. 3570 AllAtOnce bool 3571 3572 // Datacenters contains all the datacenters this job is allowed to span 3573 Datacenters []string 3574 3575 // Constraints can be specified at a job level and apply to 3576 // all the task groups and tasks. 3577 Constraints []*Constraint 3578 3579 // Affinities can be specified at the job level to express 3580 // scheduling preferences that apply to all groups and tasks 3581 Affinities []*Affinity 3582 3583 // Spread can be specified at the job level to express spreading 3584 // allocations across a desired attribute, such as datacenter 3585 Spreads []*Spread 3586 3587 // TaskGroups are the collections of task groups that this job needs 3588 // to run. Each task group is an atomic unit of scheduling and placement. 3589 TaskGroups []*TaskGroup 3590 3591 // See agent.ApiJobToStructJob 3592 // Update provides defaults for the TaskGroup Update stanzas 3593 Update UpdateStrategy 3594 3595 // Periodic is used to define the interval the job is run at. 3596 Periodic *PeriodicConfig 3597 3598 // ParameterizedJob is used to specify the job as a parameterized job 3599 // for dispatching. 3600 ParameterizedJob *ParameterizedJobConfig 3601 3602 // Dispatched is used to identify if the Job has been dispatched from a 3603 // parameterized job. 3604 Dispatched bool 3605 3606 // Payload is the payload supplied when the job was dispatched. 3607 Payload []byte 3608 3609 // Meta is used to associate arbitrary metadata with this 3610 // job. This is opaque to Nomad. 3611 Meta map[string]string 3612 3613 // ConsulToken is the Consul token that proves the submitter of the job has 3614 // access to the Service Identity policies associated with the job's 3615 // Consul Connect enabled services. This field is only used to transfer the 3616 // token and is not stored after Job submission. 3617 ConsulToken string 3618 3619 // VaultToken is the Vault token that proves the submitter of the job has 3620 // access to the specified Vault policies. This field is only used to 3621 // transfer the token and is not stored after Job submission. 3622 VaultToken string 3623 3624 // Job status 3625 Status string 3626 3627 // StatusDescription is meant to provide more human useful information 3628 StatusDescription string 3629 3630 // Stable marks a job as stable. Stability is only defined on "service" and 3631 // "system" jobs. The stability of a job will be set automatically as part 3632 // of a deployment and can be manually set via APIs. This field is updated 3633 // when the status of a corresponding deployment transitions to Failed 3634 // or Successful. This field is not meaningful for jobs that don't have an 3635 // update stanza. 3636 Stable bool 3637 3638 // Version is a monotonically increasing version number that is incremented 3639 // on each job register. 3640 Version uint64 3641 3642 // SubmitTime is the time at which the job was submitted as a UnixNano in 3643 // UTC 3644 SubmitTime int64 3645 3646 // Raft Indexes 3647 CreateIndex uint64 3648 ModifyIndex uint64 3649 JobModifyIndex uint64 3650 } 3651 3652 // NamespacedID returns the namespaced id useful for logging 3653 func (j *Job) NamespacedID() *NamespacedID { 3654 return &NamespacedID{ 3655 ID: j.ID, 3656 Namespace: j.Namespace, 3657 } 3658 } 3659 3660 // Canonicalize is used to canonicalize fields in the Job. This should be called 3661 // when registering a Job. A set of warnings are returned if the job was changed 3662 // in anyway that the user should be made aware of. 3663 func (j *Job) Canonicalize() (warnings error) { 3664 if j == nil { 3665 return nil 3666 } 3667 3668 var mErr multierror.Error 3669 // Ensure that an empty and nil map are treated the same to avoid scheduling 3670 // problems since we use reflect DeepEquals. 3671 if len(j.Meta) == 0 { 3672 j.Meta = nil 3673 } 3674 3675 // Ensure the job is in a namespace. 3676 if j.Namespace == "" { 3677 j.Namespace = DefaultNamespace 3678 } 3679 3680 for _, tg := range j.TaskGroups { 3681 tg.Canonicalize(j) 3682 } 3683 3684 if j.ParameterizedJob != nil { 3685 j.ParameterizedJob.Canonicalize() 3686 } 3687 3688 if j.Periodic != nil { 3689 j.Periodic.Canonicalize() 3690 } 3691 3692 return mErr.ErrorOrNil() 3693 } 3694 3695 // Copy returns a deep copy of the Job. It is expected that callers use recover. 3696 // This job can panic if the deep copy failed as it uses reflection. 3697 func (j *Job) Copy() *Job { 3698 if j == nil { 3699 return nil 3700 } 3701 nj := new(Job) 3702 *nj = *j 3703 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 3704 nj.Constraints = CopySliceConstraints(nj.Constraints) 3705 nj.Affinities = CopySliceAffinities(nj.Affinities) 3706 3707 if j.TaskGroups != nil { 3708 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 3709 for i, tg := range nj.TaskGroups { 3710 tgs[i] = tg.Copy() 3711 } 3712 nj.TaskGroups = tgs 3713 } 3714 3715 nj.Periodic = nj.Periodic.Copy() 3716 nj.Meta = helper.CopyMapStringString(nj.Meta) 3717 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 3718 return nj 3719 } 3720 3721 // Validate is used to sanity check a job input 3722 func (j *Job) Validate() error { 3723 var mErr multierror.Error 3724 3725 if j.Region == "" { 3726 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 3727 } 3728 if j.ID == "" { 3729 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 3730 } else if strings.Contains(j.ID, " ") { 3731 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 3732 } 3733 if j.Name == "" { 3734 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 3735 } 3736 if j.Namespace == "" { 3737 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 3738 } 3739 switch j.Type { 3740 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 3741 case "": 3742 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 3743 default: 3744 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 3745 } 3746 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 3747 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 3748 } 3749 if len(j.Datacenters) == 0 { 3750 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 3751 } else { 3752 for _, v := range j.Datacenters { 3753 if v == "" { 3754 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 3755 } 3756 } 3757 } 3758 if len(j.TaskGroups) == 0 { 3759 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 3760 } 3761 for idx, constr := range j.Constraints { 3762 if err := constr.Validate(); err != nil { 3763 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3764 mErr.Errors = append(mErr.Errors, outer) 3765 } 3766 } 3767 if j.Type == JobTypeSystem { 3768 if j.Affinities != nil { 3769 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 3770 } 3771 } else { 3772 for idx, affinity := range j.Affinities { 3773 if err := affinity.Validate(); err != nil { 3774 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 3775 mErr.Errors = append(mErr.Errors, outer) 3776 } 3777 } 3778 } 3779 3780 if j.Type == JobTypeSystem { 3781 if j.Spreads != nil { 3782 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 3783 } 3784 } else { 3785 for idx, spread := range j.Spreads { 3786 if err := spread.Validate(); err != nil { 3787 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 3788 mErr.Errors = append(mErr.Errors, outer) 3789 } 3790 } 3791 } 3792 3793 // Check for duplicate task groups 3794 taskGroups := make(map[string]int) 3795 for idx, tg := range j.TaskGroups { 3796 if tg.Name == "" { 3797 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 3798 } else if existing, ok := taskGroups[tg.Name]; ok { 3799 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 3800 } else { 3801 taskGroups[tg.Name] = idx 3802 } 3803 3804 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 3805 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 3806 } 3807 3808 if tg.StopAfterClientDisconnect != nil && *tg.StopAfterClientDisconnect < 0 { 3809 mErr.Errors = append(mErr.Errors, errors.New("StopAfterClientDisconnect must be a positive value")) 3810 } 3811 3812 if j.Type == "system" && tg.Count > 1 { 3813 mErr.Errors = append(mErr.Errors, 3814 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 3815 tg.Name, tg.Count)) 3816 } 3817 } 3818 3819 // Validate the task group 3820 for _, tg := range j.TaskGroups { 3821 if err := tg.Validate(j); err != nil { 3822 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 3823 mErr.Errors = append(mErr.Errors, outer) 3824 } 3825 } 3826 3827 // Validate periodic is only used with batch jobs. 3828 if j.IsPeriodic() && j.Periodic.Enabled { 3829 if j.Type != JobTypeBatch { 3830 mErr.Errors = append(mErr.Errors, 3831 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 3832 } 3833 3834 if err := j.Periodic.Validate(); err != nil { 3835 mErr.Errors = append(mErr.Errors, err) 3836 } 3837 } 3838 3839 if j.IsParameterized() { 3840 if j.Type != JobTypeBatch { 3841 mErr.Errors = append(mErr.Errors, 3842 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 3843 } 3844 3845 if err := j.ParameterizedJob.Validate(); err != nil { 3846 mErr.Errors = append(mErr.Errors, err) 3847 } 3848 } 3849 3850 return mErr.ErrorOrNil() 3851 } 3852 3853 // Warnings returns a list of warnings that may be from dubious settings or 3854 // deprecation warnings. 3855 func (j *Job) Warnings() error { 3856 var mErr multierror.Error 3857 3858 // Check the groups 3859 ap := 0 3860 for _, tg := range j.TaskGroups { 3861 if err := tg.Warnings(j); err != nil { 3862 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 3863 mErr.Errors = append(mErr.Errors, outer) 3864 } 3865 if tg.Update != nil && tg.Update.AutoPromote { 3866 ap += 1 3867 } 3868 } 3869 3870 // Check AutoPromote, should be all or none 3871 if ap > 0 && ap < len(j.TaskGroups) { 3872 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 3873 mErr.Errors = append(mErr.Errors, err) 3874 } 3875 3876 return mErr.ErrorOrNil() 3877 } 3878 3879 // LookupTaskGroup finds a task group by name 3880 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 3881 for _, tg := range j.TaskGroups { 3882 if tg.Name == name { 3883 return tg 3884 } 3885 } 3886 return nil 3887 } 3888 3889 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 3890 // meta data for the task. When joining Job, Group and Task Meta, the precedence 3891 // is by deepest scope (Task > Group > Job). 3892 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 3893 group := j.LookupTaskGroup(groupName) 3894 if group == nil { 3895 return j.Meta 3896 } 3897 3898 var meta map[string]string 3899 3900 task := group.LookupTask(taskName) 3901 if task != nil { 3902 meta = helper.CopyMapStringString(task.Meta) 3903 } 3904 3905 if meta == nil { 3906 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 3907 } 3908 3909 // Add the group specific meta 3910 for k, v := range group.Meta { 3911 if _, ok := meta[k]; !ok { 3912 meta[k] = v 3913 } 3914 } 3915 3916 // Add the job specific meta 3917 for k, v := range j.Meta { 3918 if _, ok := meta[k]; !ok { 3919 meta[k] = v 3920 } 3921 } 3922 3923 return meta 3924 } 3925 3926 // Stopped returns if a job is stopped. 3927 func (j *Job) Stopped() bool { 3928 return j == nil || j.Stop 3929 } 3930 3931 // HasUpdateStrategy returns if any task group in the job has an update strategy 3932 func (j *Job) HasUpdateStrategy() bool { 3933 for _, tg := range j.TaskGroups { 3934 if !tg.Update.IsEmpty() { 3935 return true 3936 } 3937 } 3938 3939 return false 3940 } 3941 3942 // Stub is used to return a summary of the job 3943 func (j *Job) Stub(summary *JobSummary) *JobListStub { 3944 return &JobListStub{ 3945 ID: j.ID, 3946 ParentID: j.ParentID, 3947 Name: j.Name, 3948 Datacenters: j.Datacenters, 3949 Type: j.Type, 3950 Priority: j.Priority, 3951 Periodic: j.IsPeriodic(), 3952 ParameterizedJob: j.IsParameterized(), 3953 Stop: j.Stop, 3954 Status: j.Status, 3955 StatusDescription: j.StatusDescription, 3956 CreateIndex: j.CreateIndex, 3957 ModifyIndex: j.ModifyIndex, 3958 JobModifyIndex: j.JobModifyIndex, 3959 SubmitTime: j.SubmitTime, 3960 JobSummary: summary, 3961 } 3962 } 3963 3964 // IsPeriodic returns whether a job is periodic. 3965 func (j *Job) IsPeriodic() bool { 3966 return j.Periodic != nil 3967 } 3968 3969 // IsPeriodicActive returns whether the job is an active periodic job that will 3970 // create child jobs 3971 func (j *Job) IsPeriodicActive() bool { 3972 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 3973 } 3974 3975 // IsParameterized returns whether a job is parameterized job. 3976 func (j *Job) IsParameterized() bool { 3977 return j.ParameterizedJob != nil && !j.Dispatched 3978 } 3979 3980 // VaultPolicies returns the set of Vault policies per task group, per task 3981 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 3982 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 3983 3984 for _, tg := range j.TaskGroups { 3985 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 3986 3987 for _, task := range tg.Tasks { 3988 if task.Vault == nil { 3989 continue 3990 } 3991 3992 tgPolicies[task.Name] = task.Vault 3993 } 3994 3995 if len(tgPolicies) != 0 { 3996 policies[tg.Name] = tgPolicies 3997 } 3998 } 3999 4000 return policies 4001 } 4002 4003 // Connect tasks returns the set of Consul Connect enabled tasks that will 4004 // require a Service Identity token, if Consul ACLs are enabled. 4005 // 4006 // This method is meaningful only after the Job has passed through the job 4007 // submission Mutator functions. 4008 // 4009 // task group -> []task 4010 func (j *Job) ConnectTasks() map[string][]string { 4011 m := make(map[string][]string) 4012 for _, tg := range j.TaskGroups { 4013 for _, task := range tg.Tasks { 4014 if task.Kind.IsConnectProxy() { 4015 // todo(shoenig): when we support native, probably need to check 4016 // an additional TBD TaskKind as well. 4017 m[tg.Name] = append(m[tg.Name], task.Name) 4018 } 4019 } 4020 } 4021 return m 4022 } 4023 4024 // RequiredSignals returns a mapping of task groups to tasks to their required 4025 // set of signals 4026 func (j *Job) RequiredSignals() map[string]map[string][]string { 4027 signals := make(map[string]map[string][]string) 4028 4029 for _, tg := range j.TaskGroups { 4030 for _, task := range tg.Tasks { 4031 // Use this local one as a set 4032 taskSignals := make(map[string]struct{}) 4033 4034 // Check if the Vault change mode uses signals 4035 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 4036 taskSignals[task.Vault.ChangeSignal] = struct{}{} 4037 } 4038 4039 // If a user has specified a KillSignal, add it to required signals 4040 if task.KillSignal != "" { 4041 taskSignals[task.KillSignal] = struct{}{} 4042 } 4043 4044 // Check if any template change mode uses signals 4045 for _, t := range task.Templates { 4046 if t.ChangeMode != TemplateChangeModeSignal { 4047 continue 4048 } 4049 4050 taskSignals[t.ChangeSignal] = struct{}{} 4051 } 4052 4053 // Flatten and sort the signals 4054 l := len(taskSignals) 4055 if l == 0 { 4056 continue 4057 } 4058 4059 flat := make([]string, 0, l) 4060 for sig := range taskSignals { 4061 flat = append(flat, sig) 4062 } 4063 4064 sort.Strings(flat) 4065 tgSignals, ok := signals[tg.Name] 4066 if !ok { 4067 tgSignals = make(map[string][]string) 4068 signals[tg.Name] = tgSignals 4069 } 4070 tgSignals[task.Name] = flat 4071 } 4072 4073 } 4074 4075 return signals 4076 } 4077 4078 // SpecChanged determines if the functional specification has changed between 4079 // two job versions. 4080 func (j *Job) SpecChanged(new *Job) bool { 4081 if j == nil { 4082 return new != nil 4083 } 4084 4085 // Create a copy of the new job 4086 c := new.Copy() 4087 4088 // Update the new job so we can do a reflect 4089 c.Status = j.Status 4090 c.StatusDescription = j.StatusDescription 4091 c.Stable = j.Stable 4092 c.Version = j.Version 4093 c.CreateIndex = j.CreateIndex 4094 c.ModifyIndex = j.ModifyIndex 4095 c.JobModifyIndex = j.JobModifyIndex 4096 c.SubmitTime = j.SubmitTime 4097 4098 // cgbaker: FINISH: probably need some consideration of scaling policy ID here 4099 4100 // Deep equals the jobs 4101 return !reflect.DeepEqual(j, c) 4102 } 4103 4104 func (j *Job) SetSubmitTime() { 4105 j.SubmitTime = time.Now().UTC().UnixNano() 4106 } 4107 4108 // JobListStub is used to return a subset of job information 4109 // for the job list 4110 type JobListStub struct { 4111 ID string 4112 ParentID string 4113 Name string 4114 Datacenters []string 4115 Type string 4116 Priority int 4117 Periodic bool 4118 ParameterizedJob bool 4119 Stop bool 4120 Status string 4121 StatusDescription string 4122 JobSummary *JobSummary 4123 CreateIndex uint64 4124 ModifyIndex uint64 4125 JobModifyIndex uint64 4126 SubmitTime int64 4127 } 4128 4129 // JobSummary summarizes the state of the allocations of a job 4130 type JobSummary struct { 4131 // JobID is the ID of the job the summary is for 4132 JobID string 4133 4134 // Namespace is the namespace of the job and its summary 4135 Namespace string 4136 4137 // Summary contains the summary per task group for the Job 4138 Summary map[string]TaskGroupSummary 4139 4140 // Children contains a summary for the children of this job. 4141 Children *JobChildrenSummary 4142 4143 // Raft Indexes 4144 CreateIndex uint64 4145 ModifyIndex uint64 4146 } 4147 4148 // Copy returns a new copy of JobSummary 4149 func (js *JobSummary) Copy() *JobSummary { 4150 newJobSummary := new(JobSummary) 4151 *newJobSummary = *js 4152 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 4153 for k, v := range js.Summary { 4154 newTGSummary[k] = v 4155 } 4156 newJobSummary.Summary = newTGSummary 4157 newJobSummary.Children = newJobSummary.Children.Copy() 4158 return newJobSummary 4159 } 4160 4161 // JobChildrenSummary contains the summary of children job statuses 4162 type JobChildrenSummary struct { 4163 Pending int64 4164 Running int64 4165 Dead int64 4166 } 4167 4168 // Copy returns a new copy of a JobChildrenSummary 4169 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 4170 if jc == nil { 4171 return nil 4172 } 4173 4174 njc := new(JobChildrenSummary) 4175 *njc = *jc 4176 return njc 4177 } 4178 4179 // TaskGroup summarizes the state of all the allocations of a particular 4180 // TaskGroup 4181 type TaskGroupSummary struct { 4182 Queued int 4183 Complete int 4184 Failed int 4185 Running int 4186 Starting int 4187 Lost int 4188 } 4189 4190 const ( 4191 // Checks uses any registered health check state in combination with task 4192 // states to determine if a allocation is healthy. 4193 UpdateStrategyHealthCheck_Checks = "checks" 4194 4195 // TaskStates uses the task states of an allocation to determine if the 4196 // allocation is healthy. 4197 UpdateStrategyHealthCheck_TaskStates = "task_states" 4198 4199 // Manual allows the operator to manually signal to Nomad when an 4200 // allocations is healthy. This allows more advanced health checking that is 4201 // outside of the scope of Nomad. 4202 UpdateStrategyHealthCheck_Manual = "manual" 4203 ) 4204 4205 var ( 4206 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 4207 // jobs with the old policy or for populating field defaults. 4208 DefaultUpdateStrategy = &UpdateStrategy{ 4209 Stagger: 30 * time.Second, 4210 MaxParallel: 1, 4211 HealthCheck: UpdateStrategyHealthCheck_Checks, 4212 MinHealthyTime: 10 * time.Second, 4213 HealthyDeadline: 5 * time.Minute, 4214 ProgressDeadline: 10 * time.Minute, 4215 AutoRevert: false, 4216 AutoPromote: false, 4217 Canary: 0, 4218 } 4219 ) 4220 4221 // UpdateStrategy is used to modify how updates are done 4222 type UpdateStrategy struct { 4223 // Stagger is used to determine the rate at which allocations are migrated 4224 // due to down or draining nodes. 4225 Stagger time.Duration 4226 4227 // MaxParallel is how many updates can be done in parallel 4228 MaxParallel int 4229 4230 // HealthCheck specifies the mechanism in which allocations are marked 4231 // healthy or unhealthy as part of a deployment. 4232 HealthCheck string 4233 4234 // MinHealthyTime is the minimum time an allocation must be in the healthy 4235 // state before it is marked as healthy, unblocking more allocations to be 4236 // rolled. 4237 MinHealthyTime time.Duration 4238 4239 // HealthyDeadline is the time in which an allocation must be marked as 4240 // healthy before it is automatically transitioned to unhealthy. This time 4241 // period doesn't count against the MinHealthyTime. 4242 HealthyDeadline time.Duration 4243 4244 // ProgressDeadline is the time in which an allocation as part of the 4245 // deployment must transition to healthy. If no allocation becomes healthy 4246 // after the deadline, the deployment is marked as failed. If the deadline 4247 // is zero, the first failure causes the deployment to fail. 4248 ProgressDeadline time.Duration 4249 4250 // AutoRevert declares that if a deployment fails because of unhealthy 4251 // allocations, there should be an attempt to auto-revert the job to a 4252 // stable version. 4253 AutoRevert bool 4254 4255 // AutoPromote declares that the deployment should be promoted when all canaries are 4256 // healthy 4257 AutoPromote bool 4258 4259 // Canary is the number of canaries to deploy when a change to the task 4260 // group is detected. 4261 Canary int 4262 } 4263 4264 func (u *UpdateStrategy) Copy() *UpdateStrategy { 4265 if u == nil { 4266 return nil 4267 } 4268 4269 copy := new(UpdateStrategy) 4270 *copy = *u 4271 return copy 4272 } 4273 4274 func (u *UpdateStrategy) Validate() error { 4275 if u == nil { 4276 return nil 4277 } 4278 4279 var mErr multierror.Error 4280 switch u.HealthCheck { 4281 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4282 default: 4283 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4284 } 4285 4286 if u.MaxParallel < 0 { 4287 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4288 } 4289 if u.Canary < 0 { 4290 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4291 } 4292 if u.Canary == 0 && u.AutoPromote { 4293 multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4294 } 4295 if u.MinHealthyTime < 0 { 4296 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4297 } 4298 if u.HealthyDeadline <= 0 { 4299 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4300 } 4301 if u.ProgressDeadline < 0 { 4302 multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4303 } 4304 if u.MinHealthyTime >= u.HealthyDeadline { 4305 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4306 } 4307 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4308 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4309 } 4310 if u.Stagger <= 0 { 4311 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4312 } 4313 4314 return mErr.ErrorOrNil() 4315 } 4316 4317 func (u *UpdateStrategy) IsEmpty() bool { 4318 if u == nil { 4319 return true 4320 } 4321 4322 return u.MaxParallel == 0 4323 } 4324 4325 // TODO(alexdadgar): Remove once no longer used by the scheduler. 4326 // Rolling returns if a rolling strategy should be used 4327 func (u *UpdateStrategy) Rolling() bool { 4328 return u.Stagger > 0 && u.MaxParallel > 0 4329 } 4330 4331 const ( 4332 // PeriodicSpecCron is used for a cron spec. 4333 PeriodicSpecCron = "cron" 4334 4335 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 4336 // separated list of unix timestamps at which to launch. 4337 PeriodicSpecTest = "_internal_test" 4338 ) 4339 4340 // Periodic defines the interval a job should be run at. 4341 type PeriodicConfig struct { 4342 // Enabled determines if the job should be run periodically. 4343 Enabled bool 4344 4345 // Spec specifies the interval the job should be run as. It is parsed based 4346 // on the SpecType. 4347 Spec string 4348 4349 // SpecType defines the format of the spec. 4350 SpecType string 4351 4352 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 4353 ProhibitOverlap bool 4354 4355 // TimeZone is the user specified string that determines the time zone to 4356 // launch against. The time zones must be specified from IANA Time Zone 4357 // database, such as "America/New_York". 4358 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 4359 // Reference: https://www.iana.org/time-zones 4360 TimeZone string 4361 4362 // location is the time zone to evaluate the launch time against 4363 location *time.Location 4364 } 4365 4366 func (p *PeriodicConfig) Copy() *PeriodicConfig { 4367 if p == nil { 4368 return nil 4369 } 4370 np := new(PeriodicConfig) 4371 *np = *p 4372 return np 4373 } 4374 4375 func (p *PeriodicConfig) Validate() error { 4376 if !p.Enabled { 4377 return nil 4378 } 4379 4380 var mErr multierror.Error 4381 if p.Spec == "" { 4382 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 4383 } 4384 4385 // Check if we got a valid time zone 4386 if p.TimeZone != "" { 4387 if _, err := time.LoadLocation(p.TimeZone); err != nil { 4388 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 4389 } 4390 } 4391 4392 switch p.SpecType { 4393 case PeriodicSpecCron: 4394 // Validate the cron spec 4395 if _, err := cronexpr.Parse(p.Spec); err != nil { 4396 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 4397 } 4398 case PeriodicSpecTest: 4399 // No-op 4400 default: 4401 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 4402 } 4403 4404 return mErr.ErrorOrNil() 4405 } 4406 4407 func (p *PeriodicConfig) Canonicalize() { 4408 // Load the location 4409 l, err := time.LoadLocation(p.TimeZone) 4410 if err != nil { 4411 p.location = time.UTC 4412 } 4413 4414 p.location = l 4415 } 4416 4417 // CronParseNext is a helper that parses the next time for the given expression 4418 // but captures any panic that may occur in the underlying library. 4419 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 4420 defer func() { 4421 if recover() != nil { 4422 t = time.Time{} 4423 err = fmt.Errorf("failed parsing cron expression: %q", spec) 4424 } 4425 }() 4426 4427 return e.Next(fromTime), nil 4428 } 4429 4430 // Next returns the closest time instant matching the spec that is after the 4431 // passed time. If no matching instance exists, the zero value of time.Time is 4432 // returned. The `time.Location` of the returned value matches that of the 4433 // passed time. 4434 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 4435 switch p.SpecType { 4436 case PeriodicSpecCron: 4437 e, err := cronexpr.Parse(p.Spec) 4438 if err != nil { 4439 return time.Time{}, fmt.Errorf("failed parsing cron expression: %q: %v", p.Spec, err) 4440 } 4441 return CronParseNext(e, fromTime, p.Spec) 4442 case PeriodicSpecTest: 4443 split := strings.Split(p.Spec, ",") 4444 if len(split) == 1 && split[0] == "" { 4445 return time.Time{}, nil 4446 } 4447 4448 // Parse the times 4449 times := make([]time.Time, len(split)) 4450 for i, s := range split { 4451 unix, err := strconv.Atoi(s) 4452 if err != nil { 4453 return time.Time{}, nil 4454 } 4455 4456 times[i] = time.Unix(int64(unix), 0) 4457 } 4458 4459 // Find the next match 4460 for _, next := range times { 4461 if fromTime.Before(next) { 4462 return next, nil 4463 } 4464 } 4465 } 4466 4467 return time.Time{}, nil 4468 } 4469 4470 // GetLocation returns the location to use for determining the time zone to run 4471 // the periodic job against. 4472 func (p *PeriodicConfig) GetLocation() *time.Location { 4473 // Jobs pre 0.5.5 will not have this 4474 if p.location != nil { 4475 return p.location 4476 } 4477 4478 return time.UTC 4479 } 4480 4481 const ( 4482 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 4483 // when launching derived instances of it. 4484 PeriodicLaunchSuffix = "/periodic-" 4485 ) 4486 4487 // PeriodicLaunch tracks the last launch time of a periodic job. 4488 type PeriodicLaunch struct { 4489 ID string // ID of the periodic job. 4490 Namespace string // Namespace of the periodic job 4491 Launch time.Time // The last launch time. 4492 4493 // Raft Indexes 4494 CreateIndex uint64 4495 ModifyIndex uint64 4496 } 4497 4498 const ( 4499 DispatchPayloadForbidden = "forbidden" 4500 DispatchPayloadOptional = "optional" 4501 DispatchPayloadRequired = "required" 4502 4503 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 4504 // when dispatching instances of it. 4505 DispatchLaunchSuffix = "/dispatch-" 4506 ) 4507 4508 // ParameterizedJobConfig is used to configure the parameterized job 4509 type ParameterizedJobConfig struct { 4510 // Payload configure the payload requirements 4511 Payload string 4512 4513 // MetaRequired is metadata keys that must be specified by the dispatcher 4514 MetaRequired []string 4515 4516 // MetaOptional is metadata keys that may be specified by the dispatcher 4517 MetaOptional []string 4518 } 4519 4520 func (d *ParameterizedJobConfig) Validate() error { 4521 var mErr multierror.Error 4522 switch d.Payload { 4523 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 4524 default: 4525 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 4526 } 4527 4528 // Check that the meta configurations are disjoint sets 4529 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 4530 if !disjoint { 4531 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 4532 } 4533 4534 return mErr.ErrorOrNil() 4535 } 4536 4537 func (d *ParameterizedJobConfig) Canonicalize() { 4538 if d.Payload == "" { 4539 d.Payload = DispatchPayloadOptional 4540 } 4541 } 4542 4543 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 4544 if d == nil { 4545 return nil 4546 } 4547 nd := new(ParameterizedJobConfig) 4548 *nd = *d 4549 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 4550 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 4551 return nd 4552 } 4553 4554 // DispatchedID returns an ID appropriate for a job dispatched against a 4555 // particular parameterized job 4556 func DispatchedID(templateID string, t time.Time) string { 4557 u := uuid.Generate()[:8] 4558 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 4559 } 4560 4561 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 4562 type DispatchPayloadConfig struct { 4563 // File specifies a relative path to where the input data should be written 4564 File string 4565 } 4566 4567 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 4568 if d == nil { 4569 return nil 4570 } 4571 nd := new(DispatchPayloadConfig) 4572 *nd = *d 4573 return nd 4574 } 4575 4576 func (d *DispatchPayloadConfig) Validate() error { 4577 // Verify the destination doesn't escape 4578 escaped, err := PathEscapesAllocDir("task/local/", d.File) 4579 if err != nil { 4580 return fmt.Errorf("invalid destination path: %v", err) 4581 } else if escaped { 4582 return fmt.Errorf("destination escapes allocation directory") 4583 } 4584 4585 return nil 4586 } 4587 4588 const ( 4589 TaskLifecycleHookPrestart = "prestart" 4590 ) 4591 4592 type TaskLifecycleConfig struct { 4593 Hook string 4594 Sidecar bool 4595 } 4596 4597 func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { 4598 if d == nil { 4599 return nil 4600 } 4601 nd := new(TaskLifecycleConfig) 4602 *nd = *d 4603 return nd 4604 } 4605 4606 func (d *TaskLifecycleConfig) Validate() error { 4607 if d == nil { 4608 return nil 4609 } 4610 4611 switch d.Hook { 4612 case TaskLifecycleHookPrestart: 4613 case "": 4614 return fmt.Errorf("no lifecycle hook provided") 4615 default: 4616 return fmt.Errorf("invalid hook: %v", d.Hook) 4617 } 4618 4619 return nil 4620 } 4621 4622 var ( 4623 // These default restart policies needs to be in sync with 4624 // Canonicalize in api/tasks.go 4625 4626 DefaultServiceJobRestartPolicy = RestartPolicy{ 4627 Delay: 15 * time.Second, 4628 Attempts: 2, 4629 Interval: 30 * time.Minute, 4630 Mode: RestartPolicyModeFail, 4631 } 4632 DefaultBatchJobRestartPolicy = RestartPolicy{ 4633 Delay: 15 * time.Second, 4634 Attempts: 3, 4635 Interval: 24 * time.Hour, 4636 Mode: RestartPolicyModeFail, 4637 } 4638 ) 4639 4640 var ( 4641 // These default reschedule policies needs to be in sync with 4642 // NewDefaultReschedulePolicy in api/tasks.go 4643 4644 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 4645 Delay: 30 * time.Second, 4646 DelayFunction: "exponential", 4647 MaxDelay: 1 * time.Hour, 4648 Unlimited: true, 4649 } 4650 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 4651 Attempts: 1, 4652 Interval: 24 * time.Hour, 4653 Delay: 5 * time.Second, 4654 DelayFunction: "constant", 4655 } 4656 ) 4657 4658 const ( 4659 // RestartPolicyModeDelay causes an artificial delay till the next interval is 4660 // reached when the specified attempts have been reached in the interval. 4661 RestartPolicyModeDelay = "delay" 4662 4663 // RestartPolicyModeFail causes a job to fail if the specified number of 4664 // attempts are reached within an interval. 4665 RestartPolicyModeFail = "fail" 4666 4667 // RestartPolicyMinInterval is the minimum interval that is accepted for a 4668 // restart policy. 4669 RestartPolicyMinInterval = 5 * time.Second 4670 4671 // ReasonWithinPolicy describes restart events that are within policy 4672 ReasonWithinPolicy = "Restart within policy" 4673 ) 4674 4675 // JobScalingEvents contains the scaling events for a given job 4676 type JobScalingEvents struct { 4677 Namespace string 4678 JobID string 4679 4680 // This map is indexed by target; currently, this is just task group 4681 // the indexed array is sorted from newest to oldest event 4682 // the array should have less than JobTrackedScalingEvents entries 4683 ScalingEvents map[string][]*ScalingEvent 4684 4685 // Raft index 4686 ModifyIndex uint64 4687 } 4688 4689 // Factory method for ScalingEvent objects 4690 func NewScalingEvent(message string) *ScalingEvent { 4691 return &ScalingEvent{ 4692 Time: time.Now().Unix(), 4693 Message: message, 4694 } 4695 } 4696 4697 // ScalingEvent describes a scaling event against a Job 4698 type ScalingEvent struct { 4699 // Unix Nanosecond timestamp for the scaling event 4700 Time int64 4701 4702 // Count is the new scaling count, if provided 4703 Count *int64 4704 4705 // Message is the message describing a scaling event 4706 Message string 4707 4708 // Error indicates an error state for this scaling event 4709 Error bool 4710 4711 // Meta is a map of metadata returned during a scaling event 4712 Meta map[string]interface{} 4713 4714 // EvalID is the ID for an evaluation if one was created as part of a scaling event 4715 EvalID *string 4716 4717 // Raft index 4718 CreateIndex uint64 4719 } 4720 4721 func (e *ScalingEvent) SetError(error bool) *ScalingEvent { 4722 e.Error = error 4723 return e 4724 } 4725 4726 func (e *ScalingEvent) SetMeta(meta map[string]interface{}) *ScalingEvent { 4727 e.Meta = meta 4728 return e 4729 } 4730 4731 func (e *ScalingEvent) SetEvalID(evalID string) *ScalingEvent { 4732 e.EvalID = &evalID 4733 return e 4734 } 4735 4736 // ScalingEventRequest is by for Job.Scale endpoint 4737 // to register scaling events 4738 type ScalingEventRequest struct { 4739 Namespace string 4740 JobID string 4741 TaskGroup string 4742 4743 ScalingEvent *ScalingEvent 4744 } 4745 4746 // ScalingPolicy specifies the scaling policy for a scaling target 4747 type ScalingPolicy struct { 4748 // ID is a generated UUID used for looking up the scaling policy 4749 ID string 4750 4751 // Target contains information about the target of the scaling policy, like job and group 4752 Target map[string]string 4753 4754 // Policy is an opaque description of the scaling policy, passed to the autoscaler 4755 Policy map[string]interface{} 4756 4757 // Min is the minimum allowable scaling count for this target 4758 Min int64 4759 4760 // Max is the maximum allowable scaling count for this target 4761 Max int64 4762 4763 // Enabled indicates whether this policy has been enabled/disabled 4764 Enabled bool 4765 4766 CreateIndex uint64 4767 ModifyIndex uint64 4768 } 4769 4770 const ( 4771 ScalingTargetNamespace = "Namespace" 4772 ScalingTargetJob = "Job" 4773 ScalingTargetGroup = "Group" 4774 ) 4775 4776 // Diff indicates whether the specification for a given scaling policy has changed 4777 func (p *ScalingPolicy) Diff(p2 *ScalingPolicy) bool { 4778 copy := *p2 4779 copy.ID = p.ID 4780 copy.CreateIndex = p.CreateIndex 4781 copy.ModifyIndex = p.ModifyIndex 4782 return !reflect.DeepEqual(*p, copy) 4783 } 4784 4785 func (p *ScalingPolicy) TargetTaskGroup(job *Job, tg *TaskGroup) *ScalingPolicy { 4786 p.Target = map[string]string{ 4787 ScalingTargetNamespace: job.Namespace, 4788 ScalingTargetJob: job.ID, 4789 ScalingTargetGroup: tg.Name, 4790 } 4791 return p 4792 } 4793 4794 func (p *ScalingPolicy) Stub() *ScalingPolicyListStub { 4795 stub := &ScalingPolicyListStub{ 4796 ID: p.ID, 4797 Target: make(map[string]string), 4798 Enabled: p.Enabled, 4799 CreateIndex: p.CreateIndex, 4800 ModifyIndex: p.ModifyIndex, 4801 } 4802 for k, v := range p.Target { 4803 stub.Target[k] = v 4804 } 4805 return stub 4806 } 4807 4808 // GetScalingPolicies returns a slice of all scaling scaling policies for this job 4809 func (j *Job) GetScalingPolicies() []*ScalingPolicy { 4810 ret := make([]*ScalingPolicy, 0) 4811 4812 for _, tg := range j.TaskGroups { 4813 if tg.Scaling != nil { 4814 ret = append(ret, tg.Scaling) 4815 } 4816 } 4817 4818 return ret 4819 } 4820 4821 // ScalingPolicyListStub is used to return a subset of scaling policy information 4822 // for the scaling policy list 4823 type ScalingPolicyListStub struct { 4824 ID string 4825 Enabled bool 4826 Target map[string]string 4827 CreateIndex uint64 4828 ModifyIndex uint64 4829 } 4830 4831 // RestartPolicy configures how Tasks are restarted when they crash or fail. 4832 type RestartPolicy struct { 4833 // Attempts is the number of restart that will occur in an interval. 4834 Attempts int 4835 4836 // Interval is a duration in which we can limit the number of restarts 4837 // within. 4838 Interval time.Duration 4839 4840 // Delay is the time between a failure and a restart. 4841 Delay time.Duration 4842 4843 // Mode controls what happens when the task restarts more than attempt times 4844 // in an interval. 4845 Mode string 4846 } 4847 4848 func (r *RestartPolicy) Copy() *RestartPolicy { 4849 if r == nil { 4850 return nil 4851 } 4852 nrp := new(RestartPolicy) 4853 *nrp = *r 4854 return nrp 4855 } 4856 4857 func (r *RestartPolicy) Validate() error { 4858 var mErr multierror.Error 4859 switch r.Mode { 4860 case RestartPolicyModeDelay, RestartPolicyModeFail: 4861 default: 4862 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 4863 } 4864 4865 // Check for ambiguous/confusing settings 4866 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 4867 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 4868 } 4869 4870 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 4871 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 4872 } 4873 if time.Duration(r.Attempts)*r.Delay > r.Interval { 4874 multierror.Append(&mErr, 4875 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 4876 } 4877 return mErr.ErrorOrNil() 4878 } 4879 4880 func NewRestartPolicy(jobType string) *RestartPolicy { 4881 switch jobType { 4882 case JobTypeService, JobTypeSystem: 4883 rp := DefaultServiceJobRestartPolicy 4884 return &rp 4885 case JobTypeBatch: 4886 rp := DefaultBatchJobRestartPolicy 4887 return &rp 4888 } 4889 return nil 4890 } 4891 4892 const ReschedulePolicyMinInterval = 15 * time.Second 4893 const ReschedulePolicyMinDelay = 5 * time.Second 4894 4895 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 4896 4897 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 4898 type ReschedulePolicy struct { 4899 // Attempts limits the number of rescheduling attempts that can occur in an interval. 4900 Attempts int 4901 4902 // Interval is a duration in which we can limit the number of reschedule attempts. 4903 Interval time.Duration 4904 4905 // Delay is a minimum duration to wait between reschedule attempts. 4906 // The delay function determines how much subsequent reschedule attempts are delayed by. 4907 Delay time.Duration 4908 4909 // DelayFunction determines how the delay progressively changes on subsequent reschedule 4910 // attempts. Valid values are "exponential", "constant", and "fibonacci". 4911 DelayFunction string 4912 4913 // MaxDelay is an upper bound on the delay. 4914 MaxDelay time.Duration 4915 4916 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 4917 // between reschedule attempts. 4918 Unlimited bool 4919 } 4920 4921 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 4922 if r == nil { 4923 return nil 4924 } 4925 nrp := new(ReschedulePolicy) 4926 *nrp = *r 4927 return nrp 4928 } 4929 4930 func (r *ReschedulePolicy) Enabled() bool { 4931 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 4932 return enabled 4933 } 4934 4935 // Validate uses different criteria to validate the reschedule policy 4936 // Delay must be a minimum of 5 seconds 4937 // Delay Ceiling is ignored if Delay Function is "constant" 4938 // Number of possible attempts is validated, given the interval, delay and delay function 4939 func (r *ReschedulePolicy) Validate() error { 4940 if !r.Enabled() { 4941 return nil 4942 } 4943 var mErr multierror.Error 4944 // Check for ambiguous/confusing settings 4945 if r.Attempts > 0 { 4946 if r.Interval <= 0 { 4947 multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 4948 } 4949 if r.Unlimited { 4950 multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 4951 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 4952 multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 4953 } 4954 } 4955 4956 delayPreCheck := true 4957 // Delay should be bigger than the default 4958 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4959 multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4960 delayPreCheck = false 4961 } 4962 4963 // Must use a valid delay function 4964 if !isValidDelayFunction(r.DelayFunction) { 4965 multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 4966 delayPreCheck = false 4967 } 4968 4969 // Validate MaxDelay if not using linear delay progression 4970 if r.DelayFunction != "constant" { 4971 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4972 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4973 delayPreCheck = false 4974 } 4975 if r.MaxDelay < r.Delay { 4976 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 4977 delayPreCheck = false 4978 } 4979 4980 } 4981 4982 // Validate Interval and other delay parameters if attempts are limited 4983 if !r.Unlimited { 4984 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 4985 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 4986 } 4987 if !delayPreCheck { 4988 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 4989 return mErr.ErrorOrNil() 4990 } 4991 crossValidationErr := r.validateDelayParams() 4992 if crossValidationErr != nil { 4993 multierror.Append(&mErr, crossValidationErr) 4994 } 4995 } 4996 return mErr.ErrorOrNil() 4997 } 4998 4999 func isValidDelayFunction(delayFunc string) bool { 5000 for _, value := range RescheduleDelayFunctions { 5001 if value == delayFunc { 5002 return true 5003 } 5004 } 5005 return false 5006 } 5007 5008 func (r *ReschedulePolicy) validateDelayParams() error { 5009 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 5010 if ok { 5011 return nil 5012 } 5013 var mErr multierror.Error 5014 if r.DelayFunction == "constant" { 5015 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 5016 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 5017 } else { 5018 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 5019 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 5020 } 5021 multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 5022 return mErr.ErrorOrNil() 5023 } 5024 5025 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 5026 var possibleAttempts int 5027 var recommendedInterval time.Duration 5028 valid := true 5029 switch r.DelayFunction { 5030 case "constant": 5031 recommendedInterval = time.Duration(r.Attempts) * r.Delay 5032 if r.Interval < recommendedInterval { 5033 possibleAttempts = int(r.Interval / r.Delay) 5034 valid = false 5035 } 5036 case "exponential": 5037 for i := 0; i < r.Attempts; i++ { 5038 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 5039 if nextDelay > r.MaxDelay { 5040 nextDelay = r.MaxDelay 5041 recommendedInterval += nextDelay 5042 } else { 5043 recommendedInterval = nextDelay 5044 } 5045 if recommendedInterval < r.Interval { 5046 possibleAttempts++ 5047 } 5048 } 5049 if possibleAttempts < r.Attempts { 5050 valid = false 5051 } 5052 case "fibonacci": 5053 var slots []time.Duration 5054 slots = append(slots, r.Delay) 5055 slots = append(slots, r.Delay) 5056 reachedCeiling := false 5057 for i := 2; i < r.Attempts; i++ { 5058 var nextDelay time.Duration 5059 if reachedCeiling { 5060 //switch to linear 5061 nextDelay = slots[i-1] + r.MaxDelay 5062 } else { 5063 nextDelay = slots[i-1] + slots[i-2] 5064 if nextDelay > r.MaxDelay { 5065 nextDelay = r.MaxDelay 5066 reachedCeiling = true 5067 } 5068 } 5069 slots = append(slots, nextDelay) 5070 } 5071 recommendedInterval = slots[len(slots)-1] 5072 if r.Interval < recommendedInterval { 5073 valid = false 5074 // calculate possible attempts 5075 for i := 0; i < len(slots); i++ { 5076 if slots[i] > r.Interval { 5077 possibleAttempts = i 5078 break 5079 } 5080 } 5081 } 5082 default: 5083 return false, 0, 0 5084 } 5085 if possibleAttempts < 0 { // can happen if delay is bigger than interval 5086 possibleAttempts = 0 5087 } 5088 return valid, possibleAttempts, recommendedInterval 5089 } 5090 5091 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 5092 switch jobType { 5093 case JobTypeService: 5094 rp := DefaultServiceJobReschedulePolicy 5095 return &rp 5096 case JobTypeBatch: 5097 rp := DefaultBatchJobReschedulePolicy 5098 return &rp 5099 } 5100 return nil 5101 } 5102 5103 const ( 5104 MigrateStrategyHealthChecks = "checks" 5105 MigrateStrategyHealthStates = "task_states" 5106 ) 5107 5108 type MigrateStrategy struct { 5109 MaxParallel int 5110 HealthCheck string 5111 MinHealthyTime time.Duration 5112 HealthyDeadline time.Duration 5113 } 5114 5115 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 5116 // that lack an update strategy. 5117 // 5118 // This function should match its counterpart in api/tasks.go 5119 func DefaultMigrateStrategy() *MigrateStrategy { 5120 return &MigrateStrategy{ 5121 MaxParallel: 1, 5122 HealthCheck: MigrateStrategyHealthChecks, 5123 MinHealthyTime: 10 * time.Second, 5124 HealthyDeadline: 5 * time.Minute, 5125 } 5126 } 5127 5128 func (m *MigrateStrategy) Validate() error { 5129 var mErr multierror.Error 5130 5131 if m.MaxParallel < 0 { 5132 multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 5133 } 5134 5135 switch m.HealthCheck { 5136 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 5137 // ok 5138 case "": 5139 if m.MaxParallel > 0 { 5140 multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 5141 } 5142 default: 5143 multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 5144 } 5145 5146 if m.MinHealthyTime < 0 { 5147 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 5148 } 5149 5150 if m.HealthyDeadline < 0 { 5151 multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 5152 } 5153 5154 if m.MinHealthyTime > m.HealthyDeadline { 5155 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 5156 } 5157 5158 return mErr.ErrorOrNil() 5159 } 5160 5161 // TaskGroup is an atomic unit of placement. Each task group belongs to 5162 // a job and may contain any number of tasks. A task group support running 5163 // in many replicas using the same configuration.. 5164 type TaskGroup struct { 5165 // Name of the task group 5166 Name string 5167 5168 // Count is the number of replicas of this task group that should 5169 // be scheduled. 5170 Count int 5171 5172 // Update is used to control the update strategy for this task group 5173 Update *UpdateStrategy 5174 5175 // Migrate is used to control the migration strategy for this task group 5176 Migrate *MigrateStrategy 5177 5178 // Constraints can be specified at a task group level and apply to 5179 // all the tasks contained. 5180 Constraints []*Constraint 5181 5182 // Scaling is the list of autoscaling policies for the TaskGroup 5183 Scaling *ScalingPolicy 5184 5185 // RestartPolicy of a TaskGroup 5186 RestartPolicy *RestartPolicy 5187 5188 // Tasks are the collection of tasks that this task group needs to run 5189 Tasks []*Task 5190 5191 // EphemeralDisk is the disk resources that the task group requests 5192 EphemeralDisk *EphemeralDisk 5193 5194 // Meta is used to associate arbitrary metadata with this 5195 // task group. This is opaque to Nomad. 5196 Meta map[string]string 5197 5198 // ReschedulePolicy is used to configure how the scheduler should 5199 // retry failed allocations. 5200 ReschedulePolicy *ReschedulePolicy 5201 5202 // Affinities can be specified at the task group level to express 5203 // scheduling preferences. 5204 Affinities []*Affinity 5205 5206 // Spread can be specified at the task group level to express spreading 5207 // allocations across a desired attribute, such as datacenter 5208 Spreads []*Spread 5209 5210 // Networks are the network configuration for the task group. This can be 5211 // overridden in the task. 5212 Networks Networks 5213 5214 // Services this group provides 5215 Services []*Service 5216 5217 // Volumes is a map of volumes that have been requested by the task group. 5218 Volumes map[string]*VolumeRequest 5219 5220 // ShutdownDelay is the amount of time to wait between deregistering 5221 // group services in consul and stopping tasks. 5222 ShutdownDelay *time.Duration 5223 5224 // StopAfterClientDisconnect, if set, configures the client to stop the task group 5225 // after this duration since the last known good heartbeat 5226 StopAfterClientDisconnect *time.Duration 5227 } 5228 5229 func (tg *TaskGroup) Copy() *TaskGroup { 5230 if tg == nil { 5231 return nil 5232 } 5233 ntg := new(TaskGroup) 5234 *ntg = *tg 5235 ntg.Update = ntg.Update.Copy() 5236 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 5237 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 5238 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 5239 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 5240 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 5241 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 5242 ntg.Scaling = CopyScalingPolicy(ntg.Scaling) 5243 5244 // Copy the network objects 5245 if tg.Networks != nil { 5246 n := len(tg.Networks) 5247 ntg.Networks = make([]*NetworkResource, n) 5248 for i := 0; i < n; i++ { 5249 ntg.Networks[i] = tg.Networks[i].Copy() 5250 } 5251 } 5252 5253 if tg.Tasks != nil { 5254 tasks := make([]*Task, len(ntg.Tasks)) 5255 for i, t := range ntg.Tasks { 5256 tasks[i] = t.Copy() 5257 } 5258 ntg.Tasks = tasks 5259 } 5260 5261 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 5262 5263 if tg.EphemeralDisk != nil { 5264 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 5265 } 5266 5267 if tg.Services != nil { 5268 ntg.Services = make([]*Service, len(tg.Services)) 5269 for i, s := range tg.Services { 5270 ntg.Services[i] = s.Copy() 5271 } 5272 } 5273 5274 if tg.ShutdownDelay != nil { 5275 ntg.ShutdownDelay = tg.ShutdownDelay 5276 } 5277 5278 if tg.StopAfterClientDisconnect != nil { 5279 ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect 5280 } 5281 5282 return ntg 5283 } 5284 5285 // Canonicalize is used to canonicalize fields in the TaskGroup. 5286 func (tg *TaskGroup) Canonicalize(job *Job) { 5287 // Ensure that an empty and nil map are treated the same to avoid scheduling 5288 // problems since we use reflect DeepEquals. 5289 if len(tg.Meta) == 0 { 5290 tg.Meta = nil 5291 } 5292 5293 // Set the default restart policy. 5294 if tg.RestartPolicy == nil { 5295 tg.RestartPolicy = NewRestartPolicy(job.Type) 5296 } 5297 5298 if tg.ReschedulePolicy == nil { 5299 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 5300 } 5301 5302 // Canonicalize Migrate for service jobs 5303 if job.Type == JobTypeService && tg.Migrate == nil { 5304 tg.Migrate = DefaultMigrateStrategy() 5305 } 5306 5307 // Set a default ephemeral disk object if the user has not requested for one 5308 if tg.EphemeralDisk == nil { 5309 tg.EphemeralDisk = DefaultEphemeralDisk() 5310 } 5311 5312 for _, service := range tg.Services { 5313 service.Canonicalize(job.Name, tg.Name, "group") 5314 } 5315 5316 for _, network := range tg.Networks { 5317 network.Canonicalize() 5318 } 5319 5320 for _, task := range tg.Tasks { 5321 task.Canonicalize(job, tg) 5322 } 5323 } 5324 5325 // Validate is used to sanity check a task group 5326 func (tg *TaskGroup) Validate(j *Job) error { 5327 var mErr multierror.Error 5328 if tg.Name == "" { 5329 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 5330 } 5331 if tg.Count < 0 { 5332 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 5333 } 5334 if len(tg.Tasks) == 0 { 5335 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 5336 } 5337 for idx, constr := range tg.Constraints { 5338 if err := constr.Validate(); err != nil { 5339 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 5340 mErr.Errors = append(mErr.Errors, outer) 5341 } 5342 } 5343 if j.Type == JobTypeSystem { 5344 if tg.Affinities != nil { 5345 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 5346 } 5347 } else { 5348 for idx, affinity := range tg.Affinities { 5349 if err := affinity.Validate(); err != nil { 5350 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 5351 mErr.Errors = append(mErr.Errors, outer) 5352 } 5353 } 5354 } 5355 5356 if tg.RestartPolicy != nil { 5357 if err := tg.RestartPolicy.Validate(); err != nil { 5358 mErr.Errors = append(mErr.Errors, err) 5359 } 5360 } else { 5361 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 5362 } 5363 5364 if j.Type == JobTypeSystem { 5365 if tg.Spreads != nil { 5366 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 5367 } 5368 } else { 5369 for idx, spread := range tg.Spreads { 5370 if err := spread.Validate(); err != nil { 5371 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 5372 mErr.Errors = append(mErr.Errors, outer) 5373 } 5374 } 5375 } 5376 5377 if j.Type == JobTypeSystem { 5378 if tg.ReschedulePolicy != nil { 5379 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 5380 } 5381 } else { 5382 if tg.ReschedulePolicy != nil { 5383 if err := tg.ReschedulePolicy.Validate(); err != nil { 5384 mErr.Errors = append(mErr.Errors, err) 5385 } 5386 } else { 5387 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 5388 } 5389 } 5390 5391 if tg.EphemeralDisk != nil { 5392 if err := tg.EphemeralDisk.Validate(); err != nil { 5393 mErr.Errors = append(mErr.Errors, err) 5394 } 5395 } else { 5396 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 5397 } 5398 5399 // Validate the update strategy 5400 if u := tg.Update; u != nil { 5401 switch j.Type { 5402 case JobTypeService, JobTypeSystem: 5403 default: 5404 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 5405 } 5406 if err := u.Validate(); err != nil { 5407 mErr.Errors = append(mErr.Errors, err) 5408 } 5409 } 5410 5411 // Validate the migration strategy 5412 switch j.Type { 5413 case JobTypeService: 5414 if tg.Migrate != nil { 5415 if err := tg.Migrate.Validate(); err != nil { 5416 mErr.Errors = append(mErr.Errors, err) 5417 } 5418 } 5419 default: 5420 if tg.Migrate != nil { 5421 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 5422 } 5423 } 5424 5425 // Check that there is only one leader task if any 5426 tasks := make(map[string]int) 5427 leaderTasks := 0 5428 for idx, task := range tg.Tasks { 5429 if task.Name == "" { 5430 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 5431 } else if existing, ok := tasks[task.Name]; ok { 5432 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 5433 } else { 5434 tasks[task.Name] = idx 5435 } 5436 5437 if task.Leader { 5438 leaderTasks++ 5439 } 5440 } 5441 5442 if leaderTasks > 1 { 5443 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 5444 } 5445 5446 // Validate the Host Volumes 5447 for name, decl := range tg.Volumes { 5448 if !(decl.Type == VolumeTypeHost || 5449 decl.Type == VolumeTypeCSI) { 5450 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has unrecognised type %s", name, decl.Type)) 5451 continue 5452 } 5453 5454 if decl.Source == "" { 5455 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has an empty source", name)) 5456 } 5457 } 5458 5459 // Validate task group and task network resources 5460 if err := tg.validateNetworks(); err != nil { 5461 outer := fmt.Errorf("Task group network validation failed: %v", err) 5462 mErr.Errors = append(mErr.Errors, outer) 5463 } 5464 5465 // Validate task group and task services 5466 if err := tg.validateServices(); err != nil { 5467 outer := fmt.Errorf("Task group service validation failed: %v", err) 5468 mErr.Errors = append(mErr.Errors, outer) 5469 } 5470 5471 // Validate the scaling policy 5472 if err := tg.validateScalingPolicy(); err != nil { 5473 outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) 5474 mErr.Errors = append(mErr.Errors, outer) 5475 } 5476 5477 // Validate the tasks 5478 for _, task := range tg.Tasks { 5479 // Validate the task does not reference undefined volume mounts 5480 for i, mnt := range task.VolumeMounts { 5481 if mnt.Volume == "" { 5482 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 5483 continue 5484 } 5485 5486 if _, ok := tg.Volumes[mnt.Volume]; !ok { 5487 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 5488 continue 5489 } 5490 } 5491 5492 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services); err != nil { 5493 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 5494 mErr.Errors = append(mErr.Errors, outer) 5495 } 5496 } 5497 return mErr.ErrorOrNil() 5498 } 5499 5500 func (tg *TaskGroup) validateNetworks() error { 5501 var mErr multierror.Error 5502 portLabels := make(map[string]string) 5503 staticPorts := make(map[int]string) 5504 mappedPorts := make(map[int]string) 5505 5506 for _, net := range tg.Networks { 5507 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5508 if other, ok := portLabels[port.Label]; ok { 5509 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5510 } else { 5511 portLabels[port.Label] = "taskgroup network" 5512 } 5513 5514 if port.Value != 0 { 5515 // static port 5516 if other, ok := staticPorts[port.Value]; ok { 5517 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5518 mErr.Errors = append(mErr.Errors, err) 5519 } else { 5520 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 5521 } 5522 } 5523 5524 if port.To > 0 { 5525 if other, ok := mappedPorts[port.To]; ok { 5526 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5527 mErr.Errors = append(mErr.Errors, err) 5528 } else { 5529 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5530 } 5531 } else if port.To < -1 { 5532 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 5533 mErr.Errors = append(mErr.Errors, err) 5534 } 5535 } 5536 } 5537 // Check for duplicate tasks or port labels, and no duplicated static or mapped ports 5538 for _, task := range tg.Tasks { 5539 if task.Resources == nil { 5540 continue 5541 } 5542 5543 for _, net := range task.Resources.Networks { 5544 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5545 if other, ok := portLabels[port.Label]; ok { 5546 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5547 } 5548 5549 if port.Value != 0 { 5550 if other, ok := staticPorts[port.Value]; ok { 5551 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5552 mErr.Errors = append(mErr.Errors, err) 5553 } else { 5554 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 5555 } 5556 } 5557 5558 if port.To != 0 { 5559 if other, ok := mappedPorts[port.To]; ok { 5560 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5561 mErr.Errors = append(mErr.Errors, err) 5562 } else { 5563 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5564 } 5565 } 5566 } 5567 } 5568 } 5569 return mErr.ErrorOrNil() 5570 } 5571 5572 // validateServices runs Service.Validate() on group-level services, 5573 // checks that group services do not conflict with task services and that 5574 // group service checks that refer to tasks only refer to tasks that exist. 5575 func (tg *TaskGroup) validateServices() error { 5576 var mErr multierror.Error 5577 knownTasks := make(map[string]struct{}) 5578 knownServices := make(map[string]struct{}) 5579 5580 // Create a map of known tasks and their services so we can compare 5581 // vs the group-level services and checks 5582 for _, task := range tg.Tasks { 5583 knownTasks[task.Name] = struct{}{} 5584 if task.Services == nil { 5585 continue 5586 } 5587 for _, service := range task.Services { 5588 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5589 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5590 } 5591 for _, check := range service.Checks { 5592 if check.TaskName != "" { 5593 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s is invalid: only task group service checks can be assigned tasks", check.Name)) 5594 } 5595 } 5596 knownServices[service.Name+service.PortLabel] = struct{}{} 5597 } 5598 } 5599 for i, service := range tg.Services { 5600 if err := service.Validate(); err != nil { 5601 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 5602 mErr.Errors = append(mErr.Errors, outer) 5603 // we break here to avoid the risk of crashing on null-pointer 5604 // access in a later step, accepting that we might miss out on 5605 // error messages to provide the user. 5606 continue 5607 } 5608 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5609 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5610 } 5611 knownServices[service.Name+service.PortLabel] = struct{}{} 5612 for _, check := range service.Checks { 5613 if check.TaskName != "" { 5614 if check.Type != ServiceCheckScript && check.Type != ServiceCheckGRPC { 5615 mErr.Errors = append(mErr.Errors, 5616 fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) 5617 } 5618 if _, ok := knownTasks[check.TaskName]; !ok { 5619 mErr.Errors = append(mErr.Errors, 5620 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 5621 } 5622 } 5623 } 5624 } 5625 return mErr.ErrorOrNil() 5626 } 5627 5628 // validateScalingPolicy ensures that the scaling policy has consistent 5629 // min and max, not in conflict with the task group count 5630 func (tg *TaskGroup) validateScalingPolicy() error { 5631 if tg.Scaling == nil { 5632 return nil 5633 } 5634 5635 var mErr multierror.Error 5636 5637 if tg.Scaling.Min > tg.Scaling.Max { 5638 mErr.Errors = append(mErr.Errors, 5639 fmt.Errorf("Scaling policy invalid: maximum count must not be less than minimum count")) 5640 } 5641 5642 if int64(tg.Count) < tg.Scaling.Min { 5643 mErr.Errors = append(mErr.Errors, 5644 fmt.Errorf("Scaling policy invalid: task group count must not be less than minimum count in scaling policy")) 5645 } 5646 5647 if tg.Scaling.Max < int64(tg.Count) { 5648 mErr.Errors = append(mErr.Errors, 5649 fmt.Errorf("Scaling policy invalid: task group count must not be greater than maximum count in scaling policy")) 5650 } 5651 5652 return mErr.ErrorOrNil() 5653 } 5654 5655 // Warnings returns a list of warnings that may be from dubious settings or 5656 // deprecation warnings. 5657 func (tg *TaskGroup) Warnings(j *Job) error { 5658 var mErr multierror.Error 5659 5660 // Validate the update strategy 5661 if u := tg.Update; u != nil { 5662 // Check the counts are appropriate 5663 if u.MaxParallel > tg.Count { 5664 mErr.Errors = append(mErr.Errors, 5665 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 5666 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 5667 } 5668 } 5669 5670 for _, t := range tg.Tasks { 5671 if err := t.Warnings(); err != nil { 5672 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 5673 mErr.Errors = append(mErr.Errors, err) 5674 } 5675 } 5676 5677 return mErr.ErrorOrNil() 5678 } 5679 5680 // LookupTask finds a task by name 5681 func (tg *TaskGroup) LookupTask(name string) *Task { 5682 for _, t := range tg.Tasks { 5683 if t.Name == name { 5684 return t 5685 } 5686 } 5687 return nil 5688 } 5689 5690 func (tg *TaskGroup) UsesConnect() bool { 5691 for _, service := range tg.Services { 5692 if service.Connect != nil { 5693 if service.Connect.Native || service.Connect.SidecarService != nil { 5694 return true 5695 } 5696 } 5697 } 5698 return false 5699 } 5700 5701 func (tg *TaskGroup) GoString() string { 5702 return fmt.Sprintf("*%#v", *tg) 5703 } 5704 5705 // CheckRestart describes if and when a task should be restarted based on 5706 // failing health checks. 5707 type CheckRestart struct { 5708 Limit int // Restart task after this many unhealthy intervals 5709 Grace time.Duration // Grace time to give tasks after starting to get healthy 5710 IgnoreWarnings bool // If true treat checks in `warning` as passing 5711 } 5712 5713 func (c *CheckRestart) Copy() *CheckRestart { 5714 if c == nil { 5715 return nil 5716 } 5717 5718 nc := new(CheckRestart) 5719 *nc = *c 5720 return nc 5721 } 5722 5723 func (c *CheckRestart) Equals(o *CheckRestart) bool { 5724 if c == nil || o == nil { 5725 return c == o 5726 } 5727 5728 if c.Limit != o.Limit { 5729 return false 5730 } 5731 5732 if c.Grace != o.Grace { 5733 return false 5734 } 5735 5736 if c.IgnoreWarnings != o.IgnoreWarnings { 5737 return false 5738 } 5739 5740 return true 5741 } 5742 5743 func (c *CheckRestart) Validate() error { 5744 if c == nil { 5745 return nil 5746 } 5747 5748 var mErr multierror.Error 5749 if c.Limit < 0 { 5750 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 5751 } 5752 5753 if c.Grace < 0 { 5754 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 5755 } 5756 5757 return mErr.ErrorOrNil() 5758 } 5759 5760 const ( 5761 // DefaultKillTimeout is the default timeout between signaling a task it 5762 // will be killed and killing it. 5763 DefaultKillTimeout = 5 * time.Second 5764 ) 5765 5766 // LogConfig provides configuration for log rotation 5767 type LogConfig struct { 5768 MaxFiles int 5769 MaxFileSizeMB int 5770 } 5771 5772 func (l *LogConfig) Copy() *LogConfig { 5773 if l == nil { 5774 return nil 5775 } 5776 return &LogConfig{ 5777 MaxFiles: l.MaxFiles, 5778 MaxFileSizeMB: l.MaxFileSizeMB, 5779 } 5780 } 5781 5782 // DefaultLogConfig returns the default LogConfig values. 5783 func DefaultLogConfig() *LogConfig { 5784 return &LogConfig{ 5785 MaxFiles: 10, 5786 MaxFileSizeMB: 10, 5787 } 5788 } 5789 5790 // Validate returns an error if the log config specified are less than 5791 // the minimum allowed. 5792 func (l *LogConfig) Validate() error { 5793 var mErr multierror.Error 5794 if l.MaxFiles < 1 { 5795 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 5796 } 5797 if l.MaxFileSizeMB < 1 { 5798 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 5799 } 5800 return mErr.ErrorOrNil() 5801 } 5802 5803 // Task is a single process typically that is executed as part of a task group. 5804 type Task struct { 5805 // Name of the task 5806 Name string 5807 5808 // Driver is used to control which driver is used 5809 Driver string 5810 5811 // User is used to determine which user will run the task. It defaults to 5812 // the same user the Nomad client is being run as. 5813 User string 5814 5815 // Config is provided to the driver to initialize 5816 Config map[string]interface{} 5817 5818 // Map of environment variables to be used by the driver 5819 Env map[string]string 5820 5821 // List of service definitions exposed by the Task 5822 Services []*Service 5823 5824 // Vault is used to define the set of Vault policies that this task should 5825 // have access to. 5826 Vault *Vault 5827 5828 // Templates are the set of templates to be rendered for the task. 5829 Templates []*Template 5830 5831 // Constraints can be specified at a task level and apply only to 5832 // the particular task. 5833 Constraints []*Constraint 5834 5835 // Affinities can be specified at the task level to express 5836 // scheduling preferences 5837 Affinities []*Affinity 5838 5839 // Resources is the resources needed by this task 5840 Resources *Resources 5841 5842 // RestartPolicy of a TaskGroup 5843 RestartPolicy *RestartPolicy 5844 5845 // DispatchPayload configures how the task retrieves its input from a dispatch 5846 DispatchPayload *DispatchPayloadConfig 5847 5848 Lifecycle *TaskLifecycleConfig 5849 5850 // Meta is used to associate arbitrary metadata with this 5851 // task. This is opaque to Nomad. 5852 Meta map[string]string 5853 5854 // KillTimeout is the time between signaling a task that it will be 5855 // killed and killing it. 5856 KillTimeout time.Duration 5857 5858 // LogConfig provides configuration for log rotation 5859 LogConfig *LogConfig 5860 5861 // Artifacts is a list of artifacts to download and extract before running 5862 // the task. 5863 Artifacts []*TaskArtifact 5864 5865 // Leader marks the task as the leader within the group. When the leader 5866 // task exits, other tasks will be gracefully terminated. 5867 Leader bool 5868 5869 // ShutdownDelay is the duration of the delay between deregistering a 5870 // task from Consul and sending it a signal to shutdown. See #2441 5871 ShutdownDelay time.Duration 5872 5873 // VolumeMounts is a list of Volume name <-> mount configurations that will be 5874 // attached to this task. 5875 VolumeMounts []*VolumeMount 5876 5877 // The kill signal to use for the task. This is an optional specification, 5878 5879 // KillSignal is the kill signal to use for the task. This is an optional 5880 // specification and defaults to SIGINT 5881 KillSignal string 5882 5883 // Used internally to manage tasks according to their TaskKind. Initial use case 5884 // is for Consul Connect 5885 Kind TaskKind 5886 5887 // CSIPluginConfig is used to configure the plugin supervisor for the task. 5888 CSIPluginConfig *TaskCSIPluginConfig 5889 } 5890 5891 // UsesConnect is for conveniently detecting if the Task is able to make use 5892 // of Consul Connect features. This will be indicated in the TaskKind of the 5893 // Task, which exports known types of Tasks. 5894 // 5895 // Currently only Consul Connect Proxy tasks are known. 5896 // (Consul Connect Native tasks will be supported soon). 5897 func (t *Task) UsesConnect() bool { 5898 // todo(shoenig): native tasks 5899 switch { 5900 case t.Kind.IsConnectProxy(): 5901 return true 5902 default: 5903 return false 5904 } 5905 } 5906 5907 func (t *Task) Copy() *Task { 5908 if t == nil { 5909 return nil 5910 } 5911 nt := new(Task) 5912 *nt = *t 5913 nt.Env = helper.CopyMapStringString(nt.Env) 5914 5915 if t.Services != nil { 5916 services := make([]*Service, len(nt.Services)) 5917 for i, s := range nt.Services { 5918 services[i] = s.Copy() 5919 } 5920 nt.Services = services 5921 } 5922 5923 nt.Constraints = CopySliceConstraints(nt.Constraints) 5924 nt.Affinities = CopySliceAffinities(nt.Affinities) 5925 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 5926 nt.CSIPluginConfig = nt.CSIPluginConfig.Copy() 5927 5928 nt.Vault = nt.Vault.Copy() 5929 nt.Resources = nt.Resources.Copy() 5930 nt.LogConfig = nt.LogConfig.Copy() 5931 nt.Meta = helper.CopyMapStringString(nt.Meta) 5932 nt.DispatchPayload = nt.DispatchPayload.Copy() 5933 nt.Lifecycle = nt.Lifecycle.Copy() 5934 5935 if t.Artifacts != nil { 5936 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 5937 for _, a := range nt.Artifacts { 5938 artifacts = append(artifacts, a.Copy()) 5939 } 5940 nt.Artifacts = artifacts 5941 } 5942 5943 if i, err := copystructure.Copy(nt.Config); err != nil { 5944 panic(err.Error()) 5945 } else { 5946 nt.Config = i.(map[string]interface{}) 5947 } 5948 5949 if t.Templates != nil { 5950 templates := make([]*Template, len(t.Templates)) 5951 for i, tmpl := range nt.Templates { 5952 templates[i] = tmpl.Copy() 5953 } 5954 nt.Templates = templates 5955 } 5956 5957 return nt 5958 } 5959 5960 // Canonicalize canonicalizes fields in the task. 5961 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 5962 // Ensure that an empty and nil map are treated the same to avoid scheduling 5963 // problems since we use reflect DeepEquals. 5964 if len(t.Meta) == 0 { 5965 t.Meta = nil 5966 } 5967 if len(t.Config) == 0 { 5968 t.Config = nil 5969 } 5970 if len(t.Env) == 0 { 5971 t.Env = nil 5972 } 5973 5974 for _, service := range t.Services { 5975 service.Canonicalize(job.Name, tg.Name, t.Name) 5976 } 5977 5978 // If Resources are nil initialize them to defaults, otherwise canonicalize 5979 if t.Resources == nil { 5980 t.Resources = DefaultResources() 5981 } else { 5982 t.Resources.Canonicalize() 5983 } 5984 5985 if t.RestartPolicy == nil { 5986 t.RestartPolicy = tg.RestartPolicy 5987 } 5988 5989 // Set the default timeout if it is not specified. 5990 if t.KillTimeout == 0 { 5991 t.KillTimeout = DefaultKillTimeout 5992 } 5993 5994 if t.Vault != nil { 5995 t.Vault.Canonicalize() 5996 } 5997 5998 for _, template := range t.Templates { 5999 template.Canonicalize() 6000 } 6001 } 6002 6003 func (t *Task) GoString() string { 6004 return fmt.Sprintf("*%#v", *t) 6005 } 6006 6007 // Validate is used to sanity check a task 6008 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service) error { 6009 var mErr multierror.Error 6010 if t.Name == "" { 6011 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 6012 } 6013 if strings.ContainsAny(t.Name, `/\`) { 6014 // We enforce this so that when creating the directory on disk it will 6015 // not have any slashes. 6016 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 6017 } 6018 if t.Driver == "" { 6019 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 6020 } 6021 if t.KillTimeout < 0 { 6022 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 6023 } 6024 if t.ShutdownDelay < 0 { 6025 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 6026 } 6027 6028 // Validate the resources. 6029 if t.Resources == nil { 6030 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 6031 } else if err := t.Resources.Validate(); err != nil { 6032 mErr.Errors = append(mErr.Errors, err) 6033 } 6034 6035 // Validate the log config 6036 if t.LogConfig == nil { 6037 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 6038 } else if err := t.LogConfig.Validate(); err != nil { 6039 mErr.Errors = append(mErr.Errors, err) 6040 } 6041 6042 for idx, constr := range t.Constraints { 6043 if err := constr.Validate(); err != nil { 6044 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6045 mErr.Errors = append(mErr.Errors, outer) 6046 } 6047 6048 switch constr.Operand { 6049 case ConstraintDistinctHosts, ConstraintDistinctProperty: 6050 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 6051 mErr.Errors = append(mErr.Errors, outer) 6052 } 6053 } 6054 6055 if jobType == JobTypeSystem { 6056 if t.Affinities != nil { 6057 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6058 } 6059 } else { 6060 for idx, affinity := range t.Affinities { 6061 if err := affinity.Validate(); err != nil { 6062 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6063 mErr.Errors = append(mErr.Errors, outer) 6064 } 6065 } 6066 } 6067 6068 // Validate Services 6069 if err := validateServices(t); err != nil { 6070 mErr.Errors = append(mErr.Errors, err) 6071 } 6072 6073 if t.LogConfig != nil && ephemeralDisk != nil { 6074 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 6075 if ephemeralDisk.SizeMB <= logUsage { 6076 mErr.Errors = append(mErr.Errors, 6077 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 6078 logUsage, ephemeralDisk.SizeMB)) 6079 } 6080 } 6081 6082 for idx, artifact := range t.Artifacts { 6083 if err := artifact.Validate(); err != nil { 6084 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 6085 mErr.Errors = append(mErr.Errors, outer) 6086 } 6087 } 6088 6089 if t.Vault != nil { 6090 if err := t.Vault.Validate(); err != nil { 6091 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 6092 } 6093 } 6094 6095 destinations := make(map[string]int, len(t.Templates)) 6096 for idx, tmpl := range t.Templates { 6097 if err := tmpl.Validate(); err != nil { 6098 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 6099 mErr.Errors = append(mErr.Errors, outer) 6100 } 6101 6102 if other, ok := destinations[tmpl.DestPath]; ok { 6103 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 6104 mErr.Errors = append(mErr.Errors, outer) 6105 } else { 6106 destinations[tmpl.DestPath] = idx + 1 6107 } 6108 } 6109 6110 // Validate the dispatch payload block if there 6111 if t.DispatchPayload != nil { 6112 if err := t.DispatchPayload.Validate(); err != nil { 6113 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 6114 } 6115 } 6116 6117 // Validate the Lifecycle block if there 6118 if t.Lifecycle != nil { 6119 if err := t.Lifecycle.Validate(); err != nil { 6120 mErr.Errors = append(mErr.Errors, fmt.Errorf("Lifecycle validation failed: %v", err)) 6121 } 6122 6123 } 6124 6125 // Validation for TaskKind field which is used for Consul Connect integration 6126 if t.Kind.IsConnectProxy() { 6127 // This task is a Connect proxy so it should not have service stanzas 6128 if len(t.Services) > 0 { 6129 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 6130 } 6131 if t.Leader { 6132 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 6133 } 6134 6135 // Ensure the proxy task has a corresponding service entry 6136 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 6137 if serviceErr != nil { 6138 mErr.Errors = append(mErr.Errors, serviceErr) 6139 } 6140 } 6141 6142 // Validation for volumes 6143 for idx, vm := range t.VolumeMounts { 6144 if !MountPropagationModeIsValid(vm.PropagationMode) { 6145 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 6146 } 6147 } 6148 6149 // Validate CSI Plugin Config 6150 if t.CSIPluginConfig != nil { 6151 if t.CSIPluginConfig.ID == "" { 6152 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig must have a non-empty PluginID")) 6153 } 6154 6155 if !CSIPluginTypeIsValid(t.CSIPluginConfig.Type) { 6156 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig PluginType must be one of 'node', 'controller', or 'monolith', got: \"%s\"", t.CSIPluginConfig.Type)) 6157 } 6158 6159 // TODO: Investigate validation of the PluginMountDir. Not much we can do apart from check IsAbs until after we understand its execution environment though :( 6160 } 6161 6162 return mErr.ErrorOrNil() 6163 } 6164 6165 // validateServices takes a task and validates the services within it are valid 6166 // and reference ports that exist. 6167 func validateServices(t *Task) error { 6168 var mErr multierror.Error 6169 6170 // Ensure that services don't ask for nonexistent ports and their names are 6171 // unique. 6172 servicePorts := make(map[string]map[string]struct{}) 6173 addServicePort := func(label, service string) { 6174 if _, ok := servicePorts[label]; !ok { 6175 servicePorts[label] = map[string]struct{}{} 6176 } 6177 servicePorts[label][service] = struct{}{} 6178 } 6179 knownServices := make(map[string]struct{}) 6180 for i, service := range t.Services { 6181 if err := service.Validate(); err != nil { 6182 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 6183 mErr.Errors = append(mErr.Errors, outer) 6184 } 6185 6186 // Ensure that services with the same name are not being registered for 6187 // the same port 6188 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6189 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 6190 } 6191 knownServices[service.Name+service.PortLabel] = struct{}{} 6192 6193 if service.PortLabel != "" { 6194 if service.AddressMode == "driver" { 6195 // Numeric port labels are valid for address_mode=driver 6196 _, err := strconv.Atoi(service.PortLabel) 6197 if err != nil { 6198 // Not a numeric port label, add it to list to check 6199 addServicePort(service.PortLabel, service.Name) 6200 } 6201 } else { 6202 addServicePort(service.PortLabel, service.Name) 6203 } 6204 } 6205 6206 // Ensure that check names are unique and have valid ports 6207 knownChecks := make(map[string]struct{}) 6208 for _, check := range service.Checks { 6209 if _, ok := knownChecks[check.Name]; ok { 6210 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 6211 } 6212 knownChecks[check.Name] = struct{}{} 6213 6214 if !check.RequiresPort() { 6215 // No need to continue validating check if it doesn't need a port 6216 continue 6217 } 6218 6219 effectivePort := check.PortLabel 6220 if effectivePort == "" { 6221 // Inherits from service 6222 effectivePort = service.PortLabel 6223 } 6224 6225 if effectivePort == "" { 6226 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 6227 continue 6228 } 6229 6230 isNumeric := false 6231 portNumber, err := strconv.Atoi(effectivePort) 6232 if err == nil { 6233 isNumeric = true 6234 } 6235 6236 // Numeric ports are fine for address_mode = "driver" 6237 if check.AddressMode == "driver" && isNumeric { 6238 if portNumber <= 0 { 6239 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 6240 } 6241 continue 6242 } 6243 6244 if isNumeric { 6245 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 6246 continue 6247 } 6248 6249 // PortLabel must exist, report errors by its parent service 6250 addServicePort(effectivePort, service.Name) 6251 } 6252 } 6253 6254 // Get the set of port labels. 6255 portLabels := make(map[string]struct{}) 6256 if t.Resources != nil { 6257 for _, network := range t.Resources.Networks { 6258 ports := network.PortLabels() 6259 for portLabel := range ports { 6260 portLabels[portLabel] = struct{}{} 6261 } 6262 } 6263 } 6264 6265 // Iterate over a sorted list of keys to make error listings stable 6266 keys := make([]string, 0, len(servicePorts)) 6267 for p := range servicePorts { 6268 keys = append(keys, p) 6269 } 6270 sort.Strings(keys) 6271 6272 // Ensure all ports referenced in services exist. 6273 for _, servicePort := range keys { 6274 services := servicePorts[servicePort] 6275 _, ok := portLabels[servicePort] 6276 if !ok { 6277 names := make([]string, 0, len(services)) 6278 for name := range services { 6279 names = append(names, name) 6280 } 6281 6282 // Keep order deterministic 6283 sort.Strings(names) 6284 joined := strings.Join(names, ", ") 6285 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 6286 mErr.Errors = append(mErr.Errors, err) 6287 } 6288 } 6289 6290 // Ensure address mode is valid 6291 return mErr.ErrorOrNil() 6292 } 6293 6294 func (t *Task) Warnings() error { 6295 var mErr multierror.Error 6296 6297 // Validate the resources 6298 if t.Resources != nil && t.Resources.IOPS != 0 { 6299 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 6300 } 6301 6302 for idx, tmpl := range t.Templates { 6303 if err := tmpl.Warnings(); err != nil { 6304 err = multierror.Prefix(err, fmt.Sprintf("Template[%d]", idx)) 6305 mErr.Errors = append(mErr.Errors, err) 6306 } 6307 } 6308 6309 return mErr.ErrorOrNil() 6310 } 6311 6312 // TaskKind identifies the special kinds of tasks using the following format: 6313 // '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 6314 // is opaque to the Task. This identifier can be used to relate the task to some 6315 // other entity based on the kind. 6316 // 6317 // For example, a task may have the TaskKind of `connect-proxy:service` where 6318 // 'connect-proxy' is the kind name and 'service' is the identifier that relates the 6319 // task to the service name of which it is a connect proxy for. 6320 type TaskKind string 6321 6322 // Name returns the kind name portion of the TaskKind 6323 func (k TaskKind) Name() string { 6324 return strings.Split(string(k), ":")[0] 6325 } 6326 6327 // Value returns the identifier of the TaskKind or an empty string if it doesn't 6328 // include one. 6329 func (k TaskKind) Value() string { 6330 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 6331 return s[1] 6332 } 6333 return "" 6334 } 6335 6336 // IsConnectProxy returns true if the TaskKind is connect-proxy 6337 func (k TaskKind) IsConnectProxy() bool { 6338 return strings.HasPrefix(string(k), ConnectProxyPrefix+":") && len(k) > len(ConnectProxyPrefix)+1 6339 } 6340 6341 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 6342 // Proxy 6343 const ConnectProxyPrefix = "connect-proxy" 6344 6345 // ValidateConnectProxyService checks that the service that is being 6346 // proxied by this task exists in the task group and contains 6347 // valid Connect config. 6348 func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 6349 found := false 6350 names := make([]string, 0, len(tgServices)) 6351 for _, svc := range tgServices { 6352 if svc.Connect == nil || svc.Connect.SidecarService == nil { 6353 continue 6354 } 6355 6356 if svc.Name == serviceName { 6357 found = true 6358 break 6359 } 6360 6361 // Build up list of mismatched Connect service names for error 6362 // reporting. 6363 names = append(names, svc.Name) 6364 } 6365 6366 if !found { 6367 if len(names) == 0 { 6368 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 6369 } else { 6370 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 6371 } 6372 } 6373 6374 return nil 6375 } 6376 6377 const ( 6378 // TemplateChangeModeNoop marks that no action should be taken if the 6379 // template is re-rendered 6380 TemplateChangeModeNoop = "noop" 6381 6382 // TemplateChangeModeSignal marks that the task should be signaled if the 6383 // template is re-rendered 6384 TemplateChangeModeSignal = "signal" 6385 6386 // TemplateChangeModeRestart marks that the task should be restarted if the 6387 // template is re-rendered 6388 TemplateChangeModeRestart = "restart" 6389 ) 6390 6391 var ( 6392 // TemplateChangeModeInvalidError is the error for when an invalid change 6393 // mode is given 6394 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 6395 ) 6396 6397 // Template represents a template configuration to be rendered for a given task 6398 type Template struct { 6399 // SourcePath is the path to the template to be rendered 6400 SourcePath string 6401 6402 // DestPath is the path to where the template should be rendered 6403 DestPath string 6404 6405 // EmbeddedTmpl store the raw template. This is useful for smaller templates 6406 // where they are embedded in the job file rather than sent as an artifact 6407 EmbeddedTmpl string 6408 6409 // ChangeMode indicates what should be done if the template is re-rendered 6410 ChangeMode string 6411 6412 // ChangeSignal is the signal that should be sent if the change mode 6413 // requires it. 6414 ChangeSignal string 6415 6416 // Splay is used to avoid coordinated restarts of processes by applying a 6417 // random wait between 0 and the given splay value before signalling the 6418 // application of a change 6419 Splay time.Duration 6420 6421 // Perms is the permission the file should be written out with. 6422 Perms string 6423 6424 // LeftDelim and RightDelim are optional configurations to control what 6425 // delimiter is utilized when parsing the template. 6426 LeftDelim string 6427 RightDelim string 6428 6429 // Envvars enables exposing the template as environment variables 6430 // instead of as a file. The template must be of the form: 6431 // 6432 // VAR_NAME_1={{ key service/my-key }} 6433 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 6434 // 6435 // Lines will be split on the initial "=" with the first part being the 6436 // key name and the second part the value. 6437 // Empty lines and lines starting with # will be ignored, but to avoid 6438 // escaping issues #s within lines will not be treated as comments. 6439 Envvars bool 6440 6441 // VaultGrace is the grace duration between lease renewal and reacquiring a 6442 // secret. If the lease of a secret is less than the grace, a new secret is 6443 // acquired. 6444 // COMPAT(0.12) VaultGrace has been ignored by Vault since Vault v0.5. 6445 VaultGrace time.Duration 6446 } 6447 6448 // DefaultTemplate returns a default template. 6449 func DefaultTemplate() *Template { 6450 return &Template{ 6451 ChangeMode: TemplateChangeModeRestart, 6452 Splay: 5 * time.Second, 6453 Perms: "0644", 6454 } 6455 } 6456 6457 func (t *Template) Copy() *Template { 6458 if t == nil { 6459 return nil 6460 } 6461 copy := new(Template) 6462 *copy = *t 6463 return copy 6464 } 6465 6466 func (t *Template) Canonicalize() { 6467 if t.ChangeSignal != "" { 6468 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 6469 } 6470 } 6471 6472 func (t *Template) Validate() error { 6473 var mErr multierror.Error 6474 6475 // Verify we have something to render 6476 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 6477 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 6478 } 6479 6480 // Verify we can render somewhere 6481 if t.DestPath == "" { 6482 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 6483 } 6484 6485 // Verify the destination doesn't escape 6486 escaped, err := PathEscapesAllocDir("task", t.DestPath) 6487 if err != nil { 6488 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 6489 } else if escaped { 6490 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 6491 } 6492 6493 // Verify a proper change mode 6494 switch t.ChangeMode { 6495 case TemplateChangeModeNoop, TemplateChangeModeRestart: 6496 case TemplateChangeModeSignal: 6497 if t.ChangeSignal == "" { 6498 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 6499 } 6500 if t.Envvars { 6501 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 6502 } 6503 default: 6504 multierror.Append(&mErr, TemplateChangeModeInvalidError) 6505 } 6506 6507 // Verify the splay is positive 6508 if t.Splay < 0 { 6509 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 6510 } 6511 6512 // Verify the permissions 6513 if t.Perms != "" { 6514 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 6515 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 6516 } 6517 } 6518 6519 return mErr.ErrorOrNil() 6520 } 6521 6522 func (t *Template) Warnings() error { 6523 var mErr multierror.Error 6524 6525 // Deprecation notice for vault_grace 6526 if t.VaultGrace != 0 { 6527 mErr.Errors = append(mErr.Errors, fmt.Errorf("VaultGrace has been deprecated as of Nomad 0.11 and ignored since Vault 0.5. Please remove VaultGrace / vault_grace from template stanza.")) 6528 } 6529 6530 return mErr.ErrorOrNil() 6531 } 6532 6533 // AllocState records a single event that changes the state of the whole allocation 6534 type AllocStateField uint8 6535 6536 const ( 6537 AllocStateFieldClientStatus AllocStateField = iota 6538 ) 6539 6540 type AllocState struct { 6541 Field AllocStateField 6542 Value string 6543 Time time.Time 6544 } 6545 6546 // Set of possible states for a task. 6547 const ( 6548 TaskStatePending = "pending" // The task is waiting to be run. 6549 TaskStateRunning = "running" // The task is currently running. 6550 TaskStateDead = "dead" // Terminal state of task. 6551 ) 6552 6553 // TaskState tracks the current state of a task and events that caused state 6554 // transitions. 6555 type TaskState struct { 6556 // The current state of the task. 6557 State string 6558 6559 // Failed marks a task as having failed 6560 Failed bool 6561 6562 // Restarts is the number of times the task has restarted 6563 Restarts uint64 6564 6565 // LastRestart is the time the task last restarted. It is updated each time the 6566 // task restarts 6567 LastRestart time.Time 6568 6569 // StartedAt is the time the task is started. It is updated each time the 6570 // task starts 6571 StartedAt time.Time 6572 6573 // FinishedAt is the time at which the task transitioned to dead and will 6574 // not be started again. 6575 FinishedAt time.Time 6576 6577 // Series of task events that transition the state of the task. 6578 Events []*TaskEvent 6579 } 6580 6581 // NewTaskState returns a TaskState initialized in the Pending state. 6582 func NewTaskState() *TaskState { 6583 return &TaskState{ 6584 State: TaskStatePending, 6585 } 6586 } 6587 6588 // Canonicalize ensures the TaskState has a State set. It should default to 6589 // Pending. 6590 func (ts *TaskState) Canonicalize() { 6591 if ts.State == "" { 6592 ts.State = TaskStatePending 6593 } 6594 } 6595 6596 func (ts *TaskState) Copy() *TaskState { 6597 if ts == nil { 6598 return nil 6599 } 6600 copy := new(TaskState) 6601 *copy = *ts 6602 6603 if ts.Events != nil { 6604 copy.Events = make([]*TaskEvent, len(ts.Events)) 6605 for i, e := range ts.Events { 6606 copy.Events[i] = e.Copy() 6607 } 6608 } 6609 return copy 6610 } 6611 6612 // Successful returns whether a task finished successfully. This doesn't really 6613 // have meaning on a non-batch allocation because a service and system 6614 // allocation should not finish. 6615 func (ts *TaskState) Successful() bool { 6616 return ts.State == TaskStateDead && !ts.Failed 6617 } 6618 6619 const ( 6620 // TaskSetupFailure indicates that the task could not be started due to a 6621 // a setup failure. 6622 TaskSetupFailure = "Setup Failure" 6623 6624 // TaskDriveFailure indicates that the task could not be started due to a 6625 // failure in the driver. TaskDriverFailure is considered Recoverable. 6626 TaskDriverFailure = "Driver Failure" 6627 6628 // TaskReceived signals that the task has been pulled by the client at the 6629 // given timestamp. 6630 TaskReceived = "Received" 6631 6632 // TaskFailedValidation indicates the task was invalid and as such was not run. 6633 // TaskFailedValidation is not considered Recoverable. 6634 TaskFailedValidation = "Failed Validation" 6635 6636 // TaskStarted signals that the task was started and its timestamp can be 6637 // used to determine the running length of the task. 6638 TaskStarted = "Started" 6639 6640 // TaskTerminated indicates that the task was started and exited. 6641 TaskTerminated = "Terminated" 6642 6643 // TaskKilling indicates a kill signal has been sent to the task. 6644 TaskKilling = "Killing" 6645 6646 // TaskKilled indicates a user has killed the task. 6647 TaskKilled = "Killed" 6648 6649 // TaskRestarting indicates that task terminated and is being restarted. 6650 TaskRestarting = "Restarting" 6651 6652 // TaskNotRestarting indicates that the task has failed and is not being 6653 // restarted because it has exceeded its restart policy. 6654 TaskNotRestarting = "Not Restarting" 6655 6656 // TaskRestartSignal indicates that the task has been signalled to be 6657 // restarted 6658 TaskRestartSignal = "Restart Signaled" 6659 6660 // TaskSignaling indicates that the task is being signalled. 6661 TaskSignaling = "Signaling" 6662 6663 // TaskDownloadingArtifacts means the task is downloading the artifacts 6664 // specified in the task. 6665 TaskDownloadingArtifacts = "Downloading Artifacts" 6666 6667 // TaskArtifactDownloadFailed indicates that downloading the artifacts 6668 // failed. 6669 TaskArtifactDownloadFailed = "Failed Artifact Download" 6670 6671 // TaskBuildingTaskDir indicates that the task directory/chroot is being 6672 // built. 6673 TaskBuildingTaskDir = "Building Task Directory" 6674 6675 // TaskSetup indicates the task runner is setting up the task environment 6676 TaskSetup = "Task Setup" 6677 6678 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 6679 // exceeded the requested disk resources. 6680 TaskDiskExceeded = "Disk Resources Exceeded" 6681 6682 // TaskSiblingFailed indicates that a sibling task in the task group has 6683 // failed. 6684 TaskSiblingFailed = "Sibling Task Failed" 6685 6686 // TaskDriverMessage is an informational event message emitted by 6687 // drivers such as when they're performing a long running action like 6688 // downloading an image. 6689 TaskDriverMessage = "Driver" 6690 6691 // TaskLeaderDead indicates that the leader task within the has finished. 6692 TaskLeaderDead = "Leader Task Dead" 6693 6694 // TaskHookFailed indicates that one of the hooks for a task failed. 6695 TaskHookFailed = "Task hook failed" 6696 6697 // TaskRestoreFailed indicates Nomad was unable to reattach to a 6698 // restored task. 6699 TaskRestoreFailed = "Failed Restoring Task" 6700 6701 // TaskPluginUnhealthy indicates that a plugin managed by Nomad became unhealthy 6702 TaskPluginUnhealthy = "Plugin became unhealthy" 6703 6704 // TaskPluginHealthy indicates that a plugin managed by Nomad became healthy 6705 TaskPluginHealthy = "Plugin became healthy" 6706 ) 6707 6708 // TaskEvent is an event that effects the state of a task and contains meta-data 6709 // appropriate to the events type. 6710 type TaskEvent struct { 6711 Type string 6712 Time int64 // Unix Nanosecond timestamp 6713 6714 Message string // A possible message explaining the termination of the task. 6715 6716 // DisplayMessage is a human friendly message about the event 6717 DisplayMessage string 6718 6719 // Details is a map with annotated info about the event 6720 Details map[string]string 6721 6722 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 6723 // in a future release. Field values are available in the Details map. 6724 6725 // FailsTask marks whether this event fails the task. 6726 // Deprecated, use Details["fails_task"] to access this. 6727 FailsTask bool 6728 6729 // Restart fields. 6730 // Deprecated, use Details["restart_reason"] to access this. 6731 RestartReason string 6732 6733 // Setup Failure fields. 6734 // Deprecated, use Details["setup_error"] to access this. 6735 SetupError string 6736 6737 // Driver Failure fields. 6738 // Deprecated, use Details["driver_error"] to access this. 6739 DriverError string // A driver error occurred while starting the task. 6740 6741 // Task Terminated Fields. 6742 6743 // Deprecated, use Details["exit_code"] to access this. 6744 ExitCode int // The exit code of the task. 6745 6746 // Deprecated, use Details["signal"] to access this. 6747 Signal int // The signal that terminated the task. 6748 6749 // Killing fields 6750 // Deprecated, use Details["kill_timeout"] to access this. 6751 KillTimeout time.Duration 6752 6753 // Task Killed Fields. 6754 // Deprecated, use Details["kill_error"] to access this. 6755 KillError string // Error killing the task. 6756 6757 // KillReason is the reason the task was killed 6758 // Deprecated, use Details["kill_reason"] to access this. 6759 KillReason string 6760 6761 // TaskRestarting fields. 6762 // Deprecated, use Details["start_delay"] to access this. 6763 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 6764 6765 // Artifact Download fields 6766 // Deprecated, use Details["download_error"] to access this. 6767 DownloadError string // Error downloading artifacts 6768 6769 // Validation fields 6770 // Deprecated, use Details["validation_error"] to access this. 6771 ValidationError string // Validation error 6772 6773 // The maximum allowed task disk size. 6774 // Deprecated, use Details["disk_limit"] to access this. 6775 DiskLimit int64 6776 6777 // Name of the sibling task that caused termination of the task that 6778 // the TaskEvent refers to. 6779 // Deprecated, use Details["failed_sibling"] to access this. 6780 FailedSibling string 6781 6782 // VaultError is the error from token renewal 6783 // Deprecated, use Details["vault_renewal_error"] to access this. 6784 VaultError string 6785 6786 // TaskSignalReason indicates the reason the task is being signalled. 6787 // Deprecated, use Details["task_signal_reason"] to access this. 6788 TaskSignalReason string 6789 6790 // TaskSignal is the signal that was sent to the task 6791 // Deprecated, use Details["task_signal"] to access this. 6792 TaskSignal string 6793 6794 // DriverMessage indicates a driver action being taken. 6795 // Deprecated, use Details["driver_message"] to access this. 6796 DriverMessage string 6797 6798 // GenericSource is the source of a message. 6799 // Deprecated, is redundant with event type. 6800 GenericSource string 6801 } 6802 6803 func (event *TaskEvent) PopulateEventDisplayMessage() { 6804 // Build up the description based on the event type. 6805 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 6806 return 6807 } 6808 6809 if event.DisplayMessage != "" { 6810 return 6811 } 6812 6813 var desc string 6814 switch event.Type { 6815 case TaskSetup: 6816 desc = event.Message 6817 case TaskStarted: 6818 desc = "Task started by client" 6819 case TaskReceived: 6820 desc = "Task received by client" 6821 case TaskFailedValidation: 6822 if event.ValidationError != "" { 6823 desc = event.ValidationError 6824 } else { 6825 desc = "Validation of task failed" 6826 } 6827 case TaskSetupFailure: 6828 if event.SetupError != "" { 6829 desc = event.SetupError 6830 } else { 6831 desc = "Task setup failed" 6832 } 6833 case TaskDriverFailure: 6834 if event.DriverError != "" { 6835 desc = event.DriverError 6836 } else { 6837 desc = "Failed to start task" 6838 } 6839 case TaskDownloadingArtifacts: 6840 desc = "Client is downloading artifacts" 6841 case TaskArtifactDownloadFailed: 6842 if event.DownloadError != "" { 6843 desc = event.DownloadError 6844 } else { 6845 desc = "Failed to download artifacts" 6846 } 6847 case TaskKilling: 6848 if event.KillReason != "" { 6849 desc = event.KillReason 6850 } else if event.KillTimeout != 0 { 6851 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 6852 } else { 6853 desc = "Sent interrupt" 6854 } 6855 case TaskKilled: 6856 if event.KillError != "" { 6857 desc = event.KillError 6858 } else { 6859 desc = "Task successfully killed" 6860 } 6861 case TaskTerminated: 6862 var parts []string 6863 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 6864 6865 if event.Signal != 0 { 6866 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 6867 } 6868 6869 if event.Message != "" { 6870 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 6871 } 6872 desc = strings.Join(parts, ", ") 6873 case TaskRestarting: 6874 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 6875 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 6876 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 6877 } else { 6878 desc = in 6879 } 6880 case TaskNotRestarting: 6881 if event.RestartReason != "" { 6882 desc = event.RestartReason 6883 } else { 6884 desc = "Task exceeded restart policy" 6885 } 6886 case TaskSiblingFailed: 6887 if event.FailedSibling != "" { 6888 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 6889 } else { 6890 desc = "Task's sibling failed" 6891 } 6892 case TaskSignaling: 6893 sig := event.TaskSignal 6894 reason := event.TaskSignalReason 6895 6896 if sig == "" && reason == "" { 6897 desc = "Task being sent a signal" 6898 } else if sig == "" { 6899 desc = reason 6900 } else if reason == "" { 6901 desc = fmt.Sprintf("Task being sent signal %v", sig) 6902 } else { 6903 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 6904 } 6905 case TaskRestartSignal: 6906 if event.RestartReason != "" { 6907 desc = event.RestartReason 6908 } else { 6909 desc = "Task signaled to restart" 6910 } 6911 case TaskDriverMessage: 6912 desc = event.DriverMessage 6913 case TaskLeaderDead: 6914 desc = "Leader Task in Group dead" 6915 default: 6916 desc = event.Message 6917 } 6918 6919 event.DisplayMessage = desc 6920 } 6921 6922 func (te *TaskEvent) GoString() string { 6923 return fmt.Sprintf("%v - %v", te.Time, te.Type) 6924 } 6925 6926 // SetDisplayMessage sets the display message of TaskEvent 6927 func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 6928 te.DisplayMessage = msg 6929 return te 6930 } 6931 6932 // SetMessage sets the message of TaskEvent 6933 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 6934 te.Message = msg 6935 te.Details["message"] = msg 6936 return te 6937 } 6938 6939 func (te *TaskEvent) Copy() *TaskEvent { 6940 if te == nil { 6941 return nil 6942 } 6943 copy := new(TaskEvent) 6944 *copy = *te 6945 return copy 6946 } 6947 6948 func NewTaskEvent(event string) *TaskEvent { 6949 return &TaskEvent{ 6950 Type: event, 6951 Time: time.Now().UnixNano(), 6952 Details: make(map[string]string), 6953 } 6954 } 6955 6956 // SetSetupError is used to store an error that occurred while setting up the 6957 // task 6958 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 6959 if err != nil { 6960 e.SetupError = err.Error() 6961 e.Details["setup_error"] = err.Error() 6962 } 6963 return e 6964 } 6965 6966 func (e *TaskEvent) SetFailsTask() *TaskEvent { 6967 e.FailsTask = true 6968 e.Details["fails_task"] = "true" 6969 return e 6970 } 6971 6972 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 6973 if err != nil { 6974 e.DriverError = err.Error() 6975 e.Details["driver_error"] = err.Error() 6976 } 6977 return e 6978 } 6979 6980 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 6981 e.ExitCode = c 6982 e.Details["exit_code"] = fmt.Sprintf("%d", c) 6983 return e 6984 } 6985 6986 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 6987 e.Signal = s 6988 e.Details["signal"] = fmt.Sprintf("%d", s) 6989 return e 6990 } 6991 6992 func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 6993 e.Details["signal"] = s 6994 return e 6995 } 6996 6997 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 6998 if err != nil { 6999 e.Message = err.Error() 7000 e.Details["exit_message"] = err.Error() 7001 } 7002 return e 7003 } 7004 7005 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 7006 if err != nil { 7007 e.KillError = err.Error() 7008 e.Details["kill_error"] = err.Error() 7009 } 7010 return e 7011 } 7012 7013 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 7014 e.KillReason = r 7015 e.Details["kill_reason"] = r 7016 return e 7017 } 7018 7019 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 7020 e.StartDelay = int64(delay) 7021 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 7022 return e 7023 } 7024 7025 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 7026 e.RestartReason = reason 7027 e.Details["restart_reason"] = reason 7028 return e 7029 } 7030 7031 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 7032 e.TaskSignalReason = r 7033 e.Details["task_signal_reason"] = r 7034 return e 7035 } 7036 7037 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 7038 e.TaskSignal = s.String() 7039 e.Details["task_signal"] = s.String() 7040 return e 7041 } 7042 7043 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 7044 if err != nil { 7045 e.DownloadError = err.Error() 7046 e.Details["download_error"] = err.Error() 7047 } 7048 return e 7049 } 7050 7051 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 7052 if err != nil { 7053 e.ValidationError = err.Error() 7054 e.Details["validation_error"] = err.Error() 7055 } 7056 return e 7057 } 7058 7059 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 7060 e.KillTimeout = timeout 7061 e.Details["kill_timeout"] = timeout.String() 7062 return e 7063 } 7064 7065 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 7066 e.DiskLimit = limit 7067 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 7068 return e 7069 } 7070 7071 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 7072 e.FailedSibling = sibling 7073 e.Details["failed_sibling"] = sibling 7074 return e 7075 } 7076 7077 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 7078 if err != nil { 7079 e.VaultError = err.Error() 7080 e.Details["vault_renewal_error"] = err.Error() 7081 } 7082 return e 7083 } 7084 7085 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 7086 e.DriverMessage = m 7087 e.Details["driver_message"] = m 7088 return e 7089 } 7090 7091 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 7092 e.Details["oom_killed"] = strconv.FormatBool(oom) 7093 return e 7094 } 7095 7096 // TaskArtifact is an artifact to download before running the task. 7097 type TaskArtifact struct { 7098 // GetterSource is the source to download an artifact using go-getter 7099 GetterSource string 7100 7101 // GetterOptions are options to use when downloading the artifact using 7102 // go-getter. 7103 GetterOptions map[string]string 7104 7105 // GetterMode is the go-getter.ClientMode for fetching resources. 7106 // Defaults to "any" but can be set to "file" or "dir". 7107 GetterMode string 7108 7109 // RelativeDest is the download destination given relative to the task's 7110 // directory. 7111 RelativeDest string 7112 } 7113 7114 func (ta *TaskArtifact) Copy() *TaskArtifact { 7115 if ta == nil { 7116 return nil 7117 } 7118 nta := new(TaskArtifact) 7119 *nta = *ta 7120 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 7121 return nta 7122 } 7123 7124 func (ta *TaskArtifact) GoString() string { 7125 return fmt.Sprintf("%+v", ta) 7126 } 7127 7128 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 7129 // may be specified multiple times with different destinations. 7130 func (ta *TaskArtifact) Hash() string { 7131 hash, err := blake2b.New256(nil) 7132 if err != nil { 7133 panic(err) 7134 } 7135 7136 hash.Write([]byte(ta.GetterSource)) 7137 7138 // Must iterate over keys in a consistent order 7139 keys := make([]string, 0, len(ta.GetterOptions)) 7140 for k := range ta.GetterOptions { 7141 keys = append(keys, k) 7142 } 7143 sort.Strings(keys) 7144 for _, k := range keys { 7145 hash.Write([]byte(k)) 7146 hash.Write([]byte(ta.GetterOptions[k])) 7147 } 7148 7149 hash.Write([]byte(ta.GetterMode)) 7150 hash.Write([]byte(ta.RelativeDest)) 7151 return base64.RawStdEncoding.EncodeToString(hash.Sum(nil)) 7152 } 7153 7154 // PathEscapesAllocDir returns if the given path escapes the allocation 7155 // directory. The prefix allows adding a prefix if the path will be joined, for 7156 // example a "task/local" prefix may be provided if the path will be joined 7157 // against that prefix. 7158 func PathEscapesAllocDir(prefix, path string) (bool, error) { 7159 // Verify the destination doesn't escape the tasks directory 7160 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 7161 if err != nil { 7162 return false, err 7163 } 7164 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 7165 if err != nil { 7166 return false, err 7167 } 7168 rel, err := filepath.Rel(alloc, abs) 7169 if err != nil { 7170 return false, err 7171 } 7172 7173 return strings.HasPrefix(rel, ".."), nil 7174 } 7175 7176 func (ta *TaskArtifact) Validate() error { 7177 // Verify the source 7178 var mErr multierror.Error 7179 if ta.GetterSource == "" { 7180 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 7181 } 7182 7183 switch ta.GetterMode { 7184 case "": 7185 // Default to any 7186 ta.GetterMode = GetterModeAny 7187 case GetterModeAny, GetterModeFile, GetterModeDir: 7188 // Ok 7189 default: 7190 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 7191 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 7192 } 7193 7194 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 7195 if err != nil { 7196 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 7197 } else if escaped { 7198 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 7199 } 7200 7201 if err := ta.validateChecksum(); err != nil { 7202 mErr.Errors = append(mErr.Errors, err) 7203 } 7204 7205 return mErr.ErrorOrNil() 7206 } 7207 7208 func (ta *TaskArtifact) validateChecksum() error { 7209 check, ok := ta.GetterOptions["checksum"] 7210 if !ok { 7211 return nil 7212 } 7213 7214 // Job struct validation occurs before interpolation resolution can be effective. 7215 // Skip checking if checksum contain variable reference, and artifacts fetching will 7216 // eventually fail, if checksum is indeed invalid. 7217 if args.ContainsEnv(check) { 7218 return nil 7219 } 7220 7221 check = strings.TrimSpace(check) 7222 if check == "" { 7223 return fmt.Errorf("checksum value cannot be empty") 7224 } 7225 7226 parts := strings.Split(check, ":") 7227 if l := len(parts); l != 2 { 7228 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 7229 } 7230 7231 checksumVal := parts[1] 7232 checksumBytes, err := hex.DecodeString(checksumVal) 7233 if err != nil { 7234 return fmt.Errorf("invalid checksum: %v", err) 7235 } 7236 7237 checksumType := parts[0] 7238 expectedLength := 0 7239 switch checksumType { 7240 case "md5": 7241 expectedLength = md5.Size 7242 case "sha1": 7243 expectedLength = sha1.Size 7244 case "sha256": 7245 expectedLength = sha256.Size 7246 case "sha512": 7247 expectedLength = sha512.Size 7248 default: 7249 return fmt.Errorf("unsupported checksum type: %s", checksumType) 7250 } 7251 7252 if len(checksumBytes) != expectedLength { 7253 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 7254 } 7255 7256 return nil 7257 } 7258 7259 const ( 7260 ConstraintDistinctProperty = "distinct_property" 7261 ConstraintDistinctHosts = "distinct_hosts" 7262 ConstraintRegex = "regexp" 7263 ConstraintVersion = "version" 7264 ConstraintSemver = "semver" 7265 ConstraintSetContains = "set_contains" 7266 ConstraintSetContainsAll = "set_contains_all" 7267 ConstraintSetContainsAny = "set_contains_any" 7268 ConstraintAttributeIsSet = "is_set" 7269 ConstraintAttributeIsNotSet = "is_not_set" 7270 ) 7271 7272 // Constraints are used to restrict placement options. 7273 type Constraint struct { 7274 LTarget string // Left-hand target 7275 RTarget string // Right-hand target 7276 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 7277 str string // Memoized string 7278 } 7279 7280 // Equal checks if two constraints are equal 7281 func (c *Constraint) Equals(o *Constraint) bool { 7282 return c == o || 7283 c.LTarget == o.LTarget && 7284 c.RTarget == o.RTarget && 7285 c.Operand == o.Operand 7286 } 7287 7288 func (c *Constraint) Equal(o *Constraint) bool { 7289 return c.Equals(o) 7290 } 7291 7292 func (c *Constraint) Copy() *Constraint { 7293 if c == nil { 7294 return nil 7295 } 7296 nc := new(Constraint) 7297 *nc = *c 7298 return nc 7299 } 7300 7301 func (c *Constraint) String() string { 7302 if c.str != "" { 7303 return c.str 7304 } 7305 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 7306 return c.str 7307 } 7308 7309 func (c *Constraint) Validate() error { 7310 var mErr multierror.Error 7311 if c.Operand == "" { 7312 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 7313 } 7314 7315 // requireLtarget specifies whether the constraint requires an LTarget to be 7316 // provided. 7317 requireLtarget := true 7318 7319 // Perform additional validation based on operand 7320 switch c.Operand { 7321 case ConstraintDistinctHosts: 7322 requireLtarget = false 7323 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 7324 if c.RTarget == "" { 7325 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 7326 } 7327 case ConstraintRegex: 7328 if _, err := regexp.Compile(c.RTarget); err != nil { 7329 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 7330 } 7331 case ConstraintVersion: 7332 if _, err := version.NewConstraint(c.RTarget); err != nil { 7333 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 7334 } 7335 case ConstraintSemver: 7336 if _, err := semver.NewConstraint(c.RTarget); err != nil { 7337 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 7338 } 7339 case ConstraintDistinctProperty: 7340 // If a count is set, make sure it is convertible to a uint64 7341 if c.RTarget != "" { 7342 count, err := strconv.ParseUint(c.RTarget, 10, 64) 7343 if err != nil { 7344 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 7345 } else if count < 1 { 7346 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 7347 } 7348 } 7349 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 7350 if c.RTarget != "" { 7351 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 7352 } 7353 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 7354 if c.RTarget == "" { 7355 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 7356 } 7357 default: 7358 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 7359 } 7360 7361 // Ensure we have an LTarget for the constraints that need one 7362 if requireLtarget && c.LTarget == "" { 7363 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 7364 } 7365 7366 return mErr.ErrorOrNil() 7367 } 7368 7369 type Constraints []*Constraint 7370 7371 // Equals compares Constraints as a set 7372 func (xs *Constraints) Equals(ys *Constraints) bool { 7373 if xs == ys { 7374 return true 7375 } 7376 if xs == nil || ys == nil { 7377 return false 7378 } 7379 if len(*xs) != len(*ys) { 7380 return false 7381 } 7382 SETEQUALS: 7383 for _, x := range *xs { 7384 for _, y := range *ys { 7385 if x.Equals(y) { 7386 continue SETEQUALS 7387 } 7388 } 7389 return false 7390 } 7391 return true 7392 } 7393 7394 // Affinity is used to score placement options based on a weight 7395 type Affinity struct { 7396 LTarget string // Left-hand target 7397 RTarget string // Right-hand target 7398 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 7399 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 7400 str string // Memoized string 7401 } 7402 7403 // Equal checks if two affinities are equal 7404 func (a *Affinity) Equals(o *Affinity) bool { 7405 return a == o || 7406 a.LTarget == o.LTarget && 7407 a.RTarget == o.RTarget && 7408 a.Operand == o.Operand && 7409 a.Weight == o.Weight 7410 } 7411 7412 func (a *Affinity) Equal(o *Affinity) bool { 7413 return a.Equals(o) 7414 } 7415 7416 func (a *Affinity) Copy() *Affinity { 7417 if a == nil { 7418 return nil 7419 } 7420 na := new(Affinity) 7421 *na = *a 7422 return na 7423 } 7424 7425 func (a *Affinity) String() string { 7426 if a.str != "" { 7427 return a.str 7428 } 7429 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 7430 return a.str 7431 } 7432 7433 func (a *Affinity) Validate() error { 7434 var mErr multierror.Error 7435 if a.Operand == "" { 7436 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 7437 } 7438 7439 // Perform additional validation based on operand 7440 switch a.Operand { 7441 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 7442 if a.RTarget == "" { 7443 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 7444 } 7445 case ConstraintRegex: 7446 if _, err := regexp.Compile(a.RTarget); err != nil { 7447 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 7448 } 7449 case ConstraintVersion: 7450 if _, err := version.NewConstraint(a.RTarget); err != nil { 7451 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 7452 } 7453 case ConstraintSemver: 7454 if _, err := semver.NewConstraint(a.RTarget); err != nil { 7455 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 7456 } 7457 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 7458 if a.RTarget == "" { 7459 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 7460 } 7461 default: 7462 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 7463 } 7464 7465 // Ensure we have an LTarget 7466 if a.LTarget == "" { 7467 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 7468 } 7469 7470 // Ensure that weight is between -100 and 100, and not zero 7471 if a.Weight == 0 { 7472 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 7473 } 7474 7475 if a.Weight > 100 || a.Weight < -100 { 7476 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 7477 } 7478 7479 return mErr.ErrorOrNil() 7480 } 7481 7482 // Spread is used to specify desired distribution of allocations according to weight 7483 type Spread struct { 7484 // Attribute is the node attribute used as the spread criteria 7485 Attribute string 7486 7487 // Weight is the relative weight of this spread, useful when there are multiple 7488 // spread and affinities 7489 Weight int8 7490 7491 // SpreadTarget is used to describe desired percentages for each attribute value 7492 SpreadTarget []*SpreadTarget 7493 7494 // Memoized string representation 7495 str string 7496 } 7497 7498 type Affinities []*Affinity 7499 7500 // Equals compares Affinities as a set 7501 func (xs *Affinities) Equals(ys *Affinities) bool { 7502 if xs == ys { 7503 return true 7504 } 7505 if xs == nil || ys == nil { 7506 return false 7507 } 7508 if len(*xs) != len(*ys) { 7509 return false 7510 } 7511 SETEQUALS: 7512 for _, x := range *xs { 7513 for _, y := range *ys { 7514 if x.Equals(y) { 7515 continue SETEQUALS 7516 } 7517 } 7518 return false 7519 } 7520 return true 7521 } 7522 7523 func (s *Spread) Copy() *Spread { 7524 if s == nil { 7525 return nil 7526 } 7527 ns := new(Spread) 7528 *ns = *s 7529 7530 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 7531 return ns 7532 } 7533 7534 func (s *Spread) String() string { 7535 if s.str != "" { 7536 return s.str 7537 } 7538 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 7539 return s.str 7540 } 7541 7542 func (s *Spread) Validate() error { 7543 var mErr multierror.Error 7544 if s.Attribute == "" { 7545 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 7546 } 7547 if s.Weight <= 0 || s.Weight > 100 { 7548 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 7549 } 7550 seen := make(map[string]struct{}) 7551 sumPercent := uint32(0) 7552 7553 for _, target := range s.SpreadTarget { 7554 // Make sure there are no duplicates 7555 _, ok := seen[target.Value] 7556 if !ok { 7557 seen[target.Value] = struct{}{} 7558 } else { 7559 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target value %q already defined", target.Value))) 7560 } 7561 if target.Percent < 0 || target.Percent > 100 { 7562 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target percentage for value %q must be between 0 and 100", target.Value))) 7563 } 7564 sumPercent += uint32(target.Percent) 7565 } 7566 if sumPercent > 100 { 7567 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent))) 7568 } 7569 return mErr.ErrorOrNil() 7570 } 7571 7572 // SpreadTarget is used to specify desired percentages for each attribute value 7573 type SpreadTarget struct { 7574 // Value is a single attribute value, like "dc1" 7575 Value string 7576 7577 // Percent is the desired percentage of allocs 7578 Percent uint8 7579 7580 // Memoized string representation 7581 str string 7582 } 7583 7584 func (s *SpreadTarget) Copy() *SpreadTarget { 7585 if s == nil { 7586 return nil 7587 } 7588 7589 ns := new(SpreadTarget) 7590 *ns = *s 7591 return ns 7592 } 7593 7594 func (s *SpreadTarget) String() string { 7595 if s.str != "" { 7596 return s.str 7597 } 7598 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 7599 return s.str 7600 } 7601 7602 // EphemeralDisk is an ephemeral disk object 7603 type EphemeralDisk struct { 7604 // Sticky indicates whether the allocation is sticky to a node 7605 Sticky bool 7606 7607 // SizeMB is the size of the local disk 7608 SizeMB int 7609 7610 // Migrate determines if Nomad client should migrate the allocation dir for 7611 // sticky allocations 7612 Migrate bool 7613 } 7614 7615 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 7616 func DefaultEphemeralDisk() *EphemeralDisk { 7617 return &EphemeralDisk{ 7618 SizeMB: 300, 7619 } 7620 } 7621 7622 // Validate validates EphemeralDisk 7623 func (d *EphemeralDisk) Validate() error { 7624 if d.SizeMB < 10 { 7625 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 7626 } 7627 return nil 7628 } 7629 7630 // Copy copies the EphemeralDisk struct and returns a new one 7631 func (d *EphemeralDisk) Copy() *EphemeralDisk { 7632 ld := new(EphemeralDisk) 7633 *ld = *d 7634 return ld 7635 } 7636 7637 var ( 7638 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 7639 // server 7640 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 7641 ) 7642 7643 const ( 7644 // VaultChangeModeNoop takes no action when a new token is retrieved. 7645 VaultChangeModeNoop = "noop" 7646 7647 // VaultChangeModeSignal signals the task when a new token is retrieved. 7648 VaultChangeModeSignal = "signal" 7649 7650 // VaultChangeModeRestart restarts the task when a new token is retrieved. 7651 VaultChangeModeRestart = "restart" 7652 ) 7653 7654 // Vault stores the set of permissions a task needs access to from Vault. 7655 type Vault struct { 7656 // Policies is the set of policies that the task needs access to 7657 Policies []string 7658 7659 // Env marks whether the Vault Token should be exposed as an environment 7660 // variable 7661 Env bool 7662 7663 // ChangeMode is used to configure the task's behavior when the Vault 7664 // token changes because the original token could not be renewed in time. 7665 ChangeMode string 7666 7667 // ChangeSignal is the signal sent to the task when a new token is 7668 // retrieved. This is only valid when using the signal change mode. 7669 ChangeSignal string 7670 } 7671 7672 func DefaultVaultBlock() *Vault { 7673 return &Vault{ 7674 Env: true, 7675 ChangeMode: VaultChangeModeRestart, 7676 } 7677 } 7678 7679 // Copy returns a copy of this Vault block. 7680 func (v *Vault) Copy() *Vault { 7681 if v == nil { 7682 return nil 7683 } 7684 7685 nv := new(Vault) 7686 *nv = *v 7687 return nv 7688 } 7689 7690 func (v *Vault) Canonicalize() { 7691 if v.ChangeSignal != "" { 7692 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 7693 } 7694 } 7695 7696 // Validate returns if the Vault block is valid. 7697 func (v *Vault) Validate() error { 7698 if v == nil { 7699 return nil 7700 } 7701 7702 var mErr multierror.Error 7703 if len(v.Policies) == 0 { 7704 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 7705 } 7706 7707 for _, p := range v.Policies { 7708 if p == "root" { 7709 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 7710 } 7711 } 7712 7713 switch v.ChangeMode { 7714 case VaultChangeModeSignal: 7715 if v.ChangeSignal == "" { 7716 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 7717 } 7718 case VaultChangeModeNoop, VaultChangeModeRestart: 7719 default: 7720 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 7721 } 7722 7723 return mErr.ErrorOrNil() 7724 } 7725 7726 const ( 7727 // DeploymentStatuses are the various states a deployment can be be in 7728 DeploymentStatusRunning = "running" 7729 DeploymentStatusPaused = "paused" 7730 DeploymentStatusFailed = "failed" 7731 DeploymentStatusSuccessful = "successful" 7732 DeploymentStatusCancelled = "cancelled" 7733 7734 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 7735 7736 // DeploymentStatusDescriptions are the various descriptions of the states a 7737 // deployment can be in. 7738 DeploymentStatusDescriptionRunning = "Deployment is running" 7739 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 7740 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 7741 DeploymentStatusDescriptionPaused = "Deployment is paused" 7742 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 7743 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 7744 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 7745 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 7746 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 7747 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 7748 ) 7749 7750 // DeploymentStatusDescriptionRollback is used to get the status description of 7751 // a deployment when rolling back to an older job. 7752 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 7753 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 7754 } 7755 7756 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 7757 // a deployment when rolling back is not possible because it has the same specification 7758 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 7759 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 7760 } 7761 7762 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 7763 // a deployment when there is no target to rollback to but autorevert is desired. 7764 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 7765 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 7766 } 7767 7768 // Deployment is the object that represents a job deployment which is used to 7769 // transition a job between versions. 7770 type Deployment struct { 7771 // ID is a generated UUID for the deployment 7772 ID string 7773 7774 // Namespace is the namespace the deployment is created in 7775 Namespace string 7776 7777 // JobID is the job the deployment is created for 7778 JobID string 7779 7780 // JobVersion is the version of the job at which the deployment is tracking 7781 JobVersion uint64 7782 7783 // JobModifyIndex is the ModifyIndex of the job which the deployment is 7784 // tracking. 7785 JobModifyIndex uint64 7786 7787 // JobSpecModifyIndex is the JobModifyIndex of the job which the 7788 // deployment is tracking. 7789 JobSpecModifyIndex uint64 7790 7791 // JobCreateIndex is the create index of the job which the deployment is 7792 // tracking. It is needed so that if the job gets stopped and reran we can 7793 // present the correct list of deployments for the job and not old ones. 7794 JobCreateIndex uint64 7795 7796 // TaskGroups is the set of task groups effected by the deployment and their 7797 // current deployment status. 7798 TaskGroups map[string]*DeploymentState 7799 7800 // The status of the deployment 7801 Status string 7802 7803 // StatusDescription allows a human readable description of the deployment 7804 // status. 7805 StatusDescription string 7806 7807 CreateIndex uint64 7808 ModifyIndex uint64 7809 } 7810 7811 // NewDeployment creates a new deployment given the job. 7812 func NewDeployment(job *Job) *Deployment { 7813 return &Deployment{ 7814 ID: uuid.Generate(), 7815 Namespace: job.Namespace, 7816 JobID: job.ID, 7817 JobVersion: job.Version, 7818 JobModifyIndex: job.ModifyIndex, 7819 JobSpecModifyIndex: job.JobModifyIndex, 7820 JobCreateIndex: job.CreateIndex, 7821 Status: DeploymentStatusRunning, 7822 StatusDescription: DeploymentStatusDescriptionRunning, 7823 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 7824 } 7825 } 7826 7827 func (d *Deployment) Copy() *Deployment { 7828 if d == nil { 7829 return nil 7830 } 7831 7832 c := &Deployment{} 7833 *c = *d 7834 7835 c.TaskGroups = nil 7836 if l := len(d.TaskGroups); d.TaskGroups != nil { 7837 c.TaskGroups = make(map[string]*DeploymentState, l) 7838 for tg, s := range d.TaskGroups { 7839 c.TaskGroups[tg] = s.Copy() 7840 } 7841 } 7842 7843 return c 7844 } 7845 7846 // Active returns whether the deployment is active or terminal. 7847 func (d *Deployment) Active() bool { 7848 switch d.Status { 7849 case DeploymentStatusRunning, DeploymentStatusPaused: 7850 return true 7851 default: 7852 return false 7853 } 7854 } 7855 7856 // GetID is a helper for getting the ID when the object may be nil 7857 func (d *Deployment) GetID() string { 7858 if d == nil { 7859 return "" 7860 } 7861 return d.ID 7862 } 7863 7864 // HasPlacedCanaries returns whether the deployment has placed canaries 7865 func (d *Deployment) HasPlacedCanaries() bool { 7866 if d == nil || len(d.TaskGroups) == 0 { 7867 return false 7868 } 7869 for _, group := range d.TaskGroups { 7870 if len(group.PlacedCanaries) != 0 { 7871 return true 7872 } 7873 } 7874 return false 7875 } 7876 7877 // RequiresPromotion returns whether the deployment requires promotion to 7878 // continue 7879 func (d *Deployment) RequiresPromotion() bool { 7880 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7881 return false 7882 } 7883 for _, group := range d.TaskGroups { 7884 if group.DesiredCanaries > 0 && !group.Promoted { 7885 return true 7886 } 7887 } 7888 return false 7889 } 7890 7891 // HasAutoPromote determines if all taskgroups are marked auto_promote 7892 func (d *Deployment) HasAutoPromote() bool { 7893 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7894 return false 7895 } 7896 for _, group := range d.TaskGroups { 7897 if !group.AutoPromote { 7898 return false 7899 } 7900 } 7901 return true 7902 } 7903 7904 func (d *Deployment) GoString() string { 7905 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 7906 for group, state := range d.TaskGroups { 7907 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 7908 } 7909 return base 7910 } 7911 7912 // DeploymentState tracks the state of a deployment for a given task group. 7913 type DeploymentState struct { 7914 // AutoRevert marks whether the task group has indicated the job should be 7915 // reverted on failure 7916 AutoRevert bool 7917 7918 // AutoPromote marks promotion triggered automatically by healthy canaries 7919 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 7920 AutoPromote bool 7921 7922 // ProgressDeadline is the deadline by which an allocation must transition 7923 // to healthy before the deployment is considered failed. 7924 ProgressDeadline time.Duration 7925 7926 // RequireProgressBy is the time by which an allocation must transition 7927 // to healthy before the deployment is considered failed. 7928 RequireProgressBy time.Time 7929 7930 // Promoted marks whether the canaries have been promoted 7931 Promoted bool 7932 7933 // PlacedCanaries is the set of placed canary allocations 7934 PlacedCanaries []string 7935 7936 // DesiredCanaries is the number of canaries that should be created. 7937 DesiredCanaries int 7938 7939 // DesiredTotal is the total number of allocations that should be created as 7940 // part of the deployment. 7941 DesiredTotal int 7942 7943 // PlacedAllocs is the number of allocations that have been placed 7944 PlacedAllocs int 7945 7946 // HealthyAllocs is the number of allocations that have been marked healthy. 7947 HealthyAllocs int 7948 7949 // UnhealthyAllocs are allocations that have been marked as unhealthy. 7950 UnhealthyAllocs int 7951 } 7952 7953 func (d *DeploymentState) GoString() string { 7954 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 7955 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 7956 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 7957 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 7958 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 7959 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 7960 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 7961 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 7962 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 7963 return base 7964 } 7965 7966 func (d *DeploymentState) Copy() *DeploymentState { 7967 c := &DeploymentState{} 7968 *c = *d 7969 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 7970 return c 7971 } 7972 7973 // DeploymentStatusUpdate is used to update the status of a given deployment 7974 type DeploymentStatusUpdate struct { 7975 // DeploymentID is the ID of the deployment to update 7976 DeploymentID string 7977 7978 // Status is the new status of the deployment. 7979 Status string 7980 7981 // StatusDescription is the new status description of the deployment. 7982 StatusDescription string 7983 } 7984 7985 // RescheduleTracker encapsulates previous reschedule events 7986 type RescheduleTracker struct { 7987 Events []*RescheduleEvent 7988 } 7989 7990 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 7991 if rt == nil { 7992 return nil 7993 } 7994 nt := &RescheduleTracker{} 7995 *nt = *rt 7996 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 7997 for _, tracker := range rt.Events { 7998 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 7999 } 8000 nt.Events = rescheduleEvents 8001 return nt 8002 } 8003 8004 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 8005 type RescheduleEvent struct { 8006 // RescheduleTime is the timestamp of a reschedule attempt 8007 RescheduleTime int64 8008 8009 // PrevAllocID is the ID of the previous allocation being restarted 8010 PrevAllocID string 8011 8012 // PrevNodeID is the node ID of the previous allocation 8013 PrevNodeID string 8014 8015 // Delay is the reschedule delay associated with the attempt 8016 Delay time.Duration 8017 } 8018 8019 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 8020 return &RescheduleEvent{RescheduleTime: rescheduleTime, 8021 PrevAllocID: prevAllocID, 8022 PrevNodeID: prevNodeID, 8023 Delay: delay} 8024 } 8025 8026 func (re *RescheduleEvent) Copy() *RescheduleEvent { 8027 if re == nil { 8028 return nil 8029 } 8030 copy := new(RescheduleEvent) 8031 *copy = *re 8032 return copy 8033 } 8034 8035 // DesiredTransition is used to mark an allocation as having a desired state 8036 // transition. This information can be used by the scheduler to make the 8037 // correct decision. 8038 type DesiredTransition struct { 8039 // Migrate is used to indicate that this allocation should be stopped and 8040 // migrated to another node. 8041 Migrate *bool 8042 8043 // Reschedule is used to indicate that this allocation is eligible to be 8044 // rescheduled. Most allocations are automatically eligible for 8045 // rescheduling, so this field is only required when an allocation is not 8046 // automatically eligible. An example is an allocation that is part of a 8047 // deployment. 8048 Reschedule *bool 8049 8050 // ForceReschedule is used to indicate that this allocation must be rescheduled. 8051 // This field is only used when operators want to force a placement even if 8052 // a failed allocation is not eligible to be rescheduled 8053 ForceReschedule *bool 8054 } 8055 8056 // Merge merges the two desired transitions, preferring the values from the 8057 // passed in object. 8058 func (d *DesiredTransition) Merge(o *DesiredTransition) { 8059 if o.Migrate != nil { 8060 d.Migrate = o.Migrate 8061 } 8062 8063 if o.Reschedule != nil { 8064 d.Reschedule = o.Reschedule 8065 } 8066 8067 if o.ForceReschedule != nil { 8068 d.ForceReschedule = o.ForceReschedule 8069 } 8070 } 8071 8072 // ShouldMigrate returns whether the transition object dictates a migration. 8073 func (d *DesiredTransition) ShouldMigrate() bool { 8074 return d.Migrate != nil && *d.Migrate 8075 } 8076 8077 // ShouldReschedule returns whether the transition object dictates a 8078 // rescheduling. 8079 func (d *DesiredTransition) ShouldReschedule() bool { 8080 return d.Reschedule != nil && *d.Reschedule 8081 } 8082 8083 // ShouldForceReschedule returns whether the transition object dictates a 8084 // forced rescheduling. 8085 func (d *DesiredTransition) ShouldForceReschedule() bool { 8086 if d == nil { 8087 return false 8088 } 8089 return d.ForceReschedule != nil && *d.ForceReschedule 8090 } 8091 8092 const ( 8093 AllocDesiredStatusRun = "run" // Allocation should run 8094 AllocDesiredStatusStop = "stop" // Allocation should stop 8095 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 8096 ) 8097 8098 const ( 8099 AllocClientStatusPending = "pending" 8100 AllocClientStatusRunning = "running" 8101 AllocClientStatusComplete = "complete" 8102 AllocClientStatusFailed = "failed" 8103 AllocClientStatusLost = "lost" 8104 ) 8105 8106 // Allocation is used to allocate the placement of a task group to a node. 8107 type Allocation struct { 8108 // msgpack omit empty fields during serialization 8109 _struct bool `codec:",omitempty"` // nolint: structcheck 8110 8111 // ID of the allocation (UUID) 8112 ID string 8113 8114 // Namespace is the namespace the allocation is created in 8115 Namespace string 8116 8117 // ID of the evaluation that generated this allocation 8118 EvalID string 8119 8120 // Name is a logical name of the allocation. 8121 Name string 8122 8123 // NodeID is the node this is being placed on 8124 NodeID string 8125 8126 // NodeName is the name of the node this is being placed on. 8127 NodeName string 8128 8129 // Job is the parent job of the task group being allocated. 8130 // This is copied at allocation time to avoid issues if the job 8131 // definition is updated. 8132 JobID string 8133 Job *Job 8134 8135 // TaskGroup is the name of the task group that should be run 8136 TaskGroup string 8137 8138 // COMPAT(0.11): Remove in 0.11 8139 // Resources is the total set of resources allocated as part 8140 // of this allocation of the task group. Dynamic ports will be set by 8141 // the scheduler. 8142 Resources *Resources 8143 8144 // SharedResources are the resources that are shared by all the tasks in an 8145 // allocation 8146 // Deprecated: use AllocatedResources.Shared instead. 8147 // Keep field to allow us to handle upgrade paths from old versions 8148 SharedResources *Resources 8149 8150 // TaskResources is the set of resources allocated to each 8151 // task. These should sum to the total Resources. Dynamic ports will be 8152 // set by the scheduler. 8153 // Deprecated: use AllocatedResources.Tasks instead. 8154 // Keep field to allow us to handle upgrade paths from old versions 8155 TaskResources map[string]*Resources 8156 8157 // AllocatedResources is the total resources allocated for the task group. 8158 AllocatedResources *AllocatedResources 8159 8160 // Metrics associated with this allocation 8161 Metrics *AllocMetric 8162 8163 // Desired Status of the allocation on the client 8164 DesiredStatus string 8165 8166 // DesiredStatusDescription is meant to provide more human useful information 8167 DesiredDescription string 8168 8169 // DesiredTransition is used to indicate that a state transition 8170 // is desired for a given reason. 8171 DesiredTransition DesiredTransition 8172 8173 // Status of the allocation on the client 8174 ClientStatus string 8175 8176 // ClientStatusDescription is meant to provide more human useful information 8177 ClientDescription string 8178 8179 // TaskStates stores the state of each task, 8180 TaskStates map[string]*TaskState 8181 8182 // AllocStates track meta data associated with changes to the state of the whole allocation, like becoming lost 8183 AllocStates []*AllocState 8184 8185 // PreviousAllocation is the allocation that this allocation is replacing 8186 PreviousAllocation string 8187 8188 // NextAllocation is the allocation that this allocation is being replaced by 8189 NextAllocation string 8190 8191 // DeploymentID identifies an allocation as being created from a 8192 // particular deployment 8193 DeploymentID string 8194 8195 // DeploymentStatus captures the status of the allocation as part of the 8196 // given deployment 8197 DeploymentStatus *AllocDeploymentStatus 8198 8199 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 8200 RescheduleTracker *RescheduleTracker 8201 8202 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 8203 // that can be rescheduled in the future 8204 FollowupEvalID string 8205 8206 // PreemptedAllocations captures IDs of any allocations that were preempted 8207 // in order to place this allocation 8208 PreemptedAllocations []string 8209 8210 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 8211 // to stop running because it got preempted 8212 PreemptedByAllocation string 8213 8214 // Raft Indexes 8215 CreateIndex uint64 8216 ModifyIndex uint64 8217 8218 // AllocModifyIndex is not updated when the client updates allocations. This 8219 // lets the client pull only the allocs updated by the server. 8220 AllocModifyIndex uint64 8221 8222 // CreateTime is the time the allocation has finished scheduling and been 8223 // verified by the plan applier. 8224 CreateTime int64 8225 8226 // ModifyTime is the time the allocation was last updated. 8227 ModifyTime int64 8228 } 8229 8230 // Index returns the index of the allocation. If the allocation is from a task 8231 // group with count greater than 1, there will be multiple allocations for it. 8232 func (a *Allocation) Index() uint { 8233 l := len(a.Name) 8234 prefix := len(a.JobID) + len(a.TaskGroup) + 2 8235 if l <= 3 || l <= prefix { 8236 return uint(0) 8237 } 8238 8239 strNum := a.Name[prefix : len(a.Name)-1] 8240 num, _ := strconv.Atoi(strNum) 8241 return uint(num) 8242 } 8243 8244 // Copy provides a copy of the allocation and deep copies the job 8245 func (a *Allocation) Copy() *Allocation { 8246 return a.copyImpl(true) 8247 } 8248 8249 // CopySkipJob provides a copy of the allocation but doesn't deep copy the job 8250 func (a *Allocation) CopySkipJob() *Allocation { 8251 return a.copyImpl(false) 8252 } 8253 8254 // Canonicalize Allocation to ensure fields are initialized to the expectations 8255 // of this version of Nomad. Should be called when restoring persisted 8256 // Allocations or receiving Allocations from Nomad agents potentially on an 8257 // older version of Nomad. 8258 func (a *Allocation) Canonicalize() { 8259 if a.AllocatedResources == nil && a.TaskResources != nil { 8260 ar := AllocatedResources{} 8261 8262 tasks := make(map[string]*AllocatedTaskResources, len(a.TaskResources)) 8263 for name, tr := range a.TaskResources { 8264 atr := AllocatedTaskResources{} 8265 atr.Cpu.CpuShares = int64(tr.CPU) 8266 atr.Memory.MemoryMB = int64(tr.MemoryMB) 8267 atr.Networks = tr.Networks.Copy() 8268 8269 tasks[name] = &atr 8270 } 8271 ar.Tasks = tasks 8272 8273 if a.SharedResources != nil { 8274 ar.Shared.DiskMB = int64(a.SharedResources.DiskMB) 8275 ar.Shared.Networks = a.SharedResources.Networks.Copy() 8276 } 8277 8278 a.AllocatedResources = &ar 8279 } 8280 8281 a.Job.Canonicalize() 8282 } 8283 8284 func (a *Allocation) copyImpl(job bool) *Allocation { 8285 if a == nil { 8286 return nil 8287 } 8288 na := new(Allocation) 8289 *na = *a 8290 8291 if job { 8292 na.Job = na.Job.Copy() 8293 } 8294 8295 na.AllocatedResources = na.AllocatedResources.Copy() 8296 na.Resources = na.Resources.Copy() 8297 na.SharedResources = na.SharedResources.Copy() 8298 8299 if a.TaskResources != nil { 8300 tr := make(map[string]*Resources, len(na.TaskResources)) 8301 for task, resource := range na.TaskResources { 8302 tr[task] = resource.Copy() 8303 } 8304 na.TaskResources = tr 8305 } 8306 8307 na.Metrics = na.Metrics.Copy() 8308 na.DeploymentStatus = na.DeploymentStatus.Copy() 8309 8310 if a.TaskStates != nil { 8311 ts := make(map[string]*TaskState, len(na.TaskStates)) 8312 for task, state := range na.TaskStates { 8313 ts[task] = state.Copy() 8314 } 8315 na.TaskStates = ts 8316 } 8317 8318 na.RescheduleTracker = a.RescheduleTracker.Copy() 8319 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 8320 return na 8321 } 8322 8323 // TerminalStatus returns if the desired or actual status is terminal and 8324 // will no longer transition. 8325 func (a *Allocation) TerminalStatus() bool { 8326 // First check the desired state and if that isn't terminal, check client 8327 // state. 8328 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 8329 } 8330 8331 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 8332 func (a *Allocation) ServerTerminalStatus() bool { 8333 switch a.DesiredStatus { 8334 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 8335 return true 8336 default: 8337 return false 8338 } 8339 } 8340 8341 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 8342 func (a *Allocation) ClientTerminalStatus() bool { 8343 switch a.ClientStatus { 8344 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 8345 return true 8346 default: 8347 return false 8348 } 8349 } 8350 8351 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 8352 // to its status and ReschedulePolicy given its failure time 8353 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 8354 // First check the desired state 8355 switch a.DesiredStatus { 8356 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 8357 return false 8358 default: 8359 } 8360 switch a.ClientStatus { 8361 case AllocClientStatusFailed: 8362 return a.RescheduleEligible(reschedulePolicy, failTime) 8363 default: 8364 return false 8365 } 8366 } 8367 8368 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 8369 // to its ReschedulePolicy and the current state of its reschedule trackers 8370 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 8371 if reschedulePolicy == nil { 8372 return false 8373 } 8374 attempts := reschedulePolicy.Attempts 8375 interval := reschedulePolicy.Interval 8376 enabled := attempts > 0 || reschedulePolicy.Unlimited 8377 if !enabled { 8378 return false 8379 } 8380 if reschedulePolicy.Unlimited { 8381 return true 8382 } 8383 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 8384 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 8385 return true 8386 } 8387 attempted := 0 8388 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 8389 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 8390 timeDiff := failTime.UTC().UnixNano() - lastAttempt 8391 if timeDiff < interval.Nanoseconds() { 8392 attempted += 1 8393 } 8394 } 8395 return attempted < attempts 8396 } 8397 8398 // LastEventTime is the time of the last task event in the allocation. 8399 // It is used to determine allocation failure time. If the FinishedAt field 8400 // is not set, the alloc's modify time is used 8401 func (a *Allocation) LastEventTime() time.Time { 8402 var lastEventTime time.Time 8403 if a.TaskStates != nil { 8404 for _, s := range a.TaskStates { 8405 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 8406 lastEventTime = s.FinishedAt 8407 } 8408 } 8409 } 8410 8411 if lastEventTime.IsZero() { 8412 return time.Unix(0, a.ModifyTime).UTC() 8413 } 8414 return lastEventTime 8415 } 8416 8417 // ReschedulePolicy returns the reschedule policy based on the task group 8418 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 8419 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8420 if tg == nil { 8421 return nil 8422 } 8423 return tg.ReschedulePolicy 8424 } 8425 8426 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 8427 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 8428 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 8429 failTime := a.LastEventTime() 8430 reschedulePolicy := a.ReschedulePolicy() 8431 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 8432 return time.Time{}, false 8433 } 8434 8435 nextDelay := a.NextDelay() 8436 nextRescheduleTime := failTime.Add(nextDelay) 8437 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 8438 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 8439 // Check for eligibility based on the interval if max attempts is set 8440 attempted := 0 8441 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 8442 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 8443 timeDiff := failTime.UTC().UnixNano() - lastAttempt 8444 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 8445 attempted += 1 8446 } 8447 } 8448 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 8449 } 8450 return nextRescheduleTime, rescheduleEligible 8451 } 8452 8453 // ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration 8454 func (a *Allocation) ShouldClientStop() bool { 8455 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8456 if tg == nil || 8457 tg.StopAfterClientDisconnect == nil || 8458 *tg.StopAfterClientDisconnect == 0*time.Nanosecond { 8459 return false 8460 } 8461 return true 8462 } 8463 8464 // WaitClientStop uses the reschedule delay mechanism to block rescheduling until 8465 // StopAfterClientDisconnect's block interval passes 8466 func (a *Allocation) WaitClientStop() time.Time { 8467 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8468 8469 // An alloc can only be marked lost once, so use the first lost transition 8470 var t time.Time 8471 for _, s := range a.AllocStates { 8472 if s.Field == AllocStateFieldClientStatus && 8473 s.Value == AllocClientStatusLost { 8474 t = s.Time 8475 break 8476 } 8477 } 8478 8479 // On the first pass, the alloc hasn't been marked lost yet, and so we start 8480 // counting from now 8481 if t.IsZero() { 8482 t = time.Now().UTC() 8483 } 8484 8485 // Find the max kill timeout 8486 kill := DefaultKillTimeout 8487 for _, t := range tg.Tasks { 8488 if t.KillTimeout > kill { 8489 kill = t.KillTimeout 8490 } 8491 } 8492 8493 return t.Add(*tg.StopAfterClientDisconnect + kill) 8494 } 8495 8496 // NextDelay returns a duration after which the allocation can be rescheduled. 8497 // It is calculated according to the delay function and previous reschedule attempts. 8498 func (a *Allocation) NextDelay() time.Duration { 8499 policy := a.ReschedulePolicy() 8500 // Can be nil if the task group was updated to remove its reschedule policy 8501 if policy == nil { 8502 return 0 8503 } 8504 delayDur := policy.Delay 8505 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 8506 return delayDur 8507 } 8508 events := a.RescheduleTracker.Events 8509 switch policy.DelayFunction { 8510 case "exponential": 8511 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 8512 case "fibonacci": 8513 if len(events) >= 2 { 8514 fibN1Delay := events[len(events)-1].Delay 8515 fibN2Delay := events[len(events)-2].Delay 8516 // Handle reset of delay ceiling which should cause 8517 // a new series to start 8518 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 8519 delayDur = fibN1Delay 8520 } else { 8521 delayDur = fibN1Delay + fibN2Delay 8522 } 8523 } 8524 default: 8525 return delayDur 8526 } 8527 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 8528 delayDur = policy.MaxDelay 8529 // check if delay needs to be reset 8530 8531 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 8532 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 8533 if timeDiff > delayDur.Nanoseconds() { 8534 delayDur = policy.Delay 8535 } 8536 8537 } 8538 8539 return delayDur 8540 } 8541 8542 // Terminated returns if the allocation is in a terminal state on a client. 8543 func (a *Allocation) Terminated() bool { 8544 if a.ClientStatus == AllocClientStatusFailed || 8545 a.ClientStatus == AllocClientStatusComplete || 8546 a.ClientStatus == AllocClientStatusLost { 8547 return true 8548 } 8549 return false 8550 } 8551 8552 // SetStopped updates the allocation in place to a DesiredStatus stop, with the ClientStatus 8553 func (a *Allocation) SetStop(clientStatus, clientDesc string) { 8554 a.DesiredStatus = AllocDesiredStatusStop 8555 a.ClientStatus = clientStatus 8556 a.ClientDescription = clientDesc 8557 a.AppendState(AllocStateFieldClientStatus, clientStatus) 8558 } 8559 8560 // AppendState creates and appends an AllocState entry recording the time of the state 8561 // transition. Used to mark the transition to lost 8562 func (a *Allocation) AppendState(field AllocStateField, value string) { 8563 a.AllocStates = append(a.AllocStates, &AllocState{ 8564 Field: field, 8565 Value: value, 8566 Time: time.Now().UTC(), 8567 }) 8568 } 8569 8570 // RanSuccessfully returns whether the client has ran the allocation and all 8571 // tasks finished successfully. Critically this function returns whether the 8572 // allocation has ran to completion and not just that the alloc has converged to 8573 // its desired state. That is to say that a batch allocation must have finished 8574 // with exit code 0 on all task groups. This doesn't really have meaning on a 8575 // non-batch allocation because a service and system allocation should not 8576 // finish. 8577 func (a *Allocation) RanSuccessfully() bool { 8578 // Handle the case the client hasn't started the allocation. 8579 if len(a.TaskStates) == 0 { 8580 return false 8581 } 8582 8583 // Check to see if all the tasks finished successfully in the allocation 8584 allSuccess := true 8585 for _, state := range a.TaskStates { 8586 allSuccess = allSuccess && state.Successful() 8587 } 8588 8589 return allSuccess 8590 } 8591 8592 // ShouldMigrate returns if the allocation needs data migration 8593 func (a *Allocation) ShouldMigrate() bool { 8594 if a.PreviousAllocation == "" { 8595 return false 8596 } 8597 8598 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 8599 return false 8600 } 8601 8602 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8603 8604 // if the task group is nil or the ephemeral disk block isn't present then 8605 // we won't migrate 8606 if tg == nil || tg.EphemeralDisk == nil { 8607 return false 8608 } 8609 8610 // We won't migrate any data is the user hasn't enabled migration or the 8611 // disk is not marked as sticky 8612 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 8613 return false 8614 } 8615 8616 return true 8617 } 8618 8619 // SetEventDisplayMessage populates the display message if its not already set, 8620 // a temporary fix to handle old allocations that don't have it. 8621 // This method will be removed in a future release. 8622 func (a *Allocation) SetEventDisplayMessages() { 8623 setDisplayMsg(a.TaskStates) 8624 } 8625 8626 // COMPAT(0.11): Remove in 0.11 8627 // ComparableResources returns the resources on the allocation 8628 // handling upgrade paths. After 0.11 calls to this should be replaced with: 8629 // alloc.AllocatedResources.Comparable() 8630 func (a *Allocation) ComparableResources() *ComparableResources { 8631 // ALloc already has 0.9+ behavior 8632 if a.AllocatedResources != nil { 8633 return a.AllocatedResources.Comparable() 8634 } 8635 8636 var resources *Resources 8637 if a.Resources != nil { 8638 resources = a.Resources 8639 } else if a.TaskResources != nil { 8640 resources = new(Resources) 8641 resources.Add(a.SharedResources) 8642 for _, taskResource := range a.TaskResources { 8643 resources.Add(taskResource) 8644 } 8645 } 8646 8647 // Upgrade path 8648 return &ComparableResources{ 8649 Flattened: AllocatedTaskResources{ 8650 Cpu: AllocatedCpuResources{ 8651 CpuShares: int64(resources.CPU), 8652 }, 8653 Memory: AllocatedMemoryResources{ 8654 MemoryMB: int64(resources.MemoryMB), 8655 }, 8656 Networks: resources.Networks, 8657 }, 8658 Shared: AllocatedSharedResources{ 8659 DiskMB: int64(resources.DiskMB), 8660 }, 8661 } 8662 } 8663 8664 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 8665 // TaskGroup does not exist, or the task name cannot be found. 8666 func (a *Allocation) LookupTask(name string) *Task { 8667 if a.Job == nil { 8668 return nil 8669 } 8670 8671 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8672 if tg == nil { 8673 return nil 8674 } 8675 8676 return tg.LookupTask(name) 8677 } 8678 8679 // Stub returns a list stub for the allocation 8680 func (a *Allocation) Stub() *AllocListStub { 8681 return &AllocListStub{ 8682 ID: a.ID, 8683 EvalID: a.EvalID, 8684 Name: a.Name, 8685 Namespace: a.Namespace, 8686 NodeID: a.NodeID, 8687 NodeName: a.NodeName, 8688 JobID: a.JobID, 8689 JobType: a.Job.Type, 8690 JobVersion: a.Job.Version, 8691 TaskGroup: a.TaskGroup, 8692 DesiredStatus: a.DesiredStatus, 8693 DesiredDescription: a.DesiredDescription, 8694 ClientStatus: a.ClientStatus, 8695 ClientDescription: a.ClientDescription, 8696 DesiredTransition: a.DesiredTransition, 8697 TaskStates: a.TaskStates, 8698 DeploymentStatus: a.DeploymentStatus, 8699 FollowupEvalID: a.FollowupEvalID, 8700 RescheduleTracker: a.RescheduleTracker, 8701 PreemptedAllocations: a.PreemptedAllocations, 8702 PreemptedByAllocation: a.PreemptedByAllocation, 8703 CreateIndex: a.CreateIndex, 8704 ModifyIndex: a.ModifyIndex, 8705 CreateTime: a.CreateTime, 8706 ModifyTime: a.ModifyTime, 8707 } 8708 } 8709 8710 // AllocationDiff converts an Allocation type to an AllocationDiff type 8711 // If at any time, modification are made to AllocationDiff so that an 8712 // Allocation can no longer be safely converted to AllocationDiff, 8713 // this method should be changed accordingly. 8714 func (a *Allocation) AllocationDiff() *AllocationDiff { 8715 return (*AllocationDiff)(a) 8716 } 8717 8718 // AllocationDiff is another named type for Allocation (to use the same fields), 8719 // which is used to represent the delta for an Allocation. If you need a method 8720 // defined on the al 8721 type AllocationDiff Allocation 8722 8723 // AllocListStub is used to return a subset of alloc information 8724 type AllocListStub struct { 8725 ID string 8726 EvalID string 8727 Name string 8728 Namespace string 8729 NodeID string 8730 NodeName string 8731 JobID string 8732 JobType string 8733 JobVersion uint64 8734 TaskGroup string 8735 DesiredStatus string 8736 DesiredDescription string 8737 ClientStatus string 8738 ClientDescription string 8739 DesiredTransition DesiredTransition 8740 TaskStates map[string]*TaskState 8741 DeploymentStatus *AllocDeploymentStatus 8742 FollowupEvalID string 8743 RescheduleTracker *RescheduleTracker 8744 PreemptedAllocations []string 8745 PreemptedByAllocation string 8746 CreateIndex uint64 8747 ModifyIndex uint64 8748 CreateTime int64 8749 ModifyTime int64 8750 } 8751 8752 // SetEventDisplayMessage populates the display message if its not already set, 8753 // a temporary fix to handle old allocations that don't have it. 8754 // This method will be removed in a future release. 8755 func (a *AllocListStub) SetEventDisplayMessages() { 8756 setDisplayMsg(a.TaskStates) 8757 } 8758 8759 func setDisplayMsg(taskStates map[string]*TaskState) { 8760 if taskStates != nil { 8761 for _, taskState := range taskStates { 8762 for _, event := range taskState.Events { 8763 event.PopulateEventDisplayMessage() 8764 } 8765 } 8766 } 8767 } 8768 8769 // AllocMetric is used to track various metrics while attempting 8770 // to make an allocation. These are used to debug a job, or to better 8771 // understand the pressure within the system. 8772 type AllocMetric struct { 8773 // NodesEvaluated is the number of nodes that were evaluated 8774 NodesEvaluated int 8775 8776 // NodesFiltered is the number of nodes filtered due to a constraint 8777 NodesFiltered int 8778 8779 // NodesAvailable is the number of nodes available for evaluation per DC. 8780 NodesAvailable map[string]int 8781 8782 // ClassFiltered is the number of nodes filtered by class 8783 ClassFiltered map[string]int 8784 8785 // ConstraintFiltered is the number of failures caused by constraint 8786 ConstraintFiltered map[string]int 8787 8788 // NodesExhausted is the number of nodes skipped due to being 8789 // exhausted of at least one resource 8790 NodesExhausted int 8791 8792 // ClassExhausted is the number of nodes exhausted by class 8793 ClassExhausted map[string]int 8794 8795 // DimensionExhausted provides the count by dimension or reason 8796 DimensionExhausted map[string]int 8797 8798 // QuotaExhausted provides the exhausted dimensions 8799 QuotaExhausted []string 8800 8801 // Scores is the scores of the final few nodes remaining 8802 // for placement. The top score is typically selected. 8803 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 8804 Scores map[string]float64 8805 8806 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 8807 ScoreMetaData []*NodeScoreMeta 8808 8809 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 8810 // we receive normalized score during the last step of the scoring stack. 8811 nodeScoreMeta *NodeScoreMeta 8812 8813 // topScores is used to maintain a heap of the top K nodes with 8814 // the highest normalized score 8815 topScores *kheap.ScoreHeap 8816 8817 // AllocationTime is a measure of how long the allocation 8818 // attempt took. This can affect performance and SLAs. 8819 AllocationTime time.Duration 8820 8821 // CoalescedFailures indicates the number of other 8822 // allocations that were coalesced into this failed allocation. 8823 // This is to prevent creating many failed allocations for a 8824 // single task group. 8825 CoalescedFailures int 8826 } 8827 8828 func (a *AllocMetric) Copy() *AllocMetric { 8829 if a == nil { 8830 return nil 8831 } 8832 na := new(AllocMetric) 8833 *na = *a 8834 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 8835 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 8836 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 8837 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 8838 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 8839 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 8840 na.Scores = helper.CopyMapStringFloat64(na.Scores) 8841 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 8842 return na 8843 } 8844 8845 func (a *AllocMetric) EvaluateNode() { 8846 a.NodesEvaluated += 1 8847 } 8848 8849 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 8850 a.NodesFiltered += 1 8851 if node != nil && node.NodeClass != "" { 8852 if a.ClassFiltered == nil { 8853 a.ClassFiltered = make(map[string]int) 8854 } 8855 a.ClassFiltered[node.NodeClass] += 1 8856 } 8857 if constraint != "" { 8858 if a.ConstraintFiltered == nil { 8859 a.ConstraintFiltered = make(map[string]int) 8860 } 8861 a.ConstraintFiltered[constraint] += 1 8862 } 8863 } 8864 8865 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 8866 a.NodesExhausted += 1 8867 if node != nil && node.NodeClass != "" { 8868 if a.ClassExhausted == nil { 8869 a.ClassExhausted = make(map[string]int) 8870 } 8871 a.ClassExhausted[node.NodeClass] += 1 8872 } 8873 if dimension != "" { 8874 if a.DimensionExhausted == nil { 8875 a.DimensionExhausted = make(map[string]int) 8876 } 8877 a.DimensionExhausted[dimension] += 1 8878 } 8879 } 8880 8881 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 8882 if a.QuotaExhausted == nil { 8883 a.QuotaExhausted = make([]string, 0, len(dimensions)) 8884 } 8885 8886 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 8887 } 8888 8889 // ScoreNode is used to gather top K scoring nodes in a heap 8890 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 8891 // Create nodeScoreMeta lazily if its the first time or if its a new node 8892 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 8893 a.nodeScoreMeta = &NodeScoreMeta{ 8894 NodeID: node.ID, 8895 Scores: make(map[string]float64), 8896 } 8897 } 8898 if name == NormScorerName { 8899 a.nodeScoreMeta.NormScore = score 8900 // Once we have the normalized score we can push to the heap 8901 // that tracks top K by normalized score 8902 8903 // Create the heap if its not there already 8904 if a.topScores == nil { 8905 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 8906 } 8907 heap.Push(a.topScores, a.nodeScoreMeta) 8908 8909 // Clear out this entry because its now in the heap 8910 a.nodeScoreMeta = nil 8911 } else { 8912 a.nodeScoreMeta.Scores[name] = score 8913 } 8914 } 8915 8916 // PopulateScoreMetaData populates a map of scorer to scoring metadata 8917 // The map is populated by popping elements from a heap of top K scores 8918 // maintained per scorer 8919 func (a *AllocMetric) PopulateScoreMetaData() { 8920 if a.topScores == nil { 8921 return 8922 } 8923 8924 if a.ScoreMetaData == nil { 8925 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 8926 } 8927 heapItems := a.topScores.GetItemsReverse() 8928 for i, item := range heapItems { 8929 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 8930 } 8931 } 8932 8933 // NodeScoreMeta captures scoring meta data derived from 8934 // different scoring factors. 8935 type NodeScoreMeta struct { 8936 NodeID string 8937 Scores map[string]float64 8938 NormScore float64 8939 } 8940 8941 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 8942 if s == nil { 8943 return nil 8944 } 8945 ns := new(NodeScoreMeta) 8946 *ns = *s 8947 return ns 8948 } 8949 8950 func (s *NodeScoreMeta) String() string { 8951 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 8952 } 8953 8954 func (s *NodeScoreMeta) Score() float64 { 8955 return s.NormScore 8956 } 8957 8958 func (s *NodeScoreMeta) Data() interface{} { 8959 return s 8960 } 8961 8962 // AllocDeploymentStatus captures the status of the allocation as part of the 8963 // deployment. This can include things like if the allocation has been marked as 8964 // healthy. 8965 type AllocDeploymentStatus struct { 8966 // Healthy marks whether the allocation has been marked healthy or unhealthy 8967 // as part of a deployment. It can be unset if it has neither been marked 8968 // healthy or unhealthy. 8969 Healthy *bool 8970 8971 // Timestamp is the time at which the health status was set. 8972 Timestamp time.Time 8973 8974 // Canary marks whether the allocation is a canary or not. A canary that has 8975 // been promoted will have this field set to false. 8976 Canary bool 8977 8978 // ModifyIndex is the raft index in which the deployment status was last 8979 // changed. 8980 ModifyIndex uint64 8981 } 8982 8983 // HasHealth returns true if the allocation has its health set. 8984 func (a *AllocDeploymentStatus) HasHealth() bool { 8985 return a != nil && a.Healthy != nil 8986 } 8987 8988 // IsHealthy returns if the allocation is marked as healthy as part of a 8989 // deployment 8990 func (a *AllocDeploymentStatus) IsHealthy() bool { 8991 if a == nil { 8992 return false 8993 } 8994 8995 return a.Healthy != nil && *a.Healthy 8996 } 8997 8998 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 8999 // deployment 9000 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 9001 if a == nil { 9002 return false 9003 } 9004 9005 return a.Healthy != nil && !*a.Healthy 9006 } 9007 9008 // IsCanary returns if the allocation is marked as a canary 9009 func (a *AllocDeploymentStatus) IsCanary() bool { 9010 if a == nil { 9011 return false 9012 } 9013 9014 return a.Canary 9015 } 9016 9017 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 9018 if a == nil { 9019 return nil 9020 } 9021 9022 c := new(AllocDeploymentStatus) 9023 *c = *a 9024 9025 if a.Healthy != nil { 9026 c.Healthy = helper.BoolToPtr(*a.Healthy) 9027 } 9028 9029 return c 9030 } 9031 9032 const ( 9033 EvalStatusBlocked = "blocked" 9034 EvalStatusPending = "pending" 9035 EvalStatusComplete = "complete" 9036 EvalStatusFailed = "failed" 9037 EvalStatusCancelled = "canceled" 9038 ) 9039 9040 const ( 9041 EvalTriggerJobRegister = "job-register" 9042 EvalTriggerJobDeregister = "job-deregister" 9043 EvalTriggerPeriodicJob = "periodic-job" 9044 EvalTriggerNodeDrain = "node-drain" 9045 EvalTriggerNodeUpdate = "node-update" 9046 EvalTriggerAllocStop = "alloc-stop" 9047 EvalTriggerScheduled = "scheduled" 9048 EvalTriggerRollingUpdate = "rolling-update" 9049 EvalTriggerDeploymentWatcher = "deployment-watcher" 9050 EvalTriggerFailedFollowUp = "failed-follow-up" 9051 EvalTriggerMaxPlans = "max-plan-attempts" 9052 EvalTriggerRetryFailedAlloc = "alloc-failure" 9053 EvalTriggerQueuedAllocs = "queued-allocs" 9054 EvalTriggerPreemption = "preemption" 9055 EvalTriggerScaling = "job-scaling" 9056 ) 9057 9058 const ( 9059 // CoreJobEvalGC is used for the garbage collection of evaluations 9060 // and allocations. We periodically scan evaluations in a terminal state, 9061 // in which all the corresponding allocations are also terminal. We 9062 // delete these out of the system to bound the state. 9063 CoreJobEvalGC = "eval-gc" 9064 9065 // CoreJobNodeGC is used for the garbage collection of failed nodes. 9066 // We periodically scan nodes in a terminal state, and if they have no 9067 // corresponding allocations we delete these out of the system. 9068 CoreJobNodeGC = "node-gc" 9069 9070 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 9071 // periodically scan garbage collectible jobs and check if both their 9072 // evaluations and allocations are terminal. If so, we delete these out of 9073 // the system. 9074 CoreJobJobGC = "job-gc" 9075 9076 // CoreJobDeploymentGC is used for the garbage collection of eligible 9077 // deployments. We periodically scan garbage collectible deployments and 9078 // check if they are terminal. If so, we delete these out of the system. 9079 CoreJobDeploymentGC = "deployment-gc" 9080 9081 // CoreJobCSIVolumeClaimGC is use for the garbage collection of CSI 9082 // volume claims. We periodically scan volumes to see if no allocs are 9083 // claiming them. If so, we unclaim the volume. 9084 CoreJobCSIVolumeClaimGC = "csi-volume-claim-gc" 9085 9086 // CoreJobCSIPluginGC is use for the garbage collection of CSI plugins. 9087 // We periodically scan plugins to see if they have no associated volumes 9088 // or allocs running them. If so, we delete the plugin. 9089 CoreJobCSIPluginGC = "csi-plugin-gc" 9090 9091 // CoreJobForceGC is used to force garbage collection of all GCable objects. 9092 CoreJobForceGC = "force-gc" 9093 ) 9094 9095 // Evaluation is used anytime we need to apply business logic as a result 9096 // of a change to our desired state (job specification) or the emergent state 9097 // (registered nodes). When the inputs change, we need to "evaluate" them, 9098 // potentially taking action (allocation of work) or doing nothing if the state 9099 // of the world does not require it. 9100 type Evaluation struct { 9101 // msgpack omit empty fields during serialization 9102 _struct bool `codec:",omitempty"` // nolint: structcheck 9103 9104 // ID is a randomly generated UUID used for this evaluation. This 9105 // is assigned upon the creation of the evaluation. 9106 ID string 9107 9108 // Namespace is the namespace the evaluation is created in 9109 Namespace string 9110 9111 // Priority is used to control scheduling importance and if this job 9112 // can preempt other jobs. 9113 Priority int 9114 9115 // Type is used to control which schedulers are available to handle 9116 // this evaluation. 9117 Type string 9118 9119 // TriggeredBy is used to give some insight into why this Eval 9120 // was created. (Job change, node failure, alloc failure, etc). 9121 TriggeredBy string 9122 9123 // JobID is the job this evaluation is scoped to. Evaluations cannot 9124 // be run in parallel for a given JobID, so we serialize on this. 9125 JobID string 9126 9127 // JobModifyIndex is the modify index of the job at the time 9128 // the evaluation was created 9129 JobModifyIndex uint64 9130 9131 // NodeID is the node that was affected triggering the evaluation. 9132 NodeID string 9133 9134 // NodeModifyIndex is the modify index of the node at the time 9135 // the evaluation was created 9136 NodeModifyIndex uint64 9137 9138 // DeploymentID is the ID of the deployment that triggered the evaluation. 9139 DeploymentID string 9140 9141 // Status of the evaluation 9142 Status string 9143 9144 // StatusDescription is meant to provide more human useful information 9145 StatusDescription string 9146 9147 // Wait is a minimum wait time for running the eval. This is used to 9148 // support a rolling upgrade in versions prior to 0.7.0 9149 // Deprecated 9150 Wait time.Duration 9151 9152 // WaitUntil is the time when this eval should be run. This is used to 9153 // supported delayed rescheduling of failed allocations 9154 WaitUntil time.Time 9155 9156 // NextEval is the evaluation ID for the eval created to do a followup. 9157 // This is used to support rolling upgrades and failed-follow-up evals, where 9158 // we need a chain of evaluations. 9159 NextEval string 9160 9161 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 9162 // This is used to support rolling upgrades and failed-follow-up evals, where 9163 // we need a chain of evaluations. 9164 PreviousEval string 9165 9166 // BlockedEval is the evaluation ID for a created blocked eval. A 9167 // blocked eval will be created if all allocations could not be placed due 9168 // to constraints or lacking resources. 9169 BlockedEval string 9170 9171 // FailedTGAllocs are task groups which have allocations that could not be 9172 // made, but the metrics are persisted so that the user can use the feedback 9173 // to determine the cause. 9174 FailedTGAllocs map[string]*AllocMetric 9175 9176 // ClassEligibility tracks computed node classes that have been explicitly 9177 // marked as eligible or ineligible. 9178 ClassEligibility map[string]bool 9179 9180 // QuotaLimitReached marks whether a quota limit was reached for the 9181 // evaluation. 9182 QuotaLimitReached string 9183 9184 // EscapedComputedClass marks whether the job has constraints that are not 9185 // captured by computed node classes. 9186 EscapedComputedClass bool 9187 9188 // AnnotatePlan triggers the scheduler to provide additional annotations 9189 // during the evaluation. This should not be set during normal operations. 9190 AnnotatePlan bool 9191 9192 // QueuedAllocations is the number of unplaced allocations at the time the 9193 // evaluation was processed. The map is keyed by Task Group names. 9194 QueuedAllocations map[string]int 9195 9196 // LeaderACL provides the ACL token to when issuing RPCs back to the 9197 // leader. This will be a valid management token as long as the leader is 9198 // active. This should not ever be exposed via the API. 9199 LeaderACL string 9200 9201 // SnapshotIndex is the Raft index of the snapshot used to process the 9202 // evaluation. The index will either be set when it has gone through the 9203 // scheduler or if a blocked evaluation is being created. The index is set 9204 // in this case so we can determine if an early unblocking is required since 9205 // capacity has changed since the evaluation was created. This can result in 9206 // the SnapshotIndex being less than the CreateIndex. 9207 SnapshotIndex uint64 9208 9209 // Raft Indexes 9210 CreateIndex uint64 9211 ModifyIndex uint64 9212 9213 CreateTime int64 9214 ModifyTime int64 9215 } 9216 9217 // TerminalStatus returns if the current status is terminal and 9218 // will no longer transition. 9219 func (e *Evaluation) TerminalStatus() bool { 9220 switch e.Status { 9221 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 9222 return true 9223 default: 9224 return false 9225 } 9226 } 9227 9228 func (e *Evaluation) GoString() string { 9229 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 9230 } 9231 9232 func (e *Evaluation) Copy() *Evaluation { 9233 if e == nil { 9234 return nil 9235 } 9236 ne := new(Evaluation) 9237 *ne = *e 9238 9239 // Copy ClassEligibility 9240 if e.ClassEligibility != nil { 9241 classes := make(map[string]bool, len(e.ClassEligibility)) 9242 for class, elig := range e.ClassEligibility { 9243 classes[class] = elig 9244 } 9245 ne.ClassEligibility = classes 9246 } 9247 9248 // Copy FailedTGAllocs 9249 if e.FailedTGAllocs != nil { 9250 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 9251 for tg, metric := range e.FailedTGAllocs { 9252 failedTGs[tg] = metric.Copy() 9253 } 9254 ne.FailedTGAllocs = failedTGs 9255 } 9256 9257 // Copy queued allocations 9258 if e.QueuedAllocations != nil { 9259 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 9260 for tg, num := range e.QueuedAllocations { 9261 queuedAllocations[tg] = num 9262 } 9263 ne.QueuedAllocations = queuedAllocations 9264 } 9265 9266 return ne 9267 } 9268 9269 // ShouldEnqueue checks if a given evaluation should be enqueued into the 9270 // eval_broker 9271 func (e *Evaluation) ShouldEnqueue() bool { 9272 switch e.Status { 9273 case EvalStatusPending: 9274 return true 9275 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 9276 return false 9277 default: 9278 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 9279 } 9280 } 9281 9282 // ShouldBlock checks if a given evaluation should be entered into the blocked 9283 // eval tracker. 9284 func (e *Evaluation) ShouldBlock() bool { 9285 switch e.Status { 9286 case EvalStatusBlocked: 9287 return true 9288 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 9289 return false 9290 default: 9291 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 9292 } 9293 } 9294 9295 // MakePlan is used to make a plan from the given evaluation 9296 // for a given Job 9297 func (e *Evaluation) MakePlan(j *Job) *Plan { 9298 p := &Plan{ 9299 EvalID: e.ID, 9300 Priority: e.Priority, 9301 Job: j, 9302 NodeUpdate: make(map[string][]*Allocation), 9303 NodeAllocation: make(map[string][]*Allocation), 9304 NodePreemptions: make(map[string][]*Allocation), 9305 } 9306 if j != nil { 9307 p.AllAtOnce = j.AllAtOnce 9308 } 9309 return p 9310 } 9311 9312 // NextRollingEval creates an evaluation to followup this eval for rolling updates 9313 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 9314 now := time.Now().UTC().UnixNano() 9315 return &Evaluation{ 9316 ID: uuid.Generate(), 9317 Namespace: e.Namespace, 9318 Priority: e.Priority, 9319 Type: e.Type, 9320 TriggeredBy: EvalTriggerRollingUpdate, 9321 JobID: e.JobID, 9322 JobModifyIndex: e.JobModifyIndex, 9323 Status: EvalStatusPending, 9324 Wait: wait, 9325 PreviousEval: e.ID, 9326 CreateTime: now, 9327 ModifyTime: now, 9328 } 9329 } 9330 9331 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 9332 // failed allocations. It takes the classes marked explicitly eligible or 9333 // ineligible, whether the job has escaped computed node classes and whether the 9334 // quota limit was reached. 9335 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 9336 escaped bool, quotaReached string) *Evaluation { 9337 now := time.Now().UTC().UnixNano() 9338 return &Evaluation{ 9339 ID: uuid.Generate(), 9340 Namespace: e.Namespace, 9341 Priority: e.Priority, 9342 Type: e.Type, 9343 TriggeredBy: EvalTriggerQueuedAllocs, 9344 JobID: e.JobID, 9345 JobModifyIndex: e.JobModifyIndex, 9346 Status: EvalStatusBlocked, 9347 PreviousEval: e.ID, 9348 ClassEligibility: classEligibility, 9349 EscapedComputedClass: escaped, 9350 QuotaLimitReached: quotaReached, 9351 CreateTime: now, 9352 ModifyTime: now, 9353 } 9354 } 9355 9356 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 9357 // has been marked as failed because it has hit the delivery limit and will not 9358 // be retried by the eval_broker. Callers should copy the created eval's ID to 9359 // into the old eval's NextEval field. 9360 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 9361 now := time.Now().UTC().UnixNano() 9362 return &Evaluation{ 9363 ID: uuid.Generate(), 9364 Namespace: e.Namespace, 9365 Priority: e.Priority, 9366 Type: e.Type, 9367 TriggeredBy: EvalTriggerFailedFollowUp, 9368 JobID: e.JobID, 9369 JobModifyIndex: e.JobModifyIndex, 9370 Status: EvalStatusPending, 9371 Wait: wait, 9372 PreviousEval: e.ID, 9373 CreateTime: now, 9374 ModifyTime: now, 9375 } 9376 } 9377 9378 // UpdateModifyTime takes into account that clocks on different servers may be 9379 // slightly out of sync. Even in case of a leader change, this method will 9380 // guarantee that ModifyTime will always be after CreateTime. 9381 func (e *Evaluation) UpdateModifyTime() { 9382 now := time.Now().UTC().UnixNano() 9383 if now <= e.CreateTime { 9384 e.ModifyTime = e.CreateTime + 1 9385 } else { 9386 e.ModifyTime = now 9387 } 9388 } 9389 9390 // Plan is used to submit a commit plan for task allocations. These 9391 // are submitted to the leader which verifies that resources have 9392 // not been overcommitted before admitting the plan. 9393 type Plan struct { 9394 // msgpack omit empty fields during serialization 9395 _struct bool `codec:",omitempty"` // nolint: structcheck 9396 9397 // EvalID is the evaluation ID this plan is associated with 9398 EvalID string 9399 9400 // EvalToken is used to prevent a split-brain processing of 9401 // an evaluation. There should only be a single scheduler running 9402 // an Eval at a time, but this could be violated after a leadership 9403 // transition. This unique token is used to reject plans that are 9404 // being submitted from a different leader. 9405 EvalToken string 9406 9407 // Priority is the priority of the upstream job 9408 Priority int 9409 9410 // AllAtOnce is used to control if incremental scheduling of task groups 9411 // is allowed or if we must do a gang scheduling of the entire job. 9412 // If this is false, a plan may be partially applied. Otherwise, the 9413 // entire plan must be able to make progress. 9414 AllAtOnce bool 9415 9416 // Job is the parent job of all the allocations in the Plan. 9417 // Since a Plan only involves a single Job, we can reduce the size 9418 // of the plan by only including it once. 9419 Job *Job 9420 9421 // NodeUpdate contains all the allocations for each node. For each node, 9422 // this is a list of the allocations to update to either stop or evict. 9423 NodeUpdate map[string][]*Allocation 9424 9425 // NodeAllocation contains all the allocations for each node. 9426 // The evicts must be considered prior to the allocations. 9427 NodeAllocation map[string][]*Allocation 9428 9429 // Annotations contains annotations by the scheduler to be used by operators 9430 // to understand the decisions made by the scheduler. 9431 Annotations *PlanAnnotations 9432 9433 // Deployment is the deployment created or updated by the scheduler that 9434 // should be applied by the planner. 9435 Deployment *Deployment 9436 9437 // DeploymentUpdates is a set of status updates to apply to the given 9438 // deployments. This allows the scheduler to cancel any unneeded deployment 9439 // because the job is stopped or the update block is removed. 9440 DeploymentUpdates []*DeploymentStatusUpdate 9441 9442 // NodePreemptions is a map from node id to a set of allocations from other 9443 // lower priority jobs that are preempted. Preempted allocations are marked 9444 // as evicted. 9445 NodePreemptions map[string][]*Allocation 9446 9447 // SnapshotIndex is the Raft index of the snapshot used to create the 9448 // Plan. The leader will wait to evaluate the plan until its StateStore 9449 // has reached at least this index. 9450 SnapshotIndex uint64 9451 } 9452 9453 // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 9454 // allocation may be optionally set by passing in a non-empty value. 9455 func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus string) { 9456 newAlloc := new(Allocation) 9457 *newAlloc = *alloc 9458 9459 // If the job is not set in the plan we are deregistering a job so we 9460 // extract the job from the allocation. 9461 if p.Job == nil && newAlloc.Job != nil { 9462 p.Job = newAlloc.Job 9463 } 9464 9465 // Normalize the job 9466 newAlloc.Job = nil 9467 9468 // Strip the resources as it can be rebuilt. 9469 newAlloc.Resources = nil 9470 9471 newAlloc.DesiredStatus = AllocDesiredStatusStop 9472 newAlloc.DesiredDescription = desiredDesc 9473 9474 if clientStatus != "" { 9475 newAlloc.ClientStatus = clientStatus 9476 } 9477 9478 newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) 9479 9480 node := alloc.NodeID 9481 existing := p.NodeUpdate[node] 9482 p.NodeUpdate[node] = append(existing, newAlloc) 9483 } 9484 9485 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 9486 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 9487 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 9488 newAlloc := &Allocation{} 9489 newAlloc.ID = alloc.ID 9490 newAlloc.JobID = alloc.JobID 9491 newAlloc.Namespace = alloc.Namespace 9492 newAlloc.DesiredStatus = AllocDesiredStatusEvict 9493 newAlloc.PreemptedByAllocation = preemptingAllocID 9494 9495 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 9496 newAlloc.DesiredDescription = desiredDesc 9497 9498 // TaskResources are needed by the plan applier to check if allocations fit 9499 // after removing preempted allocations 9500 if alloc.AllocatedResources != nil { 9501 newAlloc.AllocatedResources = alloc.AllocatedResources 9502 } else { 9503 // COMPAT Remove in version 0.11 9504 newAlloc.TaskResources = alloc.TaskResources 9505 newAlloc.SharedResources = alloc.SharedResources 9506 } 9507 9508 // Append this alloc to slice for this node 9509 node := alloc.NodeID 9510 existing := p.NodePreemptions[node] 9511 p.NodePreemptions[node] = append(existing, newAlloc) 9512 } 9513 9514 func (p *Plan) PopUpdate(alloc *Allocation) { 9515 existing := p.NodeUpdate[alloc.NodeID] 9516 n := len(existing) 9517 if n > 0 && existing[n-1].ID == alloc.ID { 9518 existing = existing[:n-1] 9519 if len(existing) > 0 { 9520 p.NodeUpdate[alloc.NodeID] = existing 9521 } else { 9522 delete(p.NodeUpdate, alloc.NodeID) 9523 } 9524 } 9525 } 9526 9527 func (p *Plan) AppendAlloc(alloc *Allocation) { 9528 node := alloc.NodeID 9529 existing := p.NodeAllocation[node] 9530 9531 // Normalize the job 9532 alloc.Job = nil 9533 9534 p.NodeAllocation[node] = append(existing, alloc) 9535 } 9536 9537 // IsNoOp checks if this plan would do nothing 9538 func (p *Plan) IsNoOp() bool { 9539 return len(p.NodeUpdate) == 0 && 9540 len(p.NodeAllocation) == 0 && 9541 p.Deployment == nil && 9542 len(p.DeploymentUpdates) == 0 9543 } 9544 9545 // NormalizeAllocations normalizes allocations to remove fields that can 9546 // be fetched from the MemDB instead of sending over the wire 9547 func (p *Plan) NormalizeAllocations() { 9548 for _, allocs := range p.NodeUpdate { 9549 for i, alloc := range allocs { 9550 allocs[i] = &Allocation{ 9551 ID: alloc.ID, 9552 DesiredDescription: alloc.DesiredDescription, 9553 ClientStatus: alloc.ClientStatus, 9554 } 9555 } 9556 } 9557 9558 for _, allocs := range p.NodePreemptions { 9559 for i, alloc := range allocs { 9560 allocs[i] = &Allocation{ 9561 ID: alloc.ID, 9562 PreemptedByAllocation: alloc.PreemptedByAllocation, 9563 } 9564 } 9565 } 9566 } 9567 9568 // PlanResult is the result of a plan submitted to the leader. 9569 type PlanResult struct { 9570 // NodeUpdate contains all the updates that were committed. 9571 NodeUpdate map[string][]*Allocation 9572 9573 // NodeAllocation contains all the allocations that were committed. 9574 NodeAllocation map[string][]*Allocation 9575 9576 // Deployment is the deployment that was committed. 9577 Deployment *Deployment 9578 9579 // DeploymentUpdates is the set of deployment updates that were committed. 9580 DeploymentUpdates []*DeploymentStatusUpdate 9581 9582 // NodePreemptions is a map from node id to a set of allocations from other 9583 // lower priority jobs that are preempted. Preempted allocations are marked 9584 // as stopped. 9585 NodePreemptions map[string][]*Allocation 9586 9587 // RefreshIndex is the index the worker should refresh state up to. 9588 // This allows all evictions and allocations to be materialized. 9589 // If any allocations were rejected due to stale data (node state, 9590 // over committed) this can be used to force a worker refresh. 9591 RefreshIndex uint64 9592 9593 // AllocIndex is the Raft index in which the evictions and 9594 // allocations took place. This is used for the write index. 9595 AllocIndex uint64 9596 } 9597 9598 // IsNoOp checks if this plan result would do nothing 9599 func (p *PlanResult) IsNoOp() bool { 9600 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 9601 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 9602 } 9603 9604 // FullCommit is used to check if all the allocations in a plan 9605 // were committed as part of the result. Returns if there was 9606 // a match, and the number of expected and actual allocations. 9607 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 9608 expected := 0 9609 actual := 0 9610 for name, allocList := range plan.NodeAllocation { 9611 didAlloc, _ := p.NodeAllocation[name] 9612 expected += len(allocList) 9613 actual += len(didAlloc) 9614 } 9615 return actual == expected, expected, actual 9616 } 9617 9618 // PlanAnnotations holds annotations made by the scheduler to give further debug 9619 // information to operators. 9620 type PlanAnnotations struct { 9621 // DesiredTGUpdates is the set of desired updates per task group. 9622 DesiredTGUpdates map[string]*DesiredUpdates 9623 9624 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 9625 PreemptedAllocs []*AllocListStub 9626 } 9627 9628 // DesiredUpdates is the set of changes the scheduler would like to make given 9629 // sufficient resources and cluster capacity. 9630 type DesiredUpdates struct { 9631 Ignore uint64 9632 Place uint64 9633 Migrate uint64 9634 Stop uint64 9635 InPlaceUpdate uint64 9636 DestructiveUpdate uint64 9637 Canary uint64 9638 Preemptions uint64 9639 } 9640 9641 func (d *DesiredUpdates) GoString() string { 9642 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 9643 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 9644 } 9645 9646 // msgpackHandle is a shared handle for encoding/decoding of structs 9647 var MsgpackHandle = func() *codec.MsgpackHandle { 9648 h := &codec.MsgpackHandle{} 9649 h.RawToString = true 9650 9651 // maintain binary format from time prior to upgrading latest ugorji 9652 h.BasicHandle.TimeNotBuiltin = true 9653 9654 // Sets the default type for decoding a map into a nil interface{}. 9655 // This is necessary in particular because we store the driver configs as a 9656 // nil interface{}. 9657 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 9658 9659 // only review struct codec tags 9660 h.TypeInfos = codec.NewTypeInfos([]string{"codec"}) 9661 9662 return h 9663 }() 9664 9665 var ( 9666 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 9667 // structs. The pretty handle will add indents for easier human consumption. 9668 JsonHandle = &codec.JsonHandle{ 9669 HTMLCharsAsIs: true, 9670 } 9671 JsonHandlePretty = &codec.JsonHandle{ 9672 HTMLCharsAsIs: true, 9673 Indent: 4, 9674 } 9675 ) 9676 9677 // Decode is used to decode a MsgPack encoded object 9678 func Decode(buf []byte, out interface{}) error { 9679 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 9680 } 9681 9682 // Encode is used to encode a MsgPack object with type prefix 9683 func Encode(t MessageType, msg interface{}) ([]byte, error) { 9684 var buf bytes.Buffer 9685 buf.WriteByte(uint8(t)) 9686 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 9687 return buf.Bytes(), err 9688 } 9689 9690 // KeyringResponse is a unified key response and can be used for install, 9691 // remove, use, as well as listing key queries. 9692 type KeyringResponse struct { 9693 Messages map[string]string 9694 Keys map[string]int 9695 NumNodes int 9696 } 9697 9698 // KeyringRequest is request objects for serf key operations. 9699 type KeyringRequest struct { 9700 Key string 9701 } 9702 9703 // RecoverableError wraps an error and marks whether it is recoverable and could 9704 // be retried or it is fatal. 9705 type RecoverableError struct { 9706 Err string 9707 Recoverable bool 9708 } 9709 9710 // NewRecoverableError is used to wrap an error and mark it as recoverable or 9711 // not. 9712 func NewRecoverableError(e error, recoverable bool) error { 9713 if e == nil { 9714 return nil 9715 } 9716 9717 return &RecoverableError{ 9718 Err: e.Error(), 9719 Recoverable: recoverable, 9720 } 9721 } 9722 9723 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 9724 // message. If the error was recoverable before the returned error is as well; 9725 // otherwise it is unrecoverable. 9726 func WrapRecoverable(msg string, err error) error { 9727 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 9728 } 9729 9730 func (r *RecoverableError) Error() string { 9731 return r.Err 9732 } 9733 9734 func (r *RecoverableError) IsRecoverable() bool { 9735 return r.Recoverable 9736 } 9737 9738 func (r *RecoverableError) IsUnrecoverable() bool { 9739 return !r.Recoverable 9740 } 9741 9742 // Recoverable is an interface for errors to implement to indicate whether or 9743 // not they are fatal or recoverable. 9744 type Recoverable interface { 9745 error 9746 IsRecoverable() bool 9747 } 9748 9749 // IsRecoverable returns true if error is a RecoverableError with 9750 // Recoverable=true. Otherwise false is returned. 9751 func IsRecoverable(e error) bool { 9752 if re, ok := e.(Recoverable); ok { 9753 return re.IsRecoverable() 9754 } 9755 return false 9756 } 9757 9758 // WrappedServerError wraps an error and satisfies 9759 // both the Recoverable and the ServerSideError interfaces 9760 type WrappedServerError struct { 9761 Err error 9762 } 9763 9764 // NewWrappedServerError is used to create a wrapped server side error 9765 func NewWrappedServerError(e error) error { 9766 return &WrappedServerError{ 9767 Err: e, 9768 } 9769 } 9770 9771 func (r *WrappedServerError) IsRecoverable() bool { 9772 return IsRecoverable(r.Err) 9773 } 9774 9775 func (r *WrappedServerError) Error() string { 9776 return r.Err.Error() 9777 } 9778 9779 func (r *WrappedServerError) IsServerSide() bool { 9780 return true 9781 } 9782 9783 // ServerSideError is an interface for errors to implement to indicate 9784 // errors occurring after the request makes it to a server 9785 type ServerSideError interface { 9786 error 9787 IsServerSide() bool 9788 } 9789 9790 // IsServerSide returns true if error is a wrapped 9791 // server side error 9792 func IsServerSide(e error) bool { 9793 if se, ok := e.(ServerSideError); ok { 9794 return se.IsServerSide() 9795 } 9796 return false 9797 } 9798 9799 // ACLPolicy is used to represent an ACL policy 9800 type ACLPolicy struct { 9801 Name string // Unique name 9802 Description string // Human readable 9803 Rules string // HCL or JSON format 9804 RulesJSON *acl.Policy // Generated from Rules on read 9805 Hash []byte 9806 CreateIndex uint64 9807 ModifyIndex uint64 9808 } 9809 9810 // SetHash is used to compute and set the hash of the ACL policy 9811 func (c *ACLPolicy) SetHash() []byte { 9812 // Initialize a 256bit Blake2 hash (32 bytes) 9813 hash, err := blake2b.New256(nil) 9814 if err != nil { 9815 panic(err) 9816 } 9817 9818 // Write all the user set fields 9819 hash.Write([]byte(c.Name)) 9820 hash.Write([]byte(c.Description)) 9821 hash.Write([]byte(c.Rules)) 9822 9823 // Finalize the hash 9824 hashVal := hash.Sum(nil) 9825 9826 // Set and return the hash 9827 c.Hash = hashVal 9828 return hashVal 9829 } 9830 9831 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 9832 return &ACLPolicyListStub{ 9833 Name: a.Name, 9834 Description: a.Description, 9835 Hash: a.Hash, 9836 CreateIndex: a.CreateIndex, 9837 ModifyIndex: a.ModifyIndex, 9838 } 9839 } 9840 9841 func (a *ACLPolicy) Validate() error { 9842 var mErr multierror.Error 9843 if !validPolicyName.MatchString(a.Name) { 9844 err := fmt.Errorf("invalid name '%s'", a.Name) 9845 mErr.Errors = append(mErr.Errors, err) 9846 } 9847 if _, err := acl.Parse(a.Rules); err != nil { 9848 err = fmt.Errorf("failed to parse rules: %v", err) 9849 mErr.Errors = append(mErr.Errors, err) 9850 } 9851 if len(a.Description) > maxPolicyDescriptionLength { 9852 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 9853 mErr.Errors = append(mErr.Errors, err) 9854 } 9855 return mErr.ErrorOrNil() 9856 } 9857 9858 // ACLPolicyListStub is used to for listing ACL policies 9859 type ACLPolicyListStub struct { 9860 Name string 9861 Description string 9862 Hash []byte 9863 CreateIndex uint64 9864 ModifyIndex uint64 9865 } 9866 9867 // ACLPolicyListRequest is used to request a list of policies 9868 type ACLPolicyListRequest struct { 9869 QueryOptions 9870 } 9871 9872 // ACLPolicySpecificRequest is used to query a specific policy 9873 type ACLPolicySpecificRequest struct { 9874 Name string 9875 QueryOptions 9876 } 9877 9878 // ACLPolicySetRequest is used to query a set of policies 9879 type ACLPolicySetRequest struct { 9880 Names []string 9881 QueryOptions 9882 } 9883 9884 // ACLPolicyListResponse is used for a list request 9885 type ACLPolicyListResponse struct { 9886 Policies []*ACLPolicyListStub 9887 QueryMeta 9888 } 9889 9890 // SingleACLPolicyResponse is used to return a single policy 9891 type SingleACLPolicyResponse struct { 9892 Policy *ACLPolicy 9893 QueryMeta 9894 } 9895 9896 // ACLPolicySetResponse is used to return a set of policies 9897 type ACLPolicySetResponse struct { 9898 Policies map[string]*ACLPolicy 9899 QueryMeta 9900 } 9901 9902 // ACLPolicyDeleteRequest is used to delete a set of policies 9903 type ACLPolicyDeleteRequest struct { 9904 Names []string 9905 WriteRequest 9906 } 9907 9908 // ACLPolicyUpsertRequest is used to upsert a set of policies 9909 type ACLPolicyUpsertRequest struct { 9910 Policies []*ACLPolicy 9911 WriteRequest 9912 } 9913 9914 // ACLToken represents a client token which is used to Authenticate 9915 type ACLToken struct { 9916 AccessorID string // Public Accessor ID (UUID) 9917 SecretID string // Secret ID, private (UUID) 9918 Name string // Human friendly name 9919 Type string // Client or Management 9920 Policies []string // Policies this token ties to 9921 Global bool // Global or Region local 9922 Hash []byte 9923 CreateTime time.Time // Time of creation 9924 CreateIndex uint64 9925 ModifyIndex uint64 9926 } 9927 9928 var ( 9929 // AnonymousACLToken is used no SecretID is provided, and the 9930 // request is made anonymously. 9931 AnonymousACLToken = &ACLToken{ 9932 AccessorID: "anonymous", 9933 Name: "Anonymous Token", 9934 Type: ACLClientToken, 9935 Policies: []string{"anonymous"}, 9936 Global: false, 9937 } 9938 ) 9939 9940 type ACLTokenListStub struct { 9941 AccessorID string 9942 Name string 9943 Type string 9944 Policies []string 9945 Global bool 9946 Hash []byte 9947 CreateTime time.Time 9948 CreateIndex uint64 9949 ModifyIndex uint64 9950 } 9951 9952 // SetHash is used to compute and set the hash of the ACL token 9953 func (a *ACLToken) SetHash() []byte { 9954 // Initialize a 256bit Blake2 hash (32 bytes) 9955 hash, err := blake2b.New256(nil) 9956 if err != nil { 9957 panic(err) 9958 } 9959 9960 // Write all the user set fields 9961 hash.Write([]byte(a.Name)) 9962 hash.Write([]byte(a.Type)) 9963 for _, policyName := range a.Policies { 9964 hash.Write([]byte(policyName)) 9965 } 9966 if a.Global { 9967 hash.Write([]byte("global")) 9968 } else { 9969 hash.Write([]byte("local")) 9970 } 9971 9972 // Finalize the hash 9973 hashVal := hash.Sum(nil) 9974 9975 // Set and return the hash 9976 a.Hash = hashVal 9977 return hashVal 9978 } 9979 9980 func (a *ACLToken) Stub() *ACLTokenListStub { 9981 return &ACLTokenListStub{ 9982 AccessorID: a.AccessorID, 9983 Name: a.Name, 9984 Type: a.Type, 9985 Policies: a.Policies, 9986 Global: a.Global, 9987 Hash: a.Hash, 9988 CreateTime: a.CreateTime, 9989 CreateIndex: a.CreateIndex, 9990 ModifyIndex: a.ModifyIndex, 9991 } 9992 } 9993 9994 // Validate is used to sanity check a token 9995 func (a *ACLToken) Validate() error { 9996 var mErr multierror.Error 9997 if len(a.Name) > maxTokenNameLength { 9998 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 9999 } 10000 switch a.Type { 10001 case ACLClientToken: 10002 if len(a.Policies) == 0 { 10003 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 10004 } 10005 case ACLManagementToken: 10006 if len(a.Policies) != 0 { 10007 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 10008 } 10009 default: 10010 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 10011 } 10012 return mErr.ErrorOrNil() 10013 } 10014 10015 // PolicySubset checks if a given set of policies is a subset of the token 10016 func (a *ACLToken) PolicySubset(policies []string) bool { 10017 // Hot-path the management tokens, superset of all policies. 10018 if a.Type == ACLManagementToken { 10019 return true 10020 } 10021 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 10022 for _, policy := range a.Policies { 10023 associatedPolicies[policy] = struct{}{} 10024 } 10025 for _, policy := range policies { 10026 if _, ok := associatedPolicies[policy]; !ok { 10027 return false 10028 } 10029 } 10030 return true 10031 } 10032 10033 // ACLTokenListRequest is used to request a list of tokens 10034 type ACLTokenListRequest struct { 10035 GlobalOnly bool 10036 QueryOptions 10037 } 10038 10039 // ACLTokenSpecificRequest is used to query a specific token 10040 type ACLTokenSpecificRequest struct { 10041 AccessorID string 10042 QueryOptions 10043 } 10044 10045 // ACLTokenSetRequest is used to query a set of tokens 10046 type ACLTokenSetRequest struct { 10047 AccessorIDS []string 10048 QueryOptions 10049 } 10050 10051 // ACLTokenListResponse is used for a list request 10052 type ACLTokenListResponse struct { 10053 Tokens []*ACLTokenListStub 10054 QueryMeta 10055 } 10056 10057 // SingleACLTokenResponse is used to return a single token 10058 type SingleACLTokenResponse struct { 10059 Token *ACLToken 10060 QueryMeta 10061 } 10062 10063 // ACLTokenSetResponse is used to return a set of token 10064 type ACLTokenSetResponse struct { 10065 Tokens map[string]*ACLToken // Keyed by Accessor ID 10066 QueryMeta 10067 } 10068 10069 // ResolveACLTokenRequest is used to resolve a specific token 10070 type ResolveACLTokenRequest struct { 10071 SecretID string 10072 QueryOptions 10073 } 10074 10075 // ResolveACLTokenResponse is used to resolve a single token 10076 type ResolveACLTokenResponse struct { 10077 Token *ACLToken 10078 QueryMeta 10079 } 10080 10081 // ACLTokenDeleteRequest is used to delete a set of tokens 10082 type ACLTokenDeleteRequest struct { 10083 AccessorIDs []string 10084 WriteRequest 10085 } 10086 10087 // ACLTokenBootstrapRequest is used to bootstrap ACLs 10088 type ACLTokenBootstrapRequest struct { 10089 Token *ACLToken // Not client specifiable 10090 ResetIndex uint64 // Reset index is used to clear the bootstrap token 10091 WriteRequest 10092 } 10093 10094 // ACLTokenUpsertRequest is used to upsert a set of tokens 10095 type ACLTokenUpsertRequest struct { 10096 Tokens []*ACLToken 10097 WriteRequest 10098 } 10099 10100 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 10101 type ACLTokenUpsertResponse struct { 10102 Tokens []*ACLToken 10103 WriteMeta 10104 }