github.com/uchennaokeke444/nomad@v0.11.8/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "math" 16 "net" 17 "os" 18 "path/filepath" 19 "reflect" 20 "regexp" 21 "sort" 22 "strconv" 23 "strings" 24 "time" 25 26 "github.com/hashicorp/cronexpr" 27 "github.com/hashicorp/go-msgpack/codec" 28 "github.com/hashicorp/go-multierror" 29 "github.com/hashicorp/go-version" 30 "github.com/mitchellh/copystructure" 31 "golang.org/x/crypto/blake2b" 32 33 "github.com/hashicorp/nomad/acl" 34 "github.com/hashicorp/nomad/command/agent/pprof" 35 "github.com/hashicorp/nomad/helper" 36 "github.com/hashicorp/nomad/helper/args" 37 "github.com/hashicorp/nomad/helper/constraints/semver" 38 "github.com/hashicorp/nomad/helper/uuid" 39 "github.com/hashicorp/nomad/lib/kheap" 40 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 41 ) 42 43 var ( 44 // validPolicyName is used to validate a policy name 45 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 46 47 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 48 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 49 ) 50 51 type MessageType uint8 52 53 // note: new raft message types need to be added to the end of this 54 // list of contents 55 const ( 56 NodeRegisterRequestType MessageType = iota 57 NodeDeregisterRequestType 58 NodeUpdateStatusRequestType 59 NodeUpdateDrainRequestType 60 JobRegisterRequestType 61 JobDeregisterRequestType 62 EvalUpdateRequestType 63 EvalDeleteRequestType 64 AllocUpdateRequestType 65 AllocClientUpdateRequestType 66 ReconcileJobSummariesRequestType 67 VaultAccessorRegisterRequestType 68 VaultAccessorDeregisterRequestType 69 ApplyPlanResultsRequestType 70 DeploymentStatusUpdateRequestType 71 DeploymentPromoteRequestType 72 DeploymentAllocHealthRequestType 73 DeploymentDeleteRequestType 74 JobStabilityRequestType 75 ACLPolicyUpsertRequestType 76 ACLPolicyDeleteRequestType 77 ACLTokenUpsertRequestType 78 ACLTokenDeleteRequestType 79 ACLTokenBootstrapRequestType 80 AutopilotRequestType 81 UpsertNodeEventsType 82 JobBatchDeregisterRequestType 83 AllocUpdateDesiredTransitionRequestType 84 NodeUpdateEligibilityRequestType 85 BatchNodeUpdateDrainRequestType 86 SchedulerConfigRequestType 87 NodeBatchDeregisterRequestType 88 ClusterMetadataRequestType 89 ServiceIdentityAccessorRegisterRequestType 90 ServiceIdentityAccessorDeregisterRequestType 91 CSIVolumeRegisterRequestType 92 CSIVolumeDeregisterRequestType 93 CSIVolumeClaimRequestType 94 ScalingEventRegisterRequestType 95 CSIVolumeClaimBatchRequestType 96 CSIPluginDeleteRequestType 97 ) 98 99 const ( 100 // IgnoreUnknownTypeFlag is set along with a MessageType 101 // to indicate that the message type can be safely ignored 102 // if it is not recognized. This is for future proofing, so 103 // that new commands can be added in a way that won't cause 104 // old servers to crash when the FSM attempts to process them. 105 IgnoreUnknownTypeFlag MessageType = 128 106 107 // ApiMajorVersion is returned as part of the Status.Version request. 108 // It should be incremented anytime the APIs are changed in a way 109 // that would break clients for sane client versioning. 110 ApiMajorVersion = 1 111 112 // ApiMinorVersion is returned as part of the Status.Version request. 113 // It should be incremented anytime the APIs are changed to allow 114 // for sane client versioning. Minor changes should be compatible 115 // within the major version. 116 ApiMinorVersion = 1 117 118 ProtocolVersion = "protocol" 119 APIMajorVersion = "api.major" 120 APIMinorVersion = "api.minor" 121 122 GetterModeAny = "any" 123 GetterModeFile = "file" 124 GetterModeDir = "dir" 125 126 // maxPolicyDescriptionLength limits a policy description length 127 maxPolicyDescriptionLength = 256 128 129 // maxTokenNameLength limits a ACL token name length 130 maxTokenNameLength = 256 131 132 // ACLClientToken and ACLManagementToken are the only types of tokens 133 ACLClientToken = "client" 134 ACLManagementToken = "management" 135 136 // DefaultNamespace is the default namespace. 137 DefaultNamespace = "default" 138 DefaultNamespaceDescription = "Default shared namespace" 139 140 // JitterFraction is a the limit to the amount of jitter we apply 141 // to a user specified MaxQueryTime. We divide the specified time by 142 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 143 // applied to RPCHoldTimeout. 144 JitterFraction = 16 145 146 // MaxRetainedNodeEvents is the maximum number of node events that will be 147 // retained for a single node 148 MaxRetainedNodeEvents = 10 149 150 // MaxRetainedNodeScores is the number of top scoring nodes for which we 151 // retain scoring metadata 152 MaxRetainedNodeScores = 5 153 154 // Normalized scorer name 155 NormScorerName = "normalized-score" 156 ) 157 158 // Context defines the scope in which a search for Nomad object operates, and 159 // is also used to query the matching index value for this context 160 type Context string 161 162 const ( 163 Allocs Context = "allocs" 164 Deployments Context = "deployment" 165 Evals Context = "evals" 166 Jobs Context = "jobs" 167 Nodes Context = "nodes" 168 Namespaces Context = "namespaces" 169 Quotas Context = "quotas" 170 All Context = "all" 171 Plugins Context = "plugins" 172 Volumes Context = "volumes" 173 ) 174 175 // NamespacedID is a tuple of an ID and a namespace 176 type NamespacedID struct { 177 ID string 178 Namespace string 179 } 180 181 // NewNamespacedID returns a new namespaced ID given the ID and namespace 182 func NewNamespacedID(id, ns string) NamespacedID { 183 return NamespacedID{ 184 ID: id, 185 Namespace: ns, 186 } 187 } 188 189 func (n NamespacedID) String() string { 190 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 191 } 192 193 // RPCInfo is used to describe common information about query 194 type RPCInfo interface { 195 RequestRegion() string 196 IsRead() bool 197 AllowStaleRead() bool 198 IsForwarded() bool 199 SetForwarded() 200 } 201 202 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 203 // should NOT be replicated in the API package as it is internal only. 204 type InternalRpcInfo struct { 205 // Forwarded marks whether the RPC has been forwarded. 206 Forwarded bool 207 } 208 209 // IsForwarded returns whether the RPC is forwarded from another server. 210 func (i *InternalRpcInfo) IsForwarded() bool { 211 return i.Forwarded 212 } 213 214 // SetForwarded marks that the RPC is being forwarded from another server. 215 func (i *InternalRpcInfo) SetForwarded() { 216 i.Forwarded = true 217 } 218 219 // QueryOptions is used to specify various flags for read queries 220 type QueryOptions struct { 221 // The target region for this query 222 Region string 223 224 // Namespace is the target namespace for the query. 225 // 226 // Since handlers do not have a default value set they should access 227 // the Namespace via the RequestNamespace method. 228 // 229 // Requests accessing specific namespaced objects must check ACLs 230 // against the namespace of the object, not the namespace in the 231 // request. 232 Namespace string 233 234 // If set, wait until query exceeds given index. Must be provided 235 // with MaxQueryTime. 236 MinQueryIndex uint64 237 238 // Provided with MinQueryIndex to wait for change. 239 MaxQueryTime time.Duration 240 241 // If set, any follower can service the request. Results 242 // may be arbitrarily stale. 243 AllowStale bool 244 245 // If set, used as prefix for resource list searches 246 Prefix string 247 248 // AuthToken is secret portion of the ACL token used for the request 249 AuthToken string 250 251 InternalRpcInfo 252 } 253 254 func (q QueryOptions) RequestRegion() string { 255 return q.Region 256 } 257 258 // RequestNamespace returns the request's namespace or the default namespace if 259 // no explicit namespace was sent. 260 // 261 // Requests accessing specific namespaced objects must check ACLs against the 262 // namespace of the object, not the namespace in the request. 263 func (q QueryOptions) RequestNamespace() string { 264 if q.Namespace == "" { 265 return DefaultNamespace 266 } 267 return q.Namespace 268 } 269 270 // QueryOption only applies to reads, so always true 271 func (q QueryOptions) IsRead() bool { 272 return true 273 } 274 275 func (q QueryOptions) AllowStaleRead() bool { 276 return q.AllowStale 277 } 278 279 // AgentPprofRequest is used to request a pprof report for a given node. 280 type AgentPprofRequest struct { 281 // ReqType specifies the profile to use 282 ReqType pprof.ReqType 283 284 // Profile specifies the runtime/pprof profile to lookup and generate. 285 Profile string 286 287 // Seconds is the number of seconds to capture a profile 288 Seconds int 289 290 // Debug specifies if pprof profile should inclue debug output 291 Debug int 292 293 // GC specifies if the profile should call runtime.GC() before 294 // running its profile. This is only used for "heap" profiles 295 GC int 296 297 // NodeID is the node we want to track the logs of 298 NodeID string 299 300 // ServerID is the server we want to track the logs of 301 ServerID string 302 303 QueryOptions 304 } 305 306 // AgentPprofResponse is used to return a generated pprof profile 307 type AgentPprofResponse struct { 308 // ID of the agent that fulfilled the request 309 AgentID string 310 311 // Payload is the generated pprof profile 312 Payload []byte 313 314 // HTTPHeaders are a set of key value pairs to be applied as 315 // HTTP headers for a specific runtime profile 316 HTTPHeaders map[string]string 317 } 318 319 type WriteRequest struct { 320 // The target region for this write 321 Region string 322 323 // Namespace is the target namespace for the write. 324 // 325 // Since RPC handlers do not have a default value set they should 326 // access the Namespace via the RequestNamespace method. 327 // 328 // Requests accessing specific namespaced objects must check ACLs 329 // against the namespace of the object, not the namespace in the 330 // request. 331 Namespace string 332 333 // AuthToken is secret portion of the ACL token used for the request 334 AuthToken string 335 336 InternalRpcInfo 337 } 338 339 func (w WriteRequest) RequestRegion() string { 340 // The target region for this request 341 return w.Region 342 } 343 344 // RequestNamespace returns the request's namespace or the default namespace if 345 // no explicit namespace was sent. 346 // 347 // Requests accessing specific namespaced objects must check ACLs against the 348 // namespace of the object, not the namespace in the request. 349 func (w WriteRequest) RequestNamespace() string { 350 if w.Namespace == "" { 351 return DefaultNamespace 352 } 353 return w.Namespace 354 } 355 356 // WriteRequest only applies to writes, always false 357 func (w WriteRequest) IsRead() bool { 358 return false 359 } 360 361 func (w WriteRequest) AllowStaleRead() bool { 362 return false 363 } 364 365 // QueryMeta allows a query response to include potentially 366 // useful metadata about a query 367 type QueryMeta struct { 368 // This is the index associated with the read 369 Index uint64 370 371 // If AllowStale is used, this is time elapsed since 372 // last contact between the follower and leader. This 373 // can be used to gauge staleness. 374 LastContact time.Duration 375 376 // Used to indicate if there is a known leader node 377 KnownLeader bool 378 } 379 380 // WriteMeta allows a write response to include potentially 381 // useful metadata about the write 382 type WriteMeta struct { 383 // This is the index associated with the write 384 Index uint64 385 } 386 387 // NodeRegisterRequest is used for Node.Register endpoint 388 // to register a node as being a schedulable entity. 389 type NodeRegisterRequest struct { 390 Node *Node 391 NodeEvent *NodeEvent 392 WriteRequest 393 } 394 395 // NodeDeregisterRequest is used for Node.Deregister endpoint 396 // to deregister a node as being a schedulable entity. 397 type NodeDeregisterRequest struct { 398 NodeID string 399 WriteRequest 400 } 401 402 // NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 403 // to deregister a batch of nodes from being schedulable entities. 404 type NodeBatchDeregisterRequest struct { 405 NodeIDs []string 406 WriteRequest 407 } 408 409 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 410 // information used in RPC server lists. 411 type NodeServerInfo struct { 412 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 413 // be contacted at for RPCs. 414 RPCAdvertiseAddr string 415 416 // RpcMajorVersion is the major version number the Nomad Server 417 // supports 418 RPCMajorVersion int32 419 420 // RpcMinorVersion is the minor version number the Nomad Server 421 // supports 422 RPCMinorVersion int32 423 424 // Datacenter is the datacenter that a Nomad server belongs to 425 Datacenter string 426 } 427 428 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 429 // to update the status of a node. 430 type NodeUpdateStatusRequest struct { 431 NodeID string 432 Status string 433 NodeEvent *NodeEvent 434 UpdatedAt int64 435 WriteRequest 436 } 437 438 // NodeUpdateDrainRequest is used for updating the drain strategy 439 type NodeUpdateDrainRequest struct { 440 NodeID string 441 DrainStrategy *DrainStrategy 442 443 // COMPAT Remove in version 0.10 444 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 445 // drain strategy but we need to handle the upgrade path where the Raft log 446 // contains drain updates with just the drain boolean being manipulated. 447 Drain bool 448 449 // MarkEligible marks the node as eligible if removing the drain strategy. 450 MarkEligible bool 451 452 // NodeEvent is the event added to the node 453 NodeEvent *NodeEvent 454 455 // UpdatedAt represents server time of receiving request 456 UpdatedAt int64 457 458 WriteRequest 459 } 460 461 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 462 // batch of nodes 463 type BatchNodeUpdateDrainRequest struct { 464 // Updates is a mapping of nodes to their updated drain strategy 465 Updates map[string]*DrainUpdate 466 467 // NodeEvents is a mapping of the node to the event to add to the node 468 NodeEvents map[string]*NodeEvent 469 470 // UpdatedAt represents server time of receiving request 471 UpdatedAt int64 472 473 WriteRequest 474 } 475 476 // DrainUpdate is used to update the drain of a node 477 type DrainUpdate struct { 478 // DrainStrategy is the new strategy for the node 479 DrainStrategy *DrainStrategy 480 481 // MarkEligible marks the node as eligible if removing the drain strategy. 482 MarkEligible bool 483 } 484 485 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 486 type NodeUpdateEligibilityRequest struct { 487 NodeID string 488 Eligibility string 489 490 // NodeEvent is the event added to the node 491 NodeEvent *NodeEvent 492 493 // UpdatedAt represents server time of receiving request 494 UpdatedAt int64 495 496 WriteRequest 497 } 498 499 // NodeEvaluateRequest is used to re-evaluate the node 500 type NodeEvaluateRequest struct { 501 NodeID string 502 WriteRequest 503 } 504 505 // NodeSpecificRequest is used when we just need to specify a target node 506 type NodeSpecificRequest struct { 507 NodeID string 508 SecretID string 509 QueryOptions 510 } 511 512 // SearchResponse is used to return matches and information about whether 513 // the match list is truncated specific to each type of context. 514 type SearchResponse struct { 515 // Map of context types to ids which match a specified prefix 516 Matches map[Context][]string 517 518 // Truncations indicates whether the matches for a particular context have 519 // been truncated 520 Truncations map[Context]bool 521 522 QueryMeta 523 } 524 525 // SearchRequest is used to parameterize a request, and returns a 526 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 527 // along with whether or not the information returned is truncated. 528 type SearchRequest struct { 529 // Prefix is what ids are matched to. I.e, if the given prefix were 530 // "a", potential matches might be "abcd" or "aabb" 531 Prefix string 532 533 // Context is the type that can be matched against. A context can be a job, 534 // node, evaluation, allocation, or empty (indicated every context should be 535 // matched) 536 Context Context 537 538 QueryOptions 539 } 540 541 // JobRegisterRequest is used for Job.Register endpoint 542 // to register a job as being a schedulable entity. 543 type JobRegisterRequest struct { 544 Job *Job 545 546 // If EnforceIndex is set then the job will only be registered if the passed 547 // JobModifyIndex matches the current Jobs index. If the index is zero, the 548 // register only occurs if the job is new. 549 EnforceIndex bool 550 JobModifyIndex uint64 551 552 // PolicyOverride is set when the user is attempting to override any policies 553 PolicyOverride bool 554 555 WriteRequest 556 } 557 558 // JobDeregisterRequest is used for Job.Deregister endpoint 559 // to deregister a job as being a schedulable entity. 560 type JobDeregisterRequest struct { 561 JobID string 562 563 // Purge controls whether the deregister purges the job from the system or 564 // whether the job is just marked as stopped and will be removed by the 565 // garbage collector 566 Purge bool 567 568 WriteRequest 569 } 570 571 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 572 // evaluations. 573 type JobBatchDeregisterRequest struct { 574 // Jobs is the set of jobs to deregister 575 Jobs map[NamespacedID]*JobDeregisterOptions 576 577 // Evals is the set of evaluations to create. 578 Evals []*Evaluation 579 580 WriteRequest 581 } 582 583 // JobDeregisterOptions configures how a job is deregistered. 584 type JobDeregisterOptions struct { 585 // Purge controls whether the deregister purges the job from the system or 586 // whether the job is just marked as stopped and will be removed by the 587 // garbage collector 588 Purge bool 589 } 590 591 // JobEvaluateRequest is used when we just need to re-evaluate a target job 592 type JobEvaluateRequest struct { 593 JobID string 594 EvalOptions EvalOptions 595 WriteRequest 596 } 597 598 // EvalOptions is used to encapsulate options when forcing a job evaluation 599 type EvalOptions struct { 600 ForceReschedule bool 601 } 602 603 // JobSpecificRequest is used when we just need to specify a target job 604 type JobSpecificRequest struct { 605 JobID string 606 All bool 607 QueryOptions 608 } 609 610 // JobListRequest is used to parameterize a list request 611 type JobListRequest struct { 612 QueryOptions 613 } 614 615 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 616 // evaluation of the Job. 617 type JobPlanRequest struct { 618 Job *Job 619 Diff bool // Toggles an annotated diff 620 // PolicyOverride is set when the user is attempting to override any policies 621 PolicyOverride bool 622 WriteRequest 623 } 624 625 // JobScaleRequest is used for the Job.Scale endpoint to scale one of the 626 // scaling targets in a job 627 type JobScaleRequest struct { 628 Namespace string 629 JobID string 630 Target map[string]string 631 Count *int64 632 Message string 633 Error bool 634 Meta map[string]interface{} 635 // PolicyOverride is set when the user is attempting to override any policies 636 PolicyOverride bool 637 WriteRequest 638 } 639 640 // JobSummaryRequest is used when we just need to get a specific job summary 641 type JobSummaryRequest struct { 642 JobID string 643 QueryOptions 644 } 645 646 // JobScaleStatusRequest is used to get the scale status for a job 647 type JobScaleStatusRequest struct { 648 JobID string 649 QueryOptions 650 } 651 652 // JobDispatchRequest is used to dispatch a job based on a parameterized job 653 type JobDispatchRequest struct { 654 JobID string 655 Payload []byte 656 Meta map[string]string 657 WriteRequest 658 } 659 660 // JobValidateRequest is used to validate a job 661 type JobValidateRequest struct { 662 Job *Job 663 WriteRequest 664 } 665 666 // JobRevertRequest is used to revert a job to a prior version. 667 type JobRevertRequest struct { 668 // JobID is the ID of the job being reverted 669 JobID string 670 671 // JobVersion the version to revert to. 672 JobVersion uint64 673 674 // EnforcePriorVersion if set will enforce that the job is at the given 675 // version before reverting. 676 EnforcePriorVersion *uint64 677 678 // ConsulToken is the Consul token that proves the submitter of the job revert 679 // has access to the Service Identity policies associated with the job's 680 // Consul Connect enabled services. This field is only used to transfer the 681 // token and is not stored after the Job revert. 682 ConsulToken string 683 684 // VaultToken is the Vault token that proves the submitter of the job revert 685 // has access to any Vault policies specified in the targeted job version. This 686 // field is only used to transfer the token and is not stored after the Job 687 // revert. 688 VaultToken string 689 690 WriteRequest 691 } 692 693 // JobStabilityRequest is used to marked a job as stable. 694 type JobStabilityRequest struct { 695 // Job to set the stability on 696 JobID string 697 JobVersion uint64 698 699 // Set the stability 700 Stable bool 701 WriteRequest 702 } 703 704 // JobStabilityResponse is the response when marking a job as stable. 705 type JobStabilityResponse struct { 706 WriteMeta 707 } 708 709 // NodeListRequest is used to parameterize a list request 710 type NodeListRequest struct { 711 QueryOptions 712 } 713 714 // EvalUpdateRequest is used for upserting evaluations. 715 type EvalUpdateRequest struct { 716 Evals []*Evaluation 717 EvalToken string 718 WriteRequest 719 } 720 721 // EvalDeleteRequest is used for deleting an evaluation. 722 type EvalDeleteRequest struct { 723 Evals []string 724 Allocs []string 725 WriteRequest 726 } 727 728 // EvalSpecificRequest is used when we just need to specify a target evaluation 729 type EvalSpecificRequest struct { 730 EvalID string 731 QueryOptions 732 } 733 734 // EvalAckRequest is used to Ack/Nack a specific evaluation 735 type EvalAckRequest struct { 736 EvalID string 737 Token string 738 WriteRequest 739 } 740 741 // EvalDequeueRequest is used when we want to dequeue an evaluation 742 type EvalDequeueRequest struct { 743 Schedulers []string 744 Timeout time.Duration 745 SchedulerVersion uint16 746 WriteRequest 747 } 748 749 // EvalListRequest is used to list the evaluations 750 type EvalListRequest struct { 751 QueryOptions 752 } 753 754 // PlanRequest is used to submit an allocation plan to the leader 755 type PlanRequest struct { 756 Plan *Plan 757 WriteRequest 758 } 759 760 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 761 // committing the result of a plan. 762 type ApplyPlanResultsRequest struct { 763 // AllocUpdateRequest holds the allocation updates to be made by the 764 // scheduler. 765 AllocUpdateRequest 766 767 // Deployment is the deployment created or updated as a result of a 768 // scheduling event. 769 Deployment *Deployment 770 771 // DeploymentUpdates is a set of status updates to apply to the given 772 // deployments. This allows the scheduler to cancel any unneeded deployment 773 // because the job is stopped or the update block is removed. 774 DeploymentUpdates []*DeploymentStatusUpdate 775 776 // EvalID is the eval ID of the plan being applied. The modify index of the 777 // evaluation is updated as part of applying the plan to ensure that subsequent 778 // scheduling events for the same job will wait for the index that last produced 779 // state changes. This is necessary for blocked evaluations since they can be 780 // processed many times, potentially making state updates, without the state of 781 // the evaluation itself being updated. 782 EvalID string 783 784 // COMPAT 0.11 785 // NodePreemptions is a slice of allocations from other lower priority jobs 786 // that are preempted. Preempted allocations are marked as evicted. 787 // Deprecated: Replaced with AllocsPreempted which contains only the diff 788 NodePreemptions []*Allocation 789 790 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 791 // that are preempted. Preempted allocations are marked as evicted. 792 AllocsPreempted []*AllocationDiff 793 794 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 795 // have been preempted to place allocs in this plan 796 PreemptionEvals []*Evaluation 797 } 798 799 // AllocUpdateRequest is used to submit changes to allocations, either 800 // to cause evictions or to assign new allocations. Both can be done 801 // within a single transaction 802 type AllocUpdateRequest struct { 803 // COMPAT 0.11 804 // Alloc is the list of new allocations to assign 805 // Deprecated: Replaced with two separate slices, one containing stopped allocations 806 // and another containing updated allocations 807 Alloc []*Allocation 808 809 // Allocations to stop. Contains only the diff, not the entire allocation 810 AllocsStopped []*AllocationDiff 811 812 // New or updated allocations 813 AllocsUpdated []*Allocation 814 815 // Evals is the list of new evaluations to create 816 // Evals are valid only when used in the Raft RPC 817 Evals []*Evaluation 818 819 // Job is the shared parent job of the allocations. 820 // It is pulled out since it is common to reduce payload size. 821 Job *Job 822 823 WriteRequest 824 } 825 826 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 827 // desired transition state. 828 type AllocUpdateDesiredTransitionRequest struct { 829 // Allocs is the mapping of allocation ids to their desired state 830 // transition 831 Allocs map[string]*DesiredTransition 832 833 // Evals is the set of evaluations to create 834 Evals []*Evaluation 835 836 WriteRequest 837 } 838 839 // AllocStopRequest is used to stop and reschedule a running Allocation. 840 type AllocStopRequest struct { 841 AllocID string 842 843 WriteRequest 844 } 845 846 // AllocStopResponse is the response to an `AllocStopRequest` 847 type AllocStopResponse struct { 848 // EvalID is the id of the follow up evalution for the rescheduled alloc. 849 EvalID string 850 851 WriteMeta 852 } 853 854 // AllocListRequest is used to request a list of allocations 855 type AllocListRequest struct { 856 QueryOptions 857 } 858 859 // AllocSpecificRequest is used to query a specific allocation 860 type AllocSpecificRequest struct { 861 AllocID string 862 QueryOptions 863 } 864 865 // AllocSignalRequest is used to signal a specific allocation 866 type AllocSignalRequest struct { 867 AllocID string 868 Task string 869 Signal string 870 QueryOptions 871 } 872 873 // AllocsGetRequest is used to query a set of allocations 874 type AllocsGetRequest struct { 875 AllocIDs []string 876 QueryOptions 877 } 878 879 // AllocRestartRequest is used to restart a specific allocations tasks. 880 type AllocRestartRequest struct { 881 AllocID string 882 TaskName string 883 884 QueryOptions 885 } 886 887 // PeriodicForceRequest is used to force a specific periodic job. 888 type PeriodicForceRequest struct { 889 JobID string 890 WriteRequest 891 } 892 893 // ServerMembersResponse has the list of servers in a cluster 894 type ServerMembersResponse struct { 895 ServerName string 896 ServerRegion string 897 ServerDC string 898 Members []*ServerMember 899 } 900 901 // ServerMember holds information about a Nomad server agent in a cluster 902 type ServerMember struct { 903 Name string 904 Addr net.IP 905 Port uint16 906 Tags map[string]string 907 Status string 908 ProtocolMin uint8 909 ProtocolMax uint8 910 ProtocolCur uint8 911 DelegateMin uint8 912 DelegateMax uint8 913 DelegateCur uint8 914 } 915 916 // ClusterMetadata is used to store per-cluster metadata. 917 type ClusterMetadata struct { 918 ClusterID string 919 CreateTime int64 920 } 921 922 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 923 // following tasks in the given allocation 924 type DeriveVaultTokenRequest struct { 925 NodeID string 926 SecretID string 927 AllocID string 928 Tasks []string 929 QueryOptions 930 } 931 932 // VaultAccessorsRequest is used to operate on a set of Vault accessors 933 type VaultAccessorsRequest struct { 934 Accessors []*VaultAccessor 935 } 936 937 // VaultAccessor is a reference to a created Vault token on behalf of 938 // an allocation's task. 939 type VaultAccessor struct { 940 AllocID string 941 Task string 942 NodeID string 943 Accessor string 944 CreationTTL int 945 946 // Raft Indexes 947 CreateIndex uint64 948 } 949 950 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 951 type DeriveVaultTokenResponse struct { 952 // Tasks is a mapping between the task name and the wrapped token 953 Tasks map[string]string 954 955 // Error stores any error that occurred. Errors are stored here so we can 956 // communicate whether it is retryable 957 Error *RecoverableError 958 959 QueryMeta 960 } 961 962 // GenericRequest is used to request where no 963 // specific information is needed. 964 type GenericRequest struct { 965 QueryOptions 966 } 967 968 // DeploymentListRequest is used to list the deployments 969 type DeploymentListRequest struct { 970 QueryOptions 971 } 972 973 // DeploymentDeleteRequest is used for deleting deployments. 974 type DeploymentDeleteRequest struct { 975 Deployments []string 976 WriteRequest 977 } 978 979 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 980 // well as optionally creating an evaluation atomically. 981 type DeploymentStatusUpdateRequest struct { 982 // Eval, if set, is used to create an evaluation at the same time as 983 // updating the status of a deployment. 984 Eval *Evaluation 985 986 // DeploymentUpdate is a status update to apply to the given 987 // deployment. 988 DeploymentUpdate *DeploymentStatusUpdate 989 990 // Job is used to optionally upsert a job. This is used when setting the 991 // allocation health results in a deployment failure and the deployment 992 // auto-reverts to the latest stable job. 993 Job *Job 994 } 995 996 // DeploymentAllocHealthRequest is used to set the health of a set of 997 // allocations as part of a deployment. 998 type DeploymentAllocHealthRequest struct { 999 DeploymentID string 1000 1001 // Marks these allocations as healthy, allow further allocations 1002 // to be rolled. 1003 HealthyAllocationIDs []string 1004 1005 // Any unhealthy allocations fail the deployment 1006 UnhealthyAllocationIDs []string 1007 1008 WriteRequest 1009 } 1010 1011 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 1012 type ApplyDeploymentAllocHealthRequest struct { 1013 DeploymentAllocHealthRequest 1014 1015 // Timestamp is the timestamp to use when setting the allocations health. 1016 Timestamp time.Time 1017 1018 // An optional field to update the status of a deployment 1019 DeploymentUpdate *DeploymentStatusUpdate 1020 1021 // Job is used to optionally upsert a job. This is used when setting the 1022 // allocation health results in a deployment failure and the deployment 1023 // auto-reverts to the latest stable job. 1024 Job *Job 1025 1026 // An optional evaluation to create after promoting the canaries 1027 Eval *Evaluation 1028 } 1029 1030 // DeploymentPromoteRequest is used to promote task groups in a deployment 1031 type DeploymentPromoteRequest struct { 1032 DeploymentID string 1033 1034 // All is to promote all task groups 1035 All bool 1036 1037 // Groups is used to set the promotion status per task group 1038 Groups []string 1039 1040 WriteRequest 1041 } 1042 1043 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1044 type ApplyDeploymentPromoteRequest struct { 1045 DeploymentPromoteRequest 1046 1047 // An optional evaluation to create after promoting the canaries 1048 Eval *Evaluation 1049 } 1050 1051 // DeploymentPauseRequest is used to pause a deployment 1052 type DeploymentPauseRequest struct { 1053 DeploymentID string 1054 1055 // Pause sets the pause status 1056 Pause bool 1057 1058 WriteRequest 1059 } 1060 1061 // DeploymentSpecificRequest is used to make a request specific to a particular 1062 // deployment 1063 type DeploymentSpecificRequest struct { 1064 DeploymentID string 1065 QueryOptions 1066 } 1067 1068 // DeploymentFailRequest is used to fail a particular deployment 1069 type DeploymentFailRequest struct { 1070 DeploymentID string 1071 WriteRequest 1072 } 1073 1074 // ScalingPolicySpecificRequest is used when we just need to specify a target scaling policy 1075 type ScalingPolicySpecificRequest struct { 1076 ID string 1077 QueryOptions 1078 } 1079 1080 // SingleScalingPolicyResponse is used to return a single job 1081 type SingleScalingPolicyResponse struct { 1082 Policy *ScalingPolicy 1083 QueryMeta 1084 } 1085 1086 // ScalingPolicyListRequest is used to parameterize a scaling policy list request 1087 type ScalingPolicyListRequest struct { 1088 QueryOptions 1089 } 1090 1091 // ScalingPolicyListResponse is used for a list request 1092 type ScalingPolicyListResponse struct { 1093 Policies []*ScalingPolicyListStub 1094 QueryMeta 1095 } 1096 1097 // SingleDeploymentResponse is used to respond with a single deployment 1098 type SingleDeploymentResponse struct { 1099 Deployment *Deployment 1100 QueryMeta 1101 } 1102 1103 // GenericResponse is used to respond to a request where no 1104 // specific response information is needed. 1105 type GenericResponse struct { 1106 WriteMeta 1107 } 1108 1109 // VersionResponse is used for the Status.Version response 1110 type VersionResponse struct { 1111 Build string 1112 Versions map[string]int 1113 QueryMeta 1114 } 1115 1116 // JobRegisterResponse is used to respond to a job registration 1117 type JobRegisterResponse struct { 1118 EvalID string 1119 EvalCreateIndex uint64 1120 JobModifyIndex uint64 1121 1122 // Warnings contains any warnings about the given job. These may include 1123 // deprecation warnings. 1124 Warnings string 1125 1126 QueryMeta 1127 } 1128 1129 // JobDeregisterResponse is used to respond to a job deregistration 1130 type JobDeregisterResponse struct { 1131 EvalID string 1132 EvalCreateIndex uint64 1133 JobModifyIndex uint64 1134 VolumeEvalID string 1135 VolumeEvalIndex uint64 1136 QueryMeta 1137 } 1138 1139 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 1140 type JobBatchDeregisterResponse struct { 1141 // JobEvals maps the job to its created evaluation 1142 JobEvals map[NamespacedID]string 1143 QueryMeta 1144 } 1145 1146 // JobValidateResponse is the response from validate request 1147 type JobValidateResponse struct { 1148 // DriverConfigValidated indicates whether the agent validated the driver 1149 // config 1150 DriverConfigValidated bool 1151 1152 // ValidationErrors is a list of validation errors 1153 ValidationErrors []string 1154 1155 // Error is a string version of any error that may have occurred 1156 Error string 1157 1158 // Warnings contains any warnings about the given job. These may include 1159 // deprecation warnings. 1160 Warnings string 1161 } 1162 1163 // NodeUpdateResponse is used to respond to a node update 1164 type NodeUpdateResponse struct { 1165 HeartbeatTTL time.Duration 1166 EvalIDs []string 1167 EvalCreateIndex uint64 1168 NodeModifyIndex uint64 1169 1170 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1171 // empty, the current Nomad Server is in the minority of a partition. 1172 LeaderRPCAddr string 1173 1174 // NumNodes is the number of Nomad nodes attached to this quorum of 1175 // Nomad Servers at the time of the response. This value can 1176 // fluctuate based on the health of the cluster between heartbeats. 1177 NumNodes int32 1178 1179 // Servers is the full list of known Nomad servers in the local 1180 // region. 1181 Servers []*NodeServerInfo 1182 1183 QueryMeta 1184 } 1185 1186 // NodeDrainUpdateResponse is used to respond to a node drain update 1187 type NodeDrainUpdateResponse struct { 1188 NodeModifyIndex uint64 1189 EvalIDs []string 1190 EvalCreateIndex uint64 1191 WriteMeta 1192 } 1193 1194 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1195 type NodeEligibilityUpdateResponse struct { 1196 NodeModifyIndex uint64 1197 EvalIDs []string 1198 EvalCreateIndex uint64 1199 WriteMeta 1200 } 1201 1202 // NodeAllocsResponse is used to return allocs for a single node 1203 type NodeAllocsResponse struct { 1204 Allocs []*Allocation 1205 QueryMeta 1206 } 1207 1208 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1209 type NodeClientAllocsResponse struct { 1210 Allocs map[string]uint64 1211 1212 // MigrateTokens are used when ACLs are enabled to allow cross node, 1213 // authenticated access to sticky volumes 1214 MigrateTokens map[string]string 1215 1216 QueryMeta 1217 } 1218 1219 // SingleNodeResponse is used to return a single node 1220 type SingleNodeResponse struct { 1221 Node *Node 1222 QueryMeta 1223 } 1224 1225 // NodeListResponse is used for a list request 1226 type NodeListResponse struct { 1227 Nodes []*NodeListStub 1228 QueryMeta 1229 } 1230 1231 // SingleJobResponse is used to return a single job 1232 type SingleJobResponse struct { 1233 Job *Job 1234 QueryMeta 1235 } 1236 1237 // JobSummaryResponse is used to return a single job summary 1238 type JobSummaryResponse struct { 1239 JobSummary *JobSummary 1240 QueryMeta 1241 } 1242 1243 // JobScaleStatusResponse is used to return the scale status for a job 1244 type JobScaleStatusResponse struct { 1245 JobScaleStatus *JobScaleStatus 1246 QueryMeta 1247 } 1248 1249 type JobScaleStatus struct { 1250 JobID string 1251 JobCreateIndex uint64 1252 JobModifyIndex uint64 1253 JobStopped bool 1254 TaskGroups map[string]*TaskGroupScaleStatus 1255 } 1256 1257 // TaskGroupScaleStatus is used to return the scale status for a given task group 1258 type TaskGroupScaleStatus struct { 1259 Desired int 1260 Placed int 1261 Running int 1262 Healthy int 1263 Unhealthy int 1264 Events []*ScalingEvent 1265 } 1266 1267 type JobDispatchResponse struct { 1268 DispatchedJobID string 1269 EvalID string 1270 EvalCreateIndex uint64 1271 JobCreateIndex uint64 1272 WriteMeta 1273 } 1274 1275 // JobListResponse is used for a list request 1276 type JobListResponse struct { 1277 Jobs []*JobListStub 1278 QueryMeta 1279 } 1280 1281 // JobVersionsRequest is used to get a jobs versions 1282 type JobVersionsRequest struct { 1283 JobID string 1284 Diffs bool 1285 QueryOptions 1286 } 1287 1288 // JobVersionsResponse is used for a job get versions request 1289 type JobVersionsResponse struct { 1290 Versions []*Job 1291 Diffs []*JobDiff 1292 QueryMeta 1293 } 1294 1295 // JobPlanResponse is used to respond to a job plan request 1296 type JobPlanResponse struct { 1297 // Annotations stores annotations explaining decisions the scheduler made. 1298 Annotations *PlanAnnotations 1299 1300 // FailedTGAllocs is the placement failures per task group. 1301 FailedTGAllocs map[string]*AllocMetric 1302 1303 // JobModifyIndex is the modification index of the job. The value can be 1304 // used when running `nomad run` to ensure that the Job wasn’t modified 1305 // since the last plan. If the job is being created, the value is zero. 1306 JobModifyIndex uint64 1307 1308 // CreatedEvals is the set of evaluations created by the scheduler. The 1309 // reasons for this can be rolling-updates or blocked evals. 1310 CreatedEvals []*Evaluation 1311 1312 // Diff contains the diff of the job and annotations on whether the change 1313 // causes an in-place update or create/destroy 1314 Diff *JobDiff 1315 1316 // NextPeriodicLaunch is the time duration till the job would be launched if 1317 // submitted. 1318 NextPeriodicLaunch time.Time 1319 1320 // Warnings contains any warnings about the given job. These may include 1321 // deprecation warnings. 1322 Warnings string 1323 1324 WriteMeta 1325 } 1326 1327 // SingleAllocResponse is used to return a single allocation 1328 type SingleAllocResponse struct { 1329 Alloc *Allocation 1330 QueryMeta 1331 } 1332 1333 // AllocsGetResponse is used to return a set of allocations 1334 type AllocsGetResponse struct { 1335 Allocs []*Allocation 1336 QueryMeta 1337 } 1338 1339 // JobAllocationsResponse is used to return the allocations for a job 1340 type JobAllocationsResponse struct { 1341 Allocations []*AllocListStub 1342 QueryMeta 1343 } 1344 1345 // JobEvaluationsResponse is used to return the evaluations for a job 1346 type JobEvaluationsResponse struct { 1347 Evaluations []*Evaluation 1348 QueryMeta 1349 } 1350 1351 // SingleEvalResponse is used to return a single evaluation 1352 type SingleEvalResponse struct { 1353 Eval *Evaluation 1354 QueryMeta 1355 } 1356 1357 // EvalDequeueResponse is used to return from a dequeue 1358 type EvalDequeueResponse struct { 1359 Eval *Evaluation 1360 Token string 1361 1362 // WaitIndex is the Raft index the worker should wait until invoking the 1363 // scheduler. 1364 WaitIndex uint64 1365 1366 QueryMeta 1367 } 1368 1369 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1370 // or beyond before invoking the scheduler. 1371 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1372 // Prefer the wait index sent. This will be populated on all responses from 1373 // 0.7.0 and above 1374 if e.WaitIndex != 0 { 1375 return e.WaitIndex 1376 } else if e.Eval != nil { 1377 return e.Eval.ModifyIndex 1378 } 1379 1380 // This should never happen 1381 return 1 1382 } 1383 1384 // PlanResponse is used to return from a PlanRequest 1385 type PlanResponse struct { 1386 Result *PlanResult 1387 WriteMeta 1388 } 1389 1390 // AllocListResponse is used for a list request 1391 type AllocListResponse struct { 1392 Allocations []*AllocListStub 1393 QueryMeta 1394 } 1395 1396 // DeploymentListResponse is used for a list request 1397 type DeploymentListResponse struct { 1398 Deployments []*Deployment 1399 QueryMeta 1400 } 1401 1402 // EvalListResponse is used for a list request 1403 type EvalListResponse struct { 1404 Evaluations []*Evaluation 1405 QueryMeta 1406 } 1407 1408 // EvalAllocationsResponse is used to return the allocations for an evaluation 1409 type EvalAllocationsResponse struct { 1410 Allocations []*AllocListStub 1411 QueryMeta 1412 } 1413 1414 // PeriodicForceResponse is used to respond to a periodic job force launch 1415 type PeriodicForceResponse struct { 1416 EvalID string 1417 EvalCreateIndex uint64 1418 WriteMeta 1419 } 1420 1421 // DeploymentUpdateResponse is used to respond to a deployment change. The 1422 // response will include the modify index of the deployment as well as details 1423 // of any triggered evaluation. 1424 type DeploymentUpdateResponse struct { 1425 EvalID string 1426 EvalCreateIndex uint64 1427 DeploymentModifyIndex uint64 1428 1429 // RevertedJobVersion is the version the job was reverted to. If unset, the 1430 // job wasn't reverted 1431 RevertedJobVersion *uint64 1432 1433 WriteMeta 1434 } 1435 1436 // NodeConnQueryResponse is used to respond to a query of whether a server has 1437 // a connection to a specific Node 1438 type NodeConnQueryResponse struct { 1439 // Connected indicates whether a connection to the Client exists 1440 Connected bool 1441 1442 // Established marks the time at which the connection was established 1443 Established time.Time 1444 1445 QueryMeta 1446 } 1447 1448 // EmitNodeEventsRequest is a request to update the node events source 1449 // with a new client-side event 1450 type EmitNodeEventsRequest struct { 1451 // NodeEvents are a map where the key is a node id, and value is a list of 1452 // events for that node 1453 NodeEvents map[string][]*NodeEvent 1454 1455 WriteRequest 1456 } 1457 1458 // EmitNodeEventsResponse is a response to the client about the status of 1459 // the node event source update. 1460 type EmitNodeEventsResponse struct { 1461 WriteMeta 1462 } 1463 1464 const ( 1465 NodeEventSubsystemDrain = "Drain" 1466 NodeEventSubsystemDriver = "Driver" 1467 NodeEventSubsystemHeartbeat = "Heartbeat" 1468 NodeEventSubsystemCluster = "Cluster" 1469 NodeEventSubsystemStorage = "Storage" 1470 ) 1471 1472 // NodeEvent is a single unit representing a node’s state change 1473 type NodeEvent struct { 1474 Message string 1475 Subsystem string 1476 Details map[string]string 1477 Timestamp time.Time 1478 CreateIndex uint64 1479 } 1480 1481 func (ne *NodeEvent) String() string { 1482 var details []string 1483 for k, v := range ne.Details { 1484 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1485 } 1486 1487 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1488 } 1489 1490 func (ne *NodeEvent) Copy() *NodeEvent { 1491 c := new(NodeEvent) 1492 *c = *ne 1493 c.Details = helper.CopyMapStringString(ne.Details) 1494 return c 1495 } 1496 1497 // NewNodeEvent generates a new node event storing the current time as the 1498 // timestamp 1499 func NewNodeEvent() *NodeEvent { 1500 return &NodeEvent{Timestamp: time.Now()} 1501 } 1502 1503 // SetMessage is used to set the message on the node event 1504 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1505 ne.Message = msg 1506 return ne 1507 } 1508 1509 // SetSubsystem is used to set the subsystem on the node event 1510 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1511 ne.Subsystem = sys 1512 return ne 1513 } 1514 1515 // SetTimestamp is used to set the timestamp on the node event 1516 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1517 ne.Timestamp = ts 1518 return ne 1519 } 1520 1521 // AddDetail is used to add a detail to the node event 1522 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1523 if ne.Details == nil { 1524 ne.Details = make(map[string]string, 1) 1525 } 1526 ne.Details[k] = v 1527 return ne 1528 } 1529 1530 const ( 1531 NodeStatusInit = "initializing" 1532 NodeStatusReady = "ready" 1533 NodeStatusDown = "down" 1534 ) 1535 1536 // ShouldDrainNode checks if a given node status should trigger an 1537 // evaluation. Some states don't require any further action. 1538 func ShouldDrainNode(status string) bool { 1539 switch status { 1540 case NodeStatusInit, NodeStatusReady: 1541 return false 1542 case NodeStatusDown: 1543 return true 1544 default: 1545 panic(fmt.Sprintf("unhandled node status %s", status)) 1546 } 1547 } 1548 1549 // ValidNodeStatus is used to check if a node status is valid 1550 func ValidNodeStatus(status string) bool { 1551 switch status { 1552 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1553 return true 1554 default: 1555 return false 1556 } 1557 } 1558 1559 const ( 1560 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1561 // respectively, for receiving allocations. This is orthoginal to the node 1562 // status being ready. 1563 NodeSchedulingEligible = "eligible" 1564 NodeSchedulingIneligible = "ineligible" 1565 ) 1566 1567 // DrainSpec describes a Node's desired drain behavior. 1568 type DrainSpec struct { 1569 // Deadline is the duration after StartTime when the remaining 1570 // allocations on a draining Node should be told to stop. 1571 Deadline time.Duration 1572 1573 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1574 // has been marked for draining. 1575 IgnoreSystemJobs bool 1576 } 1577 1578 // DrainStrategy describes a Node's drain behavior. 1579 type DrainStrategy struct { 1580 // DrainSpec is the user declared drain specification 1581 DrainSpec 1582 1583 // ForceDeadline is the deadline time for the drain after which drains will 1584 // be forced 1585 ForceDeadline time.Time 1586 1587 // StartedAt is the time the drain process started 1588 StartedAt time.Time 1589 } 1590 1591 func (d *DrainStrategy) Copy() *DrainStrategy { 1592 if d == nil { 1593 return nil 1594 } 1595 1596 nd := new(DrainStrategy) 1597 *nd = *d 1598 return nd 1599 } 1600 1601 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1602 // duration or otherwise the deadline time. The force drain is captured by the 1603 // deadline time being in the past. 1604 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1605 // Treat the nil case as a force drain so during an upgrade where a node may 1606 // not have a drain strategy but has Drain set to true, it is treated as a 1607 // force to mimick old behavior. 1608 if d == nil { 1609 return false, time.Time{} 1610 } 1611 1612 ns := d.Deadline.Nanoseconds() 1613 switch { 1614 case ns < 0: // Force 1615 return false, time.Time{} 1616 case ns == 0: // Infinite 1617 return true, time.Time{} 1618 default: 1619 return false, d.ForceDeadline 1620 } 1621 } 1622 1623 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1624 if d == nil && o == nil { 1625 return true 1626 } else if o != nil && d == nil { 1627 return false 1628 } else if d != nil && o == nil { 1629 return false 1630 } 1631 1632 // Compare values 1633 if d.ForceDeadline != o.ForceDeadline { 1634 return false 1635 } else if d.Deadline != o.Deadline { 1636 return false 1637 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1638 return false 1639 } 1640 1641 return true 1642 } 1643 1644 // Node is a representation of a schedulable client node 1645 type Node struct { 1646 // ID is a unique identifier for the node. It can be constructed 1647 // by doing a concatenation of the Name and Datacenter as a simple 1648 // approach. Alternatively a UUID may be used. 1649 ID string 1650 1651 // SecretID is an ID that is only known by the Node and the set of Servers. 1652 // It is not accessible via the API and is used to authenticate nodes 1653 // conducting privileged activities. 1654 SecretID string 1655 1656 // Datacenter for this node 1657 Datacenter string 1658 1659 // Node name 1660 Name string 1661 1662 // HTTPAddr is the address on which the Nomad client is listening for http 1663 // requests 1664 HTTPAddr string 1665 1666 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1667 TLSEnabled bool 1668 1669 // Attributes is an arbitrary set of key/value 1670 // data that can be used for constraints. Examples 1671 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1672 // "docker.runtime=1.8.3" 1673 Attributes map[string]string 1674 1675 // NodeResources captures the available resources on the client. 1676 NodeResources *NodeResources 1677 1678 // ReservedResources captures the set resources on the client that are 1679 // reserved from scheduling. 1680 ReservedResources *NodeReservedResources 1681 1682 // Resources is the available resources on the client. 1683 // For example 'cpu=2' 'memory=2048' 1684 // COMPAT(0.10): Remove in 0.10 1685 Resources *Resources 1686 1687 // Reserved is the set of resources that are reserved, 1688 // and should be subtracted from the total resources for 1689 // the purposes of scheduling. This may be provide certain 1690 // high-watermark tolerances or because of external schedulers 1691 // consuming resources. 1692 Reserved *Resources 1693 1694 // Links are used to 'link' this client to external 1695 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1696 // 'ami=ami-123' 1697 Links map[string]string 1698 1699 // Meta is used to associate arbitrary metadata with this 1700 // client. This is opaque to Nomad. 1701 Meta map[string]string 1702 1703 // NodeClass is an opaque identifier used to group nodes 1704 // together for the purpose of determining scheduling pressure. 1705 NodeClass string 1706 1707 // ComputedClass is a unique id that identifies nodes with a common set of 1708 // attributes and capabilities. 1709 ComputedClass string 1710 1711 // COMPAT: Remove in Nomad 0.9 1712 // Drain is controlled by the servers, and not the client. 1713 // If true, no jobs will be scheduled to this node, and existing 1714 // allocations will be drained. Superseded by DrainStrategy in Nomad 1715 // 0.8 but kept for backward compat. 1716 Drain bool 1717 1718 // DrainStrategy determines the node's draining behavior. Will be nil 1719 // when Drain=false. 1720 DrainStrategy *DrainStrategy 1721 1722 // SchedulingEligibility determines whether this node will receive new 1723 // placements. 1724 SchedulingEligibility string 1725 1726 // Status of this node 1727 Status string 1728 1729 // StatusDescription is meant to provide more human useful information 1730 StatusDescription string 1731 1732 // StatusUpdatedAt is the time stamp at which the state of the node was 1733 // updated 1734 StatusUpdatedAt int64 1735 1736 // Events is the most recent set of events generated for the node, 1737 // retaining only MaxRetainedNodeEvents number at a time 1738 Events []*NodeEvent 1739 1740 // Drivers is a map of driver names to current driver information 1741 Drivers map[string]*DriverInfo 1742 1743 // CSIControllerPlugins is a map of plugin names to current CSI Plugin info 1744 CSIControllerPlugins map[string]*CSIInfo 1745 // CSINodePlugins is a map of plugin names to current CSI Plugin info 1746 CSINodePlugins map[string]*CSIInfo 1747 1748 // HostVolumes is a map of host volume names to their configuration 1749 HostVolumes map[string]*ClientHostVolumeConfig 1750 1751 // Raft Indexes 1752 CreateIndex uint64 1753 ModifyIndex uint64 1754 } 1755 1756 // Ready returns true if the node is ready for running allocations 1757 func (n *Node) Ready() bool { 1758 // Drain is checked directly to support pre-0.8 Node data 1759 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1760 } 1761 1762 func (n *Node) Canonicalize() { 1763 if n == nil { 1764 return 1765 } 1766 1767 // COMPAT Remove in 0.10 1768 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1769 // upgrading nodes 1770 if n.SchedulingEligibility == "" { 1771 if n.Drain { 1772 n.SchedulingEligibility = NodeSchedulingIneligible 1773 } else { 1774 n.SchedulingEligibility = NodeSchedulingEligible 1775 } 1776 } 1777 } 1778 1779 func (n *Node) Copy() *Node { 1780 if n == nil { 1781 return nil 1782 } 1783 nn := new(Node) 1784 *nn = *n 1785 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1786 nn.Resources = nn.Resources.Copy() 1787 nn.Reserved = nn.Reserved.Copy() 1788 nn.NodeResources = nn.NodeResources.Copy() 1789 nn.ReservedResources = nn.ReservedResources.Copy() 1790 nn.Links = helper.CopyMapStringString(nn.Links) 1791 nn.Meta = helper.CopyMapStringString(nn.Meta) 1792 nn.Events = copyNodeEvents(n.Events) 1793 nn.DrainStrategy = nn.DrainStrategy.Copy() 1794 nn.CSIControllerPlugins = copyNodeCSI(nn.CSIControllerPlugins) 1795 nn.CSINodePlugins = copyNodeCSI(nn.CSINodePlugins) 1796 nn.Drivers = copyNodeDrivers(n.Drivers) 1797 nn.HostVolumes = copyNodeHostVolumes(n.HostVolumes) 1798 return nn 1799 } 1800 1801 // copyNodeEvents is a helper to copy a list of NodeEvent's 1802 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1803 l := len(events) 1804 if l == 0 { 1805 return nil 1806 } 1807 1808 c := make([]*NodeEvent, l) 1809 for i, event := range events { 1810 c[i] = event.Copy() 1811 } 1812 return c 1813 } 1814 1815 // copyNodeCSI is a helper to copy a map of CSIInfo 1816 func copyNodeCSI(plugins map[string]*CSIInfo) map[string]*CSIInfo { 1817 l := len(plugins) 1818 if l == 0 { 1819 return nil 1820 } 1821 1822 c := make(map[string]*CSIInfo, l) 1823 for plugin, info := range plugins { 1824 c[plugin] = info.Copy() 1825 } 1826 1827 return c 1828 } 1829 1830 // copyNodeDrivers is a helper to copy a map of DriverInfo 1831 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 1832 l := len(drivers) 1833 if l == 0 { 1834 return nil 1835 } 1836 1837 c := make(map[string]*DriverInfo, l) 1838 for driver, info := range drivers { 1839 c[driver] = info.Copy() 1840 } 1841 return c 1842 } 1843 1844 // copyNodeHostVolumes is a helper to copy a map of string to Volume 1845 func copyNodeHostVolumes(volumes map[string]*ClientHostVolumeConfig) map[string]*ClientHostVolumeConfig { 1846 l := len(volumes) 1847 if l == 0 { 1848 return nil 1849 } 1850 1851 c := make(map[string]*ClientHostVolumeConfig, l) 1852 for volume, v := range volumes { 1853 c[volume] = v.Copy() 1854 } 1855 1856 return c 1857 } 1858 1859 // TerminalStatus returns if the current status is terminal and 1860 // will no longer transition. 1861 func (n *Node) TerminalStatus() bool { 1862 switch n.Status { 1863 case NodeStatusDown: 1864 return true 1865 default: 1866 return false 1867 } 1868 } 1869 1870 // COMPAT(0.11): Remove in 0.11 1871 // ComparableReservedResources returns the reserved resouces on the node 1872 // handling upgrade paths. Reserved networks must be handled separately. After 1873 // 0.11 calls to this should be replaced with: 1874 // node.ReservedResources.Comparable() 1875 func (n *Node) ComparableReservedResources() *ComparableResources { 1876 // See if we can no-op 1877 if n.Reserved == nil && n.ReservedResources == nil { 1878 return nil 1879 } 1880 1881 // Node already has 0.9+ behavior 1882 if n.ReservedResources != nil { 1883 return n.ReservedResources.Comparable() 1884 } 1885 1886 // Upgrade path 1887 return &ComparableResources{ 1888 Flattened: AllocatedTaskResources{ 1889 Cpu: AllocatedCpuResources{ 1890 CpuShares: int64(n.Reserved.CPU), 1891 }, 1892 Memory: AllocatedMemoryResources{ 1893 MemoryMB: int64(n.Reserved.MemoryMB), 1894 }, 1895 }, 1896 Shared: AllocatedSharedResources{ 1897 DiskMB: int64(n.Reserved.DiskMB), 1898 }, 1899 } 1900 } 1901 1902 // COMPAT(0.11): Remove in 0.11 1903 // ComparableResources returns the resouces on the node 1904 // handling upgrade paths. Networking must be handled separately. After 0.11 1905 // calls to this should be replaced with: node.NodeResources.Comparable() 1906 func (n *Node) ComparableResources() *ComparableResources { 1907 // Node already has 0.9+ behavior 1908 if n.NodeResources != nil { 1909 return n.NodeResources.Comparable() 1910 } 1911 1912 // Upgrade path 1913 return &ComparableResources{ 1914 Flattened: AllocatedTaskResources{ 1915 Cpu: AllocatedCpuResources{ 1916 CpuShares: int64(n.Resources.CPU), 1917 }, 1918 Memory: AllocatedMemoryResources{ 1919 MemoryMB: int64(n.Resources.MemoryMB), 1920 }, 1921 }, 1922 Shared: AllocatedSharedResources{ 1923 DiskMB: int64(n.Resources.DiskMB), 1924 }, 1925 } 1926 } 1927 1928 // Stub returns a summarized version of the node 1929 func (n *Node) Stub() *NodeListStub { 1930 1931 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1932 1933 return &NodeListStub{ 1934 Address: addr, 1935 ID: n.ID, 1936 Datacenter: n.Datacenter, 1937 Name: n.Name, 1938 NodeClass: n.NodeClass, 1939 Version: n.Attributes["nomad.version"], 1940 Drain: n.Drain, 1941 SchedulingEligibility: n.SchedulingEligibility, 1942 Status: n.Status, 1943 StatusDescription: n.StatusDescription, 1944 Drivers: n.Drivers, 1945 HostVolumes: n.HostVolumes, 1946 CreateIndex: n.CreateIndex, 1947 ModifyIndex: n.ModifyIndex, 1948 } 1949 } 1950 1951 // NodeListStub is used to return a subset of job information 1952 // for the job list 1953 type NodeListStub struct { 1954 Address string 1955 ID string 1956 Datacenter string 1957 Name string 1958 NodeClass string 1959 Version string 1960 Drain bool 1961 SchedulingEligibility string 1962 Status string 1963 StatusDescription string 1964 Drivers map[string]*DriverInfo 1965 HostVolumes map[string]*ClientHostVolumeConfig 1966 CreateIndex uint64 1967 ModifyIndex uint64 1968 } 1969 1970 // Resources is used to define the resources available 1971 // on a client 1972 type Resources struct { 1973 CPU int 1974 MemoryMB int 1975 DiskMB int 1976 IOPS int // COMPAT(0.10): Only being used to issue warnings 1977 Networks Networks 1978 Devices ResourceDevices 1979 } 1980 1981 const ( 1982 BytesInMegabyte = 1024 * 1024 1983 ) 1984 1985 // DefaultResources is a small resources object that contains the 1986 // default resources requests that we will provide to an object. 1987 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1988 // be kept in sync. 1989 func DefaultResources() *Resources { 1990 return &Resources{ 1991 CPU: 100, 1992 MemoryMB: 300, 1993 } 1994 } 1995 1996 // MinResources is a small resources object that contains the 1997 // absolute minimum resources that we will provide to an object. 1998 // This should not be confused with the defaults which are 1999 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 2000 // api/resources.go and should be kept in sync. 2001 func MinResources() *Resources { 2002 return &Resources{ 2003 CPU: 20, 2004 MemoryMB: 10, 2005 } 2006 } 2007 2008 // DiskInBytes returns the amount of disk resources in bytes. 2009 func (r *Resources) DiskInBytes() int64 { 2010 return int64(r.DiskMB * BytesInMegabyte) 2011 } 2012 2013 func (r *Resources) Validate() error { 2014 var mErr multierror.Error 2015 if err := r.MeetsMinResources(); err != nil { 2016 mErr.Errors = append(mErr.Errors, err) 2017 } 2018 2019 // Ensure the task isn't asking for disk resources 2020 if r.DiskMB > 0 { 2021 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2022 } 2023 2024 for i, d := range r.Devices { 2025 if err := d.Validate(); err != nil { 2026 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 2027 } 2028 } 2029 2030 return mErr.ErrorOrNil() 2031 } 2032 2033 // Merge merges this resource with another resource. 2034 // COMPAT(0.10): Remove in 0.10 2035 func (r *Resources) Merge(other *Resources) { 2036 if other.CPU != 0 { 2037 r.CPU = other.CPU 2038 } 2039 if other.MemoryMB != 0 { 2040 r.MemoryMB = other.MemoryMB 2041 } 2042 if other.DiskMB != 0 { 2043 r.DiskMB = other.DiskMB 2044 } 2045 if len(other.Networks) != 0 { 2046 r.Networks = other.Networks 2047 } 2048 if len(other.Devices) != 0 { 2049 r.Devices = other.Devices 2050 } 2051 } 2052 2053 // COMPAT(0.10): Remove in 0.10 2054 func (r *Resources) Equals(o *Resources) bool { 2055 if r == o { 2056 return true 2057 } 2058 if r == nil || o == nil { 2059 return false 2060 } 2061 return r.CPU == o.CPU && 2062 r.MemoryMB == o.MemoryMB && 2063 r.DiskMB == o.DiskMB && 2064 r.IOPS == o.IOPS && 2065 r.Networks.Equals(&o.Networks) && 2066 r.Devices.Equals(&o.Devices) 2067 } 2068 2069 // COMPAT(0.10): Remove in 0.10 2070 // ResourceDevices are part of Resources 2071 type ResourceDevices []*RequestedDevice 2072 2073 // COMPAT(0.10): Remove in 0.10 2074 // Equals ResourceDevices as set keyed by Name 2075 func (d *ResourceDevices) Equals(o *ResourceDevices) bool { 2076 if d == o { 2077 return true 2078 } 2079 if d == nil || o == nil { 2080 return false 2081 } 2082 if len(*d) != len(*o) { 2083 return false 2084 } 2085 m := make(map[string]*RequestedDevice, len(*d)) 2086 for _, e := range *d { 2087 m[e.Name] = e 2088 } 2089 for _, oe := range *o { 2090 de, ok := m[oe.Name] 2091 if !ok || !de.Equals(oe) { 2092 return false 2093 } 2094 } 2095 return true 2096 } 2097 2098 // COMPAT(0.10): Remove in 0.10 2099 func (r *Resources) Canonicalize() { 2100 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2101 // problems since we use reflect DeepEquals. 2102 if len(r.Networks) == 0 { 2103 r.Networks = nil 2104 } 2105 if len(r.Devices) == 0 { 2106 r.Devices = nil 2107 } 2108 2109 for _, n := range r.Networks { 2110 n.Canonicalize() 2111 } 2112 } 2113 2114 // MeetsMinResources returns an error if the resources specified are less than 2115 // the minimum allowed. 2116 // This is based on the minimums defined in the Resources type 2117 // COMPAT(0.10): Remove in 0.10 2118 func (r *Resources) MeetsMinResources() error { 2119 var mErr multierror.Error 2120 minResources := MinResources() 2121 if r.CPU < minResources.CPU { 2122 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2123 } 2124 if r.MemoryMB < minResources.MemoryMB { 2125 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2126 } 2127 for i, n := range r.Networks { 2128 if err := n.MeetsMinResources(); err != nil { 2129 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 2130 } 2131 } 2132 2133 return mErr.ErrorOrNil() 2134 } 2135 2136 // Copy returns a deep copy of the resources 2137 func (r *Resources) Copy() *Resources { 2138 if r == nil { 2139 return nil 2140 } 2141 newR := new(Resources) 2142 *newR = *r 2143 2144 // Copy the network objects 2145 newR.Networks = r.Networks.Copy() 2146 2147 // Copy the devices 2148 if r.Devices != nil { 2149 n := len(r.Devices) 2150 newR.Devices = make([]*RequestedDevice, n) 2151 for i := 0; i < n; i++ { 2152 newR.Devices[i] = r.Devices[i].Copy() 2153 } 2154 } 2155 2156 return newR 2157 } 2158 2159 // NetIndex finds the matching net index using device name 2160 // COMPAT(0.10): Remove in 0.10 2161 func (r *Resources) NetIndex(n *NetworkResource) int { 2162 return r.Networks.NetIndex(n) 2163 } 2164 2165 // Superset checks if one set of resources is a superset 2166 // of another. This ignores network resources, and the NetworkIndex 2167 // should be used for that. 2168 // COMPAT(0.10): Remove in 0.10 2169 func (r *Resources) Superset(other *Resources) (bool, string) { 2170 if r.CPU < other.CPU { 2171 return false, "cpu" 2172 } 2173 if r.MemoryMB < other.MemoryMB { 2174 return false, "memory" 2175 } 2176 if r.DiskMB < other.DiskMB { 2177 return false, "disk" 2178 } 2179 return true, "" 2180 } 2181 2182 // Add adds the resources of the delta to this, potentially 2183 // returning an error if not possible. 2184 // COMPAT(0.10): Remove in 0.10 2185 func (r *Resources) Add(delta *Resources) error { 2186 if delta == nil { 2187 return nil 2188 } 2189 r.CPU += delta.CPU 2190 r.MemoryMB += delta.MemoryMB 2191 r.DiskMB += delta.DiskMB 2192 2193 for _, n := range delta.Networks { 2194 // Find the matching interface by IP or CIDR 2195 idx := r.NetIndex(n) 2196 if idx == -1 { 2197 r.Networks = append(r.Networks, n.Copy()) 2198 } else { 2199 r.Networks[idx].Add(n) 2200 } 2201 } 2202 return nil 2203 } 2204 2205 // COMPAT(0.10): Remove in 0.10 2206 func (r *Resources) GoString() string { 2207 return fmt.Sprintf("*%#v", *r) 2208 } 2209 2210 type Port struct { 2211 Label string 2212 Value int 2213 To int 2214 } 2215 2216 // NetworkResource is used to represent available network 2217 // resources 2218 type NetworkResource struct { 2219 Mode string // Mode of the network 2220 Device string // Name of the device 2221 CIDR string // CIDR block of addresses 2222 IP string // Host IP address 2223 MBits int // Throughput 2224 ReservedPorts []Port // Host Reserved ports 2225 DynamicPorts []Port // Host Dynamically assigned ports 2226 } 2227 2228 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 2229 if nr.Mode != other.Mode { 2230 return false 2231 } 2232 2233 if nr.Device != other.Device { 2234 return false 2235 } 2236 2237 if nr.CIDR != other.CIDR { 2238 return false 2239 } 2240 2241 if nr.IP != other.IP { 2242 return false 2243 } 2244 2245 if nr.MBits != other.MBits { 2246 return false 2247 } 2248 2249 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 2250 return false 2251 } 2252 2253 for i, port := range nr.ReservedPorts { 2254 if len(other.ReservedPorts) <= i { 2255 return false 2256 } 2257 if port != other.ReservedPorts[i] { 2258 return false 2259 } 2260 } 2261 2262 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 2263 return false 2264 } 2265 for i, port := range nr.DynamicPorts { 2266 if len(other.DynamicPorts) <= i { 2267 return false 2268 } 2269 if port != other.DynamicPorts[i] { 2270 return false 2271 } 2272 } 2273 2274 return true 2275 } 2276 2277 func (n *NetworkResource) Canonicalize() { 2278 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2279 // problems since we use reflect DeepEquals. 2280 if len(n.ReservedPorts) == 0 { 2281 n.ReservedPorts = nil 2282 } 2283 if len(n.DynamicPorts) == 0 { 2284 n.DynamicPorts = nil 2285 } 2286 } 2287 2288 // MeetsMinResources returns an error if the resources specified are less than 2289 // the minimum allowed. 2290 func (n *NetworkResource) MeetsMinResources() error { 2291 var mErr multierror.Error 2292 if n.MBits < 1 { 2293 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 2294 } 2295 return mErr.ErrorOrNil() 2296 } 2297 2298 // Copy returns a deep copy of the network resource 2299 func (n *NetworkResource) Copy() *NetworkResource { 2300 if n == nil { 2301 return nil 2302 } 2303 newR := new(NetworkResource) 2304 *newR = *n 2305 if n.ReservedPorts != nil { 2306 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2307 copy(newR.ReservedPorts, n.ReservedPorts) 2308 } 2309 if n.DynamicPorts != nil { 2310 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2311 copy(newR.DynamicPorts, n.DynamicPorts) 2312 } 2313 return newR 2314 } 2315 2316 // Add adds the resources of the delta to this, potentially 2317 // returning an error if not possible. 2318 func (n *NetworkResource) Add(delta *NetworkResource) { 2319 if len(delta.ReservedPorts) > 0 { 2320 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2321 } 2322 n.MBits += delta.MBits 2323 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2324 } 2325 2326 func (n *NetworkResource) GoString() string { 2327 return fmt.Sprintf("*%#v", *n) 2328 } 2329 2330 // PortLabels returns a map of port labels to their assigned host ports. 2331 func (n *NetworkResource) PortLabels() map[string]int { 2332 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2333 labelValues := make(map[string]int, num) 2334 for _, port := range n.ReservedPorts { 2335 labelValues[port.Label] = port.Value 2336 } 2337 for _, port := range n.DynamicPorts { 2338 labelValues[port.Label] = port.Value 2339 } 2340 return labelValues 2341 } 2342 2343 // ConnectPort returns the Connect port for the given service. Returns false if 2344 // no port was found for a service with that name. 2345 func (n *NetworkResource) PortForService(serviceName string) (Port, bool) { 2346 label := fmt.Sprintf("%s-%s", ConnectProxyPrefix, serviceName) 2347 for _, port := range n.ReservedPorts { 2348 if port.Label == label { 2349 return port, true 2350 } 2351 } 2352 for _, port := range n.DynamicPorts { 2353 if port.Label == label { 2354 return port, true 2355 } 2356 } 2357 2358 return Port{}, false 2359 } 2360 2361 // Networks defined for a task on the Resources struct. 2362 type Networks []*NetworkResource 2363 2364 func (ns Networks) Copy() Networks { 2365 if len(ns) == 0 { 2366 return nil 2367 } 2368 2369 out := make([]*NetworkResource, len(ns)) 2370 for i := range ns { 2371 out[i] = ns[i].Copy() 2372 } 2373 return out 2374 } 2375 2376 // Port assignment and IP for the given label or empty values. 2377 func (ns Networks) Port(label string) (string, int) { 2378 for _, n := range ns { 2379 for _, p := range n.ReservedPorts { 2380 if p.Label == label { 2381 return n.IP, p.Value 2382 } 2383 } 2384 for _, p := range n.DynamicPorts { 2385 if p.Label == label { 2386 return n.IP, p.Value 2387 } 2388 } 2389 } 2390 return "", 0 2391 } 2392 2393 func (ns Networks) NetIndex(n *NetworkResource) int { 2394 for idx, net := range ns { 2395 if net.Device == n.Device { 2396 return idx 2397 } 2398 } 2399 return -1 2400 } 2401 2402 // RequestedDevice is used to request a device for a task. 2403 type RequestedDevice struct { 2404 // Name is the request name. The possible values are as follows: 2405 // * <type>: A single value only specifies the type of request. 2406 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2407 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2408 // 2409 // Examples are as follows: 2410 // * "gpu" 2411 // * "nvidia/gpu" 2412 // * "nvidia/gpu/GTX2080Ti" 2413 Name string 2414 2415 // Count is the number of requested devices 2416 Count uint64 2417 2418 // Constraints are a set of constraints to apply when selecting the device 2419 // to use. 2420 Constraints Constraints 2421 2422 // Affinities are a set of affinities to apply when selecting the device 2423 // to use. 2424 Affinities Affinities 2425 } 2426 2427 func (r *RequestedDevice) Equals(o *RequestedDevice) bool { 2428 if r == o { 2429 return true 2430 } 2431 if r == nil || o == nil { 2432 return false 2433 } 2434 return r.Name == o.Name && 2435 r.Count == o.Count && 2436 r.Constraints.Equals(&o.Constraints) && 2437 r.Affinities.Equals(&o.Affinities) 2438 } 2439 2440 func (r *RequestedDevice) Copy() *RequestedDevice { 2441 if r == nil { 2442 return nil 2443 } 2444 2445 nr := *r 2446 nr.Constraints = CopySliceConstraints(nr.Constraints) 2447 nr.Affinities = CopySliceAffinities(nr.Affinities) 2448 2449 return &nr 2450 } 2451 2452 func (r *RequestedDevice) ID() *DeviceIdTuple { 2453 if r == nil || r.Name == "" { 2454 return nil 2455 } 2456 2457 parts := strings.SplitN(r.Name, "/", 3) 2458 switch len(parts) { 2459 case 1: 2460 return &DeviceIdTuple{ 2461 Type: parts[0], 2462 } 2463 case 2: 2464 return &DeviceIdTuple{ 2465 Vendor: parts[0], 2466 Type: parts[1], 2467 } 2468 default: 2469 return &DeviceIdTuple{ 2470 Vendor: parts[0], 2471 Type: parts[1], 2472 Name: parts[2], 2473 } 2474 } 2475 } 2476 2477 func (r *RequestedDevice) Validate() error { 2478 if r == nil { 2479 return nil 2480 } 2481 2482 var mErr multierror.Error 2483 if r.Name == "" { 2484 multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2485 } 2486 2487 for idx, constr := range r.Constraints { 2488 // Ensure that the constraint doesn't use an operand we do not allow 2489 switch constr.Operand { 2490 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2491 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2492 multierror.Append(&mErr, outer) 2493 default: 2494 if err := constr.Validate(); err != nil { 2495 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2496 multierror.Append(&mErr, outer) 2497 } 2498 } 2499 } 2500 for idx, affinity := range r.Affinities { 2501 if err := affinity.Validate(); err != nil { 2502 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2503 multierror.Append(&mErr, outer) 2504 } 2505 } 2506 2507 return mErr.ErrorOrNil() 2508 } 2509 2510 // NodeResources is used to define the resources available on a client node. 2511 type NodeResources struct { 2512 Cpu NodeCpuResources 2513 Memory NodeMemoryResources 2514 Disk NodeDiskResources 2515 Networks Networks 2516 Devices []*NodeDeviceResource 2517 } 2518 2519 func (n *NodeResources) Copy() *NodeResources { 2520 if n == nil { 2521 return nil 2522 } 2523 2524 newN := new(NodeResources) 2525 *newN = *n 2526 2527 // Copy the networks 2528 newN.Networks = n.Networks.Copy() 2529 2530 // Copy the devices 2531 if n.Devices != nil { 2532 devices := len(n.Devices) 2533 newN.Devices = make([]*NodeDeviceResource, devices) 2534 for i := 0; i < devices; i++ { 2535 newN.Devices[i] = n.Devices[i].Copy() 2536 } 2537 } 2538 2539 return newN 2540 } 2541 2542 // Comparable returns a comparable version of the nodes resources. This 2543 // conversion can be lossy so care must be taken when using it. 2544 func (n *NodeResources) Comparable() *ComparableResources { 2545 if n == nil { 2546 return nil 2547 } 2548 2549 c := &ComparableResources{ 2550 Flattened: AllocatedTaskResources{ 2551 Cpu: AllocatedCpuResources{ 2552 CpuShares: n.Cpu.CpuShares, 2553 }, 2554 Memory: AllocatedMemoryResources{ 2555 MemoryMB: n.Memory.MemoryMB, 2556 }, 2557 Networks: n.Networks, 2558 }, 2559 Shared: AllocatedSharedResources{ 2560 DiskMB: n.Disk.DiskMB, 2561 }, 2562 } 2563 return c 2564 } 2565 2566 func (n *NodeResources) Merge(o *NodeResources) { 2567 if o == nil { 2568 return 2569 } 2570 2571 n.Cpu.Merge(&o.Cpu) 2572 n.Memory.Merge(&o.Memory) 2573 n.Disk.Merge(&o.Disk) 2574 2575 if len(o.Networks) != 0 { 2576 n.Networks = o.Networks 2577 } 2578 2579 if len(o.Devices) != 0 { 2580 n.Devices = o.Devices 2581 } 2582 } 2583 2584 func (n *NodeResources) Equals(o *NodeResources) bool { 2585 if o == nil && n == nil { 2586 return true 2587 } else if o == nil { 2588 return false 2589 } else if n == nil { 2590 return false 2591 } 2592 2593 if !n.Cpu.Equals(&o.Cpu) { 2594 return false 2595 } 2596 if !n.Memory.Equals(&o.Memory) { 2597 return false 2598 } 2599 if !n.Disk.Equals(&o.Disk) { 2600 return false 2601 } 2602 if !n.Networks.Equals(&o.Networks) { 2603 return false 2604 } 2605 2606 // Check the devices 2607 if !DevicesEquals(n.Devices, o.Devices) { 2608 return false 2609 } 2610 2611 return true 2612 } 2613 2614 // Equals equates Networks as a set 2615 func (ns *Networks) Equals(o *Networks) bool { 2616 if ns == o { 2617 return true 2618 } 2619 if ns == nil || o == nil { 2620 return false 2621 } 2622 if len(*ns) != len(*o) { 2623 return false 2624 } 2625 SETEQUALS: 2626 for _, ne := range *ns { 2627 for _, oe := range *o { 2628 if ne.Equals(oe) { 2629 continue SETEQUALS 2630 } 2631 } 2632 return false 2633 } 2634 return true 2635 } 2636 2637 // DevicesEquals returns true if the two device arrays are set equal 2638 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2639 if len(d1) != len(d2) { 2640 return false 2641 } 2642 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2643 for _, d := range d1 { 2644 idMap[*d.ID()] = d 2645 } 2646 for _, otherD := range d2 { 2647 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2648 return false 2649 } 2650 } 2651 2652 return true 2653 } 2654 2655 // NodeCpuResources captures the CPU resources of the node. 2656 type NodeCpuResources struct { 2657 // CpuShares is the CPU shares available. This is calculated by number of 2658 // cores multiplied by the core frequency. 2659 CpuShares int64 2660 } 2661 2662 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2663 if o == nil { 2664 return 2665 } 2666 2667 if o.CpuShares != 0 { 2668 n.CpuShares = o.CpuShares 2669 } 2670 } 2671 2672 func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 2673 if o == nil && n == nil { 2674 return true 2675 } else if o == nil { 2676 return false 2677 } else if n == nil { 2678 return false 2679 } 2680 2681 if n.CpuShares != o.CpuShares { 2682 return false 2683 } 2684 2685 return true 2686 } 2687 2688 // NodeMemoryResources captures the memory resources of the node 2689 type NodeMemoryResources struct { 2690 // MemoryMB is the total available memory on the node 2691 MemoryMB int64 2692 } 2693 2694 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 2695 if o == nil { 2696 return 2697 } 2698 2699 if o.MemoryMB != 0 { 2700 n.MemoryMB = o.MemoryMB 2701 } 2702 } 2703 2704 func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 2705 if o == nil && n == nil { 2706 return true 2707 } else if o == nil { 2708 return false 2709 } else if n == nil { 2710 return false 2711 } 2712 2713 if n.MemoryMB != o.MemoryMB { 2714 return false 2715 } 2716 2717 return true 2718 } 2719 2720 // NodeDiskResources captures the disk resources of the node 2721 type NodeDiskResources struct { 2722 // DiskMB is the total available disk space on the node 2723 DiskMB int64 2724 } 2725 2726 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 2727 if o == nil { 2728 return 2729 } 2730 if o.DiskMB != 0 { 2731 n.DiskMB = o.DiskMB 2732 } 2733 } 2734 2735 func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 2736 if o == nil && n == nil { 2737 return true 2738 } else if o == nil { 2739 return false 2740 } else if n == nil { 2741 return false 2742 } 2743 2744 if n.DiskMB != o.DiskMB { 2745 return false 2746 } 2747 2748 return true 2749 } 2750 2751 // DeviceIdTuple is the tuple that identifies a device 2752 type DeviceIdTuple struct { 2753 Vendor string 2754 Type string 2755 Name string 2756 } 2757 2758 func (d *DeviceIdTuple) String() string { 2759 if d == nil { 2760 return "" 2761 } 2762 2763 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 2764 } 2765 2766 // Matches returns if this Device ID is a superset of the passed ID. 2767 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 2768 if other == nil { 2769 return false 2770 } 2771 2772 if other.Name != "" && other.Name != id.Name { 2773 return false 2774 } 2775 2776 if other.Vendor != "" && other.Vendor != id.Vendor { 2777 return false 2778 } 2779 2780 if other.Type != "" && other.Type != id.Type { 2781 return false 2782 } 2783 2784 return true 2785 } 2786 2787 // Equals returns if this Device ID is the same as the passed ID. 2788 func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 2789 if id == nil && o == nil { 2790 return true 2791 } else if id == nil || o == nil { 2792 return false 2793 } 2794 2795 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 2796 } 2797 2798 // NodeDeviceResource captures a set of devices sharing a common 2799 // vendor/type/device_name tuple. 2800 type NodeDeviceResource struct { 2801 Vendor string 2802 Type string 2803 Name string 2804 Instances []*NodeDevice 2805 Attributes map[string]*psstructs.Attribute 2806 } 2807 2808 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 2809 if n == nil { 2810 return nil 2811 } 2812 2813 return &DeviceIdTuple{ 2814 Vendor: n.Vendor, 2815 Type: n.Type, 2816 Name: n.Name, 2817 } 2818 } 2819 2820 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 2821 if n == nil { 2822 return nil 2823 } 2824 2825 // Copy the primitives 2826 nn := *n 2827 2828 // Copy the device instances 2829 if l := len(nn.Instances); l != 0 { 2830 nn.Instances = make([]*NodeDevice, 0, l) 2831 for _, d := range n.Instances { 2832 nn.Instances = append(nn.Instances, d.Copy()) 2833 } 2834 } 2835 2836 // Copy the Attributes 2837 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 2838 2839 return &nn 2840 } 2841 2842 func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 2843 if o == nil && n == nil { 2844 return true 2845 } else if o == nil { 2846 return false 2847 } else if n == nil { 2848 return false 2849 } 2850 2851 if n.Vendor != o.Vendor { 2852 return false 2853 } else if n.Type != o.Type { 2854 return false 2855 } else if n.Name != o.Name { 2856 return false 2857 } 2858 2859 // Check the attributes 2860 if len(n.Attributes) != len(o.Attributes) { 2861 return false 2862 } 2863 for k, v := range n.Attributes { 2864 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 2865 return false 2866 } 2867 } 2868 2869 // Check the instances 2870 if len(n.Instances) != len(o.Instances) { 2871 return false 2872 } 2873 idMap := make(map[string]*NodeDevice, len(n.Instances)) 2874 for _, d := range n.Instances { 2875 idMap[d.ID] = d 2876 } 2877 for _, otherD := range o.Instances { 2878 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 2879 return false 2880 } 2881 } 2882 2883 return true 2884 } 2885 2886 // NodeDevice is an instance of a particular device. 2887 type NodeDevice struct { 2888 // ID is the ID of the device. 2889 ID string 2890 2891 // Healthy captures whether the device is healthy. 2892 Healthy bool 2893 2894 // HealthDescription is used to provide a human readable description of why 2895 // the device may be unhealthy. 2896 HealthDescription string 2897 2898 // Locality stores HW locality information for the node to optionally be 2899 // used when making placement decisions. 2900 Locality *NodeDeviceLocality 2901 } 2902 2903 func (n *NodeDevice) Equals(o *NodeDevice) bool { 2904 if o == nil && n == nil { 2905 return true 2906 } else if o == nil { 2907 return false 2908 } else if n == nil { 2909 return false 2910 } 2911 2912 if n.ID != o.ID { 2913 return false 2914 } else if n.Healthy != o.Healthy { 2915 return false 2916 } else if n.HealthDescription != o.HealthDescription { 2917 return false 2918 } else if !n.Locality.Equals(o.Locality) { 2919 return false 2920 } 2921 2922 return false 2923 } 2924 2925 func (n *NodeDevice) Copy() *NodeDevice { 2926 if n == nil { 2927 return nil 2928 } 2929 2930 // Copy the primitives 2931 nn := *n 2932 2933 // Copy the locality 2934 nn.Locality = nn.Locality.Copy() 2935 2936 return &nn 2937 } 2938 2939 // NodeDeviceLocality stores information about the devices hardware locality on 2940 // the node. 2941 type NodeDeviceLocality struct { 2942 // PciBusID is the PCI Bus ID for the device. 2943 PciBusID string 2944 } 2945 2946 func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 2947 if o == nil && n == nil { 2948 return true 2949 } else if o == nil { 2950 return false 2951 } else if n == nil { 2952 return false 2953 } 2954 2955 if n.PciBusID != o.PciBusID { 2956 return false 2957 } 2958 2959 return true 2960 } 2961 2962 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 2963 if n == nil { 2964 return nil 2965 } 2966 2967 // Copy the primitives 2968 nn := *n 2969 return &nn 2970 } 2971 2972 // NodeReservedResources is used to capture the resources on a client node that 2973 // should be reserved and not made available to jobs. 2974 type NodeReservedResources struct { 2975 Cpu NodeReservedCpuResources 2976 Memory NodeReservedMemoryResources 2977 Disk NodeReservedDiskResources 2978 Networks NodeReservedNetworkResources 2979 } 2980 2981 func (n *NodeReservedResources) Copy() *NodeReservedResources { 2982 if n == nil { 2983 return nil 2984 } 2985 newN := new(NodeReservedResources) 2986 *newN = *n 2987 return newN 2988 } 2989 2990 // Comparable returns a comparable version of the node's reserved resources. The 2991 // returned resources doesn't contain any network information. This conversion 2992 // can be lossy so care must be taken when using it. 2993 func (n *NodeReservedResources) Comparable() *ComparableResources { 2994 if n == nil { 2995 return nil 2996 } 2997 2998 c := &ComparableResources{ 2999 Flattened: AllocatedTaskResources{ 3000 Cpu: AllocatedCpuResources{ 3001 CpuShares: n.Cpu.CpuShares, 3002 }, 3003 Memory: AllocatedMemoryResources{ 3004 MemoryMB: n.Memory.MemoryMB, 3005 }, 3006 }, 3007 Shared: AllocatedSharedResources{ 3008 DiskMB: n.Disk.DiskMB, 3009 }, 3010 } 3011 return c 3012 } 3013 3014 // NodeReservedCpuResources captures the reserved CPU resources of the node. 3015 type NodeReservedCpuResources struct { 3016 CpuShares int64 3017 } 3018 3019 // NodeReservedMemoryResources captures the reserved memory resources of the node. 3020 type NodeReservedMemoryResources struct { 3021 MemoryMB int64 3022 } 3023 3024 // NodeReservedDiskResources captures the reserved disk resources of the node. 3025 type NodeReservedDiskResources struct { 3026 DiskMB int64 3027 } 3028 3029 // NodeReservedNetworkResources captures the reserved network resources of the node. 3030 type NodeReservedNetworkResources struct { 3031 // ReservedHostPorts is the set of ports reserved on all host network 3032 // interfaces. Its format is a comma separate list of integers or integer 3033 // ranges. (80,443,1000-2000,2005) 3034 ReservedHostPorts string 3035 } 3036 3037 // ParsePortHostPorts returns the reserved host ports. 3038 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 3039 return ParsePortRanges(n.ReservedHostPorts) 3040 } 3041 3042 // AllocatedResources is the set of resources to be used by an allocation. 3043 type AllocatedResources struct { 3044 // Tasks is a mapping of task name to the resources for the task. 3045 Tasks map[string]*AllocatedTaskResources 3046 TaskLifecycles map[string]*TaskLifecycleConfig 3047 3048 // Shared is the set of resource that are shared by all tasks in the group. 3049 Shared AllocatedSharedResources 3050 } 3051 3052 func (a *AllocatedResources) Copy() *AllocatedResources { 3053 if a == nil { 3054 return nil 3055 } 3056 3057 out := AllocatedResources{ 3058 Shared: a.Shared.Copy(), 3059 } 3060 3061 if a.Tasks != nil { 3062 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 3063 for task, resource := range a.Tasks { 3064 out.Tasks[task] = resource.Copy() 3065 } 3066 } 3067 if a.TaskLifecycles != nil { 3068 out.TaskLifecycles = make(map[string]*TaskLifecycleConfig, len(out.TaskLifecycles)) 3069 for task, lifecycle := range a.TaskLifecycles { 3070 out.TaskLifecycles[task] = lifecycle.Copy() 3071 } 3072 3073 } 3074 3075 return &out 3076 } 3077 3078 // Comparable returns a comparable version of the allocations allocated 3079 // resources. This conversion can be lossy so care must be taken when using it. 3080 func (a *AllocatedResources) Comparable() *ComparableResources { 3081 if a == nil { 3082 return nil 3083 } 3084 3085 c := &ComparableResources{ 3086 Shared: a.Shared, 3087 } 3088 3089 prestartSidecarTasks := &AllocatedTaskResources{} 3090 prestartEphemeralTasks := &AllocatedTaskResources{} 3091 main := &AllocatedTaskResources{} 3092 3093 for taskName, r := range a.Tasks { 3094 lc := a.TaskLifecycles[taskName] 3095 if lc == nil { 3096 main.Add(r) 3097 } else if lc.Hook == TaskLifecycleHookPrestart { 3098 if lc.Sidecar { 3099 prestartSidecarTasks.Add(r) 3100 } else { 3101 prestartEphemeralTasks.Add(r) 3102 } 3103 } 3104 } 3105 3106 // update this loop to account for lifecycle hook 3107 prestartEphemeralTasks.Max(main) 3108 prestartSidecarTasks.Add(prestartEphemeralTasks) 3109 c.Flattened.Add(prestartSidecarTasks) 3110 3111 // Add network resources that are at the task group level 3112 for _, network := range a.Shared.Networks { 3113 c.Flattened.Add(&AllocatedTaskResources{ 3114 Networks: []*NetworkResource{network}, 3115 }) 3116 } 3117 3118 return c 3119 } 3120 3121 // OldTaskResources returns the pre-0.9.0 map of task resources 3122 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 3123 m := make(map[string]*Resources, len(a.Tasks)) 3124 for name, res := range a.Tasks { 3125 m[name] = &Resources{ 3126 CPU: int(res.Cpu.CpuShares), 3127 MemoryMB: int(res.Memory.MemoryMB), 3128 Networks: res.Networks, 3129 } 3130 } 3131 3132 return m 3133 } 3134 3135 // AllocatedTaskResources are the set of resources allocated to a task. 3136 type AllocatedTaskResources struct { 3137 Cpu AllocatedCpuResources 3138 Memory AllocatedMemoryResources 3139 Networks Networks 3140 Devices []*AllocatedDeviceResource 3141 } 3142 3143 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3144 if a == nil { 3145 return nil 3146 } 3147 newA := new(AllocatedTaskResources) 3148 *newA = *a 3149 3150 // Copy the networks 3151 newA.Networks = a.Networks.Copy() 3152 3153 // Copy the devices 3154 if newA.Devices != nil { 3155 n := len(a.Devices) 3156 newA.Devices = make([]*AllocatedDeviceResource, n) 3157 for i := 0; i < n; i++ { 3158 newA.Devices[i] = a.Devices[i].Copy() 3159 } 3160 } 3161 3162 return newA 3163 } 3164 3165 // NetIndex finds the matching net index using device name 3166 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3167 return a.Networks.NetIndex(n) 3168 } 3169 3170 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3171 if delta == nil { 3172 return 3173 } 3174 3175 a.Cpu.Add(&delta.Cpu) 3176 a.Memory.Add(&delta.Memory) 3177 3178 for _, n := range delta.Networks { 3179 // Find the matching interface by IP or CIDR 3180 idx := a.NetIndex(n) 3181 if idx == -1 { 3182 a.Networks = append(a.Networks, n.Copy()) 3183 } else { 3184 a.Networks[idx].Add(n) 3185 } 3186 } 3187 3188 for _, d := range delta.Devices { 3189 // Find the matching device 3190 idx := AllocatedDevices(a.Devices).Index(d) 3191 if idx == -1 { 3192 a.Devices = append(a.Devices, d.Copy()) 3193 } else { 3194 a.Devices[idx].Add(d) 3195 } 3196 } 3197 } 3198 3199 func (a *AllocatedTaskResources) Max(other *AllocatedTaskResources) { 3200 if other == nil { 3201 return 3202 } 3203 3204 a.Cpu.Max(&other.Cpu) 3205 a.Memory.Max(&other.Memory) 3206 3207 for _, n := range other.Networks { 3208 // Find the matching interface by IP or CIDR 3209 idx := a.NetIndex(n) 3210 if idx == -1 { 3211 a.Networks = append(a.Networks, n.Copy()) 3212 } else { 3213 a.Networks[idx].Add(n) 3214 } 3215 } 3216 3217 for _, d := range other.Devices { 3218 // Find the matching device 3219 idx := AllocatedDevices(a.Devices).Index(d) 3220 if idx == -1 { 3221 a.Devices = append(a.Devices, d.Copy()) 3222 } else { 3223 a.Devices[idx].Add(d) 3224 } 3225 } 3226 } 3227 3228 // Comparable turns AllocatedTaskResources into ComparableResources 3229 // as a helper step in preemption 3230 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3231 ret := &ComparableResources{ 3232 Flattened: AllocatedTaskResources{ 3233 Cpu: AllocatedCpuResources{ 3234 CpuShares: a.Cpu.CpuShares, 3235 }, 3236 Memory: AllocatedMemoryResources{ 3237 MemoryMB: a.Memory.MemoryMB, 3238 }, 3239 }, 3240 } 3241 if len(a.Networks) > 0 { 3242 for _, net := range a.Networks { 3243 ret.Flattened.Networks = append(ret.Flattened.Networks, net) 3244 } 3245 } 3246 return ret 3247 } 3248 3249 // Subtract only subtracts CPU and Memory resources. Network utilization 3250 // is managed separately in NetworkIndex 3251 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3252 if delta == nil { 3253 return 3254 } 3255 3256 a.Cpu.Subtract(&delta.Cpu) 3257 a.Memory.Subtract(&delta.Memory) 3258 } 3259 3260 // AllocatedSharedResources are the set of resources allocated to a task group. 3261 type AllocatedSharedResources struct { 3262 Networks Networks 3263 DiskMB int64 3264 } 3265 3266 func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3267 return AllocatedSharedResources{ 3268 Networks: a.Networks.Copy(), 3269 DiskMB: a.DiskMB, 3270 } 3271 } 3272 3273 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3274 if delta == nil { 3275 return 3276 } 3277 a.Networks = append(a.Networks, delta.Networks...) 3278 a.DiskMB += delta.DiskMB 3279 3280 } 3281 3282 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3283 if delta == nil { 3284 return 3285 } 3286 3287 diff := map[*NetworkResource]bool{} 3288 for _, n := range delta.Networks { 3289 diff[n] = true 3290 } 3291 var nets Networks 3292 for _, n := range a.Networks { 3293 if _, ok := diff[n]; !ok { 3294 nets = append(nets, n) 3295 } 3296 } 3297 a.Networks = nets 3298 a.DiskMB -= delta.DiskMB 3299 } 3300 3301 // AllocatedCpuResources captures the allocated CPU resources. 3302 type AllocatedCpuResources struct { 3303 CpuShares int64 3304 } 3305 3306 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3307 if delta == nil { 3308 return 3309 } 3310 3311 a.CpuShares += delta.CpuShares 3312 } 3313 3314 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3315 if delta == nil { 3316 return 3317 } 3318 3319 a.CpuShares -= delta.CpuShares 3320 } 3321 3322 func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { 3323 if other == nil { 3324 return 3325 } 3326 3327 if other.CpuShares > a.CpuShares { 3328 a.CpuShares = other.CpuShares 3329 } 3330 } 3331 3332 // AllocatedMemoryResources captures the allocated memory resources. 3333 type AllocatedMemoryResources struct { 3334 MemoryMB int64 3335 } 3336 3337 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3338 if delta == nil { 3339 return 3340 } 3341 3342 a.MemoryMB += delta.MemoryMB 3343 } 3344 3345 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3346 if delta == nil { 3347 return 3348 } 3349 3350 a.MemoryMB -= delta.MemoryMB 3351 } 3352 3353 func (a *AllocatedMemoryResources) Max(other *AllocatedMemoryResources) { 3354 if other == nil { 3355 return 3356 } 3357 3358 if other.MemoryMB > a.MemoryMB { 3359 a.MemoryMB = other.MemoryMB 3360 } 3361 } 3362 3363 type AllocatedDevices []*AllocatedDeviceResource 3364 3365 // Index finds the matching index using the passed device. If not found, -1 is 3366 // returned. 3367 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3368 if d == nil { 3369 return -1 3370 } 3371 3372 for i, o := range a { 3373 if o.ID().Equals(d.ID()) { 3374 return i 3375 } 3376 } 3377 3378 return -1 3379 } 3380 3381 // AllocatedDeviceResource captures a set of allocated devices. 3382 type AllocatedDeviceResource struct { 3383 // Vendor, Type, and Name are used to select the plugin to request the 3384 // device IDs from. 3385 Vendor string 3386 Type string 3387 Name string 3388 3389 // DeviceIDs is the set of allocated devices 3390 DeviceIDs []string 3391 } 3392 3393 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3394 if a == nil { 3395 return nil 3396 } 3397 3398 return &DeviceIdTuple{ 3399 Vendor: a.Vendor, 3400 Type: a.Type, 3401 Name: a.Name, 3402 } 3403 } 3404 3405 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3406 if delta == nil { 3407 return 3408 } 3409 3410 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 3411 } 3412 3413 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 3414 if a == nil { 3415 return a 3416 } 3417 3418 na := *a 3419 3420 // Copy the devices 3421 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 3422 for i, id := range a.DeviceIDs { 3423 na.DeviceIDs[i] = id 3424 } 3425 3426 return &na 3427 } 3428 3429 // ComparableResources is the set of resources allocated to a task group but 3430 // not keyed by Task, making it easier to compare. 3431 type ComparableResources struct { 3432 Flattened AllocatedTaskResources 3433 Shared AllocatedSharedResources 3434 } 3435 3436 func (c *ComparableResources) Add(delta *ComparableResources) { 3437 if delta == nil { 3438 return 3439 } 3440 3441 c.Flattened.Add(&delta.Flattened) 3442 c.Shared.Add(&delta.Shared) 3443 } 3444 3445 func (c *ComparableResources) Subtract(delta *ComparableResources) { 3446 if delta == nil { 3447 return 3448 } 3449 3450 c.Flattened.Subtract(&delta.Flattened) 3451 c.Shared.Subtract(&delta.Shared) 3452 } 3453 3454 func (c *ComparableResources) Copy() *ComparableResources { 3455 if c == nil { 3456 return nil 3457 } 3458 newR := new(ComparableResources) 3459 *newR = *c 3460 return newR 3461 } 3462 3463 // Superset checks if one set of resources is a superset of another. This 3464 // ignores network resources, and the NetworkIndex should be used for that. 3465 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 3466 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 3467 return false, "cpu" 3468 } 3469 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 3470 return false, "memory" 3471 } 3472 if c.Shared.DiskMB < other.Shared.DiskMB { 3473 return false, "disk" 3474 } 3475 return true, "" 3476 } 3477 3478 // allocated finds the matching net index using device name 3479 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3480 return c.Flattened.Networks.NetIndex(n) 3481 } 3482 3483 const ( 3484 // JobTypeNomad is reserved for internal system tasks and is 3485 // always handled by the CoreScheduler. 3486 JobTypeCore = "_core" 3487 JobTypeService = "service" 3488 JobTypeBatch = "batch" 3489 JobTypeSystem = "system" 3490 ) 3491 3492 const ( 3493 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3494 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3495 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3496 ) 3497 3498 const ( 3499 // JobMinPriority is the minimum allowed priority 3500 JobMinPriority = 1 3501 3502 // JobDefaultPriority is the default priority if not 3503 // not specified. 3504 JobDefaultPriority = 50 3505 3506 // JobMaxPriority is the maximum allowed priority 3507 JobMaxPriority = 100 3508 3509 // Ensure CoreJobPriority is higher than any user 3510 // specified job so that it gets priority. This is important 3511 // for the system to remain healthy. 3512 CoreJobPriority = JobMaxPriority * 2 3513 3514 // JobTrackedVersions is the number of historic job versions that are 3515 // kept. 3516 JobTrackedVersions = 6 3517 3518 // JobTrackedScalingEvents is the number of scaling events that are 3519 // kept for a single task group. 3520 JobTrackedScalingEvents = 20 3521 ) 3522 3523 // Job is the scope of a scheduling request to Nomad. It is the largest 3524 // scoped object, and is a named collection of task groups. Each task group 3525 // is further composed of tasks. A task group (TG) is the unit of scheduling 3526 // however. 3527 type Job struct { 3528 // Stop marks whether the user has stopped the job. A stopped job will 3529 // have all created allocations stopped and acts as a way to stop a job 3530 // without purging it from the system. This allows existing allocs to be 3531 // queried and the job to be inspected as it is being killed. 3532 Stop bool 3533 3534 // Region is the Nomad region that handles scheduling this job 3535 Region string 3536 3537 // Namespace is the namespace the job is submitted into. 3538 Namespace string 3539 3540 // ID is a unique identifier for the job per region. It can be 3541 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3542 ID string 3543 3544 // ParentID is the unique identifier of the job that spawned this job. 3545 ParentID string 3546 3547 // Name is the logical name of the job used to refer to it. This is unique 3548 // per region, but not unique globally. 3549 Name string 3550 3551 // Type is used to control various behaviors about the job. Most jobs 3552 // are service jobs, meaning they are expected to be long lived. 3553 // Some jobs are batch oriented meaning they run and then terminate. 3554 // This can be extended in the future to support custom schedulers. 3555 Type string 3556 3557 // Priority is used to control scheduling importance and if this job 3558 // can preempt other jobs. 3559 Priority int 3560 3561 // AllAtOnce is used to control if incremental scheduling of task groups 3562 // is allowed or if we must do a gang scheduling of the entire job. This 3563 // can slow down larger jobs if resources are not available. 3564 AllAtOnce bool 3565 3566 // Datacenters contains all the datacenters this job is allowed to span 3567 Datacenters []string 3568 3569 // Constraints can be specified at a job level and apply to 3570 // all the task groups and tasks. 3571 Constraints []*Constraint 3572 3573 // Affinities can be specified at the job level to express 3574 // scheduling preferences that apply to all groups and tasks 3575 Affinities []*Affinity 3576 3577 // Spread can be specified at the job level to express spreading 3578 // allocations across a desired attribute, such as datacenter 3579 Spreads []*Spread 3580 3581 // TaskGroups are the collections of task groups that this job needs 3582 // to run. Each task group is an atomic unit of scheduling and placement. 3583 TaskGroups []*TaskGroup 3584 3585 // See agent.ApiJobToStructJob 3586 // Update provides defaults for the TaskGroup Update stanzas 3587 Update UpdateStrategy 3588 3589 // Periodic is used to define the interval the job is run at. 3590 Periodic *PeriodicConfig 3591 3592 // ParameterizedJob is used to specify the job as a parameterized job 3593 // for dispatching. 3594 ParameterizedJob *ParameterizedJobConfig 3595 3596 // Dispatched is used to identify if the Job has been dispatched from a 3597 // parameterized job. 3598 Dispatched bool 3599 3600 // Payload is the payload supplied when the job was dispatched. 3601 Payload []byte 3602 3603 // Meta is used to associate arbitrary metadata with this 3604 // job. This is opaque to Nomad. 3605 Meta map[string]string 3606 3607 // ConsulToken is the Consul token that proves the submitter of the job has 3608 // access to the Service Identity policies associated with the job's 3609 // Consul Connect enabled services. This field is only used to transfer the 3610 // token and is not stored after Job submission. 3611 ConsulToken string 3612 3613 // VaultToken is the Vault token that proves the submitter of the job has 3614 // access to the specified Vault policies. This field is only used to 3615 // transfer the token and is not stored after Job submission. 3616 VaultToken string 3617 3618 // Job status 3619 Status string 3620 3621 // StatusDescription is meant to provide more human useful information 3622 StatusDescription string 3623 3624 // Stable marks a job as stable. Stability is only defined on "service" and 3625 // "system" jobs. The stability of a job will be set automatically as part 3626 // of a deployment and can be manually set via APIs. This field is updated 3627 // when the status of a corresponding deployment transitions to Failed 3628 // or Successful. This field is not meaningful for jobs that don't have an 3629 // update stanza. 3630 Stable bool 3631 3632 // Version is a monotonically increasing version number that is incremented 3633 // on each job register. 3634 Version uint64 3635 3636 // SubmitTime is the time at which the job was submitted as a UnixNano in 3637 // UTC 3638 SubmitTime int64 3639 3640 // Raft Indexes 3641 CreateIndex uint64 3642 ModifyIndex uint64 3643 JobModifyIndex uint64 3644 } 3645 3646 // NamespacedID returns the namespaced id useful for logging 3647 func (j *Job) NamespacedID() *NamespacedID { 3648 return &NamespacedID{ 3649 ID: j.ID, 3650 Namespace: j.Namespace, 3651 } 3652 } 3653 3654 // Canonicalize is used to canonicalize fields in the Job. This should be called 3655 // when registering a Job. A set of warnings are returned if the job was changed 3656 // in anyway that the user should be made aware of. 3657 func (j *Job) Canonicalize() (warnings error) { 3658 if j == nil { 3659 return nil 3660 } 3661 3662 var mErr multierror.Error 3663 // Ensure that an empty and nil map are treated the same to avoid scheduling 3664 // problems since we use reflect DeepEquals. 3665 if len(j.Meta) == 0 { 3666 j.Meta = nil 3667 } 3668 3669 // Ensure the job is in a namespace. 3670 if j.Namespace == "" { 3671 j.Namespace = DefaultNamespace 3672 } 3673 3674 for _, tg := range j.TaskGroups { 3675 tg.Canonicalize(j) 3676 } 3677 3678 if j.ParameterizedJob != nil { 3679 j.ParameterizedJob.Canonicalize() 3680 } 3681 3682 if j.Periodic != nil { 3683 j.Periodic.Canonicalize() 3684 } 3685 3686 return mErr.ErrorOrNil() 3687 } 3688 3689 // Copy returns a deep copy of the Job. It is expected that callers use recover. 3690 // This job can panic if the deep copy failed as it uses reflection. 3691 func (j *Job) Copy() *Job { 3692 if j == nil { 3693 return nil 3694 } 3695 nj := new(Job) 3696 *nj = *j 3697 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 3698 nj.Constraints = CopySliceConstraints(nj.Constraints) 3699 nj.Affinities = CopySliceAffinities(nj.Affinities) 3700 3701 if j.TaskGroups != nil { 3702 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 3703 for i, tg := range nj.TaskGroups { 3704 tgs[i] = tg.Copy() 3705 } 3706 nj.TaskGroups = tgs 3707 } 3708 3709 nj.Periodic = nj.Periodic.Copy() 3710 nj.Meta = helper.CopyMapStringString(nj.Meta) 3711 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 3712 return nj 3713 } 3714 3715 // Validate is used to sanity check a job input 3716 func (j *Job) Validate() error { 3717 var mErr multierror.Error 3718 3719 if j.Region == "" { 3720 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 3721 } 3722 if j.ID == "" { 3723 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 3724 } else if strings.Contains(j.ID, " ") { 3725 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 3726 } 3727 if j.Name == "" { 3728 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 3729 } 3730 if j.Namespace == "" { 3731 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 3732 } 3733 switch j.Type { 3734 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 3735 case "": 3736 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 3737 default: 3738 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 3739 } 3740 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 3741 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 3742 } 3743 if len(j.Datacenters) == 0 { 3744 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 3745 } else { 3746 for _, v := range j.Datacenters { 3747 if v == "" { 3748 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 3749 } 3750 } 3751 } 3752 if len(j.TaskGroups) == 0 { 3753 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 3754 } 3755 for idx, constr := range j.Constraints { 3756 if err := constr.Validate(); err != nil { 3757 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3758 mErr.Errors = append(mErr.Errors, outer) 3759 } 3760 } 3761 if j.Type == JobTypeSystem { 3762 if j.Affinities != nil { 3763 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 3764 } 3765 } else { 3766 for idx, affinity := range j.Affinities { 3767 if err := affinity.Validate(); err != nil { 3768 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 3769 mErr.Errors = append(mErr.Errors, outer) 3770 } 3771 } 3772 } 3773 3774 if j.Type == JobTypeSystem { 3775 if j.Spreads != nil { 3776 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 3777 } 3778 } else { 3779 for idx, spread := range j.Spreads { 3780 if err := spread.Validate(); err != nil { 3781 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 3782 mErr.Errors = append(mErr.Errors, outer) 3783 } 3784 } 3785 } 3786 3787 // Check for duplicate task groups 3788 taskGroups := make(map[string]int) 3789 for idx, tg := range j.TaskGroups { 3790 if tg.Name == "" { 3791 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 3792 } else if existing, ok := taskGroups[tg.Name]; ok { 3793 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 3794 } else { 3795 taskGroups[tg.Name] = idx 3796 } 3797 3798 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 3799 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 3800 } 3801 3802 if tg.StopAfterClientDisconnect != nil && *tg.StopAfterClientDisconnect < 0 { 3803 mErr.Errors = append(mErr.Errors, errors.New("StopAfterClientDisconnect must be a positive value")) 3804 } 3805 3806 if j.Type == "system" && tg.Count > 1 { 3807 mErr.Errors = append(mErr.Errors, 3808 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 3809 tg.Name, tg.Count)) 3810 } 3811 } 3812 3813 // Validate the task group 3814 for _, tg := range j.TaskGroups { 3815 if err := tg.Validate(j); err != nil { 3816 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 3817 mErr.Errors = append(mErr.Errors, outer) 3818 } 3819 } 3820 3821 // Validate periodic is only used with batch jobs. 3822 if j.IsPeriodic() && j.Periodic.Enabled { 3823 if j.Type != JobTypeBatch { 3824 mErr.Errors = append(mErr.Errors, 3825 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 3826 } 3827 3828 if err := j.Periodic.Validate(); err != nil { 3829 mErr.Errors = append(mErr.Errors, err) 3830 } 3831 } 3832 3833 if j.IsParameterized() { 3834 if j.Type != JobTypeBatch { 3835 mErr.Errors = append(mErr.Errors, 3836 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 3837 } 3838 3839 if err := j.ParameterizedJob.Validate(); err != nil { 3840 mErr.Errors = append(mErr.Errors, err) 3841 } 3842 } 3843 3844 return mErr.ErrorOrNil() 3845 } 3846 3847 // Warnings returns a list of warnings that may be from dubious settings or 3848 // deprecation warnings. 3849 func (j *Job) Warnings() error { 3850 var mErr multierror.Error 3851 3852 // Check the groups 3853 ap := 0 3854 for _, tg := range j.TaskGroups { 3855 if err := tg.Warnings(j); err != nil { 3856 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 3857 mErr.Errors = append(mErr.Errors, outer) 3858 } 3859 if tg.Update != nil && tg.Update.AutoPromote { 3860 ap += 1 3861 } 3862 } 3863 3864 // Check AutoPromote, should be all or none 3865 if ap > 0 && ap < len(j.TaskGroups) { 3866 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 3867 mErr.Errors = append(mErr.Errors, err) 3868 } 3869 3870 return mErr.ErrorOrNil() 3871 } 3872 3873 // LookupTaskGroup finds a task group by name 3874 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 3875 for _, tg := range j.TaskGroups { 3876 if tg.Name == name { 3877 return tg 3878 } 3879 } 3880 return nil 3881 } 3882 3883 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 3884 // meta data for the task. When joining Job, Group and Task Meta, the precedence 3885 // is by deepest scope (Task > Group > Job). 3886 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 3887 group := j.LookupTaskGroup(groupName) 3888 if group == nil { 3889 return j.Meta 3890 } 3891 3892 var meta map[string]string 3893 3894 task := group.LookupTask(taskName) 3895 if task != nil { 3896 meta = helper.CopyMapStringString(task.Meta) 3897 } 3898 3899 if meta == nil { 3900 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 3901 } 3902 3903 // Add the group specific meta 3904 for k, v := range group.Meta { 3905 if _, ok := meta[k]; !ok { 3906 meta[k] = v 3907 } 3908 } 3909 3910 // Add the job specific meta 3911 for k, v := range j.Meta { 3912 if _, ok := meta[k]; !ok { 3913 meta[k] = v 3914 } 3915 } 3916 3917 return meta 3918 } 3919 3920 // Stopped returns if a job is stopped. 3921 func (j *Job) Stopped() bool { 3922 return j == nil || j.Stop 3923 } 3924 3925 // HasUpdateStrategy returns if any task group in the job has an update strategy 3926 func (j *Job) HasUpdateStrategy() bool { 3927 for _, tg := range j.TaskGroups { 3928 if !tg.Update.IsEmpty() { 3929 return true 3930 } 3931 } 3932 3933 return false 3934 } 3935 3936 // Stub is used to return a summary of the job 3937 func (j *Job) Stub(summary *JobSummary) *JobListStub { 3938 return &JobListStub{ 3939 ID: j.ID, 3940 ParentID: j.ParentID, 3941 Name: j.Name, 3942 Datacenters: j.Datacenters, 3943 Type: j.Type, 3944 Priority: j.Priority, 3945 Periodic: j.IsPeriodic(), 3946 ParameterizedJob: j.IsParameterized(), 3947 Stop: j.Stop, 3948 Status: j.Status, 3949 StatusDescription: j.StatusDescription, 3950 CreateIndex: j.CreateIndex, 3951 ModifyIndex: j.ModifyIndex, 3952 JobModifyIndex: j.JobModifyIndex, 3953 SubmitTime: j.SubmitTime, 3954 JobSummary: summary, 3955 } 3956 } 3957 3958 // IsPeriodic returns whether a job is periodic. 3959 func (j *Job) IsPeriodic() bool { 3960 return j.Periodic != nil 3961 } 3962 3963 // IsPeriodicActive returns whether the job is an active periodic job that will 3964 // create child jobs 3965 func (j *Job) IsPeriodicActive() bool { 3966 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 3967 } 3968 3969 // IsParameterized returns whether a job is parameterized job. 3970 func (j *Job) IsParameterized() bool { 3971 return j.ParameterizedJob != nil && !j.Dispatched 3972 } 3973 3974 // VaultPolicies returns the set of Vault policies per task group, per task 3975 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 3976 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 3977 3978 for _, tg := range j.TaskGroups { 3979 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 3980 3981 for _, task := range tg.Tasks { 3982 if task.Vault == nil { 3983 continue 3984 } 3985 3986 tgPolicies[task.Name] = task.Vault 3987 } 3988 3989 if len(tgPolicies) != 0 { 3990 policies[tg.Name] = tgPolicies 3991 } 3992 } 3993 3994 return policies 3995 } 3996 3997 // Connect tasks returns the set of Consul Connect enabled tasks that will 3998 // require a Service Identity token, if Consul ACLs are enabled. 3999 // 4000 // This method is meaningful only after the Job has passed through the job 4001 // submission Mutator functions. 4002 // 4003 // task group -> []task 4004 func (j *Job) ConnectTasks() map[string][]string { 4005 m := make(map[string][]string) 4006 for _, tg := range j.TaskGroups { 4007 for _, task := range tg.Tasks { 4008 if task.Kind.IsConnectProxy() { 4009 // todo(shoenig): when we support native, probably need to check 4010 // an additional TBD TaskKind as well. 4011 m[tg.Name] = append(m[tg.Name], task.Name) 4012 } 4013 } 4014 } 4015 return m 4016 } 4017 4018 // RequiredSignals returns a mapping of task groups to tasks to their required 4019 // set of signals 4020 func (j *Job) RequiredSignals() map[string]map[string][]string { 4021 signals := make(map[string]map[string][]string) 4022 4023 for _, tg := range j.TaskGroups { 4024 for _, task := range tg.Tasks { 4025 // Use this local one as a set 4026 taskSignals := make(map[string]struct{}) 4027 4028 // Check if the Vault change mode uses signals 4029 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 4030 taskSignals[task.Vault.ChangeSignal] = struct{}{} 4031 } 4032 4033 // If a user has specified a KillSignal, add it to required signals 4034 if task.KillSignal != "" { 4035 taskSignals[task.KillSignal] = struct{}{} 4036 } 4037 4038 // Check if any template change mode uses signals 4039 for _, t := range task.Templates { 4040 if t.ChangeMode != TemplateChangeModeSignal { 4041 continue 4042 } 4043 4044 taskSignals[t.ChangeSignal] = struct{}{} 4045 } 4046 4047 // Flatten and sort the signals 4048 l := len(taskSignals) 4049 if l == 0 { 4050 continue 4051 } 4052 4053 flat := make([]string, 0, l) 4054 for sig := range taskSignals { 4055 flat = append(flat, sig) 4056 } 4057 4058 sort.Strings(flat) 4059 tgSignals, ok := signals[tg.Name] 4060 if !ok { 4061 tgSignals = make(map[string][]string) 4062 signals[tg.Name] = tgSignals 4063 } 4064 tgSignals[task.Name] = flat 4065 } 4066 4067 } 4068 4069 return signals 4070 } 4071 4072 // SpecChanged determines if the functional specification has changed between 4073 // two job versions. 4074 func (j *Job) SpecChanged(new *Job) bool { 4075 if j == nil { 4076 return new != nil 4077 } 4078 4079 // Create a copy of the new job 4080 c := new.Copy() 4081 4082 // Update the new job so we can do a reflect 4083 c.Status = j.Status 4084 c.StatusDescription = j.StatusDescription 4085 c.Stable = j.Stable 4086 c.Version = j.Version 4087 c.CreateIndex = j.CreateIndex 4088 c.ModifyIndex = j.ModifyIndex 4089 c.JobModifyIndex = j.JobModifyIndex 4090 c.SubmitTime = j.SubmitTime 4091 4092 // cgbaker: FINISH: probably need some consideration of scaling policy ID here 4093 4094 // Deep equals the jobs 4095 return !reflect.DeepEqual(j, c) 4096 } 4097 4098 func (j *Job) SetSubmitTime() { 4099 j.SubmitTime = time.Now().UTC().UnixNano() 4100 } 4101 4102 // JobListStub is used to return a subset of job information 4103 // for the job list 4104 type JobListStub struct { 4105 ID string 4106 ParentID string 4107 Name string 4108 Datacenters []string 4109 Type string 4110 Priority int 4111 Periodic bool 4112 ParameterizedJob bool 4113 Stop bool 4114 Status string 4115 StatusDescription string 4116 JobSummary *JobSummary 4117 CreateIndex uint64 4118 ModifyIndex uint64 4119 JobModifyIndex uint64 4120 SubmitTime int64 4121 } 4122 4123 // JobSummary summarizes the state of the allocations of a job 4124 type JobSummary struct { 4125 // JobID is the ID of the job the summary is for 4126 JobID string 4127 4128 // Namespace is the namespace of the job and its summary 4129 Namespace string 4130 4131 // Summary contains the summary per task group for the Job 4132 Summary map[string]TaskGroupSummary 4133 4134 // Children contains a summary for the children of this job. 4135 Children *JobChildrenSummary 4136 4137 // Raft Indexes 4138 CreateIndex uint64 4139 ModifyIndex uint64 4140 } 4141 4142 // Copy returns a new copy of JobSummary 4143 func (js *JobSummary) Copy() *JobSummary { 4144 newJobSummary := new(JobSummary) 4145 *newJobSummary = *js 4146 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 4147 for k, v := range js.Summary { 4148 newTGSummary[k] = v 4149 } 4150 newJobSummary.Summary = newTGSummary 4151 newJobSummary.Children = newJobSummary.Children.Copy() 4152 return newJobSummary 4153 } 4154 4155 // JobChildrenSummary contains the summary of children job statuses 4156 type JobChildrenSummary struct { 4157 Pending int64 4158 Running int64 4159 Dead int64 4160 } 4161 4162 // Copy returns a new copy of a JobChildrenSummary 4163 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 4164 if jc == nil { 4165 return nil 4166 } 4167 4168 njc := new(JobChildrenSummary) 4169 *njc = *jc 4170 return njc 4171 } 4172 4173 // TaskGroup summarizes the state of all the allocations of a particular 4174 // TaskGroup 4175 type TaskGroupSummary struct { 4176 Queued int 4177 Complete int 4178 Failed int 4179 Running int 4180 Starting int 4181 Lost int 4182 } 4183 4184 const ( 4185 // Checks uses any registered health check state in combination with task 4186 // states to determine if a allocation is healthy. 4187 UpdateStrategyHealthCheck_Checks = "checks" 4188 4189 // TaskStates uses the task states of an allocation to determine if the 4190 // allocation is healthy. 4191 UpdateStrategyHealthCheck_TaskStates = "task_states" 4192 4193 // Manual allows the operator to manually signal to Nomad when an 4194 // allocations is healthy. This allows more advanced health checking that is 4195 // outside of the scope of Nomad. 4196 UpdateStrategyHealthCheck_Manual = "manual" 4197 ) 4198 4199 var ( 4200 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 4201 // jobs with the old policy or for populating field defaults. 4202 DefaultUpdateStrategy = &UpdateStrategy{ 4203 Stagger: 30 * time.Second, 4204 MaxParallel: 1, 4205 HealthCheck: UpdateStrategyHealthCheck_Checks, 4206 MinHealthyTime: 10 * time.Second, 4207 HealthyDeadline: 5 * time.Minute, 4208 ProgressDeadline: 10 * time.Minute, 4209 AutoRevert: false, 4210 AutoPromote: false, 4211 Canary: 0, 4212 } 4213 ) 4214 4215 // UpdateStrategy is used to modify how updates are done 4216 type UpdateStrategy struct { 4217 // Stagger is used to determine the rate at which allocations are migrated 4218 // due to down or draining nodes. 4219 Stagger time.Duration 4220 4221 // MaxParallel is how many updates can be done in parallel 4222 MaxParallel int 4223 4224 // HealthCheck specifies the mechanism in which allocations are marked 4225 // healthy or unhealthy as part of a deployment. 4226 HealthCheck string 4227 4228 // MinHealthyTime is the minimum time an allocation must be in the healthy 4229 // state before it is marked as healthy, unblocking more allocations to be 4230 // rolled. 4231 MinHealthyTime time.Duration 4232 4233 // HealthyDeadline is the time in which an allocation must be marked as 4234 // healthy before it is automatically transitioned to unhealthy. This time 4235 // period doesn't count against the MinHealthyTime. 4236 HealthyDeadline time.Duration 4237 4238 // ProgressDeadline is the time in which an allocation as part of the 4239 // deployment must transition to healthy. If no allocation becomes healthy 4240 // after the deadline, the deployment is marked as failed. If the deadline 4241 // is zero, the first failure causes the deployment to fail. 4242 ProgressDeadline time.Duration 4243 4244 // AutoRevert declares that if a deployment fails because of unhealthy 4245 // allocations, there should be an attempt to auto-revert the job to a 4246 // stable version. 4247 AutoRevert bool 4248 4249 // AutoPromote declares that the deployment should be promoted when all canaries are 4250 // healthy 4251 AutoPromote bool 4252 4253 // Canary is the number of canaries to deploy when a change to the task 4254 // group is detected. 4255 Canary int 4256 } 4257 4258 func (u *UpdateStrategy) Copy() *UpdateStrategy { 4259 if u == nil { 4260 return nil 4261 } 4262 4263 copy := new(UpdateStrategy) 4264 *copy = *u 4265 return copy 4266 } 4267 4268 func (u *UpdateStrategy) Validate() error { 4269 if u == nil { 4270 return nil 4271 } 4272 4273 var mErr multierror.Error 4274 switch u.HealthCheck { 4275 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4276 default: 4277 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4278 } 4279 4280 if u.MaxParallel < 0 { 4281 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4282 } 4283 if u.Canary < 0 { 4284 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4285 } 4286 if u.Canary == 0 && u.AutoPromote { 4287 multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4288 } 4289 if u.MinHealthyTime < 0 { 4290 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4291 } 4292 if u.HealthyDeadline <= 0 { 4293 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4294 } 4295 if u.ProgressDeadline < 0 { 4296 multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4297 } 4298 if u.MinHealthyTime >= u.HealthyDeadline { 4299 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4300 } 4301 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4302 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4303 } 4304 if u.Stagger <= 0 { 4305 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4306 } 4307 4308 return mErr.ErrorOrNil() 4309 } 4310 4311 func (u *UpdateStrategy) IsEmpty() bool { 4312 if u == nil { 4313 return true 4314 } 4315 4316 return u.MaxParallel == 0 4317 } 4318 4319 // TODO(alexdadgar): Remove once no longer used by the scheduler. 4320 // Rolling returns if a rolling strategy should be used 4321 func (u *UpdateStrategy) Rolling() bool { 4322 return u.Stagger > 0 && u.MaxParallel > 0 4323 } 4324 4325 const ( 4326 // PeriodicSpecCron is used for a cron spec. 4327 PeriodicSpecCron = "cron" 4328 4329 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 4330 // separated list of unix timestamps at which to launch. 4331 PeriodicSpecTest = "_internal_test" 4332 ) 4333 4334 // Periodic defines the interval a job should be run at. 4335 type PeriodicConfig struct { 4336 // Enabled determines if the job should be run periodically. 4337 Enabled bool 4338 4339 // Spec specifies the interval the job should be run as. It is parsed based 4340 // on the SpecType. 4341 Spec string 4342 4343 // SpecType defines the format of the spec. 4344 SpecType string 4345 4346 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 4347 ProhibitOverlap bool 4348 4349 // TimeZone is the user specified string that determines the time zone to 4350 // launch against. The time zones must be specified from IANA Time Zone 4351 // database, such as "America/New_York". 4352 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 4353 // Reference: https://www.iana.org/time-zones 4354 TimeZone string 4355 4356 // location is the time zone to evaluate the launch time against 4357 location *time.Location 4358 } 4359 4360 func (p *PeriodicConfig) Copy() *PeriodicConfig { 4361 if p == nil { 4362 return nil 4363 } 4364 np := new(PeriodicConfig) 4365 *np = *p 4366 return np 4367 } 4368 4369 func (p *PeriodicConfig) Validate() error { 4370 if !p.Enabled { 4371 return nil 4372 } 4373 4374 var mErr multierror.Error 4375 if p.Spec == "" { 4376 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 4377 } 4378 4379 // Check if we got a valid time zone 4380 if p.TimeZone != "" { 4381 if _, err := time.LoadLocation(p.TimeZone); err != nil { 4382 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 4383 } 4384 } 4385 4386 switch p.SpecType { 4387 case PeriodicSpecCron: 4388 // Validate the cron spec 4389 if _, err := cronexpr.Parse(p.Spec); err != nil { 4390 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 4391 } 4392 case PeriodicSpecTest: 4393 // No-op 4394 default: 4395 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 4396 } 4397 4398 return mErr.ErrorOrNil() 4399 } 4400 4401 func (p *PeriodicConfig) Canonicalize() { 4402 // Load the location 4403 l, err := time.LoadLocation(p.TimeZone) 4404 if err != nil { 4405 p.location = time.UTC 4406 } 4407 4408 p.location = l 4409 } 4410 4411 // CronParseNext is a helper that parses the next time for the given expression 4412 // but captures any panic that may occur in the underlying library. 4413 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 4414 defer func() { 4415 if recover() != nil { 4416 t = time.Time{} 4417 err = fmt.Errorf("failed parsing cron expression: %q", spec) 4418 } 4419 }() 4420 4421 return e.Next(fromTime), nil 4422 } 4423 4424 // Next returns the closest time instant matching the spec that is after the 4425 // passed time. If no matching instance exists, the zero value of time.Time is 4426 // returned. The `time.Location` of the returned value matches that of the 4427 // passed time. 4428 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 4429 switch p.SpecType { 4430 case PeriodicSpecCron: 4431 e, err := cronexpr.Parse(p.Spec) 4432 if err != nil { 4433 return time.Time{}, fmt.Errorf("failed parsing cron expression: %q: %v", p.Spec, err) 4434 } 4435 return CronParseNext(e, fromTime, p.Spec) 4436 case PeriodicSpecTest: 4437 split := strings.Split(p.Spec, ",") 4438 if len(split) == 1 && split[0] == "" { 4439 return time.Time{}, nil 4440 } 4441 4442 // Parse the times 4443 times := make([]time.Time, len(split)) 4444 for i, s := range split { 4445 unix, err := strconv.Atoi(s) 4446 if err != nil { 4447 return time.Time{}, nil 4448 } 4449 4450 times[i] = time.Unix(int64(unix), 0) 4451 } 4452 4453 // Find the next match 4454 for _, next := range times { 4455 if fromTime.Before(next) { 4456 return next, nil 4457 } 4458 } 4459 } 4460 4461 return time.Time{}, nil 4462 } 4463 4464 // GetLocation returns the location to use for determining the time zone to run 4465 // the periodic job against. 4466 func (p *PeriodicConfig) GetLocation() *time.Location { 4467 // Jobs pre 0.5.5 will not have this 4468 if p.location != nil { 4469 return p.location 4470 } 4471 4472 return time.UTC 4473 } 4474 4475 const ( 4476 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 4477 // when launching derived instances of it. 4478 PeriodicLaunchSuffix = "/periodic-" 4479 ) 4480 4481 // PeriodicLaunch tracks the last launch time of a periodic job. 4482 type PeriodicLaunch struct { 4483 ID string // ID of the periodic job. 4484 Namespace string // Namespace of the periodic job 4485 Launch time.Time // The last launch time. 4486 4487 // Raft Indexes 4488 CreateIndex uint64 4489 ModifyIndex uint64 4490 } 4491 4492 const ( 4493 DispatchPayloadForbidden = "forbidden" 4494 DispatchPayloadOptional = "optional" 4495 DispatchPayloadRequired = "required" 4496 4497 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 4498 // when dispatching instances of it. 4499 DispatchLaunchSuffix = "/dispatch-" 4500 ) 4501 4502 // ParameterizedJobConfig is used to configure the parameterized job 4503 type ParameterizedJobConfig struct { 4504 // Payload configure the payload requirements 4505 Payload string 4506 4507 // MetaRequired is metadata keys that must be specified by the dispatcher 4508 MetaRequired []string 4509 4510 // MetaOptional is metadata keys that may be specified by the dispatcher 4511 MetaOptional []string 4512 } 4513 4514 func (d *ParameterizedJobConfig) Validate() error { 4515 var mErr multierror.Error 4516 switch d.Payload { 4517 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 4518 default: 4519 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 4520 } 4521 4522 // Check that the meta configurations are disjoint sets 4523 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 4524 if !disjoint { 4525 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 4526 } 4527 4528 return mErr.ErrorOrNil() 4529 } 4530 4531 func (d *ParameterizedJobConfig) Canonicalize() { 4532 if d.Payload == "" { 4533 d.Payload = DispatchPayloadOptional 4534 } 4535 } 4536 4537 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 4538 if d == nil { 4539 return nil 4540 } 4541 nd := new(ParameterizedJobConfig) 4542 *nd = *d 4543 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 4544 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 4545 return nd 4546 } 4547 4548 // DispatchedID returns an ID appropriate for a job dispatched against a 4549 // particular parameterized job 4550 func DispatchedID(templateID string, t time.Time) string { 4551 u := uuid.Generate()[:8] 4552 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 4553 } 4554 4555 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 4556 type DispatchPayloadConfig struct { 4557 // File specifies a relative path to where the input data should be written 4558 File string 4559 } 4560 4561 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 4562 if d == nil { 4563 return nil 4564 } 4565 nd := new(DispatchPayloadConfig) 4566 *nd = *d 4567 return nd 4568 } 4569 4570 func (d *DispatchPayloadConfig) Validate() error { 4571 // Verify the destination doesn't escape 4572 escaped, err := PathEscapesAllocDir("task/local/", d.File) 4573 if err != nil { 4574 return fmt.Errorf("invalid destination path: %v", err) 4575 } else if escaped { 4576 return fmt.Errorf("destination escapes allocation directory") 4577 } 4578 4579 return nil 4580 } 4581 4582 const ( 4583 TaskLifecycleHookPrestart = "prestart" 4584 ) 4585 4586 type TaskLifecycleConfig struct { 4587 Hook string 4588 Sidecar bool 4589 } 4590 4591 func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { 4592 if d == nil { 4593 return nil 4594 } 4595 nd := new(TaskLifecycleConfig) 4596 *nd = *d 4597 return nd 4598 } 4599 4600 func (d *TaskLifecycleConfig) Validate() error { 4601 if d == nil { 4602 return nil 4603 } 4604 4605 switch d.Hook { 4606 case TaskLifecycleHookPrestart: 4607 case "": 4608 return fmt.Errorf("no lifecycle hook provided") 4609 default: 4610 return fmt.Errorf("invalid hook: %v", d.Hook) 4611 } 4612 4613 return nil 4614 } 4615 4616 var ( 4617 // These default restart policies needs to be in sync with 4618 // Canonicalize in api/tasks.go 4619 4620 DefaultServiceJobRestartPolicy = RestartPolicy{ 4621 Delay: 15 * time.Second, 4622 Attempts: 2, 4623 Interval: 30 * time.Minute, 4624 Mode: RestartPolicyModeFail, 4625 } 4626 DefaultBatchJobRestartPolicy = RestartPolicy{ 4627 Delay: 15 * time.Second, 4628 Attempts: 3, 4629 Interval: 24 * time.Hour, 4630 Mode: RestartPolicyModeFail, 4631 } 4632 ) 4633 4634 var ( 4635 // These default reschedule policies needs to be in sync with 4636 // NewDefaultReschedulePolicy in api/tasks.go 4637 4638 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 4639 Delay: 30 * time.Second, 4640 DelayFunction: "exponential", 4641 MaxDelay: 1 * time.Hour, 4642 Unlimited: true, 4643 } 4644 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 4645 Attempts: 1, 4646 Interval: 24 * time.Hour, 4647 Delay: 5 * time.Second, 4648 DelayFunction: "constant", 4649 } 4650 ) 4651 4652 const ( 4653 // RestartPolicyModeDelay causes an artificial delay till the next interval is 4654 // reached when the specified attempts have been reached in the interval. 4655 RestartPolicyModeDelay = "delay" 4656 4657 // RestartPolicyModeFail causes a job to fail if the specified number of 4658 // attempts are reached within an interval. 4659 RestartPolicyModeFail = "fail" 4660 4661 // RestartPolicyMinInterval is the minimum interval that is accepted for a 4662 // restart policy. 4663 RestartPolicyMinInterval = 5 * time.Second 4664 4665 // ReasonWithinPolicy describes restart events that are within policy 4666 ReasonWithinPolicy = "Restart within policy" 4667 ) 4668 4669 // JobScalingEvents contains the scaling events for a given job 4670 type JobScalingEvents struct { 4671 Namespace string 4672 JobID string 4673 4674 // This map is indexed by target; currently, this is just task group 4675 // the indexed array is sorted from newest to oldest event 4676 // the array should have less than JobTrackedScalingEvents entries 4677 ScalingEvents map[string][]*ScalingEvent 4678 4679 // Raft index 4680 ModifyIndex uint64 4681 } 4682 4683 // Factory method for ScalingEvent objects 4684 func NewScalingEvent(message string) *ScalingEvent { 4685 return &ScalingEvent{ 4686 Time: time.Now().Unix(), 4687 Message: message, 4688 } 4689 } 4690 4691 // ScalingEvent describes a scaling event against a Job 4692 type ScalingEvent struct { 4693 // Unix Nanosecond timestamp for the scaling event 4694 Time int64 4695 4696 // Count is the new scaling count, if provided 4697 Count *int64 4698 4699 // Message is the message describing a scaling event 4700 Message string 4701 4702 // Error indicates an error state for this scaling event 4703 Error bool 4704 4705 // Meta is a map of metadata returned during a scaling event 4706 Meta map[string]interface{} 4707 4708 // EvalID is the ID for an evaluation if one was created as part of a scaling event 4709 EvalID *string 4710 4711 // Raft index 4712 CreateIndex uint64 4713 } 4714 4715 func (e *ScalingEvent) SetError(error bool) *ScalingEvent { 4716 e.Error = error 4717 return e 4718 } 4719 4720 func (e *ScalingEvent) SetMeta(meta map[string]interface{}) *ScalingEvent { 4721 e.Meta = meta 4722 return e 4723 } 4724 4725 func (e *ScalingEvent) SetEvalID(evalID string) *ScalingEvent { 4726 e.EvalID = &evalID 4727 return e 4728 } 4729 4730 // ScalingEventRequest is by for Job.Scale endpoint 4731 // to register scaling events 4732 type ScalingEventRequest struct { 4733 Namespace string 4734 JobID string 4735 TaskGroup string 4736 4737 ScalingEvent *ScalingEvent 4738 } 4739 4740 // ScalingPolicy specifies the scaling policy for a scaling target 4741 type ScalingPolicy struct { 4742 // ID is a generated UUID used for looking up the scaling policy 4743 ID string 4744 4745 // Target contains information about the target of the scaling policy, like job and group 4746 Target map[string]string 4747 4748 // Policy is an opaque description of the scaling policy, passed to the autoscaler 4749 Policy map[string]interface{} 4750 4751 // Min is the minimum allowable scaling count for this target 4752 Min int64 4753 4754 // Max is the maximum allowable scaling count for this target 4755 Max int64 4756 4757 // Enabled indicates whether this policy has been enabled/disabled 4758 Enabled bool 4759 4760 CreateIndex uint64 4761 ModifyIndex uint64 4762 } 4763 4764 const ( 4765 ScalingTargetNamespace = "Namespace" 4766 ScalingTargetJob = "Job" 4767 ScalingTargetGroup = "Group" 4768 ) 4769 4770 // Diff indicates whether the specification for a given scaling policy has changed 4771 func (p *ScalingPolicy) Diff(p2 *ScalingPolicy) bool { 4772 copy := *p2 4773 copy.ID = p.ID 4774 copy.CreateIndex = p.CreateIndex 4775 copy.ModifyIndex = p.ModifyIndex 4776 return !reflect.DeepEqual(*p, copy) 4777 } 4778 4779 func (p *ScalingPolicy) TargetTaskGroup(job *Job, tg *TaskGroup) *ScalingPolicy { 4780 p.Target = map[string]string{ 4781 ScalingTargetNamespace: job.Namespace, 4782 ScalingTargetJob: job.ID, 4783 ScalingTargetGroup: tg.Name, 4784 } 4785 return p 4786 } 4787 4788 func (p *ScalingPolicy) Stub() *ScalingPolicyListStub { 4789 stub := &ScalingPolicyListStub{ 4790 ID: p.ID, 4791 Target: make(map[string]string), 4792 Enabled: p.Enabled, 4793 CreateIndex: p.CreateIndex, 4794 ModifyIndex: p.ModifyIndex, 4795 } 4796 for k, v := range p.Target { 4797 stub.Target[k] = v 4798 } 4799 return stub 4800 } 4801 4802 // GetScalingPolicies returns a slice of all scaling scaling policies for this job 4803 func (j *Job) GetScalingPolicies() []*ScalingPolicy { 4804 ret := make([]*ScalingPolicy, 0) 4805 4806 for _, tg := range j.TaskGroups { 4807 if tg.Scaling != nil { 4808 ret = append(ret, tg.Scaling) 4809 } 4810 } 4811 4812 return ret 4813 } 4814 4815 // ScalingPolicyListStub is used to return a subset of scaling policy information 4816 // for the scaling policy list 4817 type ScalingPolicyListStub struct { 4818 ID string 4819 Enabled bool 4820 Target map[string]string 4821 CreateIndex uint64 4822 ModifyIndex uint64 4823 } 4824 4825 // RestartPolicy configures how Tasks are restarted when they crash or fail. 4826 type RestartPolicy struct { 4827 // Attempts is the number of restart that will occur in an interval. 4828 Attempts int 4829 4830 // Interval is a duration in which we can limit the number of restarts 4831 // within. 4832 Interval time.Duration 4833 4834 // Delay is the time between a failure and a restart. 4835 Delay time.Duration 4836 4837 // Mode controls what happens when the task restarts more than attempt times 4838 // in an interval. 4839 Mode string 4840 } 4841 4842 func (r *RestartPolicy) Copy() *RestartPolicy { 4843 if r == nil { 4844 return nil 4845 } 4846 nrp := new(RestartPolicy) 4847 *nrp = *r 4848 return nrp 4849 } 4850 4851 func (r *RestartPolicy) Validate() error { 4852 var mErr multierror.Error 4853 switch r.Mode { 4854 case RestartPolicyModeDelay, RestartPolicyModeFail: 4855 default: 4856 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 4857 } 4858 4859 // Check for ambiguous/confusing settings 4860 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 4861 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 4862 } 4863 4864 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 4865 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 4866 } 4867 if time.Duration(r.Attempts)*r.Delay > r.Interval { 4868 multierror.Append(&mErr, 4869 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 4870 } 4871 return mErr.ErrorOrNil() 4872 } 4873 4874 func NewRestartPolicy(jobType string) *RestartPolicy { 4875 switch jobType { 4876 case JobTypeService, JobTypeSystem: 4877 rp := DefaultServiceJobRestartPolicy 4878 return &rp 4879 case JobTypeBatch: 4880 rp := DefaultBatchJobRestartPolicy 4881 return &rp 4882 } 4883 return nil 4884 } 4885 4886 const ReschedulePolicyMinInterval = 15 * time.Second 4887 const ReschedulePolicyMinDelay = 5 * time.Second 4888 4889 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 4890 4891 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 4892 type ReschedulePolicy struct { 4893 // Attempts limits the number of rescheduling attempts that can occur in an interval. 4894 Attempts int 4895 4896 // Interval is a duration in which we can limit the number of reschedule attempts. 4897 Interval time.Duration 4898 4899 // Delay is a minimum duration to wait between reschedule attempts. 4900 // The delay function determines how much subsequent reschedule attempts are delayed by. 4901 Delay time.Duration 4902 4903 // DelayFunction determines how the delay progressively changes on subsequent reschedule 4904 // attempts. Valid values are "exponential", "constant", and "fibonacci". 4905 DelayFunction string 4906 4907 // MaxDelay is an upper bound on the delay. 4908 MaxDelay time.Duration 4909 4910 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 4911 // between reschedule attempts. 4912 Unlimited bool 4913 } 4914 4915 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 4916 if r == nil { 4917 return nil 4918 } 4919 nrp := new(ReschedulePolicy) 4920 *nrp = *r 4921 return nrp 4922 } 4923 4924 func (r *ReschedulePolicy) Enabled() bool { 4925 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 4926 return enabled 4927 } 4928 4929 // Validate uses different criteria to validate the reschedule policy 4930 // Delay must be a minimum of 5 seconds 4931 // Delay Ceiling is ignored if Delay Function is "constant" 4932 // Number of possible attempts is validated, given the interval, delay and delay function 4933 func (r *ReschedulePolicy) Validate() error { 4934 if !r.Enabled() { 4935 return nil 4936 } 4937 var mErr multierror.Error 4938 // Check for ambiguous/confusing settings 4939 if r.Attempts > 0 { 4940 if r.Interval <= 0 { 4941 multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 4942 } 4943 if r.Unlimited { 4944 multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 4945 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 4946 multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 4947 } 4948 } 4949 4950 delayPreCheck := true 4951 // Delay should be bigger than the default 4952 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4953 multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4954 delayPreCheck = false 4955 } 4956 4957 // Must use a valid delay function 4958 if !isValidDelayFunction(r.DelayFunction) { 4959 multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 4960 delayPreCheck = false 4961 } 4962 4963 // Validate MaxDelay if not using linear delay progression 4964 if r.DelayFunction != "constant" { 4965 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4966 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4967 delayPreCheck = false 4968 } 4969 if r.MaxDelay < r.Delay { 4970 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 4971 delayPreCheck = false 4972 } 4973 4974 } 4975 4976 // Validate Interval and other delay parameters if attempts are limited 4977 if !r.Unlimited { 4978 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 4979 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 4980 } 4981 if !delayPreCheck { 4982 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 4983 return mErr.ErrorOrNil() 4984 } 4985 crossValidationErr := r.validateDelayParams() 4986 if crossValidationErr != nil { 4987 multierror.Append(&mErr, crossValidationErr) 4988 } 4989 } 4990 return mErr.ErrorOrNil() 4991 } 4992 4993 func isValidDelayFunction(delayFunc string) bool { 4994 for _, value := range RescheduleDelayFunctions { 4995 if value == delayFunc { 4996 return true 4997 } 4998 } 4999 return false 5000 } 5001 5002 func (r *ReschedulePolicy) validateDelayParams() error { 5003 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 5004 if ok { 5005 return nil 5006 } 5007 var mErr multierror.Error 5008 if r.DelayFunction == "constant" { 5009 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 5010 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 5011 } else { 5012 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 5013 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 5014 } 5015 multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 5016 return mErr.ErrorOrNil() 5017 } 5018 5019 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 5020 var possibleAttempts int 5021 var recommendedInterval time.Duration 5022 valid := true 5023 switch r.DelayFunction { 5024 case "constant": 5025 recommendedInterval = time.Duration(r.Attempts) * r.Delay 5026 if r.Interval < recommendedInterval { 5027 possibleAttempts = int(r.Interval / r.Delay) 5028 valid = false 5029 } 5030 case "exponential": 5031 for i := 0; i < r.Attempts; i++ { 5032 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 5033 if nextDelay > r.MaxDelay { 5034 nextDelay = r.MaxDelay 5035 recommendedInterval += nextDelay 5036 } else { 5037 recommendedInterval = nextDelay 5038 } 5039 if recommendedInterval < r.Interval { 5040 possibleAttempts++ 5041 } 5042 } 5043 if possibleAttempts < r.Attempts { 5044 valid = false 5045 } 5046 case "fibonacci": 5047 var slots []time.Duration 5048 slots = append(slots, r.Delay) 5049 slots = append(slots, r.Delay) 5050 reachedCeiling := false 5051 for i := 2; i < r.Attempts; i++ { 5052 var nextDelay time.Duration 5053 if reachedCeiling { 5054 //switch to linear 5055 nextDelay = slots[i-1] + r.MaxDelay 5056 } else { 5057 nextDelay = slots[i-1] + slots[i-2] 5058 if nextDelay > r.MaxDelay { 5059 nextDelay = r.MaxDelay 5060 reachedCeiling = true 5061 } 5062 } 5063 slots = append(slots, nextDelay) 5064 } 5065 recommendedInterval = slots[len(slots)-1] 5066 if r.Interval < recommendedInterval { 5067 valid = false 5068 // calculate possible attempts 5069 for i := 0; i < len(slots); i++ { 5070 if slots[i] > r.Interval { 5071 possibleAttempts = i 5072 break 5073 } 5074 } 5075 } 5076 default: 5077 return false, 0, 0 5078 } 5079 if possibleAttempts < 0 { // can happen if delay is bigger than interval 5080 possibleAttempts = 0 5081 } 5082 return valid, possibleAttempts, recommendedInterval 5083 } 5084 5085 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 5086 switch jobType { 5087 case JobTypeService: 5088 rp := DefaultServiceJobReschedulePolicy 5089 return &rp 5090 case JobTypeBatch: 5091 rp := DefaultBatchJobReschedulePolicy 5092 return &rp 5093 } 5094 return nil 5095 } 5096 5097 const ( 5098 MigrateStrategyHealthChecks = "checks" 5099 MigrateStrategyHealthStates = "task_states" 5100 ) 5101 5102 type MigrateStrategy struct { 5103 MaxParallel int 5104 HealthCheck string 5105 MinHealthyTime time.Duration 5106 HealthyDeadline time.Duration 5107 } 5108 5109 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 5110 // that lack an update strategy. 5111 // 5112 // This function should match its counterpart in api/tasks.go 5113 func DefaultMigrateStrategy() *MigrateStrategy { 5114 return &MigrateStrategy{ 5115 MaxParallel: 1, 5116 HealthCheck: MigrateStrategyHealthChecks, 5117 MinHealthyTime: 10 * time.Second, 5118 HealthyDeadline: 5 * time.Minute, 5119 } 5120 } 5121 5122 func (m *MigrateStrategy) Validate() error { 5123 var mErr multierror.Error 5124 5125 if m.MaxParallel < 0 { 5126 multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 5127 } 5128 5129 switch m.HealthCheck { 5130 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 5131 // ok 5132 case "": 5133 if m.MaxParallel > 0 { 5134 multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 5135 } 5136 default: 5137 multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 5138 } 5139 5140 if m.MinHealthyTime < 0 { 5141 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 5142 } 5143 5144 if m.HealthyDeadline < 0 { 5145 multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 5146 } 5147 5148 if m.MinHealthyTime > m.HealthyDeadline { 5149 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 5150 } 5151 5152 return mErr.ErrorOrNil() 5153 } 5154 5155 // TaskGroup is an atomic unit of placement. Each task group belongs to 5156 // a job and may contain any number of tasks. A task group support running 5157 // in many replicas using the same configuration.. 5158 type TaskGroup struct { 5159 // Name of the task group 5160 Name string 5161 5162 // Count is the number of replicas of this task group that should 5163 // be scheduled. 5164 Count int 5165 5166 // Update is used to control the update strategy for this task group 5167 Update *UpdateStrategy 5168 5169 // Migrate is used to control the migration strategy for this task group 5170 Migrate *MigrateStrategy 5171 5172 // Constraints can be specified at a task group level and apply to 5173 // all the tasks contained. 5174 Constraints []*Constraint 5175 5176 // Scaling is the list of autoscaling policies for the TaskGroup 5177 Scaling *ScalingPolicy 5178 5179 // RestartPolicy of a TaskGroup 5180 RestartPolicy *RestartPolicy 5181 5182 // Tasks are the collection of tasks that this task group needs to run 5183 Tasks []*Task 5184 5185 // EphemeralDisk is the disk resources that the task group requests 5186 EphemeralDisk *EphemeralDisk 5187 5188 // Meta is used to associate arbitrary metadata with this 5189 // task group. This is opaque to Nomad. 5190 Meta map[string]string 5191 5192 // ReschedulePolicy is used to configure how the scheduler should 5193 // retry failed allocations. 5194 ReschedulePolicy *ReschedulePolicy 5195 5196 // Affinities can be specified at the task group level to express 5197 // scheduling preferences. 5198 Affinities []*Affinity 5199 5200 // Spread can be specified at the task group level to express spreading 5201 // allocations across a desired attribute, such as datacenter 5202 Spreads []*Spread 5203 5204 // Networks are the network configuration for the task group. This can be 5205 // overridden in the task. 5206 Networks Networks 5207 5208 // Services this group provides 5209 Services []*Service 5210 5211 // Volumes is a map of volumes that have been requested by the task group. 5212 Volumes map[string]*VolumeRequest 5213 5214 // ShutdownDelay is the amount of time to wait between deregistering 5215 // group services in consul and stopping tasks. 5216 ShutdownDelay *time.Duration 5217 5218 // StopAfterClientDisconnect, if set, configures the client to stop the task group 5219 // after this duration since the last known good heartbeat 5220 StopAfterClientDisconnect *time.Duration 5221 } 5222 5223 func (tg *TaskGroup) Copy() *TaskGroup { 5224 if tg == nil { 5225 return nil 5226 } 5227 ntg := new(TaskGroup) 5228 *ntg = *tg 5229 ntg.Update = ntg.Update.Copy() 5230 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 5231 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 5232 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 5233 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 5234 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 5235 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 5236 ntg.Scaling = CopyScalingPolicy(ntg.Scaling) 5237 5238 // Copy the network objects 5239 if tg.Networks != nil { 5240 n := len(tg.Networks) 5241 ntg.Networks = make([]*NetworkResource, n) 5242 for i := 0; i < n; i++ { 5243 ntg.Networks[i] = tg.Networks[i].Copy() 5244 } 5245 } 5246 5247 if tg.Tasks != nil { 5248 tasks := make([]*Task, len(ntg.Tasks)) 5249 for i, t := range ntg.Tasks { 5250 tasks[i] = t.Copy() 5251 } 5252 ntg.Tasks = tasks 5253 } 5254 5255 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 5256 5257 if tg.EphemeralDisk != nil { 5258 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 5259 } 5260 5261 if tg.Services != nil { 5262 ntg.Services = make([]*Service, len(tg.Services)) 5263 for i, s := range tg.Services { 5264 ntg.Services[i] = s.Copy() 5265 } 5266 } 5267 5268 if tg.ShutdownDelay != nil { 5269 ntg.ShutdownDelay = tg.ShutdownDelay 5270 } 5271 5272 if tg.StopAfterClientDisconnect != nil { 5273 ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect 5274 } 5275 5276 return ntg 5277 } 5278 5279 // Canonicalize is used to canonicalize fields in the TaskGroup. 5280 func (tg *TaskGroup) Canonicalize(job *Job) { 5281 // Ensure that an empty and nil map are treated the same to avoid scheduling 5282 // problems since we use reflect DeepEquals. 5283 if len(tg.Meta) == 0 { 5284 tg.Meta = nil 5285 } 5286 5287 // Set the default restart policy. 5288 if tg.RestartPolicy == nil { 5289 tg.RestartPolicy = NewRestartPolicy(job.Type) 5290 } 5291 5292 if tg.ReschedulePolicy == nil { 5293 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 5294 } 5295 5296 // Canonicalize Migrate for service jobs 5297 if job.Type == JobTypeService && tg.Migrate == nil { 5298 tg.Migrate = DefaultMigrateStrategy() 5299 } 5300 5301 // Set a default ephemeral disk object if the user has not requested for one 5302 if tg.EphemeralDisk == nil { 5303 tg.EphemeralDisk = DefaultEphemeralDisk() 5304 } 5305 5306 for _, service := range tg.Services { 5307 service.Canonicalize(job.Name, tg.Name, "group") 5308 } 5309 5310 for _, network := range tg.Networks { 5311 network.Canonicalize() 5312 } 5313 5314 for _, task := range tg.Tasks { 5315 task.Canonicalize(job, tg) 5316 } 5317 } 5318 5319 // Validate is used to sanity check a task group 5320 func (tg *TaskGroup) Validate(j *Job) error { 5321 var mErr multierror.Error 5322 if tg.Name == "" { 5323 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 5324 } 5325 if tg.Count < 0 { 5326 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 5327 } 5328 if len(tg.Tasks) == 0 { 5329 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 5330 } 5331 for idx, constr := range tg.Constraints { 5332 if err := constr.Validate(); err != nil { 5333 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 5334 mErr.Errors = append(mErr.Errors, outer) 5335 } 5336 } 5337 if j.Type == JobTypeSystem { 5338 if tg.Affinities != nil { 5339 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 5340 } 5341 } else { 5342 for idx, affinity := range tg.Affinities { 5343 if err := affinity.Validate(); err != nil { 5344 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 5345 mErr.Errors = append(mErr.Errors, outer) 5346 } 5347 } 5348 } 5349 5350 if tg.RestartPolicy != nil { 5351 if err := tg.RestartPolicy.Validate(); err != nil { 5352 mErr.Errors = append(mErr.Errors, err) 5353 } 5354 } else { 5355 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 5356 } 5357 5358 if j.Type == JobTypeSystem { 5359 if tg.Spreads != nil { 5360 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 5361 } 5362 } else { 5363 for idx, spread := range tg.Spreads { 5364 if err := spread.Validate(); err != nil { 5365 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 5366 mErr.Errors = append(mErr.Errors, outer) 5367 } 5368 } 5369 } 5370 5371 if j.Type == JobTypeSystem { 5372 if tg.ReschedulePolicy != nil { 5373 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 5374 } 5375 } else { 5376 if tg.ReschedulePolicy != nil { 5377 if err := tg.ReschedulePolicy.Validate(); err != nil { 5378 mErr.Errors = append(mErr.Errors, err) 5379 } 5380 } else { 5381 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 5382 } 5383 } 5384 5385 if tg.EphemeralDisk != nil { 5386 if err := tg.EphemeralDisk.Validate(); err != nil { 5387 mErr.Errors = append(mErr.Errors, err) 5388 } 5389 } else { 5390 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 5391 } 5392 5393 // Validate the update strategy 5394 if u := tg.Update; u != nil { 5395 switch j.Type { 5396 case JobTypeService, JobTypeSystem: 5397 default: 5398 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 5399 } 5400 if err := u.Validate(); err != nil { 5401 mErr.Errors = append(mErr.Errors, err) 5402 } 5403 } 5404 5405 // Validate the migration strategy 5406 switch j.Type { 5407 case JobTypeService: 5408 if tg.Migrate != nil { 5409 if err := tg.Migrate.Validate(); err != nil { 5410 mErr.Errors = append(mErr.Errors, err) 5411 } 5412 } 5413 default: 5414 if tg.Migrate != nil { 5415 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 5416 } 5417 } 5418 5419 // Check that there is only one leader task if any 5420 tasks := make(map[string]int) 5421 leaderTasks := 0 5422 for idx, task := range tg.Tasks { 5423 if task.Name == "" { 5424 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 5425 } else if existing, ok := tasks[task.Name]; ok { 5426 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 5427 } else { 5428 tasks[task.Name] = idx 5429 } 5430 5431 if task.Leader { 5432 leaderTasks++ 5433 } 5434 } 5435 5436 if leaderTasks > 1 { 5437 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 5438 } 5439 5440 // Validate the Host Volumes 5441 for name, decl := range tg.Volumes { 5442 if !(decl.Type == VolumeTypeHost || 5443 decl.Type == VolumeTypeCSI) { 5444 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has unrecognised type %s", name, decl.Type)) 5445 continue 5446 } 5447 5448 if decl.Source == "" { 5449 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has an empty source", name)) 5450 } 5451 } 5452 5453 // Validate task group and task network resources 5454 if err := tg.validateNetworks(); err != nil { 5455 outer := fmt.Errorf("Task group network validation failed: %v", err) 5456 mErr.Errors = append(mErr.Errors, outer) 5457 } 5458 5459 // Validate task group and task services 5460 if err := tg.validateServices(); err != nil { 5461 outer := fmt.Errorf("Task group service validation failed: %v", err) 5462 mErr.Errors = append(mErr.Errors, outer) 5463 } 5464 5465 // Validate the scaling policy 5466 if err := tg.validateScalingPolicy(); err != nil { 5467 outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) 5468 mErr.Errors = append(mErr.Errors, outer) 5469 } 5470 5471 // Validate the tasks 5472 for _, task := range tg.Tasks { 5473 // Validate the task does not reference undefined volume mounts 5474 for i, mnt := range task.VolumeMounts { 5475 if mnt.Volume == "" { 5476 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 5477 continue 5478 } 5479 5480 if _, ok := tg.Volumes[mnt.Volume]; !ok { 5481 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 5482 continue 5483 } 5484 } 5485 5486 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services); err != nil { 5487 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 5488 mErr.Errors = append(mErr.Errors, outer) 5489 } 5490 } 5491 return mErr.ErrorOrNil() 5492 } 5493 5494 func (tg *TaskGroup) validateNetworks() error { 5495 var mErr multierror.Error 5496 portLabels := make(map[string]string) 5497 staticPorts := make(map[int]string) 5498 mappedPorts := make(map[int]string) 5499 5500 for _, net := range tg.Networks { 5501 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5502 if other, ok := portLabels[port.Label]; ok { 5503 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5504 } else { 5505 portLabels[port.Label] = "taskgroup network" 5506 } 5507 5508 if port.Value != 0 { 5509 // static port 5510 if other, ok := staticPorts[port.Value]; ok { 5511 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5512 mErr.Errors = append(mErr.Errors, err) 5513 } else { 5514 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 5515 } 5516 } 5517 5518 if port.To > 0 { 5519 if other, ok := mappedPorts[port.To]; ok { 5520 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5521 mErr.Errors = append(mErr.Errors, err) 5522 } else { 5523 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5524 } 5525 } else if port.To < -1 { 5526 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 5527 mErr.Errors = append(mErr.Errors, err) 5528 } 5529 } 5530 } 5531 // Check for duplicate tasks or port labels, and no duplicated static or mapped ports 5532 for _, task := range tg.Tasks { 5533 if task.Resources == nil { 5534 continue 5535 } 5536 5537 for _, net := range task.Resources.Networks { 5538 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 5539 if other, ok := portLabels[port.Label]; ok { 5540 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 5541 } 5542 5543 if port.Value != 0 { 5544 if other, ok := staticPorts[port.Value]; ok { 5545 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 5546 mErr.Errors = append(mErr.Errors, err) 5547 } else { 5548 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 5549 } 5550 } 5551 5552 if port.To != 0 { 5553 if other, ok := mappedPorts[port.To]; ok { 5554 err := fmt.Errorf("Port mapped to %d already in use by %s", port.To, other) 5555 mErr.Errors = append(mErr.Errors, err) 5556 } else { 5557 mappedPorts[port.To] = fmt.Sprintf("taskgroup network:%s", port.Label) 5558 } 5559 } 5560 } 5561 } 5562 } 5563 return mErr.ErrorOrNil() 5564 } 5565 5566 // validateServices runs Service.Validate() on group-level services, 5567 // checks that group services do not conflict with task services and that 5568 // group service checks that refer to tasks only refer to tasks that exist. 5569 func (tg *TaskGroup) validateServices() error { 5570 var mErr multierror.Error 5571 knownTasks := make(map[string]struct{}) 5572 knownServices := make(map[string]struct{}) 5573 5574 // Create a map of known tasks and their services so we can compare 5575 // vs the group-level services and checks 5576 for _, task := range tg.Tasks { 5577 knownTasks[task.Name] = struct{}{} 5578 if task.Services == nil { 5579 continue 5580 } 5581 for _, service := range task.Services { 5582 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5583 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5584 } 5585 for _, check := range service.Checks { 5586 if check.TaskName != "" { 5587 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s is invalid: only task group service checks can be assigned tasks", check.Name)) 5588 } 5589 } 5590 knownServices[service.Name+service.PortLabel] = struct{}{} 5591 } 5592 } 5593 for i, service := range tg.Services { 5594 if err := service.Validate(); err != nil { 5595 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 5596 mErr.Errors = append(mErr.Errors, outer) 5597 // we break here to avoid the risk of crashing on null-pointer 5598 // access in a later step, accepting that we might miss out on 5599 // error messages to provide the user. 5600 continue 5601 } 5602 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5603 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 5604 } 5605 knownServices[service.Name+service.PortLabel] = struct{}{} 5606 for _, check := range service.Checks { 5607 if check.TaskName != "" { 5608 if check.Type != ServiceCheckScript && check.Type != ServiceCheckGRPC { 5609 mErr.Errors = append(mErr.Errors, 5610 fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) 5611 } 5612 if _, ok := knownTasks[check.TaskName]; !ok { 5613 mErr.Errors = append(mErr.Errors, 5614 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 5615 } 5616 } 5617 } 5618 } 5619 return mErr.ErrorOrNil() 5620 } 5621 5622 // validateScalingPolicy ensures that the scaling policy has consistent 5623 // min and max, not in conflict with the task group count 5624 func (tg *TaskGroup) validateScalingPolicy() error { 5625 if tg.Scaling == nil { 5626 return nil 5627 } 5628 5629 var mErr multierror.Error 5630 5631 if tg.Scaling.Min > tg.Scaling.Max { 5632 mErr.Errors = append(mErr.Errors, 5633 fmt.Errorf("Scaling policy invalid: maximum count must not be less than minimum count")) 5634 } 5635 5636 if int64(tg.Count) < tg.Scaling.Min { 5637 mErr.Errors = append(mErr.Errors, 5638 fmt.Errorf("Scaling policy invalid: task group count must not be less than minimum count in scaling policy")) 5639 } 5640 5641 if tg.Scaling.Max < int64(tg.Count) { 5642 mErr.Errors = append(mErr.Errors, 5643 fmt.Errorf("Scaling policy invalid: task group count must not be greater than maximum count in scaling policy")) 5644 } 5645 5646 return mErr.ErrorOrNil() 5647 } 5648 5649 // Warnings returns a list of warnings that may be from dubious settings or 5650 // deprecation warnings. 5651 func (tg *TaskGroup) Warnings(j *Job) error { 5652 var mErr multierror.Error 5653 5654 // Validate the update strategy 5655 if u := tg.Update; u != nil { 5656 // Check the counts are appropriate 5657 if u.MaxParallel > tg.Count { 5658 mErr.Errors = append(mErr.Errors, 5659 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 5660 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 5661 } 5662 } 5663 5664 for _, t := range tg.Tasks { 5665 if err := t.Warnings(); err != nil { 5666 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 5667 mErr.Errors = append(mErr.Errors, err) 5668 } 5669 } 5670 5671 return mErr.ErrorOrNil() 5672 } 5673 5674 // LookupTask finds a task by name 5675 func (tg *TaskGroup) LookupTask(name string) *Task { 5676 for _, t := range tg.Tasks { 5677 if t.Name == name { 5678 return t 5679 } 5680 } 5681 return nil 5682 } 5683 5684 func (tg *TaskGroup) UsesConnect() bool { 5685 for _, service := range tg.Services { 5686 if service.Connect != nil { 5687 if service.Connect.Native || service.Connect.SidecarService != nil { 5688 return true 5689 } 5690 } 5691 } 5692 return false 5693 } 5694 5695 func (tg *TaskGroup) GoString() string { 5696 return fmt.Sprintf("*%#v", *tg) 5697 } 5698 5699 // CheckRestart describes if and when a task should be restarted based on 5700 // failing health checks. 5701 type CheckRestart struct { 5702 Limit int // Restart task after this many unhealthy intervals 5703 Grace time.Duration // Grace time to give tasks after starting to get healthy 5704 IgnoreWarnings bool // If true treat checks in `warning` as passing 5705 } 5706 5707 func (c *CheckRestart) Copy() *CheckRestart { 5708 if c == nil { 5709 return nil 5710 } 5711 5712 nc := new(CheckRestart) 5713 *nc = *c 5714 return nc 5715 } 5716 5717 func (c *CheckRestart) Equals(o *CheckRestart) bool { 5718 if c == nil || o == nil { 5719 return c == o 5720 } 5721 5722 if c.Limit != o.Limit { 5723 return false 5724 } 5725 5726 if c.Grace != o.Grace { 5727 return false 5728 } 5729 5730 if c.IgnoreWarnings != o.IgnoreWarnings { 5731 return false 5732 } 5733 5734 return true 5735 } 5736 5737 func (c *CheckRestart) Validate() error { 5738 if c == nil { 5739 return nil 5740 } 5741 5742 var mErr multierror.Error 5743 if c.Limit < 0 { 5744 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 5745 } 5746 5747 if c.Grace < 0 { 5748 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 5749 } 5750 5751 return mErr.ErrorOrNil() 5752 } 5753 5754 const ( 5755 // DefaultKillTimeout is the default timeout between signaling a task it 5756 // will be killed and killing it. 5757 DefaultKillTimeout = 5 * time.Second 5758 ) 5759 5760 // LogConfig provides configuration for log rotation 5761 type LogConfig struct { 5762 MaxFiles int 5763 MaxFileSizeMB int 5764 } 5765 5766 func (l *LogConfig) Copy() *LogConfig { 5767 if l == nil { 5768 return nil 5769 } 5770 return &LogConfig{ 5771 MaxFiles: l.MaxFiles, 5772 MaxFileSizeMB: l.MaxFileSizeMB, 5773 } 5774 } 5775 5776 // DefaultLogConfig returns the default LogConfig values. 5777 func DefaultLogConfig() *LogConfig { 5778 return &LogConfig{ 5779 MaxFiles: 10, 5780 MaxFileSizeMB: 10, 5781 } 5782 } 5783 5784 // Validate returns an error if the log config specified are less than 5785 // the minimum allowed. 5786 func (l *LogConfig) Validate() error { 5787 var mErr multierror.Error 5788 if l.MaxFiles < 1 { 5789 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 5790 } 5791 if l.MaxFileSizeMB < 1 { 5792 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 5793 } 5794 return mErr.ErrorOrNil() 5795 } 5796 5797 // Task is a single process typically that is executed as part of a task group. 5798 type Task struct { 5799 // Name of the task 5800 Name string 5801 5802 // Driver is used to control which driver is used 5803 Driver string 5804 5805 // User is used to determine which user will run the task. It defaults to 5806 // the same user the Nomad client is being run as. 5807 User string 5808 5809 // Config is provided to the driver to initialize 5810 Config map[string]interface{} 5811 5812 // Map of environment variables to be used by the driver 5813 Env map[string]string 5814 5815 // List of service definitions exposed by the Task 5816 Services []*Service 5817 5818 // Vault is used to define the set of Vault policies that this task should 5819 // have access to. 5820 Vault *Vault 5821 5822 // Templates are the set of templates to be rendered for the task. 5823 Templates []*Template 5824 5825 // Constraints can be specified at a task level and apply only to 5826 // the particular task. 5827 Constraints []*Constraint 5828 5829 // Affinities can be specified at the task level to express 5830 // scheduling preferences 5831 Affinities []*Affinity 5832 5833 // Resources is the resources needed by this task 5834 Resources *Resources 5835 5836 // RestartPolicy of a TaskGroup 5837 RestartPolicy *RestartPolicy 5838 5839 // DispatchPayload configures how the task retrieves its input from a dispatch 5840 DispatchPayload *DispatchPayloadConfig 5841 5842 Lifecycle *TaskLifecycleConfig 5843 5844 // Meta is used to associate arbitrary metadata with this 5845 // task. This is opaque to Nomad. 5846 Meta map[string]string 5847 5848 // KillTimeout is the time between signaling a task that it will be 5849 // killed and killing it. 5850 KillTimeout time.Duration 5851 5852 // LogConfig provides configuration for log rotation 5853 LogConfig *LogConfig 5854 5855 // Artifacts is a list of artifacts to download and extract before running 5856 // the task. 5857 Artifacts []*TaskArtifact 5858 5859 // Leader marks the task as the leader within the group. When the leader 5860 // task exits, other tasks will be gracefully terminated. 5861 Leader bool 5862 5863 // ShutdownDelay is the duration of the delay between deregistering a 5864 // task from Consul and sending it a signal to shutdown. See #2441 5865 ShutdownDelay time.Duration 5866 5867 // VolumeMounts is a list of Volume name <-> mount configurations that will be 5868 // attached to this task. 5869 VolumeMounts []*VolumeMount 5870 5871 // The kill signal to use for the task. This is an optional specification, 5872 5873 // KillSignal is the kill signal to use for the task. This is an optional 5874 // specification and defaults to SIGINT 5875 KillSignal string 5876 5877 // Used internally to manage tasks according to their TaskKind. Initial use case 5878 // is for Consul Connect 5879 Kind TaskKind 5880 5881 // CSIPluginConfig is used to configure the plugin supervisor for the task. 5882 CSIPluginConfig *TaskCSIPluginConfig 5883 } 5884 5885 // UsesConnect is for conveniently detecting if the Task is able to make use 5886 // of Consul Connect features. This will be indicated in the TaskKind of the 5887 // Task, which exports known types of Tasks. 5888 // 5889 // Currently only Consul Connect Proxy tasks are known. 5890 // (Consul Connect Native tasks will be supported soon). 5891 func (t *Task) UsesConnect() bool { 5892 // todo(shoenig): native tasks 5893 switch { 5894 case t.Kind.IsConnectProxy(): 5895 return true 5896 default: 5897 return false 5898 } 5899 } 5900 5901 func (t *Task) Copy() *Task { 5902 if t == nil { 5903 return nil 5904 } 5905 nt := new(Task) 5906 *nt = *t 5907 nt.Env = helper.CopyMapStringString(nt.Env) 5908 5909 if t.Services != nil { 5910 services := make([]*Service, len(nt.Services)) 5911 for i, s := range nt.Services { 5912 services[i] = s.Copy() 5913 } 5914 nt.Services = services 5915 } 5916 5917 nt.Constraints = CopySliceConstraints(nt.Constraints) 5918 nt.Affinities = CopySliceAffinities(nt.Affinities) 5919 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 5920 nt.CSIPluginConfig = nt.CSIPluginConfig.Copy() 5921 5922 nt.Vault = nt.Vault.Copy() 5923 nt.Resources = nt.Resources.Copy() 5924 nt.LogConfig = nt.LogConfig.Copy() 5925 nt.Meta = helper.CopyMapStringString(nt.Meta) 5926 nt.DispatchPayload = nt.DispatchPayload.Copy() 5927 nt.Lifecycle = nt.Lifecycle.Copy() 5928 5929 if t.Artifacts != nil { 5930 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 5931 for _, a := range nt.Artifacts { 5932 artifacts = append(artifacts, a.Copy()) 5933 } 5934 nt.Artifacts = artifacts 5935 } 5936 5937 if i, err := copystructure.Copy(nt.Config); err != nil { 5938 panic(err.Error()) 5939 } else { 5940 nt.Config = i.(map[string]interface{}) 5941 } 5942 5943 if t.Templates != nil { 5944 templates := make([]*Template, len(t.Templates)) 5945 for i, tmpl := range nt.Templates { 5946 templates[i] = tmpl.Copy() 5947 } 5948 nt.Templates = templates 5949 } 5950 5951 return nt 5952 } 5953 5954 // Canonicalize canonicalizes fields in the task. 5955 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 5956 // Ensure that an empty and nil map are treated the same to avoid scheduling 5957 // problems since we use reflect DeepEquals. 5958 if len(t.Meta) == 0 { 5959 t.Meta = nil 5960 } 5961 if len(t.Config) == 0 { 5962 t.Config = nil 5963 } 5964 if len(t.Env) == 0 { 5965 t.Env = nil 5966 } 5967 5968 for _, service := range t.Services { 5969 service.Canonicalize(job.Name, tg.Name, t.Name) 5970 } 5971 5972 // If Resources are nil initialize them to defaults, otherwise canonicalize 5973 if t.Resources == nil { 5974 t.Resources = DefaultResources() 5975 } else { 5976 t.Resources.Canonicalize() 5977 } 5978 5979 if t.RestartPolicy == nil { 5980 t.RestartPolicy = tg.RestartPolicy 5981 } 5982 5983 // Set the default timeout if it is not specified. 5984 if t.KillTimeout == 0 { 5985 t.KillTimeout = DefaultKillTimeout 5986 } 5987 5988 if t.Vault != nil { 5989 t.Vault.Canonicalize() 5990 } 5991 5992 for _, template := range t.Templates { 5993 template.Canonicalize() 5994 } 5995 } 5996 5997 func (t *Task) GoString() string { 5998 return fmt.Sprintf("*%#v", *t) 5999 } 6000 6001 // Validate is used to sanity check a task 6002 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service) error { 6003 var mErr multierror.Error 6004 if t.Name == "" { 6005 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 6006 } 6007 if strings.ContainsAny(t.Name, `/\`) { 6008 // We enforce this so that when creating the directory on disk it will 6009 // not have any slashes. 6010 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 6011 } 6012 if t.Driver == "" { 6013 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 6014 } 6015 if t.KillTimeout < 0 { 6016 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 6017 } 6018 if t.ShutdownDelay < 0 { 6019 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 6020 } 6021 6022 // Validate the resources. 6023 if t.Resources == nil { 6024 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 6025 } else if err := t.Resources.Validate(); err != nil { 6026 mErr.Errors = append(mErr.Errors, err) 6027 } 6028 6029 // Validate the log config 6030 if t.LogConfig == nil { 6031 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 6032 } else if err := t.LogConfig.Validate(); err != nil { 6033 mErr.Errors = append(mErr.Errors, err) 6034 } 6035 6036 for idx, constr := range t.Constraints { 6037 if err := constr.Validate(); err != nil { 6038 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6039 mErr.Errors = append(mErr.Errors, outer) 6040 } 6041 6042 switch constr.Operand { 6043 case ConstraintDistinctHosts, ConstraintDistinctProperty: 6044 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 6045 mErr.Errors = append(mErr.Errors, outer) 6046 } 6047 } 6048 6049 if jobType == JobTypeSystem { 6050 if t.Affinities != nil { 6051 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6052 } 6053 } else { 6054 for idx, affinity := range t.Affinities { 6055 if err := affinity.Validate(); err != nil { 6056 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6057 mErr.Errors = append(mErr.Errors, outer) 6058 } 6059 } 6060 } 6061 6062 // Validate Services 6063 if err := validateServices(t); err != nil { 6064 mErr.Errors = append(mErr.Errors, err) 6065 } 6066 6067 if t.LogConfig != nil && ephemeralDisk != nil { 6068 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 6069 if ephemeralDisk.SizeMB <= logUsage { 6070 mErr.Errors = append(mErr.Errors, 6071 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 6072 logUsage, ephemeralDisk.SizeMB)) 6073 } 6074 } 6075 6076 for idx, artifact := range t.Artifacts { 6077 if err := artifact.Validate(); err != nil { 6078 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 6079 mErr.Errors = append(mErr.Errors, outer) 6080 } 6081 } 6082 6083 if t.Vault != nil { 6084 if err := t.Vault.Validate(); err != nil { 6085 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 6086 } 6087 } 6088 6089 destinations := make(map[string]int, len(t.Templates)) 6090 for idx, tmpl := range t.Templates { 6091 if err := tmpl.Validate(); err != nil { 6092 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 6093 mErr.Errors = append(mErr.Errors, outer) 6094 } 6095 6096 if other, ok := destinations[tmpl.DestPath]; ok { 6097 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 6098 mErr.Errors = append(mErr.Errors, outer) 6099 } else { 6100 destinations[tmpl.DestPath] = idx + 1 6101 } 6102 } 6103 6104 // Validate the dispatch payload block if there 6105 if t.DispatchPayload != nil { 6106 if err := t.DispatchPayload.Validate(); err != nil { 6107 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 6108 } 6109 } 6110 6111 // Validate the Lifecycle block if there 6112 if t.Lifecycle != nil { 6113 if err := t.Lifecycle.Validate(); err != nil { 6114 mErr.Errors = append(mErr.Errors, fmt.Errorf("Lifecycle validation failed: %v", err)) 6115 } 6116 6117 } 6118 6119 // Validation for TaskKind field which is used for Consul Connect integration 6120 if t.Kind.IsConnectProxy() { 6121 // This task is a Connect proxy so it should not have service stanzas 6122 if len(t.Services) > 0 { 6123 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 6124 } 6125 if t.Leader { 6126 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 6127 } 6128 6129 // Ensure the proxy task has a corresponding service entry 6130 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 6131 if serviceErr != nil { 6132 mErr.Errors = append(mErr.Errors, serviceErr) 6133 } 6134 } 6135 6136 // Validation for volumes 6137 for idx, vm := range t.VolumeMounts { 6138 if !MountPropagationModeIsValid(vm.PropagationMode) { 6139 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 6140 } 6141 } 6142 6143 // Validate CSI Plugin Config 6144 if t.CSIPluginConfig != nil { 6145 if t.CSIPluginConfig.ID == "" { 6146 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig must have a non-empty PluginID")) 6147 } 6148 6149 if !CSIPluginTypeIsValid(t.CSIPluginConfig.Type) { 6150 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig PluginType must be one of 'node', 'controller', or 'monolith', got: \"%s\"", t.CSIPluginConfig.Type)) 6151 } 6152 6153 // TODO: Investigate validation of the PluginMountDir. Not much we can do apart from check IsAbs until after we understand its execution environment though :( 6154 } 6155 6156 return mErr.ErrorOrNil() 6157 } 6158 6159 // validateServices takes a task and validates the services within it are valid 6160 // and reference ports that exist. 6161 func validateServices(t *Task) error { 6162 var mErr multierror.Error 6163 6164 // Ensure that services don't ask for nonexistent ports and their names are 6165 // unique. 6166 servicePorts := make(map[string]map[string]struct{}) 6167 addServicePort := func(label, service string) { 6168 if _, ok := servicePorts[label]; !ok { 6169 servicePorts[label] = map[string]struct{}{} 6170 } 6171 servicePorts[label][service] = struct{}{} 6172 } 6173 knownServices := make(map[string]struct{}) 6174 for i, service := range t.Services { 6175 if err := service.Validate(); err != nil { 6176 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 6177 mErr.Errors = append(mErr.Errors, outer) 6178 } 6179 6180 // Ensure that services with the same name are not being registered for 6181 // the same port 6182 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6183 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 6184 } 6185 knownServices[service.Name+service.PortLabel] = struct{}{} 6186 6187 if service.PortLabel != "" { 6188 if service.AddressMode == "driver" { 6189 // Numeric port labels are valid for address_mode=driver 6190 _, err := strconv.Atoi(service.PortLabel) 6191 if err != nil { 6192 // Not a numeric port label, add it to list to check 6193 addServicePort(service.PortLabel, service.Name) 6194 } 6195 } else { 6196 addServicePort(service.PortLabel, service.Name) 6197 } 6198 } 6199 6200 // Ensure that check names are unique and have valid ports 6201 knownChecks := make(map[string]struct{}) 6202 for _, check := range service.Checks { 6203 if _, ok := knownChecks[check.Name]; ok { 6204 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 6205 } 6206 knownChecks[check.Name] = struct{}{} 6207 6208 if !check.RequiresPort() { 6209 // No need to continue validating check if it doesn't need a port 6210 continue 6211 } 6212 6213 effectivePort := check.PortLabel 6214 if effectivePort == "" { 6215 // Inherits from service 6216 effectivePort = service.PortLabel 6217 } 6218 6219 if effectivePort == "" { 6220 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 6221 continue 6222 } 6223 6224 isNumeric := false 6225 portNumber, err := strconv.Atoi(effectivePort) 6226 if err == nil { 6227 isNumeric = true 6228 } 6229 6230 // Numeric ports are fine for address_mode = "driver" 6231 if check.AddressMode == "driver" && isNumeric { 6232 if portNumber <= 0 { 6233 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 6234 } 6235 continue 6236 } 6237 6238 if isNumeric { 6239 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 6240 continue 6241 } 6242 6243 // PortLabel must exist, report errors by its parent service 6244 addServicePort(effectivePort, service.Name) 6245 } 6246 } 6247 6248 // Get the set of port labels. 6249 portLabels := make(map[string]struct{}) 6250 if t.Resources != nil { 6251 for _, network := range t.Resources.Networks { 6252 ports := network.PortLabels() 6253 for portLabel := range ports { 6254 portLabels[portLabel] = struct{}{} 6255 } 6256 } 6257 } 6258 6259 // Iterate over a sorted list of keys to make error listings stable 6260 keys := make([]string, 0, len(servicePorts)) 6261 for p := range servicePorts { 6262 keys = append(keys, p) 6263 } 6264 sort.Strings(keys) 6265 6266 // Ensure all ports referenced in services exist. 6267 for _, servicePort := range keys { 6268 services := servicePorts[servicePort] 6269 _, ok := portLabels[servicePort] 6270 if !ok { 6271 names := make([]string, 0, len(services)) 6272 for name := range services { 6273 names = append(names, name) 6274 } 6275 6276 // Keep order deterministic 6277 sort.Strings(names) 6278 joined := strings.Join(names, ", ") 6279 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 6280 mErr.Errors = append(mErr.Errors, err) 6281 } 6282 } 6283 6284 // Ensure address mode is valid 6285 return mErr.ErrorOrNil() 6286 } 6287 6288 func (t *Task) Warnings() error { 6289 var mErr multierror.Error 6290 6291 // Validate the resources 6292 if t.Resources != nil && t.Resources.IOPS != 0 { 6293 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 6294 } 6295 6296 for idx, tmpl := range t.Templates { 6297 if err := tmpl.Warnings(); err != nil { 6298 err = multierror.Prefix(err, fmt.Sprintf("Template[%d]", idx)) 6299 mErr.Errors = append(mErr.Errors, err) 6300 } 6301 } 6302 6303 return mErr.ErrorOrNil() 6304 } 6305 6306 // TaskKind identifies the special kinds of tasks using the following format: 6307 // '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 6308 // is opaque to the Task. This identifier can be used to relate the task to some 6309 // other entity based on the kind. 6310 // 6311 // For example, a task may have the TaskKind of `connect-proxy:service` where 6312 // 'connect-proxy' is the kind name and 'service' is the identifier that relates the 6313 // task to the service name of which it is a connect proxy for. 6314 type TaskKind string 6315 6316 func NewTaskKind(name, identifier string) TaskKind { 6317 return TaskKind(fmt.Sprintf("%s:%s", name, identifier)) 6318 } 6319 6320 // Name returns the kind name portion of the TaskKind 6321 func (k TaskKind) Name() string { 6322 return strings.Split(string(k), ":")[0] 6323 } 6324 6325 // Value returns the identifier of the TaskKind or an empty string if it doesn't 6326 // include one. 6327 func (k TaskKind) Value() string { 6328 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 6329 return s[1] 6330 } 6331 return "" 6332 } 6333 6334 // IsConnectProxy returns true if the TaskKind is connect-proxy 6335 func (k TaskKind) IsConnectProxy() bool { 6336 return strings.HasPrefix(string(k), ConnectProxyPrefix+":") && len(k) > len(ConnectProxyPrefix)+1 6337 } 6338 6339 func (k TaskKind) IsConnectNative() bool { 6340 return strings.HasPrefix(string(k), ConnectNativePrefix+":") && len(k) > len(ConnectNativePrefix)+1 6341 } 6342 6343 const ( 6344 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 6345 // Proxy 6346 ConnectProxyPrefix = "connect-proxy" 6347 6348 // ConnectNativePrefix is the prefix used for fields referencing a Connect 6349 // Native Task 6350 ConnectNativePrefix = "connect-native" 6351 ) 6352 6353 // ValidateConnectProxyService checks that the service that is being 6354 // proxied by this task exists in the task group and contains 6355 // valid Connect config. 6356 func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 6357 found := false 6358 names := make([]string, 0, len(tgServices)) 6359 for _, svc := range tgServices { 6360 if svc.Connect == nil || svc.Connect.SidecarService == nil { 6361 continue 6362 } 6363 6364 if svc.Name == serviceName { 6365 found = true 6366 break 6367 } 6368 6369 // Build up list of mismatched Connect service names for error 6370 // reporting. 6371 names = append(names, svc.Name) 6372 } 6373 6374 if !found { 6375 if len(names) == 0 { 6376 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 6377 } else { 6378 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 6379 } 6380 } 6381 6382 return nil 6383 } 6384 6385 const ( 6386 // TemplateChangeModeNoop marks that no action should be taken if the 6387 // template is re-rendered 6388 TemplateChangeModeNoop = "noop" 6389 6390 // TemplateChangeModeSignal marks that the task should be signaled if the 6391 // template is re-rendered 6392 TemplateChangeModeSignal = "signal" 6393 6394 // TemplateChangeModeRestart marks that the task should be restarted if the 6395 // template is re-rendered 6396 TemplateChangeModeRestart = "restart" 6397 ) 6398 6399 var ( 6400 // TemplateChangeModeInvalidError is the error for when an invalid change 6401 // mode is given 6402 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 6403 ) 6404 6405 // Template represents a template configuration to be rendered for a given task 6406 type Template struct { 6407 // SourcePath is the path to the template to be rendered 6408 SourcePath string 6409 6410 // DestPath is the path to where the template should be rendered 6411 DestPath string 6412 6413 // EmbeddedTmpl store the raw template. This is useful for smaller templates 6414 // where they are embedded in the job file rather than sent as an artifact 6415 EmbeddedTmpl string 6416 6417 // ChangeMode indicates what should be done if the template is re-rendered 6418 ChangeMode string 6419 6420 // ChangeSignal is the signal that should be sent if the change mode 6421 // requires it. 6422 ChangeSignal string 6423 6424 // Splay is used to avoid coordinated restarts of processes by applying a 6425 // random wait between 0 and the given splay value before signalling the 6426 // application of a change 6427 Splay time.Duration 6428 6429 // Perms is the permission the file should be written out with. 6430 Perms string 6431 6432 // LeftDelim and RightDelim are optional configurations to control what 6433 // delimiter is utilized when parsing the template. 6434 LeftDelim string 6435 RightDelim string 6436 6437 // Envvars enables exposing the template as environment variables 6438 // instead of as a file. The template must be of the form: 6439 // 6440 // VAR_NAME_1={{ key service/my-key }} 6441 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 6442 // 6443 // Lines will be split on the initial "=" with the first part being the 6444 // key name and the second part the value. 6445 // Empty lines and lines starting with # will be ignored, but to avoid 6446 // escaping issues #s within lines will not be treated as comments. 6447 Envvars bool 6448 6449 // VaultGrace is the grace duration between lease renewal and reacquiring a 6450 // secret. If the lease of a secret is less than the grace, a new secret is 6451 // acquired. 6452 // COMPAT(0.12) VaultGrace has been ignored by Vault since Vault v0.5. 6453 VaultGrace time.Duration 6454 } 6455 6456 // DefaultTemplate returns a default template. 6457 func DefaultTemplate() *Template { 6458 return &Template{ 6459 ChangeMode: TemplateChangeModeRestart, 6460 Splay: 5 * time.Second, 6461 Perms: "0644", 6462 } 6463 } 6464 6465 func (t *Template) Copy() *Template { 6466 if t == nil { 6467 return nil 6468 } 6469 copy := new(Template) 6470 *copy = *t 6471 return copy 6472 } 6473 6474 func (t *Template) Canonicalize() { 6475 if t.ChangeSignal != "" { 6476 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 6477 } 6478 } 6479 6480 func (t *Template) Validate() error { 6481 var mErr multierror.Error 6482 6483 // Verify we have something to render 6484 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 6485 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 6486 } 6487 6488 // Verify we can render somewhere 6489 if t.DestPath == "" { 6490 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 6491 } 6492 6493 // Verify the destination doesn't escape 6494 escaped, err := PathEscapesAllocDir("task", t.DestPath) 6495 if err != nil { 6496 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 6497 } else if escaped { 6498 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 6499 } 6500 6501 // Verify a proper change mode 6502 switch t.ChangeMode { 6503 case TemplateChangeModeNoop, TemplateChangeModeRestart: 6504 case TemplateChangeModeSignal: 6505 if t.ChangeSignal == "" { 6506 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 6507 } 6508 if t.Envvars { 6509 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 6510 } 6511 default: 6512 multierror.Append(&mErr, TemplateChangeModeInvalidError) 6513 } 6514 6515 // Verify the splay is positive 6516 if t.Splay < 0 { 6517 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 6518 } 6519 6520 // Verify the permissions 6521 if t.Perms != "" { 6522 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 6523 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 6524 } 6525 } 6526 6527 return mErr.ErrorOrNil() 6528 } 6529 6530 func (t *Template) Warnings() error { 6531 var mErr multierror.Error 6532 6533 // Deprecation notice for vault_grace 6534 if t.VaultGrace != 0 { 6535 mErr.Errors = append(mErr.Errors, fmt.Errorf("VaultGrace has been deprecated as of Nomad 0.11 and ignored since Vault 0.5. Please remove VaultGrace / vault_grace from template stanza.")) 6536 } 6537 6538 return mErr.ErrorOrNil() 6539 } 6540 6541 // AllocState records a single event that changes the state of the whole allocation 6542 type AllocStateField uint8 6543 6544 const ( 6545 AllocStateFieldClientStatus AllocStateField = iota 6546 ) 6547 6548 type AllocState struct { 6549 Field AllocStateField 6550 Value string 6551 Time time.Time 6552 } 6553 6554 // Set of possible states for a task. 6555 const ( 6556 TaskStatePending = "pending" // The task is waiting to be run. 6557 TaskStateRunning = "running" // The task is currently running. 6558 TaskStateDead = "dead" // Terminal state of task. 6559 ) 6560 6561 // TaskState tracks the current state of a task and events that caused state 6562 // transitions. 6563 type TaskState struct { 6564 // The current state of the task. 6565 State string 6566 6567 // Failed marks a task as having failed 6568 Failed bool 6569 6570 // Restarts is the number of times the task has restarted 6571 Restarts uint64 6572 6573 // LastRestart is the time the task last restarted. It is updated each time the 6574 // task restarts 6575 LastRestart time.Time 6576 6577 // StartedAt is the time the task is started. It is updated each time the 6578 // task starts 6579 StartedAt time.Time 6580 6581 // FinishedAt is the time at which the task transitioned to dead and will 6582 // not be started again. 6583 FinishedAt time.Time 6584 6585 // Series of task events that transition the state of the task. 6586 Events []*TaskEvent 6587 } 6588 6589 // NewTaskState returns a TaskState initialized in the Pending state. 6590 func NewTaskState() *TaskState { 6591 return &TaskState{ 6592 State: TaskStatePending, 6593 } 6594 } 6595 6596 // Canonicalize ensures the TaskState has a State set. It should default to 6597 // Pending. 6598 func (ts *TaskState) Canonicalize() { 6599 if ts.State == "" { 6600 ts.State = TaskStatePending 6601 } 6602 } 6603 6604 func (ts *TaskState) Copy() *TaskState { 6605 if ts == nil { 6606 return nil 6607 } 6608 copy := new(TaskState) 6609 *copy = *ts 6610 6611 if ts.Events != nil { 6612 copy.Events = make([]*TaskEvent, len(ts.Events)) 6613 for i, e := range ts.Events { 6614 copy.Events[i] = e.Copy() 6615 } 6616 } 6617 return copy 6618 } 6619 6620 // Successful returns whether a task finished successfully. This doesn't really 6621 // have meaning on a non-batch allocation because a service and system 6622 // allocation should not finish. 6623 func (ts *TaskState) Successful() bool { 6624 return ts.State == TaskStateDead && !ts.Failed 6625 } 6626 6627 const ( 6628 // TaskSetupFailure indicates that the task could not be started due to a 6629 // a setup failure. 6630 TaskSetupFailure = "Setup Failure" 6631 6632 // TaskDriveFailure indicates that the task could not be started due to a 6633 // failure in the driver. TaskDriverFailure is considered Recoverable. 6634 TaskDriverFailure = "Driver Failure" 6635 6636 // TaskReceived signals that the task has been pulled by the client at the 6637 // given timestamp. 6638 TaskReceived = "Received" 6639 6640 // TaskFailedValidation indicates the task was invalid and as such was not run. 6641 // TaskFailedValidation is not considered Recoverable. 6642 TaskFailedValidation = "Failed Validation" 6643 6644 // TaskStarted signals that the task was started and its timestamp can be 6645 // used to determine the running length of the task. 6646 TaskStarted = "Started" 6647 6648 // TaskTerminated indicates that the task was started and exited. 6649 TaskTerminated = "Terminated" 6650 6651 // TaskKilling indicates a kill signal has been sent to the task. 6652 TaskKilling = "Killing" 6653 6654 // TaskKilled indicates a user has killed the task. 6655 TaskKilled = "Killed" 6656 6657 // TaskRestarting indicates that task terminated and is being restarted. 6658 TaskRestarting = "Restarting" 6659 6660 // TaskNotRestarting indicates that the task has failed and is not being 6661 // restarted because it has exceeded its restart policy. 6662 TaskNotRestarting = "Not Restarting" 6663 6664 // TaskRestartSignal indicates that the task has been signalled to be 6665 // restarted 6666 TaskRestartSignal = "Restart Signaled" 6667 6668 // TaskSignaling indicates that the task is being signalled. 6669 TaskSignaling = "Signaling" 6670 6671 // TaskDownloadingArtifacts means the task is downloading the artifacts 6672 // specified in the task. 6673 TaskDownloadingArtifacts = "Downloading Artifacts" 6674 6675 // TaskArtifactDownloadFailed indicates that downloading the artifacts 6676 // failed. 6677 TaskArtifactDownloadFailed = "Failed Artifact Download" 6678 6679 // TaskBuildingTaskDir indicates that the task directory/chroot is being 6680 // built. 6681 TaskBuildingTaskDir = "Building Task Directory" 6682 6683 // TaskSetup indicates the task runner is setting up the task environment 6684 TaskSetup = "Task Setup" 6685 6686 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 6687 // exceeded the requested disk resources. 6688 TaskDiskExceeded = "Disk Resources Exceeded" 6689 6690 // TaskSiblingFailed indicates that a sibling task in the task group has 6691 // failed. 6692 TaskSiblingFailed = "Sibling Task Failed" 6693 6694 // TaskDriverMessage is an informational event message emitted by 6695 // drivers such as when they're performing a long running action like 6696 // downloading an image. 6697 TaskDriverMessage = "Driver" 6698 6699 // TaskLeaderDead indicates that the leader task within the has finished. 6700 TaskLeaderDead = "Leader Task Dead" 6701 6702 // TaskHookFailed indicates that one of the hooks for a task failed. 6703 TaskHookFailed = "Task hook failed" 6704 6705 // TaskRestoreFailed indicates Nomad was unable to reattach to a 6706 // restored task. 6707 TaskRestoreFailed = "Failed Restoring Task" 6708 6709 // TaskPluginUnhealthy indicates that a plugin managed by Nomad became unhealthy 6710 TaskPluginUnhealthy = "Plugin became unhealthy" 6711 6712 // TaskPluginHealthy indicates that a plugin managed by Nomad became healthy 6713 TaskPluginHealthy = "Plugin became healthy" 6714 ) 6715 6716 // TaskEvent is an event that effects the state of a task and contains meta-data 6717 // appropriate to the events type. 6718 type TaskEvent struct { 6719 Type string 6720 Time int64 // Unix Nanosecond timestamp 6721 6722 Message string // A possible message explaining the termination of the task. 6723 6724 // DisplayMessage is a human friendly message about the event 6725 DisplayMessage string 6726 6727 // Details is a map with annotated info about the event 6728 Details map[string]string 6729 6730 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 6731 // in a future release. Field values are available in the Details map. 6732 6733 // FailsTask marks whether this event fails the task. 6734 // Deprecated, use Details["fails_task"] to access this. 6735 FailsTask bool 6736 6737 // Restart fields. 6738 // Deprecated, use Details["restart_reason"] to access this. 6739 RestartReason string 6740 6741 // Setup Failure fields. 6742 // Deprecated, use Details["setup_error"] to access this. 6743 SetupError string 6744 6745 // Driver Failure fields. 6746 // Deprecated, use Details["driver_error"] to access this. 6747 DriverError string // A driver error occurred while starting the task. 6748 6749 // Task Terminated Fields. 6750 6751 // Deprecated, use Details["exit_code"] to access this. 6752 ExitCode int // The exit code of the task. 6753 6754 // Deprecated, use Details["signal"] to access this. 6755 Signal int // The signal that terminated the task. 6756 6757 // Killing fields 6758 // Deprecated, use Details["kill_timeout"] to access this. 6759 KillTimeout time.Duration 6760 6761 // Task Killed Fields. 6762 // Deprecated, use Details["kill_error"] to access this. 6763 KillError string // Error killing the task. 6764 6765 // KillReason is the reason the task was killed 6766 // Deprecated, use Details["kill_reason"] to access this. 6767 KillReason string 6768 6769 // TaskRestarting fields. 6770 // Deprecated, use Details["start_delay"] to access this. 6771 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 6772 6773 // Artifact Download fields 6774 // Deprecated, use Details["download_error"] to access this. 6775 DownloadError string // Error downloading artifacts 6776 6777 // Validation fields 6778 // Deprecated, use Details["validation_error"] to access this. 6779 ValidationError string // Validation error 6780 6781 // The maximum allowed task disk size. 6782 // Deprecated, use Details["disk_limit"] to access this. 6783 DiskLimit int64 6784 6785 // Name of the sibling task that caused termination of the task that 6786 // the TaskEvent refers to. 6787 // Deprecated, use Details["failed_sibling"] to access this. 6788 FailedSibling string 6789 6790 // VaultError is the error from token renewal 6791 // Deprecated, use Details["vault_renewal_error"] to access this. 6792 VaultError string 6793 6794 // TaskSignalReason indicates the reason the task is being signalled. 6795 // Deprecated, use Details["task_signal_reason"] to access this. 6796 TaskSignalReason string 6797 6798 // TaskSignal is the signal that was sent to the task 6799 // Deprecated, use Details["task_signal"] to access this. 6800 TaskSignal string 6801 6802 // DriverMessage indicates a driver action being taken. 6803 // Deprecated, use Details["driver_message"] to access this. 6804 DriverMessage string 6805 6806 // GenericSource is the source of a message. 6807 // Deprecated, is redundant with event type. 6808 GenericSource string 6809 } 6810 6811 func (event *TaskEvent) PopulateEventDisplayMessage() { 6812 // Build up the description based on the event type. 6813 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 6814 return 6815 } 6816 6817 if event.DisplayMessage != "" { 6818 return 6819 } 6820 6821 var desc string 6822 switch event.Type { 6823 case TaskSetup: 6824 desc = event.Message 6825 case TaskStarted: 6826 desc = "Task started by client" 6827 case TaskReceived: 6828 desc = "Task received by client" 6829 case TaskFailedValidation: 6830 if event.ValidationError != "" { 6831 desc = event.ValidationError 6832 } else { 6833 desc = "Validation of task failed" 6834 } 6835 case TaskSetupFailure: 6836 if event.SetupError != "" { 6837 desc = event.SetupError 6838 } else { 6839 desc = "Task setup failed" 6840 } 6841 case TaskDriverFailure: 6842 if event.DriverError != "" { 6843 desc = event.DriverError 6844 } else { 6845 desc = "Failed to start task" 6846 } 6847 case TaskDownloadingArtifacts: 6848 desc = "Client is downloading artifacts" 6849 case TaskArtifactDownloadFailed: 6850 if event.DownloadError != "" { 6851 desc = event.DownloadError 6852 } else { 6853 desc = "Failed to download artifacts" 6854 } 6855 case TaskKilling: 6856 if event.KillReason != "" { 6857 desc = event.KillReason 6858 } else if event.KillTimeout != 0 { 6859 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 6860 } else { 6861 desc = "Sent interrupt" 6862 } 6863 case TaskKilled: 6864 if event.KillError != "" { 6865 desc = event.KillError 6866 } else { 6867 desc = "Task successfully killed" 6868 } 6869 case TaskTerminated: 6870 var parts []string 6871 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 6872 6873 if event.Signal != 0 { 6874 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 6875 } 6876 6877 if event.Message != "" { 6878 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 6879 } 6880 desc = strings.Join(parts, ", ") 6881 case TaskRestarting: 6882 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 6883 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 6884 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 6885 } else { 6886 desc = in 6887 } 6888 case TaskNotRestarting: 6889 if event.RestartReason != "" { 6890 desc = event.RestartReason 6891 } else { 6892 desc = "Task exceeded restart policy" 6893 } 6894 case TaskSiblingFailed: 6895 if event.FailedSibling != "" { 6896 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 6897 } else { 6898 desc = "Task's sibling failed" 6899 } 6900 case TaskSignaling: 6901 sig := event.TaskSignal 6902 reason := event.TaskSignalReason 6903 6904 if sig == "" && reason == "" { 6905 desc = "Task being sent a signal" 6906 } else if sig == "" { 6907 desc = reason 6908 } else if reason == "" { 6909 desc = fmt.Sprintf("Task being sent signal %v", sig) 6910 } else { 6911 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 6912 } 6913 case TaskRestartSignal: 6914 if event.RestartReason != "" { 6915 desc = event.RestartReason 6916 } else { 6917 desc = "Task signaled to restart" 6918 } 6919 case TaskDriverMessage: 6920 desc = event.DriverMessage 6921 case TaskLeaderDead: 6922 desc = "Leader Task in Group dead" 6923 default: 6924 desc = event.Message 6925 } 6926 6927 event.DisplayMessage = desc 6928 } 6929 6930 func (te *TaskEvent) GoString() string { 6931 return fmt.Sprintf("%v - %v", te.Time, te.Type) 6932 } 6933 6934 // SetDisplayMessage sets the display message of TaskEvent 6935 func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 6936 te.DisplayMessage = msg 6937 return te 6938 } 6939 6940 // SetMessage sets the message of TaskEvent 6941 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 6942 te.Message = msg 6943 te.Details["message"] = msg 6944 return te 6945 } 6946 6947 func (te *TaskEvent) Copy() *TaskEvent { 6948 if te == nil { 6949 return nil 6950 } 6951 copy := new(TaskEvent) 6952 *copy = *te 6953 return copy 6954 } 6955 6956 func NewTaskEvent(event string) *TaskEvent { 6957 return &TaskEvent{ 6958 Type: event, 6959 Time: time.Now().UnixNano(), 6960 Details: make(map[string]string), 6961 } 6962 } 6963 6964 // SetSetupError is used to store an error that occurred while setting up the 6965 // task 6966 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 6967 if err != nil { 6968 e.SetupError = err.Error() 6969 e.Details["setup_error"] = err.Error() 6970 } 6971 return e 6972 } 6973 6974 func (e *TaskEvent) SetFailsTask() *TaskEvent { 6975 e.FailsTask = true 6976 e.Details["fails_task"] = "true" 6977 return e 6978 } 6979 6980 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 6981 if err != nil { 6982 e.DriverError = err.Error() 6983 e.Details["driver_error"] = err.Error() 6984 } 6985 return e 6986 } 6987 6988 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 6989 e.ExitCode = c 6990 e.Details["exit_code"] = fmt.Sprintf("%d", c) 6991 return e 6992 } 6993 6994 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 6995 e.Signal = s 6996 e.Details["signal"] = fmt.Sprintf("%d", s) 6997 return e 6998 } 6999 7000 func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 7001 e.Details["signal"] = s 7002 return e 7003 } 7004 7005 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 7006 if err != nil { 7007 e.Message = err.Error() 7008 e.Details["exit_message"] = err.Error() 7009 } 7010 return e 7011 } 7012 7013 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 7014 if err != nil { 7015 e.KillError = err.Error() 7016 e.Details["kill_error"] = err.Error() 7017 } 7018 return e 7019 } 7020 7021 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 7022 e.KillReason = r 7023 e.Details["kill_reason"] = r 7024 return e 7025 } 7026 7027 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 7028 e.StartDelay = int64(delay) 7029 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 7030 return e 7031 } 7032 7033 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 7034 e.RestartReason = reason 7035 e.Details["restart_reason"] = reason 7036 return e 7037 } 7038 7039 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 7040 e.TaskSignalReason = r 7041 e.Details["task_signal_reason"] = r 7042 return e 7043 } 7044 7045 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 7046 e.TaskSignal = s.String() 7047 e.Details["task_signal"] = s.String() 7048 return e 7049 } 7050 7051 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 7052 if err != nil { 7053 e.DownloadError = err.Error() 7054 e.Details["download_error"] = err.Error() 7055 } 7056 return e 7057 } 7058 7059 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 7060 if err != nil { 7061 e.ValidationError = err.Error() 7062 e.Details["validation_error"] = err.Error() 7063 } 7064 return e 7065 } 7066 7067 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 7068 e.KillTimeout = timeout 7069 e.Details["kill_timeout"] = timeout.String() 7070 return e 7071 } 7072 7073 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 7074 e.DiskLimit = limit 7075 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 7076 return e 7077 } 7078 7079 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 7080 e.FailedSibling = sibling 7081 e.Details["failed_sibling"] = sibling 7082 return e 7083 } 7084 7085 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 7086 if err != nil { 7087 e.VaultError = err.Error() 7088 e.Details["vault_renewal_error"] = err.Error() 7089 } 7090 return e 7091 } 7092 7093 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 7094 e.DriverMessage = m 7095 e.Details["driver_message"] = m 7096 return e 7097 } 7098 7099 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 7100 e.Details["oom_killed"] = strconv.FormatBool(oom) 7101 return e 7102 } 7103 7104 // TaskArtifact is an artifact to download before running the task. 7105 type TaskArtifact struct { 7106 // GetterSource is the source to download an artifact using go-getter 7107 GetterSource string 7108 7109 // GetterOptions are options to use when downloading the artifact using 7110 // go-getter. 7111 GetterOptions map[string]string 7112 7113 // GetterMode is the go-getter.ClientMode for fetching resources. 7114 // Defaults to "any" but can be set to "file" or "dir". 7115 GetterMode string 7116 7117 // RelativeDest is the download destination given relative to the task's 7118 // directory. 7119 RelativeDest string 7120 } 7121 7122 func (ta *TaskArtifact) Copy() *TaskArtifact { 7123 if ta == nil { 7124 return nil 7125 } 7126 nta := new(TaskArtifact) 7127 *nta = *ta 7128 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 7129 return nta 7130 } 7131 7132 func (ta *TaskArtifact) GoString() string { 7133 return fmt.Sprintf("%+v", ta) 7134 } 7135 7136 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 7137 // may be specified multiple times with different destinations. 7138 func (ta *TaskArtifact) Hash() string { 7139 hash, err := blake2b.New256(nil) 7140 if err != nil { 7141 panic(err) 7142 } 7143 7144 hash.Write([]byte(ta.GetterSource)) 7145 7146 // Must iterate over keys in a consistent order 7147 keys := make([]string, 0, len(ta.GetterOptions)) 7148 for k := range ta.GetterOptions { 7149 keys = append(keys, k) 7150 } 7151 sort.Strings(keys) 7152 for _, k := range keys { 7153 hash.Write([]byte(k)) 7154 hash.Write([]byte(ta.GetterOptions[k])) 7155 } 7156 7157 hash.Write([]byte(ta.GetterMode)) 7158 hash.Write([]byte(ta.RelativeDest)) 7159 return base64.RawStdEncoding.EncodeToString(hash.Sum(nil)) 7160 } 7161 7162 // PathEscapesAllocDir returns if the given path escapes the allocation 7163 // directory. The prefix allows adding a prefix if the path will be joined, for 7164 // example a "task/local" prefix may be provided if the path will be joined 7165 // against that prefix. 7166 func PathEscapesAllocDir(prefix, path string) (bool, error) { 7167 // Verify the destination doesn't escape the tasks directory 7168 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 7169 if err != nil { 7170 return false, err 7171 } 7172 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 7173 if err != nil { 7174 return false, err 7175 } 7176 rel, err := filepath.Rel(alloc, abs) 7177 if err != nil { 7178 return false, err 7179 } 7180 7181 return strings.HasPrefix(rel, ".."), nil 7182 } 7183 7184 func (ta *TaskArtifact) Validate() error { 7185 // Verify the source 7186 var mErr multierror.Error 7187 if ta.GetterSource == "" { 7188 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 7189 } 7190 7191 switch ta.GetterMode { 7192 case "": 7193 // Default to any 7194 ta.GetterMode = GetterModeAny 7195 case GetterModeAny, GetterModeFile, GetterModeDir: 7196 // Ok 7197 default: 7198 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 7199 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 7200 } 7201 7202 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 7203 if err != nil { 7204 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 7205 } else if escaped { 7206 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 7207 } 7208 7209 if err := ta.validateChecksum(); err != nil { 7210 mErr.Errors = append(mErr.Errors, err) 7211 } 7212 7213 return mErr.ErrorOrNil() 7214 } 7215 7216 func (ta *TaskArtifact) validateChecksum() error { 7217 check, ok := ta.GetterOptions["checksum"] 7218 if !ok { 7219 return nil 7220 } 7221 7222 // Job struct validation occurs before interpolation resolution can be effective. 7223 // Skip checking if checksum contain variable reference, and artifacts fetching will 7224 // eventually fail, if checksum is indeed invalid. 7225 if args.ContainsEnv(check) { 7226 return nil 7227 } 7228 7229 check = strings.TrimSpace(check) 7230 if check == "" { 7231 return fmt.Errorf("checksum value cannot be empty") 7232 } 7233 7234 parts := strings.Split(check, ":") 7235 if l := len(parts); l != 2 { 7236 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 7237 } 7238 7239 checksumVal := parts[1] 7240 checksumBytes, err := hex.DecodeString(checksumVal) 7241 if err != nil { 7242 return fmt.Errorf("invalid checksum: %v", err) 7243 } 7244 7245 checksumType := parts[0] 7246 expectedLength := 0 7247 switch checksumType { 7248 case "md5": 7249 expectedLength = md5.Size 7250 case "sha1": 7251 expectedLength = sha1.Size 7252 case "sha256": 7253 expectedLength = sha256.Size 7254 case "sha512": 7255 expectedLength = sha512.Size 7256 default: 7257 return fmt.Errorf("unsupported checksum type: %s", checksumType) 7258 } 7259 7260 if len(checksumBytes) != expectedLength { 7261 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 7262 } 7263 7264 return nil 7265 } 7266 7267 const ( 7268 ConstraintDistinctProperty = "distinct_property" 7269 ConstraintDistinctHosts = "distinct_hosts" 7270 ConstraintRegex = "regexp" 7271 ConstraintVersion = "version" 7272 ConstraintSemver = "semver" 7273 ConstraintSetContains = "set_contains" 7274 ConstraintSetContainsAll = "set_contains_all" 7275 ConstraintSetContainsAny = "set_contains_any" 7276 ConstraintAttributeIsSet = "is_set" 7277 ConstraintAttributeIsNotSet = "is_not_set" 7278 ) 7279 7280 // Constraints are used to restrict placement options. 7281 type Constraint struct { 7282 LTarget string // Left-hand target 7283 RTarget string // Right-hand target 7284 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 7285 str string // Memoized string 7286 } 7287 7288 // Equal checks if two constraints are equal 7289 func (c *Constraint) Equals(o *Constraint) bool { 7290 return c == o || 7291 c.LTarget == o.LTarget && 7292 c.RTarget == o.RTarget && 7293 c.Operand == o.Operand 7294 } 7295 7296 func (c *Constraint) Equal(o *Constraint) bool { 7297 return c.Equals(o) 7298 } 7299 7300 func (c *Constraint) Copy() *Constraint { 7301 if c == nil { 7302 return nil 7303 } 7304 nc := new(Constraint) 7305 *nc = *c 7306 return nc 7307 } 7308 7309 func (c *Constraint) String() string { 7310 if c.str != "" { 7311 return c.str 7312 } 7313 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 7314 return c.str 7315 } 7316 7317 func (c *Constraint) Validate() error { 7318 var mErr multierror.Error 7319 if c.Operand == "" { 7320 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 7321 } 7322 7323 // requireLtarget specifies whether the constraint requires an LTarget to be 7324 // provided. 7325 requireLtarget := true 7326 7327 // Perform additional validation based on operand 7328 switch c.Operand { 7329 case ConstraintDistinctHosts: 7330 requireLtarget = false 7331 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 7332 if c.RTarget == "" { 7333 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 7334 } 7335 case ConstraintRegex: 7336 if _, err := regexp.Compile(c.RTarget); err != nil { 7337 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 7338 } 7339 case ConstraintVersion: 7340 if _, err := version.NewConstraint(c.RTarget); err != nil { 7341 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 7342 } 7343 case ConstraintSemver: 7344 if _, err := semver.NewConstraint(c.RTarget); err != nil { 7345 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 7346 } 7347 case ConstraintDistinctProperty: 7348 // If a count is set, make sure it is convertible to a uint64 7349 if c.RTarget != "" { 7350 count, err := strconv.ParseUint(c.RTarget, 10, 64) 7351 if err != nil { 7352 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 7353 } else if count < 1 { 7354 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 7355 } 7356 } 7357 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 7358 if c.RTarget != "" { 7359 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 7360 } 7361 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 7362 if c.RTarget == "" { 7363 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 7364 } 7365 default: 7366 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 7367 } 7368 7369 // Ensure we have an LTarget for the constraints that need one 7370 if requireLtarget && c.LTarget == "" { 7371 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 7372 } 7373 7374 return mErr.ErrorOrNil() 7375 } 7376 7377 type Constraints []*Constraint 7378 7379 // Equals compares Constraints as a set 7380 func (xs *Constraints) Equals(ys *Constraints) bool { 7381 if xs == ys { 7382 return true 7383 } 7384 if xs == nil || ys == nil { 7385 return false 7386 } 7387 if len(*xs) != len(*ys) { 7388 return false 7389 } 7390 SETEQUALS: 7391 for _, x := range *xs { 7392 for _, y := range *ys { 7393 if x.Equals(y) { 7394 continue SETEQUALS 7395 } 7396 } 7397 return false 7398 } 7399 return true 7400 } 7401 7402 // Affinity is used to score placement options based on a weight 7403 type Affinity struct { 7404 LTarget string // Left-hand target 7405 RTarget string // Right-hand target 7406 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 7407 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 7408 str string // Memoized string 7409 } 7410 7411 // Equal checks if two affinities are equal 7412 func (a *Affinity) Equals(o *Affinity) bool { 7413 return a == o || 7414 a.LTarget == o.LTarget && 7415 a.RTarget == o.RTarget && 7416 a.Operand == o.Operand && 7417 a.Weight == o.Weight 7418 } 7419 7420 func (a *Affinity) Equal(o *Affinity) bool { 7421 return a.Equals(o) 7422 } 7423 7424 func (a *Affinity) Copy() *Affinity { 7425 if a == nil { 7426 return nil 7427 } 7428 na := new(Affinity) 7429 *na = *a 7430 return na 7431 } 7432 7433 func (a *Affinity) String() string { 7434 if a.str != "" { 7435 return a.str 7436 } 7437 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 7438 return a.str 7439 } 7440 7441 func (a *Affinity) Validate() error { 7442 var mErr multierror.Error 7443 if a.Operand == "" { 7444 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 7445 } 7446 7447 // Perform additional validation based on operand 7448 switch a.Operand { 7449 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 7450 if a.RTarget == "" { 7451 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 7452 } 7453 case ConstraintRegex: 7454 if _, err := regexp.Compile(a.RTarget); err != nil { 7455 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 7456 } 7457 case ConstraintVersion: 7458 if _, err := version.NewConstraint(a.RTarget); err != nil { 7459 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 7460 } 7461 case ConstraintSemver: 7462 if _, err := semver.NewConstraint(a.RTarget); err != nil { 7463 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 7464 } 7465 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 7466 if a.RTarget == "" { 7467 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 7468 } 7469 default: 7470 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 7471 } 7472 7473 // Ensure we have an LTarget 7474 if a.LTarget == "" { 7475 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 7476 } 7477 7478 // Ensure that weight is between -100 and 100, and not zero 7479 if a.Weight == 0 { 7480 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 7481 } 7482 7483 if a.Weight > 100 || a.Weight < -100 { 7484 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 7485 } 7486 7487 return mErr.ErrorOrNil() 7488 } 7489 7490 // Spread is used to specify desired distribution of allocations according to weight 7491 type Spread struct { 7492 // Attribute is the node attribute used as the spread criteria 7493 Attribute string 7494 7495 // Weight is the relative weight of this spread, useful when there are multiple 7496 // spread and affinities 7497 Weight int8 7498 7499 // SpreadTarget is used to describe desired percentages for each attribute value 7500 SpreadTarget []*SpreadTarget 7501 7502 // Memoized string representation 7503 str string 7504 } 7505 7506 type Affinities []*Affinity 7507 7508 // Equals compares Affinities as a set 7509 func (xs *Affinities) Equals(ys *Affinities) bool { 7510 if xs == ys { 7511 return true 7512 } 7513 if xs == nil || ys == nil { 7514 return false 7515 } 7516 if len(*xs) != len(*ys) { 7517 return false 7518 } 7519 SETEQUALS: 7520 for _, x := range *xs { 7521 for _, y := range *ys { 7522 if x.Equals(y) { 7523 continue SETEQUALS 7524 } 7525 } 7526 return false 7527 } 7528 return true 7529 } 7530 7531 func (s *Spread) Copy() *Spread { 7532 if s == nil { 7533 return nil 7534 } 7535 ns := new(Spread) 7536 *ns = *s 7537 7538 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 7539 return ns 7540 } 7541 7542 func (s *Spread) String() string { 7543 if s.str != "" { 7544 return s.str 7545 } 7546 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 7547 return s.str 7548 } 7549 7550 func (s *Spread) Validate() error { 7551 var mErr multierror.Error 7552 if s.Attribute == "" { 7553 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 7554 } 7555 if s.Weight <= 0 || s.Weight > 100 { 7556 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 7557 } 7558 seen := make(map[string]struct{}) 7559 sumPercent := uint32(0) 7560 7561 for _, target := range s.SpreadTarget { 7562 // Make sure there are no duplicates 7563 _, ok := seen[target.Value] 7564 if !ok { 7565 seen[target.Value] = struct{}{} 7566 } else { 7567 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target value %q already defined", target.Value))) 7568 } 7569 if target.Percent < 0 || target.Percent > 100 { 7570 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target percentage for value %q must be between 0 and 100", target.Value))) 7571 } 7572 sumPercent += uint32(target.Percent) 7573 } 7574 if sumPercent > 100 { 7575 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent))) 7576 } 7577 return mErr.ErrorOrNil() 7578 } 7579 7580 // SpreadTarget is used to specify desired percentages for each attribute value 7581 type SpreadTarget struct { 7582 // Value is a single attribute value, like "dc1" 7583 Value string 7584 7585 // Percent is the desired percentage of allocs 7586 Percent uint8 7587 7588 // Memoized string representation 7589 str string 7590 } 7591 7592 func (s *SpreadTarget) Copy() *SpreadTarget { 7593 if s == nil { 7594 return nil 7595 } 7596 7597 ns := new(SpreadTarget) 7598 *ns = *s 7599 return ns 7600 } 7601 7602 func (s *SpreadTarget) String() string { 7603 if s.str != "" { 7604 return s.str 7605 } 7606 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 7607 return s.str 7608 } 7609 7610 // EphemeralDisk is an ephemeral disk object 7611 type EphemeralDisk struct { 7612 // Sticky indicates whether the allocation is sticky to a node 7613 Sticky bool 7614 7615 // SizeMB is the size of the local disk 7616 SizeMB int 7617 7618 // Migrate determines if Nomad client should migrate the allocation dir for 7619 // sticky allocations 7620 Migrate bool 7621 } 7622 7623 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 7624 func DefaultEphemeralDisk() *EphemeralDisk { 7625 return &EphemeralDisk{ 7626 SizeMB: 300, 7627 } 7628 } 7629 7630 // Validate validates EphemeralDisk 7631 func (d *EphemeralDisk) Validate() error { 7632 if d.SizeMB < 10 { 7633 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 7634 } 7635 return nil 7636 } 7637 7638 // Copy copies the EphemeralDisk struct and returns a new one 7639 func (d *EphemeralDisk) Copy() *EphemeralDisk { 7640 ld := new(EphemeralDisk) 7641 *ld = *d 7642 return ld 7643 } 7644 7645 var ( 7646 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 7647 // server 7648 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 7649 ) 7650 7651 const ( 7652 // VaultChangeModeNoop takes no action when a new token is retrieved. 7653 VaultChangeModeNoop = "noop" 7654 7655 // VaultChangeModeSignal signals the task when a new token is retrieved. 7656 VaultChangeModeSignal = "signal" 7657 7658 // VaultChangeModeRestart restarts the task when a new token is retrieved. 7659 VaultChangeModeRestart = "restart" 7660 ) 7661 7662 // Vault stores the set of permissions a task needs access to from Vault. 7663 type Vault struct { 7664 // Policies is the set of policies that the task needs access to 7665 Policies []string 7666 7667 // Env marks whether the Vault Token should be exposed as an environment 7668 // variable 7669 Env bool 7670 7671 // ChangeMode is used to configure the task's behavior when the Vault 7672 // token changes because the original token could not be renewed in time. 7673 ChangeMode string 7674 7675 // ChangeSignal is the signal sent to the task when a new token is 7676 // retrieved. This is only valid when using the signal change mode. 7677 ChangeSignal string 7678 } 7679 7680 func DefaultVaultBlock() *Vault { 7681 return &Vault{ 7682 Env: true, 7683 ChangeMode: VaultChangeModeRestart, 7684 } 7685 } 7686 7687 // Copy returns a copy of this Vault block. 7688 func (v *Vault) Copy() *Vault { 7689 if v == nil { 7690 return nil 7691 } 7692 7693 nv := new(Vault) 7694 *nv = *v 7695 return nv 7696 } 7697 7698 func (v *Vault) Canonicalize() { 7699 if v.ChangeSignal != "" { 7700 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 7701 } 7702 } 7703 7704 // Validate returns if the Vault block is valid. 7705 func (v *Vault) Validate() error { 7706 if v == nil { 7707 return nil 7708 } 7709 7710 var mErr multierror.Error 7711 if len(v.Policies) == 0 { 7712 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 7713 } 7714 7715 for _, p := range v.Policies { 7716 if p == "root" { 7717 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 7718 } 7719 } 7720 7721 switch v.ChangeMode { 7722 case VaultChangeModeSignal: 7723 if v.ChangeSignal == "" { 7724 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 7725 } 7726 case VaultChangeModeNoop, VaultChangeModeRestart: 7727 default: 7728 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 7729 } 7730 7731 return mErr.ErrorOrNil() 7732 } 7733 7734 const ( 7735 // DeploymentStatuses are the various states a deployment can be be in 7736 DeploymentStatusRunning = "running" 7737 DeploymentStatusPaused = "paused" 7738 DeploymentStatusFailed = "failed" 7739 DeploymentStatusSuccessful = "successful" 7740 DeploymentStatusCancelled = "cancelled" 7741 7742 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 7743 7744 // DeploymentStatusDescriptions are the various descriptions of the states a 7745 // deployment can be in. 7746 DeploymentStatusDescriptionRunning = "Deployment is running" 7747 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 7748 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 7749 DeploymentStatusDescriptionPaused = "Deployment is paused" 7750 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 7751 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 7752 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 7753 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 7754 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 7755 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 7756 ) 7757 7758 // DeploymentStatusDescriptionRollback is used to get the status description of 7759 // a deployment when rolling back to an older job. 7760 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 7761 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 7762 } 7763 7764 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 7765 // a deployment when rolling back is not possible because it has the same specification 7766 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 7767 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 7768 } 7769 7770 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 7771 // a deployment when there is no target to rollback to but autorevert is desired. 7772 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 7773 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 7774 } 7775 7776 // Deployment is the object that represents a job deployment which is used to 7777 // transition a job between versions. 7778 type Deployment struct { 7779 // ID is a generated UUID for the deployment 7780 ID string 7781 7782 // Namespace is the namespace the deployment is created in 7783 Namespace string 7784 7785 // JobID is the job the deployment is created for 7786 JobID string 7787 7788 // JobVersion is the version of the job at which the deployment is tracking 7789 JobVersion uint64 7790 7791 // JobModifyIndex is the ModifyIndex of the job which the deployment is 7792 // tracking. 7793 JobModifyIndex uint64 7794 7795 // JobSpecModifyIndex is the JobModifyIndex of the job which the 7796 // deployment is tracking. 7797 JobSpecModifyIndex uint64 7798 7799 // JobCreateIndex is the create index of the job which the deployment is 7800 // tracking. It is needed so that if the job gets stopped and reran we can 7801 // present the correct list of deployments for the job and not old ones. 7802 JobCreateIndex uint64 7803 7804 // TaskGroups is the set of task groups effected by the deployment and their 7805 // current deployment status. 7806 TaskGroups map[string]*DeploymentState 7807 7808 // The status of the deployment 7809 Status string 7810 7811 // StatusDescription allows a human readable description of the deployment 7812 // status. 7813 StatusDescription string 7814 7815 CreateIndex uint64 7816 ModifyIndex uint64 7817 } 7818 7819 // NewDeployment creates a new deployment given the job. 7820 func NewDeployment(job *Job) *Deployment { 7821 return &Deployment{ 7822 ID: uuid.Generate(), 7823 Namespace: job.Namespace, 7824 JobID: job.ID, 7825 JobVersion: job.Version, 7826 JobModifyIndex: job.ModifyIndex, 7827 JobSpecModifyIndex: job.JobModifyIndex, 7828 JobCreateIndex: job.CreateIndex, 7829 Status: DeploymentStatusRunning, 7830 StatusDescription: DeploymentStatusDescriptionRunning, 7831 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 7832 } 7833 } 7834 7835 func (d *Deployment) Copy() *Deployment { 7836 if d == nil { 7837 return nil 7838 } 7839 7840 c := &Deployment{} 7841 *c = *d 7842 7843 c.TaskGroups = nil 7844 if l := len(d.TaskGroups); d.TaskGroups != nil { 7845 c.TaskGroups = make(map[string]*DeploymentState, l) 7846 for tg, s := range d.TaskGroups { 7847 c.TaskGroups[tg] = s.Copy() 7848 } 7849 } 7850 7851 return c 7852 } 7853 7854 // Active returns whether the deployment is active or terminal. 7855 func (d *Deployment) Active() bool { 7856 switch d.Status { 7857 case DeploymentStatusRunning, DeploymentStatusPaused: 7858 return true 7859 default: 7860 return false 7861 } 7862 } 7863 7864 // GetID is a helper for getting the ID when the object may be nil 7865 func (d *Deployment) GetID() string { 7866 if d == nil { 7867 return "" 7868 } 7869 return d.ID 7870 } 7871 7872 // HasPlacedCanaries returns whether the deployment has placed canaries 7873 func (d *Deployment) HasPlacedCanaries() bool { 7874 if d == nil || len(d.TaskGroups) == 0 { 7875 return false 7876 } 7877 for _, group := range d.TaskGroups { 7878 if len(group.PlacedCanaries) != 0 { 7879 return true 7880 } 7881 } 7882 return false 7883 } 7884 7885 // RequiresPromotion returns whether the deployment requires promotion to 7886 // continue 7887 func (d *Deployment) RequiresPromotion() bool { 7888 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7889 return false 7890 } 7891 for _, group := range d.TaskGroups { 7892 if group.DesiredCanaries > 0 && !group.Promoted { 7893 return true 7894 } 7895 } 7896 return false 7897 } 7898 7899 // HasAutoPromote determines if all taskgroups are marked auto_promote 7900 func (d *Deployment) HasAutoPromote() bool { 7901 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 7902 return false 7903 } 7904 for _, group := range d.TaskGroups { 7905 if !group.AutoPromote { 7906 return false 7907 } 7908 } 7909 return true 7910 } 7911 7912 func (d *Deployment) GoString() string { 7913 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 7914 for group, state := range d.TaskGroups { 7915 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 7916 } 7917 return base 7918 } 7919 7920 // DeploymentState tracks the state of a deployment for a given task group. 7921 type DeploymentState struct { 7922 // AutoRevert marks whether the task group has indicated the job should be 7923 // reverted on failure 7924 AutoRevert bool 7925 7926 // AutoPromote marks promotion triggered automatically by healthy canaries 7927 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 7928 AutoPromote bool 7929 7930 // ProgressDeadline is the deadline by which an allocation must transition 7931 // to healthy before the deployment is considered failed. 7932 ProgressDeadline time.Duration 7933 7934 // RequireProgressBy is the time by which an allocation must transition 7935 // to healthy before the deployment is considered failed. 7936 RequireProgressBy time.Time 7937 7938 // Promoted marks whether the canaries have been promoted 7939 Promoted bool 7940 7941 // PlacedCanaries is the set of placed canary allocations 7942 PlacedCanaries []string 7943 7944 // DesiredCanaries is the number of canaries that should be created. 7945 DesiredCanaries int 7946 7947 // DesiredTotal is the total number of allocations that should be created as 7948 // part of the deployment. 7949 DesiredTotal int 7950 7951 // PlacedAllocs is the number of allocations that have been placed 7952 PlacedAllocs int 7953 7954 // HealthyAllocs is the number of allocations that have been marked healthy. 7955 HealthyAllocs int 7956 7957 // UnhealthyAllocs are allocations that have been marked as unhealthy. 7958 UnhealthyAllocs int 7959 } 7960 7961 func (d *DeploymentState) GoString() string { 7962 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 7963 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 7964 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 7965 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 7966 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 7967 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 7968 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 7969 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 7970 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 7971 return base 7972 } 7973 7974 func (d *DeploymentState) Copy() *DeploymentState { 7975 c := &DeploymentState{} 7976 *c = *d 7977 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 7978 return c 7979 } 7980 7981 // DeploymentStatusUpdate is used to update the status of a given deployment 7982 type DeploymentStatusUpdate struct { 7983 // DeploymentID is the ID of the deployment to update 7984 DeploymentID string 7985 7986 // Status is the new status of the deployment. 7987 Status string 7988 7989 // StatusDescription is the new status description of the deployment. 7990 StatusDescription string 7991 } 7992 7993 // RescheduleTracker encapsulates previous reschedule events 7994 type RescheduleTracker struct { 7995 Events []*RescheduleEvent 7996 } 7997 7998 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 7999 if rt == nil { 8000 return nil 8001 } 8002 nt := &RescheduleTracker{} 8003 *nt = *rt 8004 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 8005 for _, tracker := range rt.Events { 8006 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 8007 } 8008 nt.Events = rescheduleEvents 8009 return nt 8010 } 8011 8012 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 8013 type RescheduleEvent struct { 8014 // RescheduleTime is the timestamp of a reschedule attempt 8015 RescheduleTime int64 8016 8017 // PrevAllocID is the ID of the previous allocation being restarted 8018 PrevAllocID string 8019 8020 // PrevNodeID is the node ID of the previous allocation 8021 PrevNodeID string 8022 8023 // Delay is the reschedule delay associated with the attempt 8024 Delay time.Duration 8025 } 8026 8027 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 8028 return &RescheduleEvent{RescheduleTime: rescheduleTime, 8029 PrevAllocID: prevAllocID, 8030 PrevNodeID: prevNodeID, 8031 Delay: delay} 8032 } 8033 8034 func (re *RescheduleEvent) Copy() *RescheduleEvent { 8035 if re == nil { 8036 return nil 8037 } 8038 copy := new(RescheduleEvent) 8039 *copy = *re 8040 return copy 8041 } 8042 8043 // DesiredTransition is used to mark an allocation as having a desired state 8044 // transition. This information can be used by the scheduler to make the 8045 // correct decision. 8046 type DesiredTransition struct { 8047 // Migrate is used to indicate that this allocation should be stopped and 8048 // migrated to another node. 8049 Migrate *bool 8050 8051 // Reschedule is used to indicate that this allocation is eligible to be 8052 // rescheduled. Most allocations are automatically eligible for 8053 // rescheduling, so this field is only required when an allocation is not 8054 // automatically eligible. An example is an allocation that is part of a 8055 // deployment. 8056 Reschedule *bool 8057 8058 // ForceReschedule is used to indicate that this allocation must be rescheduled. 8059 // This field is only used when operators want to force a placement even if 8060 // a failed allocation is not eligible to be rescheduled 8061 ForceReschedule *bool 8062 } 8063 8064 // Merge merges the two desired transitions, preferring the values from the 8065 // passed in object. 8066 func (d *DesiredTransition) Merge(o *DesiredTransition) { 8067 if o.Migrate != nil { 8068 d.Migrate = o.Migrate 8069 } 8070 8071 if o.Reschedule != nil { 8072 d.Reschedule = o.Reschedule 8073 } 8074 8075 if o.ForceReschedule != nil { 8076 d.ForceReschedule = o.ForceReschedule 8077 } 8078 } 8079 8080 // ShouldMigrate returns whether the transition object dictates a migration. 8081 func (d *DesiredTransition) ShouldMigrate() bool { 8082 return d.Migrate != nil && *d.Migrate 8083 } 8084 8085 // ShouldReschedule returns whether the transition object dictates a 8086 // rescheduling. 8087 func (d *DesiredTransition) ShouldReschedule() bool { 8088 return d.Reschedule != nil && *d.Reschedule 8089 } 8090 8091 // ShouldForceReschedule returns whether the transition object dictates a 8092 // forced rescheduling. 8093 func (d *DesiredTransition) ShouldForceReschedule() bool { 8094 if d == nil { 8095 return false 8096 } 8097 return d.ForceReschedule != nil && *d.ForceReschedule 8098 } 8099 8100 const ( 8101 AllocDesiredStatusRun = "run" // Allocation should run 8102 AllocDesiredStatusStop = "stop" // Allocation should stop 8103 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 8104 ) 8105 8106 const ( 8107 AllocClientStatusPending = "pending" 8108 AllocClientStatusRunning = "running" 8109 AllocClientStatusComplete = "complete" 8110 AllocClientStatusFailed = "failed" 8111 AllocClientStatusLost = "lost" 8112 ) 8113 8114 // Allocation is used to allocate the placement of a task group to a node. 8115 type Allocation struct { 8116 // msgpack omit empty fields during serialization 8117 _struct bool `codec:",omitempty"` // nolint: structcheck 8118 8119 // ID of the allocation (UUID) 8120 ID string 8121 8122 // Namespace is the namespace the allocation is created in 8123 Namespace string 8124 8125 // ID of the evaluation that generated this allocation 8126 EvalID string 8127 8128 // Name is a logical name of the allocation. 8129 Name string 8130 8131 // NodeID is the node this is being placed on 8132 NodeID string 8133 8134 // NodeName is the name of the node this is being placed on. 8135 NodeName string 8136 8137 // Job is the parent job of the task group being allocated. 8138 // This is copied at allocation time to avoid issues if the job 8139 // definition is updated. 8140 JobID string 8141 Job *Job 8142 8143 // TaskGroup is the name of the task group that should be run 8144 TaskGroup string 8145 8146 // COMPAT(0.11): Remove in 0.11 8147 // Resources is the total set of resources allocated as part 8148 // of this allocation of the task group. Dynamic ports will be set by 8149 // the scheduler. 8150 Resources *Resources 8151 8152 // SharedResources are the resources that are shared by all the tasks in an 8153 // allocation 8154 // Deprecated: use AllocatedResources.Shared instead. 8155 // Keep field to allow us to handle upgrade paths from old versions 8156 SharedResources *Resources 8157 8158 // TaskResources is the set of resources allocated to each 8159 // task. These should sum to the total Resources. Dynamic ports will be 8160 // set by the scheduler. 8161 // Deprecated: use AllocatedResources.Tasks instead. 8162 // Keep field to allow us to handle upgrade paths from old versions 8163 TaskResources map[string]*Resources 8164 8165 // AllocatedResources is the total resources allocated for the task group. 8166 AllocatedResources *AllocatedResources 8167 8168 // Metrics associated with this allocation 8169 Metrics *AllocMetric 8170 8171 // Desired Status of the allocation on the client 8172 DesiredStatus string 8173 8174 // DesiredStatusDescription is meant to provide more human useful information 8175 DesiredDescription string 8176 8177 // DesiredTransition is used to indicate that a state transition 8178 // is desired for a given reason. 8179 DesiredTransition DesiredTransition 8180 8181 // Status of the allocation on the client 8182 ClientStatus string 8183 8184 // ClientStatusDescription is meant to provide more human useful information 8185 ClientDescription string 8186 8187 // TaskStates stores the state of each task, 8188 TaskStates map[string]*TaskState 8189 8190 // AllocStates track meta data associated with changes to the state of the whole allocation, like becoming lost 8191 AllocStates []*AllocState 8192 8193 // PreviousAllocation is the allocation that this allocation is replacing 8194 PreviousAllocation string 8195 8196 // NextAllocation is the allocation that this allocation is being replaced by 8197 NextAllocation string 8198 8199 // DeploymentID identifies an allocation as being created from a 8200 // particular deployment 8201 DeploymentID string 8202 8203 // DeploymentStatus captures the status of the allocation as part of the 8204 // given deployment 8205 DeploymentStatus *AllocDeploymentStatus 8206 8207 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 8208 RescheduleTracker *RescheduleTracker 8209 8210 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 8211 // that can be rescheduled in the future 8212 FollowupEvalID string 8213 8214 // PreemptedAllocations captures IDs of any allocations that were preempted 8215 // in order to place this allocation 8216 PreemptedAllocations []string 8217 8218 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 8219 // to stop running because it got preempted 8220 PreemptedByAllocation string 8221 8222 // Raft Indexes 8223 CreateIndex uint64 8224 ModifyIndex uint64 8225 8226 // AllocModifyIndex is not updated when the client updates allocations. This 8227 // lets the client pull only the allocs updated by the server. 8228 AllocModifyIndex uint64 8229 8230 // CreateTime is the time the allocation has finished scheduling and been 8231 // verified by the plan applier. 8232 CreateTime int64 8233 8234 // ModifyTime is the time the allocation was last updated. 8235 ModifyTime int64 8236 } 8237 8238 // Index returns the index of the allocation. If the allocation is from a task 8239 // group with count greater than 1, there will be multiple allocations for it. 8240 func (a *Allocation) Index() uint { 8241 l := len(a.Name) 8242 prefix := len(a.JobID) + len(a.TaskGroup) + 2 8243 if l <= 3 || l <= prefix { 8244 return uint(0) 8245 } 8246 8247 strNum := a.Name[prefix : len(a.Name)-1] 8248 num, _ := strconv.Atoi(strNum) 8249 return uint(num) 8250 } 8251 8252 // Copy provides a copy of the allocation and deep copies the job 8253 func (a *Allocation) Copy() *Allocation { 8254 return a.copyImpl(true) 8255 } 8256 8257 // CopySkipJob provides a copy of the allocation but doesn't deep copy the job 8258 func (a *Allocation) CopySkipJob() *Allocation { 8259 return a.copyImpl(false) 8260 } 8261 8262 // Canonicalize Allocation to ensure fields are initialized to the expectations 8263 // of this version of Nomad. Should be called when restoring persisted 8264 // Allocations or receiving Allocations from Nomad agents potentially on an 8265 // older version of Nomad. 8266 func (a *Allocation) Canonicalize() { 8267 if a.AllocatedResources == nil && a.TaskResources != nil { 8268 ar := AllocatedResources{} 8269 8270 tasks := make(map[string]*AllocatedTaskResources, len(a.TaskResources)) 8271 for name, tr := range a.TaskResources { 8272 atr := AllocatedTaskResources{} 8273 atr.Cpu.CpuShares = int64(tr.CPU) 8274 atr.Memory.MemoryMB = int64(tr.MemoryMB) 8275 atr.Networks = tr.Networks.Copy() 8276 8277 tasks[name] = &atr 8278 } 8279 ar.Tasks = tasks 8280 8281 if a.SharedResources != nil { 8282 ar.Shared.DiskMB = int64(a.SharedResources.DiskMB) 8283 ar.Shared.Networks = a.SharedResources.Networks.Copy() 8284 } 8285 8286 a.AllocatedResources = &ar 8287 } 8288 8289 a.Job.Canonicalize() 8290 } 8291 8292 func (a *Allocation) copyImpl(job bool) *Allocation { 8293 if a == nil { 8294 return nil 8295 } 8296 na := new(Allocation) 8297 *na = *a 8298 8299 if job { 8300 na.Job = na.Job.Copy() 8301 } 8302 8303 na.AllocatedResources = na.AllocatedResources.Copy() 8304 na.Resources = na.Resources.Copy() 8305 na.SharedResources = na.SharedResources.Copy() 8306 8307 if a.TaskResources != nil { 8308 tr := make(map[string]*Resources, len(na.TaskResources)) 8309 for task, resource := range na.TaskResources { 8310 tr[task] = resource.Copy() 8311 } 8312 na.TaskResources = tr 8313 } 8314 8315 na.Metrics = na.Metrics.Copy() 8316 na.DeploymentStatus = na.DeploymentStatus.Copy() 8317 8318 if a.TaskStates != nil { 8319 ts := make(map[string]*TaskState, len(na.TaskStates)) 8320 for task, state := range na.TaskStates { 8321 ts[task] = state.Copy() 8322 } 8323 na.TaskStates = ts 8324 } 8325 8326 na.RescheduleTracker = a.RescheduleTracker.Copy() 8327 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 8328 return na 8329 } 8330 8331 // TerminalStatus returns if the desired or actual status is terminal and 8332 // will no longer transition. 8333 func (a *Allocation) TerminalStatus() bool { 8334 // First check the desired state and if that isn't terminal, check client 8335 // state. 8336 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 8337 } 8338 8339 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 8340 func (a *Allocation) ServerTerminalStatus() bool { 8341 switch a.DesiredStatus { 8342 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 8343 return true 8344 default: 8345 return false 8346 } 8347 } 8348 8349 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 8350 func (a *Allocation) ClientTerminalStatus() bool { 8351 switch a.ClientStatus { 8352 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 8353 return true 8354 default: 8355 return false 8356 } 8357 } 8358 8359 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 8360 // to its status and ReschedulePolicy given its failure time 8361 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 8362 // First check the desired state 8363 switch a.DesiredStatus { 8364 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 8365 return false 8366 default: 8367 } 8368 switch a.ClientStatus { 8369 case AllocClientStatusFailed: 8370 return a.RescheduleEligible(reschedulePolicy, failTime) 8371 default: 8372 return false 8373 } 8374 } 8375 8376 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 8377 // to its ReschedulePolicy and the current state of its reschedule trackers 8378 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 8379 if reschedulePolicy == nil { 8380 return false 8381 } 8382 attempts := reschedulePolicy.Attempts 8383 interval := reschedulePolicy.Interval 8384 enabled := attempts > 0 || reschedulePolicy.Unlimited 8385 if !enabled { 8386 return false 8387 } 8388 if reschedulePolicy.Unlimited { 8389 return true 8390 } 8391 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 8392 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 8393 return true 8394 } 8395 attempted := 0 8396 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 8397 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 8398 timeDiff := failTime.UTC().UnixNano() - lastAttempt 8399 if timeDiff < interval.Nanoseconds() { 8400 attempted += 1 8401 } 8402 } 8403 return attempted < attempts 8404 } 8405 8406 // LastEventTime is the time of the last task event in the allocation. 8407 // It is used to determine allocation failure time. If the FinishedAt field 8408 // is not set, the alloc's modify time is used 8409 func (a *Allocation) LastEventTime() time.Time { 8410 var lastEventTime time.Time 8411 if a.TaskStates != nil { 8412 for _, s := range a.TaskStates { 8413 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 8414 lastEventTime = s.FinishedAt 8415 } 8416 } 8417 } 8418 8419 if lastEventTime.IsZero() { 8420 return time.Unix(0, a.ModifyTime).UTC() 8421 } 8422 return lastEventTime 8423 } 8424 8425 // ReschedulePolicy returns the reschedule policy based on the task group 8426 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 8427 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8428 if tg == nil { 8429 return nil 8430 } 8431 return tg.ReschedulePolicy 8432 } 8433 8434 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 8435 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 8436 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 8437 failTime := a.LastEventTime() 8438 reschedulePolicy := a.ReschedulePolicy() 8439 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 8440 return time.Time{}, false 8441 } 8442 8443 nextDelay := a.NextDelay() 8444 nextRescheduleTime := failTime.Add(nextDelay) 8445 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 8446 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 8447 // Check for eligibility based on the interval if max attempts is set 8448 attempted := 0 8449 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 8450 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 8451 timeDiff := failTime.UTC().UnixNano() - lastAttempt 8452 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 8453 attempted += 1 8454 } 8455 } 8456 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 8457 } 8458 return nextRescheduleTime, rescheduleEligible 8459 } 8460 8461 // ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration 8462 func (a *Allocation) ShouldClientStop() bool { 8463 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8464 if tg == nil || 8465 tg.StopAfterClientDisconnect == nil || 8466 *tg.StopAfterClientDisconnect == 0*time.Nanosecond { 8467 return false 8468 } 8469 return true 8470 } 8471 8472 // WaitClientStop uses the reschedule delay mechanism to block rescheduling until 8473 // StopAfterClientDisconnect's block interval passes 8474 func (a *Allocation) WaitClientStop() time.Time { 8475 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8476 8477 // An alloc can only be marked lost once, so use the first lost transition 8478 var t time.Time 8479 for _, s := range a.AllocStates { 8480 if s.Field == AllocStateFieldClientStatus && 8481 s.Value == AllocClientStatusLost { 8482 t = s.Time 8483 break 8484 } 8485 } 8486 8487 // On the first pass, the alloc hasn't been marked lost yet, and so we start 8488 // counting from now 8489 if t.IsZero() { 8490 t = time.Now().UTC() 8491 } 8492 8493 // Find the max kill timeout 8494 kill := DefaultKillTimeout 8495 for _, t := range tg.Tasks { 8496 if t.KillTimeout > kill { 8497 kill = t.KillTimeout 8498 } 8499 } 8500 8501 return t.Add(*tg.StopAfterClientDisconnect + kill) 8502 } 8503 8504 // NextDelay returns a duration after which the allocation can be rescheduled. 8505 // It is calculated according to the delay function and previous reschedule attempts. 8506 func (a *Allocation) NextDelay() time.Duration { 8507 policy := a.ReschedulePolicy() 8508 // Can be nil if the task group was updated to remove its reschedule policy 8509 if policy == nil { 8510 return 0 8511 } 8512 delayDur := policy.Delay 8513 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 8514 return delayDur 8515 } 8516 events := a.RescheduleTracker.Events 8517 switch policy.DelayFunction { 8518 case "exponential": 8519 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 8520 case "fibonacci": 8521 if len(events) >= 2 { 8522 fibN1Delay := events[len(events)-1].Delay 8523 fibN2Delay := events[len(events)-2].Delay 8524 // Handle reset of delay ceiling which should cause 8525 // a new series to start 8526 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 8527 delayDur = fibN1Delay 8528 } else { 8529 delayDur = fibN1Delay + fibN2Delay 8530 } 8531 } 8532 default: 8533 return delayDur 8534 } 8535 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 8536 delayDur = policy.MaxDelay 8537 // check if delay needs to be reset 8538 8539 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 8540 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 8541 if timeDiff > delayDur.Nanoseconds() { 8542 delayDur = policy.Delay 8543 } 8544 8545 } 8546 8547 return delayDur 8548 } 8549 8550 // Terminated returns if the allocation is in a terminal state on a client. 8551 func (a *Allocation) Terminated() bool { 8552 if a.ClientStatus == AllocClientStatusFailed || 8553 a.ClientStatus == AllocClientStatusComplete || 8554 a.ClientStatus == AllocClientStatusLost { 8555 return true 8556 } 8557 return false 8558 } 8559 8560 // SetStopped updates the allocation in place to a DesiredStatus stop, with the ClientStatus 8561 func (a *Allocation) SetStop(clientStatus, clientDesc string) { 8562 a.DesiredStatus = AllocDesiredStatusStop 8563 a.ClientStatus = clientStatus 8564 a.ClientDescription = clientDesc 8565 a.AppendState(AllocStateFieldClientStatus, clientStatus) 8566 } 8567 8568 // AppendState creates and appends an AllocState entry recording the time of the state 8569 // transition. Used to mark the transition to lost 8570 func (a *Allocation) AppendState(field AllocStateField, value string) { 8571 a.AllocStates = append(a.AllocStates, &AllocState{ 8572 Field: field, 8573 Value: value, 8574 Time: time.Now().UTC(), 8575 }) 8576 } 8577 8578 // RanSuccessfully returns whether the client has ran the allocation and all 8579 // tasks finished successfully. Critically this function returns whether the 8580 // allocation has ran to completion and not just that the alloc has converged to 8581 // its desired state. That is to say that a batch allocation must have finished 8582 // with exit code 0 on all task groups. This doesn't really have meaning on a 8583 // non-batch allocation because a service and system allocation should not 8584 // finish. 8585 func (a *Allocation) RanSuccessfully() bool { 8586 // Handle the case the client hasn't started the allocation. 8587 if len(a.TaskStates) == 0 { 8588 return false 8589 } 8590 8591 // Check to see if all the tasks finished successfully in the allocation 8592 allSuccess := true 8593 for _, state := range a.TaskStates { 8594 allSuccess = allSuccess && state.Successful() 8595 } 8596 8597 return allSuccess 8598 } 8599 8600 // ShouldMigrate returns if the allocation needs data migration 8601 func (a *Allocation) ShouldMigrate() bool { 8602 if a.PreviousAllocation == "" { 8603 return false 8604 } 8605 8606 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 8607 return false 8608 } 8609 8610 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8611 8612 // if the task group is nil or the ephemeral disk block isn't present then 8613 // we won't migrate 8614 if tg == nil || tg.EphemeralDisk == nil { 8615 return false 8616 } 8617 8618 // We won't migrate any data is the user hasn't enabled migration or the 8619 // disk is not marked as sticky 8620 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 8621 return false 8622 } 8623 8624 return true 8625 } 8626 8627 // SetEventDisplayMessage populates the display message if its not already set, 8628 // a temporary fix to handle old allocations that don't have it. 8629 // This method will be removed in a future release. 8630 func (a *Allocation) SetEventDisplayMessages() { 8631 setDisplayMsg(a.TaskStates) 8632 } 8633 8634 // COMPAT(0.11): Remove in 0.11 8635 // ComparableResources returns the resources on the allocation 8636 // handling upgrade paths. After 0.11 calls to this should be replaced with: 8637 // alloc.AllocatedResources.Comparable() 8638 func (a *Allocation) ComparableResources() *ComparableResources { 8639 // ALloc already has 0.9+ behavior 8640 if a.AllocatedResources != nil { 8641 return a.AllocatedResources.Comparable() 8642 } 8643 8644 var resources *Resources 8645 if a.Resources != nil { 8646 resources = a.Resources 8647 } else if a.TaskResources != nil { 8648 resources = new(Resources) 8649 resources.Add(a.SharedResources) 8650 for _, taskResource := range a.TaskResources { 8651 resources.Add(taskResource) 8652 } 8653 } 8654 8655 // Upgrade path 8656 return &ComparableResources{ 8657 Flattened: AllocatedTaskResources{ 8658 Cpu: AllocatedCpuResources{ 8659 CpuShares: int64(resources.CPU), 8660 }, 8661 Memory: AllocatedMemoryResources{ 8662 MemoryMB: int64(resources.MemoryMB), 8663 }, 8664 Networks: resources.Networks, 8665 }, 8666 Shared: AllocatedSharedResources{ 8667 DiskMB: int64(resources.DiskMB), 8668 }, 8669 } 8670 } 8671 8672 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 8673 // TaskGroup does not exist, or the task name cannot be found. 8674 func (a *Allocation) LookupTask(name string) *Task { 8675 if a.Job == nil { 8676 return nil 8677 } 8678 8679 tg := a.Job.LookupTaskGroup(a.TaskGroup) 8680 if tg == nil { 8681 return nil 8682 } 8683 8684 return tg.LookupTask(name) 8685 } 8686 8687 // Stub returns a list stub for the allocation 8688 func (a *Allocation) Stub() *AllocListStub { 8689 return &AllocListStub{ 8690 ID: a.ID, 8691 EvalID: a.EvalID, 8692 Name: a.Name, 8693 Namespace: a.Namespace, 8694 NodeID: a.NodeID, 8695 NodeName: a.NodeName, 8696 JobID: a.JobID, 8697 JobType: a.Job.Type, 8698 JobVersion: a.Job.Version, 8699 TaskGroup: a.TaskGroup, 8700 DesiredStatus: a.DesiredStatus, 8701 DesiredDescription: a.DesiredDescription, 8702 ClientStatus: a.ClientStatus, 8703 ClientDescription: a.ClientDescription, 8704 DesiredTransition: a.DesiredTransition, 8705 TaskStates: a.TaskStates, 8706 DeploymentStatus: a.DeploymentStatus, 8707 FollowupEvalID: a.FollowupEvalID, 8708 RescheduleTracker: a.RescheduleTracker, 8709 PreemptedAllocations: a.PreemptedAllocations, 8710 PreemptedByAllocation: a.PreemptedByAllocation, 8711 CreateIndex: a.CreateIndex, 8712 ModifyIndex: a.ModifyIndex, 8713 CreateTime: a.CreateTime, 8714 ModifyTime: a.ModifyTime, 8715 } 8716 } 8717 8718 // AllocationDiff converts an Allocation type to an AllocationDiff type 8719 // If at any time, modification are made to AllocationDiff so that an 8720 // Allocation can no longer be safely converted to AllocationDiff, 8721 // this method should be changed accordingly. 8722 func (a *Allocation) AllocationDiff() *AllocationDiff { 8723 return (*AllocationDiff)(a) 8724 } 8725 8726 // AllocationDiff is another named type for Allocation (to use the same fields), 8727 // which is used to represent the delta for an Allocation. If you need a method 8728 // defined on the al 8729 type AllocationDiff Allocation 8730 8731 // AllocListStub is used to return a subset of alloc information 8732 type AllocListStub struct { 8733 ID string 8734 EvalID string 8735 Name string 8736 Namespace string 8737 NodeID string 8738 NodeName string 8739 JobID string 8740 JobType string 8741 JobVersion uint64 8742 TaskGroup string 8743 DesiredStatus string 8744 DesiredDescription string 8745 ClientStatus string 8746 ClientDescription string 8747 DesiredTransition DesiredTransition 8748 TaskStates map[string]*TaskState 8749 DeploymentStatus *AllocDeploymentStatus 8750 FollowupEvalID string 8751 RescheduleTracker *RescheduleTracker 8752 PreemptedAllocations []string 8753 PreemptedByAllocation string 8754 CreateIndex uint64 8755 ModifyIndex uint64 8756 CreateTime int64 8757 ModifyTime int64 8758 } 8759 8760 // SetEventDisplayMessage populates the display message if its not already set, 8761 // a temporary fix to handle old allocations that don't have it. 8762 // This method will be removed in a future release. 8763 func (a *AllocListStub) SetEventDisplayMessages() { 8764 setDisplayMsg(a.TaskStates) 8765 } 8766 8767 func setDisplayMsg(taskStates map[string]*TaskState) { 8768 if taskStates != nil { 8769 for _, taskState := range taskStates { 8770 for _, event := range taskState.Events { 8771 event.PopulateEventDisplayMessage() 8772 } 8773 } 8774 } 8775 } 8776 8777 // AllocMetric is used to track various metrics while attempting 8778 // to make an allocation. These are used to debug a job, or to better 8779 // understand the pressure within the system. 8780 type AllocMetric struct { 8781 // NodesEvaluated is the number of nodes that were evaluated 8782 NodesEvaluated int 8783 8784 // NodesFiltered is the number of nodes filtered due to a constraint 8785 NodesFiltered int 8786 8787 // NodesAvailable is the number of nodes available for evaluation per DC. 8788 NodesAvailable map[string]int 8789 8790 // ClassFiltered is the number of nodes filtered by class 8791 ClassFiltered map[string]int 8792 8793 // ConstraintFiltered is the number of failures caused by constraint 8794 ConstraintFiltered map[string]int 8795 8796 // NodesExhausted is the number of nodes skipped due to being 8797 // exhausted of at least one resource 8798 NodesExhausted int 8799 8800 // ClassExhausted is the number of nodes exhausted by class 8801 ClassExhausted map[string]int 8802 8803 // DimensionExhausted provides the count by dimension or reason 8804 DimensionExhausted map[string]int 8805 8806 // QuotaExhausted provides the exhausted dimensions 8807 QuotaExhausted []string 8808 8809 // Scores is the scores of the final few nodes remaining 8810 // for placement. The top score is typically selected. 8811 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 8812 Scores map[string]float64 8813 8814 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 8815 ScoreMetaData []*NodeScoreMeta 8816 8817 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 8818 // we receive normalized score during the last step of the scoring stack. 8819 nodeScoreMeta *NodeScoreMeta 8820 8821 // topScores is used to maintain a heap of the top K nodes with 8822 // the highest normalized score 8823 topScores *kheap.ScoreHeap 8824 8825 // AllocationTime is a measure of how long the allocation 8826 // attempt took. This can affect performance and SLAs. 8827 AllocationTime time.Duration 8828 8829 // CoalescedFailures indicates the number of other 8830 // allocations that were coalesced into this failed allocation. 8831 // This is to prevent creating many failed allocations for a 8832 // single task group. 8833 CoalescedFailures int 8834 } 8835 8836 func (a *AllocMetric) Copy() *AllocMetric { 8837 if a == nil { 8838 return nil 8839 } 8840 na := new(AllocMetric) 8841 *na = *a 8842 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 8843 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 8844 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 8845 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 8846 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 8847 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 8848 na.Scores = helper.CopyMapStringFloat64(na.Scores) 8849 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 8850 return na 8851 } 8852 8853 func (a *AllocMetric) EvaluateNode() { 8854 a.NodesEvaluated += 1 8855 } 8856 8857 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 8858 a.NodesFiltered += 1 8859 if node != nil && node.NodeClass != "" { 8860 if a.ClassFiltered == nil { 8861 a.ClassFiltered = make(map[string]int) 8862 } 8863 a.ClassFiltered[node.NodeClass] += 1 8864 } 8865 if constraint != "" { 8866 if a.ConstraintFiltered == nil { 8867 a.ConstraintFiltered = make(map[string]int) 8868 } 8869 a.ConstraintFiltered[constraint] += 1 8870 } 8871 } 8872 8873 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 8874 a.NodesExhausted += 1 8875 if node != nil && node.NodeClass != "" { 8876 if a.ClassExhausted == nil { 8877 a.ClassExhausted = make(map[string]int) 8878 } 8879 a.ClassExhausted[node.NodeClass] += 1 8880 } 8881 if dimension != "" { 8882 if a.DimensionExhausted == nil { 8883 a.DimensionExhausted = make(map[string]int) 8884 } 8885 a.DimensionExhausted[dimension] += 1 8886 } 8887 } 8888 8889 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 8890 if a.QuotaExhausted == nil { 8891 a.QuotaExhausted = make([]string, 0, len(dimensions)) 8892 } 8893 8894 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 8895 } 8896 8897 // ScoreNode is used to gather top K scoring nodes in a heap 8898 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 8899 // Create nodeScoreMeta lazily if its the first time or if its a new node 8900 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 8901 a.nodeScoreMeta = &NodeScoreMeta{ 8902 NodeID: node.ID, 8903 Scores: make(map[string]float64), 8904 } 8905 } 8906 if name == NormScorerName { 8907 a.nodeScoreMeta.NormScore = score 8908 // Once we have the normalized score we can push to the heap 8909 // that tracks top K by normalized score 8910 8911 // Create the heap if its not there already 8912 if a.topScores == nil { 8913 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 8914 } 8915 heap.Push(a.topScores, a.nodeScoreMeta) 8916 8917 // Clear out this entry because its now in the heap 8918 a.nodeScoreMeta = nil 8919 } else { 8920 a.nodeScoreMeta.Scores[name] = score 8921 } 8922 } 8923 8924 // PopulateScoreMetaData populates a map of scorer to scoring metadata 8925 // The map is populated by popping elements from a heap of top K scores 8926 // maintained per scorer 8927 func (a *AllocMetric) PopulateScoreMetaData() { 8928 if a.topScores == nil { 8929 return 8930 } 8931 8932 if a.ScoreMetaData == nil { 8933 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 8934 } 8935 heapItems := a.topScores.GetItemsReverse() 8936 for i, item := range heapItems { 8937 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 8938 } 8939 } 8940 8941 // NodeScoreMeta captures scoring meta data derived from 8942 // different scoring factors. 8943 type NodeScoreMeta struct { 8944 NodeID string 8945 Scores map[string]float64 8946 NormScore float64 8947 } 8948 8949 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 8950 if s == nil { 8951 return nil 8952 } 8953 ns := new(NodeScoreMeta) 8954 *ns = *s 8955 return ns 8956 } 8957 8958 func (s *NodeScoreMeta) String() string { 8959 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 8960 } 8961 8962 func (s *NodeScoreMeta) Score() float64 { 8963 return s.NormScore 8964 } 8965 8966 func (s *NodeScoreMeta) Data() interface{} { 8967 return s 8968 } 8969 8970 // AllocDeploymentStatus captures the status of the allocation as part of the 8971 // deployment. This can include things like if the allocation has been marked as 8972 // healthy. 8973 type AllocDeploymentStatus struct { 8974 // Healthy marks whether the allocation has been marked healthy or unhealthy 8975 // as part of a deployment. It can be unset if it has neither been marked 8976 // healthy or unhealthy. 8977 Healthy *bool 8978 8979 // Timestamp is the time at which the health status was set. 8980 Timestamp time.Time 8981 8982 // Canary marks whether the allocation is a canary or not. A canary that has 8983 // been promoted will have this field set to false. 8984 Canary bool 8985 8986 // ModifyIndex is the raft index in which the deployment status was last 8987 // changed. 8988 ModifyIndex uint64 8989 } 8990 8991 // HasHealth returns true if the allocation has its health set. 8992 func (a *AllocDeploymentStatus) HasHealth() bool { 8993 return a != nil && a.Healthy != nil 8994 } 8995 8996 // IsHealthy returns if the allocation is marked as healthy as part of a 8997 // deployment 8998 func (a *AllocDeploymentStatus) IsHealthy() bool { 8999 if a == nil { 9000 return false 9001 } 9002 9003 return a.Healthy != nil && *a.Healthy 9004 } 9005 9006 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 9007 // deployment 9008 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 9009 if a == nil { 9010 return false 9011 } 9012 9013 return a.Healthy != nil && !*a.Healthy 9014 } 9015 9016 // IsCanary returns if the allocation is marked as a canary 9017 func (a *AllocDeploymentStatus) IsCanary() bool { 9018 if a == nil { 9019 return false 9020 } 9021 9022 return a.Canary 9023 } 9024 9025 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 9026 if a == nil { 9027 return nil 9028 } 9029 9030 c := new(AllocDeploymentStatus) 9031 *c = *a 9032 9033 if a.Healthy != nil { 9034 c.Healthy = helper.BoolToPtr(*a.Healthy) 9035 } 9036 9037 return c 9038 } 9039 9040 const ( 9041 EvalStatusBlocked = "blocked" 9042 EvalStatusPending = "pending" 9043 EvalStatusComplete = "complete" 9044 EvalStatusFailed = "failed" 9045 EvalStatusCancelled = "canceled" 9046 ) 9047 9048 const ( 9049 EvalTriggerJobRegister = "job-register" 9050 EvalTriggerJobDeregister = "job-deregister" 9051 EvalTriggerPeriodicJob = "periodic-job" 9052 EvalTriggerNodeDrain = "node-drain" 9053 EvalTriggerNodeUpdate = "node-update" 9054 EvalTriggerAllocStop = "alloc-stop" 9055 EvalTriggerScheduled = "scheduled" 9056 EvalTriggerRollingUpdate = "rolling-update" 9057 EvalTriggerDeploymentWatcher = "deployment-watcher" 9058 EvalTriggerFailedFollowUp = "failed-follow-up" 9059 EvalTriggerMaxPlans = "max-plan-attempts" 9060 EvalTriggerRetryFailedAlloc = "alloc-failure" 9061 EvalTriggerQueuedAllocs = "queued-allocs" 9062 EvalTriggerPreemption = "preemption" 9063 EvalTriggerScaling = "job-scaling" 9064 ) 9065 9066 const ( 9067 // CoreJobEvalGC is used for the garbage collection of evaluations 9068 // and allocations. We periodically scan evaluations in a terminal state, 9069 // in which all the corresponding allocations are also terminal. We 9070 // delete these out of the system to bound the state. 9071 CoreJobEvalGC = "eval-gc" 9072 9073 // CoreJobNodeGC is used for the garbage collection of failed nodes. 9074 // We periodically scan nodes in a terminal state, and if they have no 9075 // corresponding allocations we delete these out of the system. 9076 CoreJobNodeGC = "node-gc" 9077 9078 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 9079 // periodically scan garbage collectible jobs and check if both their 9080 // evaluations and allocations are terminal. If so, we delete these out of 9081 // the system. 9082 CoreJobJobGC = "job-gc" 9083 9084 // CoreJobDeploymentGC is used for the garbage collection of eligible 9085 // deployments. We periodically scan garbage collectible deployments and 9086 // check if they are terminal. If so, we delete these out of the system. 9087 CoreJobDeploymentGC = "deployment-gc" 9088 9089 // CoreJobCSIVolumeClaimGC is use for the garbage collection of CSI 9090 // volume claims. We periodically scan volumes to see if no allocs are 9091 // claiming them. If so, we unclaim the volume. 9092 CoreJobCSIVolumeClaimGC = "csi-volume-claim-gc" 9093 9094 // CoreJobCSIPluginGC is use for the garbage collection of CSI plugins. 9095 // We periodically scan plugins to see if they have no associated volumes 9096 // or allocs running them. If so, we delete the plugin. 9097 CoreJobCSIPluginGC = "csi-plugin-gc" 9098 9099 // CoreJobForceGC is used to force garbage collection of all GCable objects. 9100 CoreJobForceGC = "force-gc" 9101 ) 9102 9103 // Evaluation is used anytime we need to apply business logic as a result 9104 // of a change to our desired state (job specification) or the emergent state 9105 // (registered nodes). When the inputs change, we need to "evaluate" them, 9106 // potentially taking action (allocation of work) or doing nothing if the state 9107 // of the world does not require it. 9108 type Evaluation struct { 9109 // msgpack omit empty fields during serialization 9110 _struct bool `codec:",omitempty"` // nolint: structcheck 9111 9112 // ID is a randomly generated UUID used for this evaluation. This 9113 // is assigned upon the creation of the evaluation. 9114 ID string 9115 9116 // Namespace is the namespace the evaluation is created in 9117 Namespace string 9118 9119 // Priority is used to control scheduling importance and if this job 9120 // can preempt other jobs. 9121 Priority int 9122 9123 // Type is used to control which schedulers are available to handle 9124 // this evaluation. 9125 Type string 9126 9127 // TriggeredBy is used to give some insight into why this Eval 9128 // was created. (Job change, node failure, alloc failure, etc). 9129 TriggeredBy string 9130 9131 // JobID is the job this evaluation is scoped to. Evaluations cannot 9132 // be run in parallel for a given JobID, so we serialize on this. 9133 JobID string 9134 9135 // JobModifyIndex is the modify index of the job at the time 9136 // the evaluation was created 9137 JobModifyIndex uint64 9138 9139 // NodeID is the node that was affected triggering the evaluation. 9140 NodeID string 9141 9142 // NodeModifyIndex is the modify index of the node at the time 9143 // the evaluation was created 9144 NodeModifyIndex uint64 9145 9146 // DeploymentID is the ID of the deployment that triggered the evaluation. 9147 DeploymentID string 9148 9149 // Status of the evaluation 9150 Status string 9151 9152 // StatusDescription is meant to provide more human useful information 9153 StatusDescription string 9154 9155 // Wait is a minimum wait time for running the eval. This is used to 9156 // support a rolling upgrade in versions prior to 0.7.0 9157 // Deprecated 9158 Wait time.Duration 9159 9160 // WaitUntil is the time when this eval should be run. This is used to 9161 // supported delayed rescheduling of failed allocations 9162 WaitUntil time.Time 9163 9164 // NextEval is the evaluation ID for the eval created to do a followup. 9165 // This is used to support rolling upgrades and failed-follow-up evals, where 9166 // we need a chain of evaluations. 9167 NextEval string 9168 9169 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 9170 // This is used to support rolling upgrades and failed-follow-up evals, where 9171 // we need a chain of evaluations. 9172 PreviousEval string 9173 9174 // BlockedEval is the evaluation ID for a created blocked eval. A 9175 // blocked eval will be created if all allocations could not be placed due 9176 // to constraints or lacking resources. 9177 BlockedEval string 9178 9179 // FailedTGAllocs are task groups which have allocations that could not be 9180 // made, but the metrics are persisted so that the user can use the feedback 9181 // to determine the cause. 9182 FailedTGAllocs map[string]*AllocMetric 9183 9184 // ClassEligibility tracks computed node classes that have been explicitly 9185 // marked as eligible or ineligible. 9186 ClassEligibility map[string]bool 9187 9188 // QuotaLimitReached marks whether a quota limit was reached for the 9189 // evaluation. 9190 QuotaLimitReached string 9191 9192 // EscapedComputedClass marks whether the job has constraints that are not 9193 // captured by computed node classes. 9194 EscapedComputedClass bool 9195 9196 // AnnotatePlan triggers the scheduler to provide additional annotations 9197 // during the evaluation. This should not be set during normal operations. 9198 AnnotatePlan bool 9199 9200 // QueuedAllocations is the number of unplaced allocations at the time the 9201 // evaluation was processed. The map is keyed by Task Group names. 9202 QueuedAllocations map[string]int 9203 9204 // LeaderACL provides the ACL token to when issuing RPCs back to the 9205 // leader. This will be a valid management token as long as the leader is 9206 // active. This should not ever be exposed via the API. 9207 LeaderACL string 9208 9209 // SnapshotIndex is the Raft index of the snapshot used to process the 9210 // evaluation. The index will either be set when it has gone through the 9211 // scheduler or if a blocked evaluation is being created. The index is set 9212 // in this case so we can determine if an early unblocking is required since 9213 // capacity has changed since the evaluation was created. This can result in 9214 // the SnapshotIndex being less than the CreateIndex. 9215 SnapshotIndex uint64 9216 9217 // Raft Indexes 9218 CreateIndex uint64 9219 ModifyIndex uint64 9220 9221 CreateTime int64 9222 ModifyTime int64 9223 } 9224 9225 // TerminalStatus returns if the current status is terminal and 9226 // will no longer transition. 9227 func (e *Evaluation) TerminalStatus() bool { 9228 switch e.Status { 9229 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 9230 return true 9231 default: 9232 return false 9233 } 9234 } 9235 9236 func (e *Evaluation) GoString() string { 9237 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 9238 } 9239 9240 func (e *Evaluation) Copy() *Evaluation { 9241 if e == nil { 9242 return nil 9243 } 9244 ne := new(Evaluation) 9245 *ne = *e 9246 9247 // Copy ClassEligibility 9248 if e.ClassEligibility != nil { 9249 classes := make(map[string]bool, len(e.ClassEligibility)) 9250 for class, elig := range e.ClassEligibility { 9251 classes[class] = elig 9252 } 9253 ne.ClassEligibility = classes 9254 } 9255 9256 // Copy FailedTGAllocs 9257 if e.FailedTGAllocs != nil { 9258 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 9259 for tg, metric := range e.FailedTGAllocs { 9260 failedTGs[tg] = metric.Copy() 9261 } 9262 ne.FailedTGAllocs = failedTGs 9263 } 9264 9265 // Copy queued allocations 9266 if e.QueuedAllocations != nil { 9267 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 9268 for tg, num := range e.QueuedAllocations { 9269 queuedAllocations[tg] = num 9270 } 9271 ne.QueuedAllocations = queuedAllocations 9272 } 9273 9274 return ne 9275 } 9276 9277 // ShouldEnqueue checks if a given evaluation should be enqueued into the 9278 // eval_broker 9279 func (e *Evaluation) ShouldEnqueue() bool { 9280 switch e.Status { 9281 case EvalStatusPending: 9282 return true 9283 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 9284 return false 9285 default: 9286 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 9287 } 9288 } 9289 9290 // ShouldBlock checks if a given evaluation should be entered into the blocked 9291 // eval tracker. 9292 func (e *Evaluation) ShouldBlock() bool { 9293 switch e.Status { 9294 case EvalStatusBlocked: 9295 return true 9296 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 9297 return false 9298 default: 9299 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 9300 } 9301 } 9302 9303 // MakePlan is used to make a plan from the given evaluation 9304 // for a given Job 9305 func (e *Evaluation) MakePlan(j *Job) *Plan { 9306 p := &Plan{ 9307 EvalID: e.ID, 9308 Priority: e.Priority, 9309 Job: j, 9310 NodeUpdate: make(map[string][]*Allocation), 9311 NodeAllocation: make(map[string][]*Allocation), 9312 NodePreemptions: make(map[string][]*Allocation), 9313 } 9314 if j != nil { 9315 p.AllAtOnce = j.AllAtOnce 9316 } 9317 return p 9318 } 9319 9320 // NextRollingEval creates an evaluation to followup this eval for rolling updates 9321 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 9322 now := time.Now().UTC().UnixNano() 9323 return &Evaluation{ 9324 ID: uuid.Generate(), 9325 Namespace: e.Namespace, 9326 Priority: e.Priority, 9327 Type: e.Type, 9328 TriggeredBy: EvalTriggerRollingUpdate, 9329 JobID: e.JobID, 9330 JobModifyIndex: e.JobModifyIndex, 9331 Status: EvalStatusPending, 9332 Wait: wait, 9333 PreviousEval: e.ID, 9334 CreateTime: now, 9335 ModifyTime: now, 9336 } 9337 } 9338 9339 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 9340 // failed allocations. It takes the classes marked explicitly eligible or 9341 // ineligible, whether the job has escaped computed node classes and whether the 9342 // quota limit was reached. 9343 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 9344 escaped bool, quotaReached string) *Evaluation { 9345 now := time.Now().UTC().UnixNano() 9346 return &Evaluation{ 9347 ID: uuid.Generate(), 9348 Namespace: e.Namespace, 9349 Priority: e.Priority, 9350 Type: e.Type, 9351 TriggeredBy: EvalTriggerQueuedAllocs, 9352 JobID: e.JobID, 9353 JobModifyIndex: e.JobModifyIndex, 9354 Status: EvalStatusBlocked, 9355 PreviousEval: e.ID, 9356 ClassEligibility: classEligibility, 9357 EscapedComputedClass: escaped, 9358 QuotaLimitReached: quotaReached, 9359 CreateTime: now, 9360 ModifyTime: now, 9361 } 9362 } 9363 9364 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 9365 // has been marked as failed because it has hit the delivery limit and will not 9366 // be retried by the eval_broker. Callers should copy the created eval's ID to 9367 // into the old eval's NextEval field. 9368 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 9369 now := time.Now().UTC().UnixNano() 9370 return &Evaluation{ 9371 ID: uuid.Generate(), 9372 Namespace: e.Namespace, 9373 Priority: e.Priority, 9374 Type: e.Type, 9375 TriggeredBy: EvalTriggerFailedFollowUp, 9376 JobID: e.JobID, 9377 JobModifyIndex: e.JobModifyIndex, 9378 Status: EvalStatusPending, 9379 Wait: wait, 9380 PreviousEval: e.ID, 9381 CreateTime: now, 9382 ModifyTime: now, 9383 } 9384 } 9385 9386 // UpdateModifyTime takes into account that clocks on different servers may be 9387 // slightly out of sync. Even in case of a leader change, this method will 9388 // guarantee that ModifyTime will always be after CreateTime. 9389 func (e *Evaluation) UpdateModifyTime() { 9390 now := time.Now().UTC().UnixNano() 9391 if now <= e.CreateTime { 9392 e.ModifyTime = e.CreateTime + 1 9393 } else { 9394 e.ModifyTime = now 9395 } 9396 } 9397 9398 // Plan is used to submit a commit plan for task allocations. These 9399 // are submitted to the leader which verifies that resources have 9400 // not been overcommitted before admitting the plan. 9401 type Plan struct { 9402 // msgpack omit empty fields during serialization 9403 _struct bool `codec:",omitempty"` // nolint: structcheck 9404 9405 // EvalID is the evaluation ID this plan is associated with 9406 EvalID string 9407 9408 // EvalToken is used to prevent a split-brain processing of 9409 // an evaluation. There should only be a single scheduler running 9410 // an Eval at a time, but this could be violated after a leadership 9411 // transition. This unique token is used to reject plans that are 9412 // being submitted from a different leader. 9413 EvalToken string 9414 9415 // Priority is the priority of the upstream job 9416 Priority int 9417 9418 // AllAtOnce is used to control if incremental scheduling of task groups 9419 // is allowed or if we must do a gang scheduling of the entire job. 9420 // If this is false, a plan may be partially applied. Otherwise, the 9421 // entire plan must be able to make progress. 9422 AllAtOnce bool 9423 9424 // Job is the parent job of all the allocations in the Plan. 9425 // Since a Plan only involves a single Job, we can reduce the size 9426 // of the plan by only including it once. 9427 Job *Job 9428 9429 // NodeUpdate contains all the allocations for each node. For each node, 9430 // this is a list of the allocations to update to either stop or evict. 9431 NodeUpdate map[string][]*Allocation 9432 9433 // NodeAllocation contains all the allocations for each node. 9434 // The evicts must be considered prior to the allocations. 9435 NodeAllocation map[string][]*Allocation 9436 9437 // Annotations contains annotations by the scheduler to be used by operators 9438 // to understand the decisions made by the scheduler. 9439 Annotations *PlanAnnotations 9440 9441 // Deployment is the deployment created or updated by the scheduler that 9442 // should be applied by the planner. 9443 Deployment *Deployment 9444 9445 // DeploymentUpdates is a set of status updates to apply to the given 9446 // deployments. This allows the scheduler to cancel any unneeded deployment 9447 // because the job is stopped or the update block is removed. 9448 DeploymentUpdates []*DeploymentStatusUpdate 9449 9450 // NodePreemptions is a map from node id to a set of allocations from other 9451 // lower priority jobs that are preempted. Preempted allocations are marked 9452 // as evicted. 9453 NodePreemptions map[string][]*Allocation 9454 9455 // SnapshotIndex is the Raft index of the snapshot used to create the 9456 // Plan. The leader will wait to evaluate the plan until its StateStore 9457 // has reached at least this index. 9458 SnapshotIndex uint64 9459 } 9460 9461 // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 9462 // allocation may be optionally set by passing in a non-empty value. 9463 func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus, followupEvalID string) { 9464 newAlloc := new(Allocation) 9465 *newAlloc = *alloc 9466 9467 // If the job is not set in the plan we are deregistering a job so we 9468 // extract the job from the allocation. 9469 if p.Job == nil && newAlloc.Job != nil { 9470 p.Job = newAlloc.Job 9471 } 9472 9473 // Normalize the job 9474 newAlloc.Job = nil 9475 9476 // Strip the resources as it can be rebuilt. 9477 newAlloc.Resources = nil 9478 9479 newAlloc.DesiredStatus = AllocDesiredStatusStop 9480 newAlloc.DesiredDescription = desiredDesc 9481 9482 if clientStatus != "" { 9483 newAlloc.ClientStatus = clientStatus 9484 } 9485 9486 newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) 9487 9488 if followupEvalID != "" { 9489 newAlloc.FollowupEvalID = followupEvalID 9490 } 9491 9492 node := alloc.NodeID 9493 existing := p.NodeUpdate[node] 9494 p.NodeUpdate[node] = append(existing, newAlloc) 9495 } 9496 9497 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 9498 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 9499 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 9500 newAlloc := &Allocation{} 9501 newAlloc.ID = alloc.ID 9502 newAlloc.JobID = alloc.JobID 9503 newAlloc.Namespace = alloc.Namespace 9504 newAlloc.DesiredStatus = AllocDesiredStatusEvict 9505 newAlloc.PreemptedByAllocation = preemptingAllocID 9506 9507 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 9508 newAlloc.DesiredDescription = desiredDesc 9509 9510 // TaskResources are needed by the plan applier to check if allocations fit 9511 // after removing preempted allocations 9512 if alloc.AllocatedResources != nil { 9513 newAlloc.AllocatedResources = alloc.AllocatedResources 9514 } else { 9515 // COMPAT Remove in version 0.11 9516 newAlloc.TaskResources = alloc.TaskResources 9517 newAlloc.SharedResources = alloc.SharedResources 9518 } 9519 9520 // Append this alloc to slice for this node 9521 node := alloc.NodeID 9522 existing := p.NodePreemptions[node] 9523 p.NodePreemptions[node] = append(existing, newAlloc) 9524 } 9525 9526 func (p *Plan) PopUpdate(alloc *Allocation) { 9527 existing := p.NodeUpdate[alloc.NodeID] 9528 n := len(existing) 9529 if n > 0 && existing[n-1].ID == alloc.ID { 9530 existing = existing[:n-1] 9531 if len(existing) > 0 { 9532 p.NodeUpdate[alloc.NodeID] = existing 9533 } else { 9534 delete(p.NodeUpdate, alloc.NodeID) 9535 } 9536 } 9537 } 9538 9539 func (p *Plan) AppendAlloc(alloc *Allocation) { 9540 node := alloc.NodeID 9541 existing := p.NodeAllocation[node] 9542 9543 // Normalize the job 9544 alloc.Job = nil 9545 9546 p.NodeAllocation[node] = append(existing, alloc) 9547 } 9548 9549 // IsNoOp checks if this plan would do nothing 9550 func (p *Plan) IsNoOp() bool { 9551 return len(p.NodeUpdate) == 0 && 9552 len(p.NodeAllocation) == 0 && 9553 p.Deployment == nil && 9554 len(p.DeploymentUpdates) == 0 9555 } 9556 9557 // NormalizeAllocations normalizes allocations to remove fields that can 9558 // be fetched from the MemDB instead of sending over the wire 9559 func (p *Plan) NormalizeAllocations() { 9560 for _, allocs := range p.NodeUpdate { 9561 for i, alloc := range allocs { 9562 allocs[i] = &Allocation{ 9563 ID: alloc.ID, 9564 DesiredDescription: alloc.DesiredDescription, 9565 ClientStatus: alloc.ClientStatus, 9566 FollowupEvalID: alloc.FollowupEvalID, 9567 } 9568 } 9569 } 9570 9571 for _, allocs := range p.NodePreemptions { 9572 for i, alloc := range allocs { 9573 allocs[i] = &Allocation{ 9574 ID: alloc.ID, 9575 PreemptedByAllocation: alloc.PreemptedByAllocation, 9576 } 9577 } 9578 } 9579 } 9580 9581 // PlanResult is the result of a plan submitted to the leader. 9582 type PlanResult struct { 9583 // NodeUpdate contains all the updates that were committed. 9584 NodeUpdate map[string][]*Allocation 9585 9586 // NodeAllocation contains all the allocations that were committed. 9587 NodeAllocation map[string][]*Allocation 9588 9589 // Deployment is the deployment that was committed. 9590 Deployment *Deployment 9591 9592 // DeploymentUpdates is the set of deployment updates that were committed. 9593 DeploymentUpdates []*DeploymentStatusUpdate 9594 9595 // NodePreemptions is a map from node id to a set of allocations from other 9596 // lower priority jobs that are preempted. Preempted allocations are marked 9597 // as stopped. 9598 NodePreemptions map[string][]*Allocation 9599 9600 // RefreshIndex is the index the worker should refresh state up to. 9601 // This allows all evictions and allocations to be materialized. 9602 // If any allocations were rejected due to stale data (node state, 9603 // over committed) this can be used to force a worker refresh. 9604 RefreshIndex uint64 9605 9606 // AllocIndex is the Raft index in which the evictions and 9607 // allocations took place. This is used for the write index. 9608 AllocIndex uint64 9609 } 9610 9611 // IsNoOp checks if this plan result would do nothing 9612 func (p *PlanResult) IsNoOp() bool { 9613 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 9614 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 9615 } 9616 9617 // FullCommit is used to check if all the allocations in a plan 9618 // were committed as part of the result. Returns if there was 9619 // a match, and the number of expected and actual allocations. 9620 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 9621 expected := 0 9622 actual := 0 9623 for name, allocList := range plan.NodeAllocation { 9624 didAlloc, _ := p.NodeAllocation[name] 9625 expected += len(allocList) 9626 actual += len(didAlloc) 9627 } 9628 return actual == expected, expected, actual 9629 } 9630 9631 // PlanAnnotations holds annotations made by the scheduler to give further debug 9632 // information to operators. 9633 type PlanAnnotations struct { 9634 // DesiredTGUpdates is the set of desired updates per task group. 9635 DesiredTGUpdates map[string]*DesiredUpdates 9636 9637 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 9638 PreemptedAllocs []*AllocListStub 9639 } 9640 9641 // DesiredUpdates is the set of changes the scheduler would like to make given 9642 // sufficient resources and cluster capacity. 9643 type DesiredUpdates struct { 9644 Ignore uint64 9645 Place uint64 9646 Migrate uint64 9647 Stop uint64 9648 InPlaceUpdate uint64 9649 DestructiveUpdate uint64 9650 Canary uint64 9651 Preemptions uint64 9652 } 9653 9654 func (d *DesiredUpdates) GoString() string { 9655 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 9656 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 9657 } 9658 9659 // msgpackHandle is a shared handle for encoding/decoding of structs 9660 var MsgpackHandle = func() *codec.MsgpackHandle { 9661 h := &codec.MsgpackHandle{} 9662 h.RawToString = true 9663 9664 // maintain binary format from time prior to upgrading latest ugorji 9665 h.BasicHandle.TimeNotBuiltin = true 9666 9667 // Sets the default type for decoding a map into a nil interface{}. 9668 // This is necessary in particular because we store the driver configs as a 9669 // nil interface{}. 9670 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 9671 9672 // only review struct codec tags 9673 h.TypeInfos = codec.NewTypeInfos([]string{"codec"}) 9674 9675 return h 9676 }() 9677 9678 var ( 9679 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 9680 // structs. The pretty handle will add indents for easier human consumption. 9681 JsonHandle = &codec.JsonHandle{ 9682 HTMLCharsAsIs: true, 9683 } 9684 JsonHandlePretty = &codec.JsonHandle{ 9685 HTMLCharsAsIs: true, 9686 Indent: 4, 9687 } 9688 ) 9689 9690 // Decode is used to decode a MsgPack encoded object 9691 func Decode(buf []byte, out interface{}) error { 9692 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 9693 } 9694 9695 // Encode is used to encode a MsgPack object with type prefix 9696 func Encode(t MessageType, msg interface{}) ([]byte, error) { 9697 var buf bytes.Buffer 9698 buf.WriteByte(uint8(t)) 9699 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 9700 return buf.Bytes(), err 9701 } 9702 9703 // KeyringResponse is a unified key response and can be used for install, 9704 // remove, use, as well as listing key queries. 9705 type KeyringResponse struct { 9706 Messages map[string]string 9707 Keys map[string]int 9708 NumNodes int 9709 } 9710 9711 // KeyringRequest is request objects for serf key operations. 9712 type KeyringRequest struct { 9713 Key string 9714 } 9715 9716 // RecoverableError wraps an error and marks whether it is recoverable and could 9717 // be retried or it is fatal. 9718 type RecoverableError struct { 9719 Err string 9720 Recoverable bool 9721 } 9722 9723 // NewRecoverableError is used to wrap an error and mark it as recoverable or 9724 // not. 9725 func NewRecoverableError(e error, recoverable bool) error { 9726 if e == nil { 9727 return nil 9728 } 9729 9730 return &RecoverableError{ 9731 Err: e.Error(), 9732 Recoverable: recoverable, 9733 } 9734 } 9735 9736 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 9737 // message. If the error was recoverable before the returned error is as well; 9738 // otherwise it is unrecoverable. 9739 func WrapRecoverable(msg string, err error) error { 9740 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 9741 } 9742 9743 func (r *RecoverableError) Error() string { 9744 return r.Err 9745 } 9746 9747 func (r *RecoverableError) IsRecoverable() bool { 9748 return r.Recoverable 9749 } 9750 9751 func (r *RecoverableError) IsUnrecoverable() bool { 9752 return !r.Recoverable 9753 } 9754 9755 // Recoverable is an interface for errors to implement to indicate whether or 9756 // not they are fatal or recoverable. 9757 type Recoverable interface { 9758 error 9759 IsRecoverable() bool 9760 } 9761 9762 // IsRecoverable returns true if error is a RecoverableError with 9763 // Recoverable=true. Otherwise false is returned. 9764 func IsRecoverable(e error) bool { 9765 if re, ok := e.(Recoverable); ok { 9766 return re.IsRecoverable() 9767 } 9768 return false 9769 } 9770 9771 // WrappedServerError wraps an error and satisfies 9772 // both the Recoverable and the ServerSideError interfaces 9773 type WrappedServerError struct { 9774 Err error 9775 } 9776 9777 // NewWrappedServerError is used to create a wrapped server side error 9778 func NewWrappedServerError(e error) error { 9779 return &WrappedServerError{ 9780 Err: e, 9781 } 9782 } 9783 9784 func (r *WrappedServerError) IsRecoverable() bool { 9785 return IsRecoverable(r.Err) 9786 } 9787 9788 func (r *WrappedServerError) Error() string { 9789 return r.Err.Error() 9790 } 9791 9792 func (r *WrappedServerError) IsServerSide() bool { 9793 return true 9794 } 9795 9796 // ServerSideError is an interface for errors to implement to indicate 9797 // errors occurring after the request makes it to a server 9798 type ServerSideError interface { 9799 error 9800 IsServerSide() bool 9801 } 9802 9803 // IsServerSide returns true if error is a wrapped 9804 // server side error 9805 func IsServerSide(e error) bool { 9806 if se, ok := e.(ServerSideError); ok { 9807 return se.IsServerSide() 9808 } 9809 return false 9810 } 9811 9812 // ACLPolicy is used to represent an ACL policy 9813 type ACLPolicy struct { 9814 Name string // Unique name 9815 Description string // Human readable 9816 Rules string // HCL or JSON format 9817 RulesJSON *acl.Policy // Generated from Rules on read 9818 Hash []byte 9819 CreateIndex uint64 9820 ModifyIndex uint64 9821 } 9822 9823 // SetHash is used to compute and set the hash of the ACL policy 9824 func (c *ACLPolicy) SetHash() []byte { 9825 // Initialize a 256bit Blake2 hash (32 bytes) 9826 hash, err := blake2b.New256(nil) 9827 if err != nil { 9828 panic(err) 9829 } 9830 9831 // Write all the user set fields 9832 hash.Write([]byte(c.Name)) 9833 hash.Write([]byte(c.Description)) 9834 hash.Write([]byte(c.Rules)) 9835 9836 // Finalize the hash 9837 hashVal := hash.Sum(nil) 9838 9839 // Set and return the hash 9840 c.Hash = hashVal 9841 return hashVal 9842 } 9843 9844 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 9845 return &ACLPolicyListStub{ 9846 Name: a.Name, 9847 Description: a.Description, 9848 Hash: a.Hash, 9849 CreateIndex: a.CreateIndex, 9850 ModifyIndex: a.ModifyIndex, 9851 } 9852 } 9853 9854 func (a *ACLPolicy) Validate() error { 9855 var mErr multierror.Error 9856 if !validPolicyName.MatchString(a.Name) { 9857 err := fmt.Errorf("invalid name '%s'", a.Name) 9858 mErr.Errors = append(mErr.Errors, err) 9859 } 9860 if _, err := acl.Parse(a.Rules); err != nil { 9861 err = fmt.Errorf("failed to parse rules: %v", err) 9862 mErr.Errors = append(mErr.Errors, err) 9863 } 9864 if len(a.Description) > maxPolicyDescriptionLength { 9865 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 9866 mErr.Errors = append(mErr.Errors, err) 9867 } 9868 return mErr.ErrorOrNil() 9869 } 9870 9871 // ACLPolicyListStub is used to for listing ACL policies 9872 type ACLPolicyListStub struct { 9873 Name string 9874 Description string 9875 Hash []byte 9876 CreateIndex uint64 9877 ModifyIndex uint64 9878 } 9879 9880 // ACLPolicyListRequest is used to request a list of policies 9881 type ACLPolicyListRequest struct { 9882 QueryOptions 9883 } 9884 9885 // ACLPolicySpecificRequest is used to query a specific policy 9886 type ACLPolicySpecificRequest struct { 9887 Name string 9888 QueryOptions 9889 } 9890 9891 // ACLPolicySetRequest is used to query a set of policies 9892 type ACLPolicySetRequest struct { 9893 Names []string 9894 QueryOptions 9895 } 9896 9897 // ACLPolicyListResponse is used for a list request 9898 type ACLPolicyListResponse struct { 9899 Policies []*ACLPolicyListStub 9900 QueryMeta 9901 } 9902 9903 // SingleACLPolicyResponse is used to return a single policy 9904 type SingleACLPolicyResponse struct { 9905 Policy *ACLPolicy 9906 QueryMeta 9907 } 9908 9909 // ACLPolicySetResponse is used to return a set of policies 9910 type ACLPolicySetResponse struct { 9911 Policies map[string]*ACLPolicy 9912 QueryMeta 9913 } 9914 9915 // ACLPolicyDeleteRequest is used to delete a set of policies 9916 type ACLPolicyDeleteRequest struct { 9917 Names []string 9918 WriteRequest 9919 } 9920 9921 // ACLPolicyUpsertRequest is used to upsert a set of policies 9922 type ACLPolicyUpsertRequest struct { 9923 Policies []*ACLPolicy 9924 WriteRequest 9925 } 9926 9927 // ACLToken represents a client token which is used to Authenticate 9928 type ACLToken struct { 9929 AccessorID string // Public Accessor ID (UUID) 9930 SecretID string // Secret ID, private (UUID) 9931 Name string // Human friendly name 9932 Type string // Client or Management 9933 Policies []string // Policies this token ties to 9934 Global bool // Global or Region local 9935 Hash []byte 9936 CreateTime time.Time // Time of creation 9937 CreateIndex uint64 9938 ModifyIndex uint64 9939 } 9940 9941 var ( 9942 // AnonymousACLToken is used no SecretID is provided, and the 9943 // request is made anonymously. 9944 AnonymousACLToken = &ACLToken{ 9945 AccessorID: "anonymous", 9946 Name: "Anonymous Token", 9947 Type: ACLClientToken, 9948 Policies: []string{"anonymous"}, 9949 Global: false, 9950 } 9951 ) 9952 9953 type ACLTokenListStub struct { 9954 AccessorID string 9955 Name string 9956 Type string 9957 Policies []string 9958 Global bool 9959 Hash []byte 9960 CreateTime time.Time 9961 CreateIndex uint64 9962 ModifyIndex uint64 9963 } 9964 9965 // SetHash is used to compute and set the hash of the ACL token 9966 func (a *ACLToken) SetHash() []byte { 9967 // Initialize a 256bit Blake2 hash (32 bytes) 9968 hash, err := blake2b.New256(nil) 9969 if err != nil { 9970 panic(err) 9971 } 9972 9973 // Write all the user set fields 9974 hash.Write([]byte(a.Name)) 9975 hash.Write([]byte(a.Type)) 9976 for _, policyName := range a.Policies { 9977 hash.Write([]byte(policyName)) 9978 } 9979 if a.Global { 9980 hash.Write([]byte("global")) 9981 } else { 9982 hash.Write([]byte("local")) 9983 } 9984 9985 // Finalize the hash 9986 hashVal := hash.Sum(nil) 9987 9988 // Set and return the hash 9989 a.Hash = hashVal 9990 return hashVal 9991 } 9992 9993 func (a *ACLToken) Stub() *ACLTokenListStub { 9994 return &ACLTokenListStub{ 9995 AccessorID: a.AccessorID, 9996 Name: a.Name, 9997 Type: a.Type, 9998 Policies: a.Policies, 9999 Global: a.Global, 10000 Hash: a.Hash, 10001 CreateTime: a.CreateTime, 10002 CreateIndex: a.CreateIndex, 10003 ModifyIndex: a.ModifyIndex, 10004 } 10005 } 10006 10007 // Validate is used to sanity check a token 10008 func (a *ACLToken) Validate() error { 10009 var mErr multierror.Error 10010 if len(a.Name) > maxTokenNameLength { 10011 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 10012 } 10013 switch a.Type { 10014 case ACLClientToken: 10015 if len(a.Policies) == 0 { 10016 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 10017 } 10018 case ACLManagementToken: 10019 if len(a.Policies) != 0 { 10020 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 10021 } 10022 default: 10023 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 10024 } 10025 return mErr.ErrorOrNil() 10026 } 10027 10028 // PolicySubset checks if a given set of policies is a subset of the token 10029 func (a *ACLToken) PolicySubset(policies []string) bool { 10030 // Hot-path the management tokens, superset of all policies. 10031 if a.Type == ACLManagementToken { 10032 return true 10033 } 10034 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 10035 for _, policy := range a.Policies { 10036 associatedPolicies[policy] = struct{}{} 10037 } 10038 for _, policy := range policies { 10039 if _, ok := associatedPolicies[policy]; !ok { 10040 return false 10041 } 10042 } 10043 return true 10044 } 10045 10046 // ACLTokenListRequest is used to request a list of tokens 10047 type ACLTokenListRequest struct { 10048 GlobalOnly bool 10049 QueryOptions 10050 } 10051 10052 // ACLTokenSpecificRequest is used to query a specific token 10053 type ACLTokenSpecificRequest struct { 10054 AccessorID string 10055 QueryOptions 10056 } 10057 10058 // ACLTokenSetRequest is used to query a set of tokens 10059 type ACLTokenSetRequest struct { 10060 AccessorIDS []string 10061 QueryOptions 10062 } 10063 10064 // ACLTokenListResponse is used for a list request 10065 type ACLTokenListResponse struct { 10066 Tokens []*ACLTokenListStub 10067 QueryMeta 10068 } 10069 10070 // SingleACLTokenResponse is used to return a single token 10071 type SingleACLTokenResponse struct { 10072 Token *ACLToken 10073 QueryMeta 10074 } 10075 10076 // ACLTokenSetResponse is used to return a set of token 10077 type ACLTokenSetResponse struct { 10078 Tokens map[string]*ACLToken // Keyed by Accessor ID 10079 QueryMeta 10080 } 10081 10082 // ResolveACLTokenRequest is used to resolve a specific token 10083 type ResolveACLTokenRequest struct { 10084 SecretID string 10085 QueryOptions 10086 } 10087 10088 // ResolveACLTokenResponse is used to resolve a single token 10089 type ResolveACLTokenResponse struct { 10090 Token *ACLToken 10091 QueryMeta 10092 } 10093 10094 // ACLTokenDeleteRequest is used to delete a set of tokens 10095 type ACLTokenDeleteRequest struct { 10096 AccessorIDs []string 10097 WriteRequest 10098 } 10099 10100 // ACLTokenBootstrapRequest is used to bootstrap ACLs 10101 type ACLTokenBootstrapRequest struct { 10102 Token *ACLToken // Not client specifiable 10103 ResetIndex uint64 // Reset index is used to clear the bootstrap token 10104 WriteRequest 10105 } 10106 10107 // ACLTokenUpsertRequest is used to upsert a set of tokens 10108 type ACLTokenUpsertRequest struct { 10109 Tokens []*ACLToken 10110 WriteRequest 10111 } 10112 10113 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 10114 type ACLTokenUpsertResponse struct { 10115 Tokens []*ACLToken 10116 WriteMeta 10117 }