github.com/smithx10/nomad@v0.9.1-rc1/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "io" 16 "math" 17 "net" 18 "net/url" 19 "os" 20 "path/filepath" 21 "reflect" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 "time" 27 28 "github.com/gorhill/cronexpr" 29 "github.com/hashicorp/consul/api" 30 hcodec "github.com/hashicorp/go-msgpack/codec" 31 multierror "github.com/hashicorp/go-multierror" 32 version "github.com/hashicorp/go-version" 33 "github.com/hashicorp/nomad/acl" 34 "github.com/hashicorp/nomad/helper" 35 "github.com/hashicorp/nomad/helper/args" 36 "github.com/hashicorp/nomad/helper/uuid" 37 "github.com/hashicorp/nomad/lib/kheap" 38 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 39 "github.com/mitchellh/copystructure" 40 "github.com/ugorji/go/codec" 41 "golang.org/x/crypto/blake2b" 42 ) 43 44 var ( 45 // validPolicyName is used to validate a policy name 46 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 47 48 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 49 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 50 ) 51 52 type MessageType uint8 53 54 const ( 55 NodeRegisterRequestType MessageType = iota 56 NodeDeregisterRequestType 57 NodeUpdateStatusRequestType 58 NodeUpdateDrainRequestType 59 JobRegisterRequestType 60 JobDeregisterRequestType 61 EvalUpdateRequestType 62 EvalDeleteRequestType 63 AllocUpdateRequestType 64 AllocClientUpdateRequestType 65 ReconcileJobSummariesRequestType 66 VaultAccessorRegisterRequestType 67 VaultAccessorDeregisterRequestType 68 ApplyPlanResultsRequestType 69 DeploymentStatusUpdateRequestType 70 DeploymentPromoteRequestType 71 DeploymentAllocHealthRequestType 72 DeploymentDeleteRequestType 73 JobStabilityRequestType 74 ACLPolicyUpsertRequestType 75 ACLPolicyDeleteRequestType 76 ACLTokenUpsertRequestType 77 ACLTokenDeleteRequestType 78 ACLTokenBootstrapRequestType 79 AutopilotRequestType 80 UpsertNodeEventsType 81 JobBatchDeregisterRequestType 82 AllocUpdateDesiredTransitionRequestType 83 NodeUpdateEligibilityRequestType 84 BatchNodeUpdateDrainRequestType 85 SchedulerConfigRequestType 86 ) 87 88 const ( 89 // IgnoreUnknownTypeFlag is set along with a MessageType 90 // to indicate that the message type can be safely ignored 91 // if it is not recognized. This is for future proofing, so 92 // that new commands can be added in a way that won't cause 93 // old servers to crash when the FSM attempts to process them. 94 IgnoreUnknownTypeFlag MessageType = 128 95 96 // ApiMajorVersion is returned as part of the Status.Version request. 97 // It should be incremented anytime the APIs are changed in a way 98 // that would break clients for sane client versioning. 99 ApiMajorVersion = 1 100 101 // ApiMinorVersion is returned as part of the Status.Version request. 102 // It should be incremented anytime the APIs are changed to allow 103 // for sane client versioning. Minor changes should be compatible 104 // within the major version. 105 ApiMinorVersion = 1 106 107 ProtocolVersion = "protocol" 108 APIMajorVersion = "api.major" 109 APIMinorVersion = "api.minor" 110 111 GetterModeAny = "any" 112 GetterModeFile = "file" 113 GetterModeDir = "dir" 114 115 // maxPolicyDescriptionLength limits a policy description length 116 maxPolicyDescriptionLength = 256 117 118 // maxTokenNameLength limits a ACL token name length 119 maxTokenNameLength = 256 120 121 // ACLClientToken and ACLManagementToken are the only types of tokens 122 ACLClientToken = "client" 123 ACLManagementToken = "management" 124 125 // DefaultNamespace is the default namespace. 126 DefaultNamespace = "default" 127 DefaultNamespaceDescription = "Default shared namespace" 128 129 // JitterFraction is a the limit to the amount of jitter we apply 130 // to a user specified MaxQueryTime. We divide the specified time by 131 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 132 // applied to RPCHoldTimeout. 133 JitterFraction = 16 134 135 // MaxRetainedNodeEvents is the maximum number of node events that will be 136 // retained for a single node 137 MaxRetainedNodeEvents = 10 138 139 // MaxRetainedNodeScores is the number of top scoring nodes for which we 140 // retain scoring metadata 141 MaxRetainedNodeScores = 5 142 143 // Normalized scorer name 144 NormScorerName = "normalized-score" 145 ) 146 147 // Context defines the scope in which a search for Nomad object operates, and 148 // is also used to query the matching index value for this context 149 type Context string 150 151 const ( 152 Allocs Context = "allocs" 153 Deployments Context = "deployment" 154 Evals Context = "evals" 155 Jobs Context = "jobs" 156 Nodes Context = "nodes" 157 Namespaces Context = "namespaces" 158 Quotas Context = "quotas" 159 All Context = "all" 160 ) 161 162 // NamespacedID is a tuple of an ID and a namespace 163 type NamespacedID struct { 164 ID string 165 Namespace string 166 } 167 168 // NewNamespacedID returns a new namespaced ID given the ID and namespace 169 func NewNamespacedID(id, ns string) NamespacedID { 170 return NamespacedID{ 171 ID: id, 172 Namespace: ns, 173 } 174 } 175 176 func (n NamespacedID) String() string { 177 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 178 } 179 180 // RPCInfo is used to describe common information about query 181 type RPCInfo interface { 182 RequestRegion() string 183 IsRead() bool 184 AllowStaleRead() bool 185 IsForwarded() bool 186 SetForwarded() 187 } 188 189 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 190 // should NOT be replicated in the API package as it is internal only. 191 type InternalRpcInfo struct { 192 // Forwarded marks whether the RPC has been forwarded. 193 Forwarded bool 194 } 195 196 // IsForwarded returns whether the RPC is forwarded from another server. 197 func (i *InternalRpcInfo) IsForwarded() bool { 198 return i.Forwarded 199 } 200 201 // SetForwarded marks that the RPC is being forwarded from another server. 202 func (i *InternalRpcInfo) SetForwarded() { 203 i.Forwarded = true 204 } 205 206 // QueryOptions is used to specify various flags for read queries 207 type QueryOptions struct { 208 // The target region for this query 209 Region string 210 211 // Namespace is the target namespace for the query. 212 Namespace string 213 214 // If set, wait until query exceeds given index. Must be provided 215 // with MaxQueryTime. 216 MinQueryIndex uint64 217 218 // Provided with MinQueryIndex to wait for change. 219 MaxQueryTime time.Duration 220 221 // If set, any follower can service the request. Results 222 // may be arbitrarily stale. 223 AllowStale bool 224 225 // If set, used as prefix for resource list searches 226 Prefix string 227 228 // AuthToken is secret portion of the ACL token used for the request 229 AuthToken string 230 231 InternalRpcInfo 232 } 233 234 func (q QueryOptions) RequestRegion() string { 235 return q.Region 236 } 237 238 func (q QueryOptions) RequestNamespace() string { 239 if q.Namespace == "" { 240 return DefaultNamespace 241 } 242 return q.Namespace 243 } 244 245 // QueryOption only applies to reads, so always true 246 func (q QueryOptions) IsRead() bool { 247 return true 248 } 249 250 func (q QueryOptions) AllowStaleRead() bool { 251 return q.AllowStale 252 } 253 254 type WriteRequest struct { 255 // The target region for this write 256 Region string 257 258 // Namespace is the target namespace for the write. 259 Namespace string 260 261 // AuthToken is secret portion of the ACL token used for the request 262 AuthToken string 263 264 InternalRpcInfo 265 } 266 267 func (w WriteRequest) RequestRegion() string { 268 // The target region for this request 269 return w.Region 270 } 271 272 func (w WriteRequest) RequestNamespace() string { 273 if w.Namespace == "" { 274 return DefaultNamespace 275 } 276 return w.Namespace 277 } 278 279 // WriteRequest only applies to writes, always false 280 func (w WriteRequest) IsRead() bool { 281 return false 282 } 283 284 func (w WriteRequest) AllowStaleRead() bool { 285 return false 286 } 287 288 // QueryMeta allows a query response to include potentially 289 // useful metadata about a query 290 type QueryMeta struct { 291 // This is the index associated with the read 292 Index uint64 293 294 // If AllowStale is used, this is time elapsed since 295 // last contact between the follower and leader. This 296 // can be used to gauge staleness. 297 LastContact time.Duration 298 299 // Used to indicate if there is a known leader node 300 KnownLeader bool 301 } 302 303 // WriteMeta allows a write response to include potentially 304 // useful metadata about the write 305 type WriteMeta struct { 306 // This is the index associated with the write 307 Index uint64 308 } 309 310 // NodeRegisterRequest is used for Node.Register endpoint 311 // to register a node as being a schedulable entity. 312 type NodeRegisterRequest struct { 313 Node *Node 314 NodeEvent *NodeEvent 315 WriteRequest 316 } 317 318 // NodeDeregisterRequest is used for Node.Deregister endpoint 319 // to deregister a node as being a schedulable entity. 320 type NodeDeregisterRequest struct { 321 NodeID string 322 WriteRequest 323 } 324 325 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 326 // information used in RPC server lists. 327 type NodeServerInfo struct { 328 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 329 // be contacted at for RPCs. 330 RPCAdvertiseAddr string 331 332 // RpcMajorVersion is the major version number the Nomad Server 333 // supports 334 RPCMajorVersion int32 335 336 // RpcMinorVersion is the minor version number the Nomad Server 337 // supports 338 RPCMinorVersion int32 339 340 // Datacenter is the datacenter that a Nomad server belongs to 341 Datacenter string 342 } 343 344 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 345 // to update the status of a node. 346 type NodeUpdateStatusRequest struct { 347 NodeID string 348 Status string 349 NodeEvent *NodeEvent 350 WriteRequest 351 } 352 353 // NodeUpdateDrainRequest is used for updating the drain strategy 354 type NodeUpdateDrainRequest struct { 355 NodeID string 356 DrainStrategy *DrainStrategy 357 358 // COMPAT Remove in version 0.10 359 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 360 // drain strategy but we need to handle the upgrade path where the Raft log 361 // contains drain updates with just the drain boolean being manipulated. 362 Drain bool 363 364 // MarkEligible marks the node as eligible if removing the drain strategy. 365 MarkEligible bool 366 367 // NodeEvent is the event added to the node 368 NodeEvent *NodeEvent 369 370 WriteRequest 371 } 372 373 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 374 // batch of nodes 375 type BatchNodeUpdateDrainRequest struct { 376 // Updates is a mapping of nodes to their updated drain strategy 377 Updates map[string]*DrainUpdate 378 379 // NodeEvents is a mapping of the node to the event to add to the node 380 NodeEvents map[string]*NodeEvent 381 382 WriteRequest 383 } 384 385 // DrainUpdate is used to update the drain of a node 386 type DrainUpdate struct { 387 // DrainStrategy is the new strategy for the node 388 DrainStrategy *DrainStrategy 389 390 // MarkEligible marks the node as eligible if removing the drain strategy. 391 MarkEligible bool 392 } 393 394 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 395 type NodeUpdateEligibilityRequest struct { 396 NodeID string 397 Eligibility string 398 399 // NodeEvent is the event added to the node 400 NodeEvent *NodeEvent 401 402 WriteRequest 403 } 404 405 // NodeEvaluateRequest is used to re-evaluate the node 406 type NodeEvaluateRequest struct { 407 NodeID string 408 WriteRequest 409 } 410 411 // NodeSpecificRequest is used when we just need to specify a target node 412 type NodeSpecificRequest struct { 413 NodeID string 414 SecretID string 415 QueryOptions 416 } 417 418 // SearchResponse is used to return matches and information about whether 419 // the match list is truncated specific to each type of context. 420 type SearchResponse struct { 421 // Map of context types to ids which match a specified prefix 422 Matches map[Context][]string 423 424 // Truncations indicates whether the matches for a particular context have 425 // been truncated 426 Truncations map[Context]bool 427 428 QueryMeta 429 } 430 431 // SearchRequest is used to parameterize a request, and returns a 432 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 433 // along with whether or not the information returned is truncated. 434 type SearchRequest struct { 435 // Prefix is what ids are matched to. I.e, if the given prefix were 436 // "a", potential matches might be "abcd" or "aabb" 437 Prefix string 438 439 // Context is the type that can be matched against. A context can be a job, 440 // node, evaluation, allocation, or empty (indicated every context should be 441 // matched) 442 Context Context 443 444 QueryOptions 445 } 446 447 // JobRegisterRequest is used for Job.Register endpoint 448 // to register a job as being a schedulable entity. 449 type JobRegisterRequest struct { 450 Job *Job 451 452 // If EnforceIndex is set then the job will only be registered if the passed 453 // JobModifyIndex matches the current Jobs index. If the index is zero, the 454 // register only occurs if the job is new. 455 EnforceIndex bool 456 JobModifyIndex uint64 457 458 // PolicyOverride is set when the user is attempting to override any policies 459 PolicyOverride bool 460 461 WriteRequest 462 } 463 464 // JobDeregisterRequest is used for Job.Deregister endpoint 465 // to deregister a job as being a schedulable entity. 466 type JobDeregisterRequest struct { 467 JobID string 468 469 // Purge controls whether the deregister purges the job from the system or 470 // whether the job is just marked as stopped and will be removed by the 471 // garbage collector 472 Purge bool 473 474 WriteRequest 475 } 476 477 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 478 // evaluations. 479 type JobBatchDeregisterRequest struct { 480 // Jobs is the set of jobs to deregister 481 Jobs map[NamespacedID]*JobDeregisterOptions 482 483 // Evals is the set of evaluations to create. 484 Evals []*Evaluation 485 486 WriteRequest 487 } 488 489 // JobDeregisterOptions configures how a job is deregistered. 490 type JobDeregisterOptions struct { 491 // Purge controls whether the deregister purges the job from the system or 492 // whether the job is just marked as stopped and will be removed by the 493 // garbage collector 494 Purge bool 495 } 496 497 // JobEvaluateRequest is used when we just need to re-evaluate a target job 498 type JobEvaluateRequest struct { 499 JobID string 500 EvalOptions EvalOptions 501 WriteRequest 502 } 503 504 // EvalOptions is used to encapsulate options when forcing a job evaluation 505 type EvalOptions struct { 506 ForceReschedule bool 507 } 508 509 // JobSpecificRequest is used when we just need to specify a target job 510 type JobSpecificRequest struct { 511 JobID string 512 AllAllocs bool 513 QueryOptions 514 } 515 516 // JobListRequest is used to parameterize a list request 517 type JobListRequest struct { 518 QueryOptions 519 } 520 521 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 522 // evaluation of the Job. 523 type JobPlanRequest struct { 524 Job *Job 525 Diff bool // Toggles an annotated diff 526 // PolicyOverride is set when the user is attempting to override any policies 527 PolicyOverride bool 528 WriteRequest 529 } 530 531 // JobSummaryRequest is used when we just need to get a specific job summary 532 type JobSummaryRequest struct { 533 JobID string 534 QueryOptions 535 } 536 537 // JobDispatchRequest is used to dispatch a job based on a parameterized job 538 type JobDispatchRequest struct { 539 JobID string 540 Payload []byte 541 Meta map[string]string 542 WriteRequest 543 } 544 545 // JobValidateRequest is used to validate a job 546 type JobValidateRequest struct { 547 Job *Job 548 WriteRequest 549 } 550 551 // JobRevertRequest is used to revert a job to a prior version. 552 type JobRevertRequest struct { 553 // JobID is the ID of the job being reverted 554 JobID string 555 556 // JobVersion the version to revert to. 557 JobVersion uint64 558 559 // EnforcePriorVersion if set will enforce that the job is at the given 560 // version before reverting. 561 EnforcePriorVersion *uint64 562 563 WriteRequest 564 } 565 566 // JobStabilityRequest is used to marked a job as stable. 567 type JobStabilityRequest struct { 568 // Job to set the stability on 569 JobID string 570 JobVersion uint64 571 572 // Set the stability 573 Stable bool 574 WriteRequest 575 } 576 577 // JobStabilityResponse is the response when marking a job as stable. 578 type JobStabilityResponse struct { 579 WriteMeta 580 } 581 582 // NodeListRequest is used to parameterize a list request 583 type NodeListRequest struct { 584 QueryOptions 585 } 586 587 // EvalUpdateRequest is used for upserting evaluations. 588 type EvalUpdateRequest struct { 589 Evals []*Evaluation 590 EvalToken string 591 WriteRequest 592 } 593 594 // EvalDeleteRequest is used for deleting an evaluation. 595 type EvalDeleteRequest struct { 596 Evals []string 597 Allocs []string 598 WriteRequest 599 } 600 601 // EvalSpecificRequest is used when we just need to specify a target evaluation 602 type EvalSpecificRequest struct { 603 EvalID string 604 QueryOptions 605 } 606 607 // EvalAckRequest is used to Ack/Nack a specific evaluation 608 type EvalAckRequest struct { 609 EvalID string 610 Token string 611 WriteRequest 612 } 613 614 // EvalDequeueRequest is used when we want to dequeue an evaluation 615 type EvalDequeueRequest struct { 616 Schedulers []string 617 Timeout time.Duration 618 SchedulerVersion uint16 619 WriteRequest 620 } 621 622 // EvalListRequest is used to list the evaluations 623 type EvalListRequest struct { 624 QueryOptions 625 } 626 627 // PlanRequest is used to submit an allocation plan to the leader 628 type PlanRequest struct { 629 Plan *Plan 630 WriteRequest 631 } 632 633 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 634 // committing the result of a plan. 635 type ApplyPlanResultsRequest struct { 636 // AllocUpdateRequest holds the allocation updates to be made by the 637 // scheduler. 638 AllocUpdateRequest 639 640 // Deployment is the deployment created or updated as a result of a 641 // scheduling event. 642 Deployment *Deployment 643 644 // DeploymentUpdates is a set of status updates to apply to the given 645 // deployments. This allows the scheduler to cancel any unneeded deployment 646 // because the job is stopped or the update block is removed. 647 DeploymentUpdates []*DeploymentStatusUpdate 648 649 // EvalID is the eval ID of the plan being applied. The modify index of the 650 // evaluation is updated as part of applying the plan to ensure that subsequent 651 // scheduling events for the same job will wait for the index that last produced 652 // state changes. This is necessary for blocked evaluations since they can be 653 // processed many times, potentially making state updates, without the state of 654 // the evaluation itself being updated. 655 EvalID string 656 657 // NodePreemptions is a slice of allocations from other lower priority jobs 658 // that are preempted. Preempted allocations are marked as evicted. 659 NodePreemptions []*Allocation 660 661 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 662 // have been preempted to place allocs in this plan 663 PreemptionEvals []*Evaluation 664 } 665 666 // AllocUpdateRequest is used to submit changes to allocations, either 667 // to cause evictions or to assign new allocations. Both can be done 668 // within a single transaction 669 type AllocUpdateRequest struct { 670 // Alloc is the list of new allocations to assign 671 Alloc []*Allocation 672 673 // Evals is the list of new evaluations to create 674 // Evals are valid only when used in the Raft RPC 675 Evals []*Evaluation 676 677 // Job is the shared parent job of the allocations. 678 // It is pulled out since it is common to reduce payload size. 679 Job *Job 680 681 WriteRequest 682 } 683 684 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 685 // desired transition state. 686 type AllocUpdateDesiredTransitionRequest struct { 687 // Allocs is the mapping of allocation ids to their desired state 688 // transition 689 Allocs map[string]*DesiredTransition 690 691 // Evals is the set of evaluations to create 692 Evals []*Evaluation 693 694 WriteRequest 695 } 696 697 // AllocListRequest is used to request a list of allocations 698 type AllocListRequest struct { 699 QueryOptions 700 } 701 702 // AllocSpecificRequest is used to query a specific allocation 703 type AllocSpecificRequest struct { 704 AllocID string 705 QueryOptions 706 } 707 708 // AllocsGetRequest is used to query a set of allocations 709 type AllocsGetRequest struct { 710 AllocIDs []string 711 QueryOptions 712 } 713 714 // PeriodicForceRequest is used to force a specific periodic job. 715 type PeriodicForceRequest struct { 716 JobID string 717 WriteRequest 718 } 719 720 // ServerMembersResponse has the list of servers in a cluster 721 type ServerMembersResponse struct { 722 ServerName string 723 ServerRegion string 724 ServerDC string 725 Members []*ServerMember 726 } 727 728 // ServerMember holds information about a Nomad server agent in a cluster 729 type ServerMember struct { 730 Name string 731 Addr net.IP 732 Port uint16 733 Tags map[string]string 734 Status string 735 ProtocolMin uint8 736 ProtocolMax uint8 737 ProtocolCur uint8 738 DelegateMin uint8 739 DelegateMax uint8 740 DelegateCur uint8 741 } 742 743 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 744 // following tasks in the given allocation 745 type DeriveVaultTokenRequest struct { 746 NodeID string 747 SecretID string 748 AllocID string 749 Tasks []string 750 QueryOptions 751 } 752 753 // VaultAccessorsRequest is used to operate on a set of Vault accessors 754 type VaultAccessorsRequest struct { 755 Accessors []*VaultAccessor 756 } 757 758 // VaultAccessor is a reference to a created Vault token on behalf of 759 // an allocation's task. 760 type VaultAccessor struct { 761 AllocID string 762 Task string 763 NodeID string 764 Accessor string 765 CreationTTL int 766 767 // Raft Indexes 768 CreateIndex uint64 769 } 770 771 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 772 type DeriveVaultTokenResponse struct { 773 // Tasks is a mapping between the task name and the wrapped token 774 Tasks map[string]string 775 776 // Error stores any error that occurred. Errors are stored here so we can 777 // communicate whether it is retriable 778 Error *RecoverableError 779 780 QueryMeta 781 } 782 783 // GenericRequest is used to request where no 784 // specific information is needed. 785 type GenericRequest struct { 786 QueryOptions 787 } 788 789 // DeploymentListRequest is used to list the deployments 790 type DeploymentListRequest struct { 791 QueryOptions 792 } 793 794 // DeploymentDeleteRequest is used for deleting deployments. 795 type DeploymentDeleteRequest struct { 796 Deployments []string 797 WriteRequest 798 } 799 800 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 801 // well as optionally creating an evaluation atomically. 802 type DeploymentStatusUpdateRequest struct { 803 // Eval, if set, is used to create an evaluation at the same time as 804 // updating the status of a deployment. 805 Eval *Evaluation 806 807 // DeploymentUpdate is a status update to apply to the given 808 // deployment. 809 DeploymentUpdate *DeploymentStatusUpdate 810 811 // Job is used to optionally upsert a job. This is used when setting the 812 // allocation health results in a deployment failure and the deployment 813 // auto-reverts to the latest stable job. 814 Job *Job 815 } 816 817 // DeploymentAllocHealthRequest is used to set the health of a set of 818 // allocations as part of a deployment. 819 type DeploymentAllocHealthRequest struct { 820 DeploymentID string 821 822 // Marks these allocations as healthy, allow further allocations 823 // to be rolled. 824 HealthyAllocationIDs []string 825 826 // Any unhealthy allocations fail the deployment 827 UnhealthyAllocationIDs []string 828 829 WriteRequest 830 } 831 832 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 833 type ApplyDeploymentAllocHealthRequest struct { 834 DeploymentAllocHealthRequest 835 836 // Timestamp is the timestamp to use when setting the allocations health. 837 Timestamp time.Time 838 839 // An optional field to update the status of a deployment 840 DeploymentUpdate *DeploymentStatusUpdate 841 842 // Job is used to optionally upsert a job. This is used when setting the 843 // allocation health results in a deployment failure and the deployment 844 // auto-reverts to the latest stable job. 845 Job *Job 846 847 // An optional evaluation to create after promoting the canaries 848 Eval *Evaluation 849 } 850 851 // DeploymentPromoteRequest is used to promote task groups in a deployment 852 type DeploymentPromoteRequest struct { 853 DeploymentID string 854 855 // All is to promote all task groups 856 All bool 857 858 // Groups is used to set the promotion status per task group 859 Groups []string 860 861 WriteRequest 862 } 863 864 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 865 type ApplyDeploymentPromoteRequest struct { 866 DeploymentPromoteRequest 867 868 // An optional evaluation to create after promoting the canaries 869 Eval *Evaluation 870 } 871 872 // DeploymentPauseRequest is used to pause a deployment 873 type DeploymentPauseRequest struct { 874 DeploymentID string 875 876 // Pause sets the pause status 877 Pause bool 878 879 WriteRequest 880 } 881 882 // DeploymentSpecificRequest is used to make a request specific to a particular 883 // deployment 884 type DeploymentSpecificRequest struct { 885 DeploymentID string 886 QueryOptions 887 } 888 889 // DeploymentFailRequest is used to fail a particular deployment 890 type DeploymentFailRequest struct { 891 DeploymentID string 892 WriteRequest 893 } 894 895 // SingleDeploymentResponse is used to respond with a single deployment 896 type SingleDeploymentResponse struct { 897 Deployment *Deployment 898 QueryMeta 899 } 900 901 // GenericResponse is used to respond to a request where no 902 // specific response information is needed. 903 type GenericResponse struct { 904 WriteMeta 905 } 906 907 // VersionResponse is used for the Status.Version response 908 type VersionResponse struct { 909 Build string 910 Versions map[string]int 911 QueryMeta 912 } 913 914 // JobRegisterResponse is used to respond to a job registration 915 type JobRegisterResponse struct { 916 EvalID string 917 EvalCreateIndex uint64 918 JobModifyIndex uint64 919 920 // Warnings contains any warnings about the given job. These may include 921 // deprecation warnings. 922 Warnings string 923 924 QueryMeta 925 } 926 927 // JobDeregisterResponse is used to respond to a job deregistration 928 type JobDeregisterResponse struct { 929 EvalID string 930 EvalCreateIndex uint64 931 JobModifyIndex uint64 932 QueryMeta 933 } 934 935 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 936 type JobBatchDeregisterResponse struct { 937 // JobEvals maps the job to its created evaluation 938 JobEvals map[NamespacedID]string 939 QueryMeta 940 } 941 942 // JobValidateResponse is the response from validate request 943 type JobValidateResponse struct { 944 // DriverConfigValidated indicates whether the agent validated the driver 945 // config 946 DriverConfigValidated bool 947 948 // ValidationErrors is a list of validation errors 949 ValidationErrors []string 950 951 // Error is a string version of any error that may have occurred 952 Error string 953 954 // Warnings contains any warnings about the given job. These may include 955 // deprecation warnings. 956 Warnings string 957 } 958 959 // NodeUpdateResponse is used to respond to a node update 960 type NodeUpdateResponse struct { 961 HeartbeatTTL time.Duration 962 EvalIDs []string 963 EvalCreateIndex uint64 964 NodeModifyIndex uint64 965 966 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 967 // empty, the current Nomad Server is in the minority of a partition. 968 LeaderRPCAddr string 969 970 // NumNodes is the number of Nomad nodes attached to this quorum of 971 // Nomad Servers at the time of the response. This value can 972 // fluctuate based on the health of the cluster between heartbeats. 973 NumNodes int32 974 975 // Servers is the full list of known Nomad servers in the local 976 // region. 977 Servers []*NodeServerInfo 978 979 QueryMeta 980 } 981 982 // NodeDrainUpdateResponse is used to respond to a node drain update 983 type NodeDrainUpdateResponse struct { 984 NodeModifyIndex uint64 985 EvalIDs []string 986 EvalCreateIndex uint64 987 WriteMeta 988 } 989 990 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 991 type NodeEligibilityUpdateResponse struct { 992 NodeModifyIndex uint64 993 EvalIDs []string 994 EvalCreateIndex uint64 995 WriteMeta 996 } 997 998 // NodeAllocsResponse is used to return allocs for a single node 999 type NodeAllocsResponse struct { 1000 Allocs []*Allocation 1001 QueryMeta 1002 } 1003 1004 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1005 type NodeClientAllocsResponse struct { 1006 Allocs map[string]uint64 1007 1008 // MigrateTokens are used when ACLs are enabled to allow cross node, 1009 // authenticated access to sticky volumes 1010 MigrateTokens map[string]string 1011 1012 QueryMeta 1013 } 1014 1015 // SingleNodeResponse is used to return a single node 1016 type SingleNodeResponse struct { 1017 Node *Node 1018 QueryMeta 1019 } 1020 1021 // NodeListResponse is used for a list request 1022 type NodeListResponse struct { 1023 Nodes []*NodeListStub 1024 QueryMeta 1025 } 1026 1027 // SingleJobResponse is used to return a single job 1028 type SingleJobResponse struct { 1029 Job *Job 1030 QueryMeta 1031 } 1032 1033 // JobSummaryResponse is used to return a single job summary 1034 type JobSummaryResponse struct { 1035 JobSummary *JobSummary 1036 QueryMeta 1037 } 1038 1039 type JobDispatchResponse struct { 1040 DispatchedJobID string 1041 EvalID string 1042 EvalCreateIndex uint64 1043 JobCreateIndex uint64 1044 WriteMeta 1045 } 1046 1047 // JobListResponse is used for a list request 1048 type JobListResponse struct { 1049 Jobs []*JobListStub 1050 QueryMeta 1051 } 1052 1053 // JobVersionsRequest is used to get a jobs versions 1054 type JobVersionsRequest struct { 1055 JobID string 1056 Diffs bool 1057 QueryOptions 1058 } 1059 1060 // JobVersionsResponse is used for a job get versions request 1061 type JobVersionsResponse struct { 1062 Versions []*Job 1063 Diffs []*JobDiff 1064 QueryMeta 1065 } 1066 1067 // JobPlanResponse is used to respond to a job plan request 1068 type JobPlanResponse struct { 1069 // Annotations stores annotations explaining decisions the scheduler made. 1070 Annotations *PlanAnnotations 1071 1072 // FailedTGAllocs is the placement failures per task group. 1073 FailedTGAllocs map[string]*AllocMetric 1074 1075 // JobModifyIndex is the modification index of the job. The value can be 1076 // used when running `nomad run` to ensure that the Job wasn’t modified 1077 // since the last plan. If the job is being created, the value is zero. 1078 JobModifyIndex uint64 1079 1080 // CreatedEvals is the set of evaluations created by the scheduler. The 1081 // reasons for this can be rolling-updates or blocked evals. 1082 CreatedEvals []*Evaluation 1083 1084 // Diff contains the diff of the job and annotations on whether the change 1085 // causes an in-place update or create/destroy 1086 Diff *JobDiff 1087 1088 // NextPeriodicLaunch is the time duration till the job would be launched if 1089 // submitted. 1090 NextPeriodicLaunch time.Time 1091 1092 // Warnings contains any warnings about the given job. These may include 1093 // deprecation warnings. 1094 Warnings string 1095 1096 WriteMeta 1097 } 1098 1099 // SingleAllocResponse is used to return a single allocation 1100 type SingleAllocResponse struct { 1101 Alloc *Allocation 1102 QueryMeta 1103 } 1104 1105 // AllocsGetResponse is used to return a set of allocations 1106 type AllocsGetResponse struct { 1107 Allocs []*Allocation 1108 QueryMeta 1109 } 1110 1111 // JobAllocationsResponse is used to return the allocations for a job 1112 type JobAllocationsResponse struct { 1113 Allocations []*AllocListStub 1114 QueryMeta 1115 } 1116 1117 // JobEvaluationsResponse is used to return the evaluations for a job 1118 type JobEvaluationsResponse struct { 1119 Evaluations []*Evaluation 1120 QueryMeta 1121 } 1122 1123 // SingleEvalResponse is used to return a single evaluation 1124 type SingleEvalResponse struct { 1125 Eval *Evaluation 1126 QueryMeta 1127 } 1128 1129 // EvalDequeueResponse is used to return from a dequeue 1130 type EvalDequeueResponse struct { 1131 Eval *Evaluation 1132 Token string 1133 1134 // WaitIndex is the Raft index the worker should wait until invoking the 1135 // scheduler. 1136 WaitIndex uint64 1137 1138 QueryMeta 1139 } 1140 1141 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1142 // or beyond before invoking the scheduler. 1143 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1144 // Prefer the wait index sent. This will be populated on all responses from 1145 // 0.7.0 and above 1146 if e.WaitIndex != 0 { 1147 return e.WaitIndex 1148 } else if e.Eval != nil { 1149 return e.Eval.ModifyIndex 1150 } 1151 1152 // This should never happen 1153 return 1 1154 } 1155 1156 // PlanResponse is used to return from a PlanRequest 1157 type PlanResponse struct { 1158 Result *PlanResult 1159 WriteMeta 1160 } 1161 1162 // AllocListResponse is used for a list request 1163 type AllocListResponse struct { 1164 Allocations []*AllocListStub 1165 QueryMeta 1166 } 1167 1168 // DeploymentListResponse is used for a list request 1169 type DeploymentListResponse struct { 1170 Deployments []*Deployment 1171 QueryMeta 1172 } 1173 1174 // EvalListResponse is used for a list request 1175 type EvalListResponse struct { 1176 Evaluations []*Evaluation 1177 QueryMeta 1178 } 1179 1180 // EvalAllocationsResponse is used to return the allocations for an evaluation 1181 type EvalAllocationsResponse struct { 1182 Allocations []*AllocListStub 1183 QueryMeta 1184 } 1185 1186 // PeriodicForceResponse is used to respond to a periodic job force launch 1187 type PeriodicForceResponse struct { 1188 EvalID string 1189 EvalCreateIndex uint64 1190 WriteMeta 1191 } 1192 1193 // DeploymentUpdateResponse is used to respond to a deployment change. The 1194 // response will include the modify index of the deployment as well as details 1195 // of any triggered evaluation. 1196 type DeploymentUpdateResponse struct { 1197 EvalID string 1198 EvalCreateIndex uint64 1199 DeploymentModifyIndex uint64 1200 1201 // RevertedJobVersion is the version the job was reverted to. If unset, the 1202 // job wasn't reverted 1203 RevertedJobVersion *uint64 1204 1205 WriteMeta 1206 } 1207 1208 // NodeConnQueryResponse is used to respond to a query of whether a server has 1209 // a connection to a specific Node 1210 type NodeConnQueryResponse struct { 1211 // Connected indicates whether a connection to the Client exists 1212 Connected bool 1213 1214 // Established marks the time at which the connection was established 1215 Established time.Time 1216 1217 QueryMeta 1218 } 1219 1220 // EmitNodeEventsRequest is a request to update the node events source 1221 // with a new client-side event 1222 type EmitNodeEventsRequest struct { 1223 // NodeEvents are a map where the key is a node id, and value is a list of 1224 // events for that node 1225 NodeEvents map[string][]*NodeEvent 1226 1227 WriteRequest 1228 } 1229 1230 // EmitNodeEventsResponse is a response to the client about the status of 1231 // the node event source update. 1232 type EmitNodeEventsResponse struct { 1233 Index uint64 1234 WriteMeta 1235 } 1236 1237 const ( 1238 NodeEventSubsystemDrain = "Drain" 1239 NodeEventSubsystemDriver = "Driver" 1240 NodeEventSubsystemHeartbeat = "Heartbeat" 1241 NodeEventSubsystemCluster = "Cluster" 1242 ) 1243 1244 // NodeEvent is a single unit representing a node’s state change 1245 type NodeEvent struct { 1246 Message string 1247 Subsystem string 1248 Details map[string]string 1249 Timestamp time.Time 1250 CreateIndex uint64 1251 } 1252 1253 func (ne *NodeEvent) String() string { 1254 var details []string 1255 for k, v := range ne.Details { 1256 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1257 } 1258 1259 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1260 } 1261 1262 func (ne *NodeEvent) Copy() *NodeEvent { 1263 c := new(NodeEvent) 1264 *c = *ne 1265 c.Details = helper.CopyMapStringString(ne.Details) 1266 return c 1267 } 1268 1269 // NewNodeEvent generates a new node event storing the current time as the 1270 // timestamp 1271 func NewNodeEvent() *NodeEvent { 1272 return &NodeEvent{Timestamp: time.Now()} 1273 } 1274 1275 // SetMessage is used to set the message on the node event 1276 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1277 ne.Message = msg 1278 return ne 1279 } 1280 1281 // SetSubsystem is used to set the subsystem on the node event 1282 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1283 ne.Subsystem = sys 1284 return ne 1285 } 1286 1287 // SetTimestamp is used to set the timestamp on the node event 1288 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1289 ne.Timestamp = ts 1290 return ne 1291 } 1292 1293 // AddDetail is used to add a detail to the node event 1294 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1295 if ne.Details == nil { 1296 ne.Details = make(map[string]string, 1) 1297 } 1298 ne.Details[k] = v 1299 return ne 1300 } 1301 1302 const ( 1303 NodeStatusInit = "initializing" 1304 NodeStatusReady = "ready" 1305 NodeStatusDown = "down" 1306 ) 1307 1308 // ShouldDrainNode checks if a given node status should trigger an 1309 // evaluation. Some states don't require any further action. 1310 func ShouldDrainNode(status string) bool { 1311 switch status { 1312 case NodeStatusInit, NodeStatusReady: 1313 return false 1314 case NodeStatusDown: 1315 return true 1316 default: 1317 panic(fmt.Sprintf("unhandled node status %s", status)) 1318 } 1319 } 1320 1321 // ValidNodeStatus is used to check if a node status is valid 1322 func ValidNodeStatus(status string) bool { 1323 switch status { 1324 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1325 return true 1326 default: 1327 return false 1328 } 1329 } 1330 1331 const ( 1332 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1333 // respectively, for receiving allocations. This is orthoginal to the node 1334 // status being ready. 1335 NodeSchedulingEligible = "eligible" 1336 NodeSchedulingIneligible = "ineligible" 1337 ) 1338 1339 // DrainSpec describes a Node's desired drain behavior. 1340 type DrainSpec struct { 1341 // Deadline is the duration after StartTime when the remaining 1342 // allocations on a draining Node should be told to stop. 1343 Deadline time.Duration 1344 1345 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1346 // has been marked for draining. 1347 IgnoreSystemJobs bool 1348 } 1349 1350 // DrainStrategy describes a Node's drain behavior. 1351 type DrainStrategy struct { 1352 // DrainSpec is the user declared drain specification 1353 DrainSpec 1354 1355 // ForceDeadline is the deadline time for the drain after which drains will 1356 // be forced 1357 ForceDeadline time.Time 1358 } 1359 1360 func (d *DrainStrategy) Copy() *DrainStrategy { 1361 if d == nil { 1362 return nil 1363 } 1364 1365 nd := new(DrainStrategy) 1366 *nd = *d 1367 return nd 1368 } 1369 1370 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1371 // duration or otherwise the deadline time. The force drain is captured by the 1372 // deadline time being in the past. 1373 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1374 // Treat the nil case as a force drain so during an upgrade where a node may 1375 // not have a drain strategy but has Drain set to true, it is treated as a 1376 // force to mimick old behavior. 1377 if d == nil { 1378 return false, time.Time{} 1379 } 1380 1381 ns := d.Deadline.Nanoseconds() 1382 switch { 1383 case ns < 0: // Force 1384 return false, time.Time{} 1385 case ns == 0: // Infinite 1386 return true, time.Time{} 1387 default: 1388 return false, d.ForceDeadline 1389 } 1390 } 1391 1392 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1393 if d == nil && o == nil { 1394 return true 1395 } else if o != nil && d == nil { 1396 return false 1397 } else if d != nil && o == nil { 1398 return false 1399 } 1400 1401 // Compare values 1402 if d.ForceDeadline != o.ForceDeadline { 1403 return false 1404 } else if d.Deadline != o.Deadline { 1405 return false 1406 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1407 return false 1408 } 1409 1410 return true 1411 } 1412 1413 // Node is a representation of a schedulable client node 1414 type Node struct { 1415 // ID is a unique identifier for the node. It can be constructed 1416 // by doing a concatenation of the Name and Datacenter as a simple 1417 // approach. Alternatively a UUID may be used. 1418 ID string 1419 1420 // SecretID is an ID that is only known by the Node and the set of Servers. 1421 // It is not accessible via the API and is used to authenticate nodes 1422 // conducting privileged activities. 1423 SecretID string 1424 1425 // Datacenter for this node 1426 Datacenter string 1427 1428 // Node name 1429 Name string 1430 1431 // HTTPAddr is the address on which the Nomad client is listening for http 1432 // requests 1433 HTTPAddr string 1434 1435 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1436 TLSEnabled bool 1437 1438 // Attributes is an arbitrary set of key/value 1439 // data that can be used for constraints. Examples 1440 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1441 // "docker.runtime=1.8.3" 1442 Attributes map[string]string 1443 1444 // NodeResources captures the available resources on the client. 1445 NodeResources *NodeResources 1446 1447 // ReservedResources captures the set resources on the client that are 1448 // reserved from scheduling. 1449 ReservedResources *NodeReservedResources 1450 1451 // Resources is the available resources on the client. 1452 // For example 'cpu=2' 'memory=2048' 1453 // COMPAT(0.10): Remove in 0.10 1454 Resources *Resources 1455 1456 // Reserved is the set of resources that are reserved, 1457 // and should be subtracted from the total resources for 1458 // the purposes of scheduling. This may be provide certain 1459 // high-watermark tolerances or because of external schedulers 1460 // consuming resources. 1461 Reserved *Resources 1462 1463 // Links are used to 'link' this client to external 1464 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1465 // 'ami=ami-123' 1466 Links map[string]string 1467 1468 // Meta is used to associate arbitrary metadata with this 1469 // client. This is opaque to Nomad. 1470 Meta map[string]string 1471 1472 // NodeClass is an opaque identifier used to group nodes 1473 // together for the purpose of determining scheduling pressure. 1474 NodeClass string 1475 1476 // ComputedClass is a unique id that identifies nodes with a common set of 1477 // attributes and capabilities. 1478 ComputedClass string 1479 1480 // COMPAT: Remove in Nomad 0.9 1481 // Drain is controlled by the servers, and not the client. 1482 // If true, no jobs will be scheduled to this node, and existing 1483 // allocations will be drained. Superceded by DrainStrategy in Nomad 1484 // 0.8 but kept for backward compat. 1485 Drain bool 1486 1487 // DrainStrategy determines the node's draining behavior. Will be nil 1488 // when Drain=false. 1489 DrainStrategy *DrainStrategy 1490 1491 // SchedulingEligibility determines whether this node will receive new 1492 // placements. 1493 SchedulingEligibility string 1494 1495 // Status of this node 1496 Status string 1497 1498 // StatusDescription is meant to provide more human useful information 1499 StatusDescription string 1500 1501 // StatusUpdatedAt is the time stamp at which the state of the node was 1502 // updated 1503 StatusUpdatedAt int64 1504 1505 // Events is the most recent set of events generated for the node, 1506 // retaining only MaxRetainedNodeEvents number at a time 1507 Events []*NodeEvent 1508 1509 // Drivers is a map of driver names to current driver information 1510 Drivers map[string]*DriverInfo 1511 1512 // Raft Indexes 1513 CreateIndex uint64 1514 ModifyIndex uint64 1515 } 1516 1517 // Ready returns true if the node is ready for running allocations 1518 func (n *Node) Ready() bool { 1519 // Drain is checked directly to support pre-0.8 Node data 1520 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1521 } 1522 1523 func (n *Node) Canonicalize() { 1524 if n == nil { 1525 return 1526 } 1527 1528 // COMPAT Remove in 0.10 1529 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1530 // upgrading nodes 1531 if n.SchedulingEligibility == "" { 1532 if n.Drain { 1533 n.SchedulingEligibility = NodeSchedulingIneligible 1534 } else { 1535 n.SchedulingEligibility = NodeSchedulingEligible 1536 } 1537 } 1538 } 1539 1540 func (n *Node) Copy() *Node { 1541 if n == nil { 1542 return nil 1543 } 1544 nn := new(Node) 1545 *nn = *n 1546 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1547 nn.Resources = nn.Resources.Copy() 1548 nn.Reserved = nn.Reserved.Copy() 1549 nn.NodeResources = nn.NodeResources.Copy() 1550 nn.ReservedResources = nn.ReservedResources.Copy() 1551 nn.Links = helper.CopyMapStringString(nn.Links) 1552 nn.Meta = helper.CopyMapStringString(nn.Meta) 1553 nn.Events = copyNodeEvents(n.Events) 1554 nn.DrainStrategy = nn.DrainStrategy.Copy() 1555 nn.Drivers = copyNodeDrivers(n.Drivers) 1556 return nn 1557 } 1558 1559 // copyNodeEvents is a helper to copy a list of NodeEvent's 1560 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1561 l := len(events) 1562 if l == 0 { 1563 return nil 1564 } 1565 1566 c := make([]*NodeEvent, l) 1567 for i, event := range events { 1568 c[i] = event.Copy() 1569 } 1570 return c 1571 } 1572 1573 // copyNodeDrivers is a helper to copy a map of DriverInfo 1574 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 1575 l := len(drivers) 1576 if l == 0 { 1577 return nil 1578 } 1579 1580 c := make(map[string]*DriverInfo, l) 1581 for driver, info := range drivers { 1582 c[driver] = info.Copy() 1583 } 1584 return c 1585 } 1586 1587 // TerminalStatus returns if the current status is terminal and 1588 // will no longer transition. 1589 func (n *Node) TerminalStatus() bool { 1590 switch n.Status { 1591 case NodeStatusDown: 1592 return true 1593 default: 1594 return false 1595 } 1596 } 1597 1598 // COMPAT(0.11): Remove in 0.11 1599 // ComparableReservedResources returns the reserved resouces on the node 1600 // handling upgrade paths. Reserved networks must be handled separately. After 1601 // 0.11 calls to this should be replaced with: 1602 // node.ReservedResources.Comparable() 1603 func (n *Node) ComparableReservedResources() *ComparableResources { 1604 // See if we can no-op 1605 if n.Reserved == nil && n.ReservedResources == nil { 1606 return nil 1607 } 1608 1609 // Node already has 0.9+ behavior 1610 if n.ReservedResources != nil { 1611 return n.ReservedResources.Comparable() 1612 } 1613 1614 // Upgrade path 1615 return &ComparableResources{ 1616 Flattened: AllocatedTaskResources{ 1617 Cpu: AllocatedCpuResources{ 1618 CpuShares: int64(n.Reserved.CPU), 1619 }, 1620 Memory: AllocatedMemoryResources{ 1621 MemoryMB: int64(n.Reserved.MemoryMB), 1622 }, 1623 }, 1624 Shared: AllocatedSharedResources{ 1625 DiskMB: int64(n.Reserved.DiskMB), 1626 }, 1627 } 1628 } 1629 1630 // COMPAT(0.11): Remove in 0.11 1631 // ComparableResources returns the resouces on the node 1632 // handling upgrade paths. Networking must be handled separately. After 0.11 1633 // calls to this should be replaced with: node.NodeResources.Comparable() 1634 func (n *Node) ComparableResources() *ComparableResources { 1635 // Node already has 0.9+ behavior 1636 if n.NodeResources != nil { 1637 return n.NodeResources.Comparable() 1638 } 1639 1640 // Upgrade path 1641 return &ComparableResources{ 1642 Flattened: AllocatedTaskResources{ 1643 Cpu: AllocatedCpuResources{ 1644 CpuShares: int64(n.Resources.CPU), 1645 }, 1646 Memory: AllocatedMemoryResources{ 1647 MemoryMB: int64(n.Resources.MemoryMB), 1648 }, 1649 }, 1650 Shared: AllocatedSharedResources{ 1651 DiskMB: int64(n.Resources.DiskMB), 1652 }, 1653 } 1654 } 1655 1656 // Stub returns a summarized version of the node 1657 func (n *Node) Stub() *NodeListStub { 1658 1659 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1660 1661 return &NodeListStub{ 1662 Address: addr, 1663 ID: n.ID, 1664 Datacenter: n.Datacenter, 1665 Name: n.Name, 1666 NodeClass: n.NodeClass, 1667 Version: n.Attributes["nomad.version"], 1668 Drain: n.Drain, 1669 SchedulingEligibility: n.SchedulingEligibility, 1670 Status: n.Status, 1671 StatusDescription: n.StatusDescription, 1672 Drivers: n.Drivers, 1673 CreateIndex: n.CreateIndex, 1674 ModifyIndex: n.ModifyIndex, 1675 } 1676 } 1677 1678 // NodeListStub is used to return a subset of job information 1679 // for the job list 1680 type NodeListStub struct { 1681 Address string 1682 ID string 1683 Datacenter string 1684 Name string 1685 NodeClass string 1686 Version string 1687 Drain bool 1688 SchedulingEligibility string 1689 Status string 1690 StatusDescription string 1691 Drivers map[string]*DriverInfo 1692 CreateIndex uint64 1693 ModifyIndex uint64 1694 } 1695 1696 // Resources is used to define the resources available 1697 // on a client 1698 type Resources struct { 1699 CPU int 1700 MemoryMB int 1701 DiskMB int 1702 IOPS int // COMPAT(0.10): Only being used to issue warnings 1703 Networks Networks 1704 Devices []*RequestedDevice 1705 } 1706 1707 const ( 1708 BytesInMegabyte = 1024 * 1024 1709 ) 1710 1711 // DefaultResources is a small resources object that contains the 1712 // default resources requests that we will provide to an object. 1713 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1714 // be kept in sync. 1715 func DefaultResources() *Resources { 1716 return &Resources{ 1717 CPU: 100, 1718 MemoryMB: 300, 1719 } 1720 } 1721 1722 // MinResources is a small resources object that contains the 1723 // absolute minimum resources that we will provide to an object. 1724 // This should not be confused with the defaults which are 1725 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 1726 // api/resources.go and should be kept in sync. 1727 func MinResources() *Resources { 1728 return &Resources{ 1729 CPU: 20, 1730 MemoryMB: 10, 1731 } 1732 } 1733 1734 // DiskInBytes returns the amount of disk resources in bytes. 1735 func (r *Resources) DiskInBytes() int64 { 1736 return int64(r.DiskMB * BytesInMegabyte) 1737 } 1738 1739 func (r *Resources) Validate() error { 1740 var mErr multierror.Error 1741 if err := r.MeetsMinResources(); err != nil { 1742 mErr.Errors = append(mErr.Errors, err) 1743 } 1744 1745 // Ensure the task isn't asking for disk resources 1746 if r.DiskMB > 0 { 1747 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 1748 } 1749 1750 for i, d := range r.Devices { 1751 if err := d.Validate(); err != nil { 1752 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 1753 } 1754 } 1755 1756 return mErr.ErrorOrNil() 1757 } 1758 1759 // Merge merges this resource with another resource. 1760 func (r *Resources) Merge(other *Resources) { 1761 if other.CPU != 0 { 1762 r.CPU = other.CPU 1763 } 1764 if other.MemoryMB != 0 { 1765 r.MemoryMB = other.MemoryMB 1766 } 1767 if other.DiskMB != 0 { 1768 r.DiskMB = other.DiskMB 1769 } 1770 if len(other.Networks) != 0 { 1771 r.Networks = other.Networks 1772 } 1773 if len(other.Devices) != 0 { 1774 r.Devices = other.Devices 1775 } 1776 } 1777 1778 func (r *Resources) Canonicalize() { 1779 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1780 // problems since we use reflect DeepEquals. 1781 if len(r.Networks) == 0 { 1782 r.Networks = nil 1783 } 1784 if len(r.Devices) == 0 { 1785 r.Devices = nil 1786 } 1787 1788 for _, n := range r.Networks { 1789 n.Canonicalize() 1790 } 1791 } 1792 1793 // MeetsMinResources returns an error if the resources specified are less than 1794 // the minimum allowed. 1795 // This is based on the minimums defined in the Resources type 1796 func (r *Resources) MeetsMinResources() error { 1797 var mErr multierror.Error 1798 minResources := MinResources() 1799 if r.CPU < minResources.CPU { 1800 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 1801 } 1802 if r.MemoryMB < minResources.MemoryMB { 1803 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 1804 } 1805 for i, n := range r.Networks { 1806 if err := n.MeetsMinResources(); err != nil { 1807 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 1808 } 1809 } 1810 1811 return mErr.ErrorOrNil() 1812 } 1813 1814 // Copy returns a deep copy of the resources 1815 func (r *Resources) Copy() *Resources { 1816 if r == nil { 1817 return nil 1818 } 1819 newR := new(Resources) 1820 *newR = *r 1821 1822 // Copy the network objects 1823 if r.Networks != nil { 1824 n := len(r.Networks) 1825 newR.Networks = make([]*NetworkResource, n) 1826 for i := 0; i < n; i++ { 1827 newR.Networks[i] = r.Networks[i].Copy() 1828 } 1829 } 1830 1831 // Copy the devices 1832 if r.Devices != nil { 1833 n := len(r.Devices) 1834 newR.Devices = make([]*RequestedDevice, n) 1835 for i := 0; i < n; i++ { 1836 newR.Devices[i] = r.Devices[i].Copy() 1837 } 1838 } 1839 1840 return newR 1841 } 1842 1843 // NetIndex finds the matching net index using device name 1844 func (r *Resources) NetIndex(n *NetworkResource) int { 1845 return r.Networks.NetIndex(n) 1846 } 1847 1848 // Superset checks if one set of resources is a superset 1849 // of another. This ignores network resources, and the NetworkIndex 1850 // should be used for that. 1851 func (r *Resources) Superset(other *Resources) (bool, string) { 1852 if r.CPU < other.CPU { 1853 return false, "cpu" 1854 } 1855 if r.MemoryMB < other.MemoryMB { 1856 return false, "memory" 1857 } 1858 if r.DiskMB < other.DiskMB { 1859 return false, "disk" 1860 } 1861 return true, "" 1862 } 1863 1864 // Add adds the resources of the delta to this, potentially 1865 // returning an error if not possible. 1866 func (r *Resources) Add(delta *Resources) error { 1867 if delta == nil { 1868 return nil 1869 } 1870 r.CPU += delta.CPU 1871 r.MemoryMB += delta.MemoryMB 1872 r.DiskMB += delta.DiskMB 1873 1874 for _, n := range delta.Networks { 1875 // Find the matching interface by IP or CIDR 1876 idx := r.NetIndex(n) 1877 if idx == -1 { 1878 r.Networks = append(r.Networks, n.Copy()) 1879 } else { 1880 r.Networks[idx].Add(n) 1881 } 1882 } 1883 return nil 1884 } 1885 1886 func (r *Resources) GoString() string { 1887 return fmt.Sprintf("*%#v", *r) 1888 } 1889 1890 type Port struct { 1891 Label string 1892 Value int 1893 } 1894 1895 // NetworkResource is used to represent available network 1896 // resources 1897 type NetworkResource struct { 1898 Device string // Name of the device 1899 CIDR string // CIDR block of addresses 1900 IP string // Host IP address 1901 MBits int // Throughput 1902 ReservedPorts []Port // Host Reserved ports 1903 DynamicPorts []Port // Host Dynamically assigned ports 1904 } 1905 1906 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 1907 if nr.Device != other.Device { 1908 return false 1909 } 1910 1911 if nr.CIDR != other.CIDR { 1912 return false 1913 } 1914 1915 if nr.IP != other.IP { 1916 return false 1917 } 1918 1919 if nr.MBits != other.MBits { 1920 return false 1921 } 1922 1923 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 1924 return false 1925 } 1926 1927 for i, port := range nr.ReservedPorts { 1928 if len(other.ReservedPorts) <= i { 1929 return false 1930 } 1931 if port != other.ReservedPorts[i] { 1932 return false 1933 } 1934 } 1935 1936 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 1937 return false 1938 } 1939 for i, port := range nr.DynamicPorts { 1940 if len(other.DynamicPorts) <= i { 1941 return false 1942 } 1943 if port != other.DynamicPorts[i] { 1944 return false 1945 } 1946 } 1947 return true 1948 } 1949 1950 func (n *NetworkResource) Canonicalize() { 1951 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1952 // problems since we use reflect DeepEquals. 1953 if len(n.ReservedPorts) == 0 { 1954 n.ReservedPorts = nil 1955 } 1956 if len(n.DynamicPorts) == 0 { 1957 n.DynamicPorts = nil 1958 } 1959 } 1960 1961 // MeetsMinResources returns an error if the resources specified are less than 1962 // the minimum allowed. 1963 func (n *NetworkResource) MeetsMinResources() error { 1964 var mErr multierror.Error 1965 if n.MBits < 1 { 1966 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 1967 } 1968 return mErr.ErrorOrNil() 1969 } 1970 1971 // Copy returns a deep copy of the network resource 1972 func (n *NetworkResource) Copy() *NetworkResource { 1973 if n == nil { 1974 return nil 1975 } 1976 newR := new(NetworkResource) 1977 *newR = *n 1978 if n.ReservedPorts != nil { 1979 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 1980 copy(newR.ReservedPorts, n.ReservedPorts) 1981 } 1982 if n.DynamicPorts != nil { 1983 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 1984 copy(newR.DynamicPorts, n.DynamicPorts) 1985 } 1986 return newR 1987 } 1988 1989 // Add adds the resources of the delta to this, potentially 1990 // returning an error if not possible. 1991 func (n *NetworkResource) Add(delta *NetworkResource) { 1992 if len(delta.ReservedPorts) > 0 { 1993 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 1994 } 1995 n.MBits += delta.MBits 1996 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 1997 } 1998 1999 func (n *NetworkResource) GoString() string { 2000 return fmt.Sprintf("*%#v", *n) 2001 } 2002 2003 // PortLabels returns a map of port labels to their assigned host ports. 2004 func (n *NetworkResource) PortLabels() map[string]int { 2005 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2006 labelValues := make(map[string]int, num) 2007 for _, port := range n.ReservedPorts { 2008 labelValues[port.Label] = port.Value 2009 } 2010 for _, port := range n.DynamicPorts { 2011 labelValues[port.Label] = port.Value 2012 } 2013 return labelValues 2014 } 2015 2016 // Networks defined for a task on the Resources struct. 2017 type Networks []*NetworkResource 2018 2019 // Port assignment and IP for the given label or empty values. 2020 func (ns Networks) Port(label string) (string, int) { 2021 for _, n := range ns { 2022 for _, p := range n.ReservedPorts { 2023 if p.Label == label { 2024 return n.IP, p.Value 2025 } 2026 } 2027 for _, p := range n.DynamicPorts { 2028 if p.Label == label { 2029 return n.IP, p.Value 2030 } 2031 } 2032 } 2033 return "", 0 2034 } 2035 2036 func (ns Networks) NetIndex(n *NetworkResource) int { 2037 for idx, net := range ns { 2038 if net.Device == n.Device { 2039 return idx 2040 } 2041 } 2042 return -1 2043 } 2044 2045 // RequestedDevice is used to request a device for a task. 2046 type RequestedDevice struct { 2047 // Name is the request name. The possible values are as follows: 2048 // * <type>: A single value only specifies the type of request. 2049 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2050 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2051 // 2052 // Examples are as follows: 2053 // * "gpu" 2054 // * "nvidia/gpu" 2055 // * "nvidia/gpu/GTX2080Ti" 2056 Name string 2057 2058 // Count is the number of requested devices 2059 Count uint64 2060 2061 // Constraints are a set of constraints to apply when selecting the device 2062 // to use. 2063 Constraints []*Constraint 2064 2065 // Affinities are a set of affinites to apply when selecting the device 2066 // to use. 2067 Affinities []*Affinity 2068 } 2069 2070 func (r *RequestedDevice) Copy() *RequestedDevice { 2071 if r == nil { 2072 return nil 2073 } 2074 2075 nr := *r 2076 nr.Constraints = CopySliceConstraints(nr.Constraints) 2077 nr.Affinities = CopySliceAffinities(nr.Affinities) 2078 2079 return &nr 2080 } 2081 2082 func (r *RequestedDevice) ID() *DeviceIdTuple { 2083 if r == nil || r.Name == "" { 2084 return nil 2085 } 2086 2087 parts := strings.SplitN(r.Name, "/", 3) 2088 switch len(parts) { 2089 case 1: 2090 return &DeviceIdTuple{ 2091 Type: parts[0], 2092 } 2093 case 2: 2094 return &DeviceIdTuple{ 2095 Vendor: parts[0], 2096 Type: parts[1], 2097 } 2098 default: 2099 return &DeviceIdTuple{ 2100 Vendor: parts[0], 2101 Type: parts[1], 2102 Name: parts[2], 2103 } 2104 } 2105 } 2106 2107 func (r *RequestedDevice) Validate() error { 2108 if r == nil { 2109 return nil 2110 } 2111 2112 var mErr multierror.Error 2113 if r.Name == "" { 2114 multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2115 } 2116 2117 for idx, constr := range r.Constraints { 2118 // Ensure that the constraint doesn't use an operand we do not allow 2119 switch constr.Operand { 2120 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2121 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2122 multierror.Append(&mErr, outer) 2123 default: 2124 if err := constr.Validate(); err != nil { 2125 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2126 multierror.Append(&mErr, outer) 2127 } 2128 } 2129 } 2130 for idx, affinity := range r.Affinities { 2131 if err := affinity.Validate(); err != nil { 2132 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2133 multierror.Append(&mErr, outer) 2134 } 2135 } 2136 2137 return mErr.ErrorOrNil() 2138 } 2139 2140 // NodeResources is used to define the resources available on a client node. 2141 type NodeResources struct { 2142 Cpu NodeCpuResources 2143 Memory NodeMemoryResources 2144 Disk NodeDiskResources 2145 Networks Networks 2146 Devices []*NodeDeviceResource 2147 } 2148 2149 func (n *NodeResources) Copy() *NodeResources { 2150 if n == nil { 2151 return nil 2152 } 2153 2154 newN := new(NodeResources) 2155 *newN = *n 2156 2157 // Copy the networks 2158 if n.Networks != nil { 2159 networks := len(n.Networks) 2160 newN.Networks = make([]*NetworkResource, networks) 2161 for i := 0; i < networks; i++ { 2162 newN.Networks[i] = n.Networks[i].Copy() 2163 } 2164 } 2165 2166 // Copy the devices 2167 if n.Devices != nil { 2168 devices := len(n.Devices) 2169 newN.Devices = make([]*NodeDeviceResource, devices) 2170 for i := 0; i < devices; i++ { 2171 newN.Devices[i] = n.Devices[i].Copy() 2172 } 2173 } 2174 2175 return newN 2176 } 2177 2178 // Comparable returns a comparable version of the nodes resources. This 2179 // conversion can be lossy so care must be taken when using it. 2180 func (n *NodeResources) Comparable() *ComparableResources { 2181 if n == nil { 2182 return nil 2183 } 2184 2185 c := &ComparableResources{ 2186 Flattened: AllocatedTaskResources{ 2187 Cpu: AllocatedCpuResources{ 2188 CpuShares: n.Cpu.CpuShares, 2189 }, 2190 Memory: AllocatedMemoryResources{ 2191 MemoryMB: n.Memory.MemoryMB, 2192 }, 2193 Networks: n.Networks, 2194 }, 2195 Shared: AllocatedSharedResources{ 2196 DiskMB: n.Disk.DiskMB, 2197 }, 2198 } 2199 return c 2200 } 2201 2202 func (n *NodeResources) Merge(o *NodeResources) { 2203 if o == nil { 2204 return 2205 } 2206 2207 n.Cpu.Merge(&o.Cpu) 2208 n.Memory.Merge(&o.Memory) 2209 n.Disk.Merge(&o.Disk) 2210 2211 if len(o.Networks) != 0 { 2212 n.Networks = o.Networks 2213 } 2214 2215 if len(o.Devices) != 0 { 2216 n.Devices = o.Devices 2217 } 2218 } 2219 2220 func (n *NodeResources) Equals(o *NodeResources) bool { 2221 if o == nil && n == nil { 2222 return true 2223 } else if o == nil { 2224 return false 2225 } else if n == nil { 2226 return false 2227 } 2228 2229 if !n.Cpu.Equals(&o.Cpu) { 2230 return false 2231 } 2232 if !n.Memory.Equals(&o.Memory) { 2233 return false 2234 } 2235 if !n.Disk.Equals(&o.Disk) { 2236 return false 2237 } 2238 2239 if len(n.Networks) != len(o.Networks) { 2240 return false 2241 } 2242 for i, n := range n.Networks { 2243 if !n.Equals(o.Networks[i]) { 2244 return false 2245 } 2246 } 2247 2248 // Check the devices 2249 if !DevicesEquals(n.Devices, o.Devices) { 2250 return false 2251 } 2252 2253 return true 2254 } 2255 2256 // DevicesEquals returns true if the two device arrays are equal 2257 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2258 if len(d1) != len(d2) { 2259 return false 2260 } 2261 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2262 for _, d := range d1 { 2263 idMap[*d.ID()] = d 2264 } 2265 for _, otherD := range d2 { 2266 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2267 return false 2268 } 2269 } 2270 2271 return true 2272 } 2273 2274 // NodeCpuResources captures the CPU resources of the node. 2275 type NodeCpuResources struct { 2276 // CpuShares is the CPU shares available. This is calculated by number of 2277 // cores multiplied by the core frequency. 2278 CpuShares int64 2279 } 2280 2281 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2282 if o == nil { 2283 return 2284 } 2285 2286 if o.CpuShares != 0 { 2287 n.CpuShares = o.CpuShares 2288 } 2289 } 2290 2291 func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 2292 if o == nil && n == nil { 2293 return true 2294 } else if o == nil { 2295 return false 2296 } else if n == nil { 2297 return false 2298 } 2299 2300 if n.CpuShares != o.CpuShares { 2301 return false 2302 } 2303 2304 return true 2305 } 2306 2307 // NodeMemoryResources captures the memory resources of the node 2308 type NodeMemoryResources struct { 2309 // MemoryMB is the total available memory on the node 2310 MemoryMB int64 2311 } 2312 2313 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 2314 if o == nil { 2315 return 2316 } 2317 2318 if o.MemoryMB != 0 { 2319 n.MemoryMB = o.MemoryMB 2320 } 2321 } 2322 2323 func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 2324 if o == nil && n == nil { 2325 return true 2326 } else if o == nil { 2327 return false 2328 } else if n == nil { 2329 return false 2330 } 2331 2332 if n.MemoryMB != o.MemoryMB { 2333 return false 2334 } 2335 2336 return true 2337 } 2338 2339 // NodeDiskResources captures the disk resources of the node 2340 type NodeDiskResources struct { 2341 // DiskMB is the total available disk space on the node 2342 DiskMB int64 2343 } 2344 2345 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 2346 if o == nil { 2347 return 2348 } 2349 if o.DiskMB != 0 { 2350 n.DiskMB = o.DiskMB 2351 } 2352 } 2353 2354 func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 2355 if o == nil && n == nil { 2356 return true 2357 } else if o == nil { 2358 return false 2359 } else if n == nil { 2360 return false 2361 } 2362 2363 if n.DiskMB != o.DiskMB { 2364 return false 2365 } 2366 2367 return true 2368 } 2369 2370 // DeviceIdTuple is the tuple that identifies a device 2371 type DeviceIdTuple struct { 2372 Vendor string 2373 Type string 2374 Name string 2375 } 2376 2377 func (d *DeviceIdTuple) String() string { 2378 if d == nil { 2379 return "" 2380 } 2381 2382 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 2383 } 2384 2385 // Matches returns if this Device ID is a superset of the passed ID. 2386 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 2387 if other == nil { 2388 return false 2389 } 2390 2391 if other.Name != "" && other.Name != id.Name { 2392 return false 2393 } 2394 2395 if other.Vendor != "" && other.Vendor != id.Vendor { 2396 return false 2397 } 2398 2399 if other.Type != "" && other.Type != id.Type { 2400 return false 2401 } 2402 2403 return true 2404 } 2405 2406 // Equals returns if this Device ID is the same as the passed ID. 2407 func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 2408 if id == nil && o == nil { 2409 return true 2410 } else if id == nil || o == nil { 2411 return false 2412 } 2413 2414 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 2415 } 2416 2417 // NodeDeviceResource captures a set of devices sharing a common 2418 // vendor/type/device_name tuple. 2419 type NodeDeviceResource struct { 2420 Vendor string 2421 Type string 2422 Name string 2423 Instances []*NodeDevice 2424 Attributes map[string]*psstructs.Attribute 2425 } 2426 2427 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 2428 if n == nil { 2429 return nil 2430 } 2431 2432 return &DeviceIdTuple{ 2433 Vendor: n.Vendor, 2434 Type: n.Type, 2435 Name: n.Name, 2436 } 2437 } 2438 2439 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 2440 if n == nil { 2441 return nil 2442 } 2443 2444 // Copy the primitives 2445 nn := *n 2446 2447 // Copy the device instances 2448 if l := len(nn.Instances); l != 0 { 2449 nn.Instances = make([]*NodeDevice, 0, l) 2450 for _, d := range n.Instances { 2451 nn.Instances = append(nn.Instances, d.Copy()) 2452 } 2453 } 2454 2455 // Copy the Attributes 2456 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 2457 2458 return &nn 2459 } 2460 2461 func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 2462 if o == nil && n == nil { 2463 return true 2464 } else if o == nil { 2465 return false 2466 } else if n == nil { 2467 return false 2468 } 2469 2470 if n.Vendor != o.Vendor { 2471 return false 2472 } else if n.Type != o.Type { 2473 return false 2474 } else if n.Name != o.Name { 2475 return false 2476 } 2477 2478 // Check the attributes 2479 if len(n.Attributes) != len(o.Attributes) { 2480 return false 2481 } 2482 for k, v := range n.Attributes { 2483 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 2484 return false 2485 } 2486 } 2487 2488 // Check the instances 2489 if len(n.Instances) != len(o.Instances) { 2490 return false 2491 } 2492 idMap := make(map[string]*NodeDevice, len(n.Instances)) 2493 for _, d := range n.Instances { 2494 idMap[d.ID] = d 2495 } 2496 for _, otherD := range o.Instances { 2497 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 2498 return false 2499 } 2500 } 2501 2502 return true 2503 } 2504 2505 // NodeDevice is an instance of a particular device. 2506 type NodeDevice struct { 2507 // ID is the ID of the device. 2508 ID string 2509 2510 // Healthy captures whether the device is healthy. 2511 Healthy bool 2512 2513 // HealthDescription is used to provide a human readable description of why 2514 // the device may be unhealthy. 2515 HealthDescription string 2516 2517 // Locality stores HW locality information for the node to optionally be 2518 // used when making placement decisions. 2519 Locality *NodeDeviceLocality 2520 } 2521 2522 func (n *NodeDevice) Equals(o *NodeDevice) bool { 2523 if o == nil && n == nil { 2524 return true 2525 } else if o == nil { 2526 return false 2527 } else if n == nil { 2528 return false 2529 } 2530 2531 if n.ID != o.ID { 2532 return false 2533 } else if n.Healthy != o.Healthy { 2534 return false 2535 } else if n.HealthDescription != o.HealthDescription { 2536 return false 2537 } else if !n.Locality.Equals(o.Locality) { 2538 return false 2539 } 2540 2541 return false 2542 } 2543 2544 func (n *NodeDevice) Copy() *NodeDevice { 2545 if n == nil { 2546 return nil 2547 } 2548 2549 // Copy the primitives 2550 nn := *n 2551 2552 // Copy the locality 2553 nn.Locality = nn.Locality.Copy() 2554 2555 return &nn 2556 } 2557 2558 // NodeDeviceLocality stores information about the devices hardware locality on 2559 // the node. 2560 type NodeDeviceLocality struct { 2561 // PciBusID is the PCI Bus ID for the device. 2562 PciBusID string 2563 } 2564 2565 func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 2566 if o == nil && n == nil { 2567 return true 2568 } else if o == nil { 2569 return false 2570 } else if n == nil { 2571 return false 2572 } 2573 2574 if n.PciBusID != o.PciBusID { 2575 return false 2576 } 2577 2578 return true 2579 } 2580 2581 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 2582 if n == nil { 2583 return nil 2584 } 2585 2586 // Copy the primitives 2587 nn := *n 2588 return &nn 2589 } 2590 2591 // NodeReservedResources is used to capture the resources on a client node that 2592 // should be reserved and not made available to jobs. 2593 type NodeReservedResources struct { 2594 Cpu NodeReservedCpuResources 2595 Memory NodeReservedMemoryResources 2596 Disk NodeReservedDiskResources 2597 Networks NodeReservedNetworkResources 2598 } 2599 2600 func (n *NodeReservedResources) Copy() *NodeReservedResources { 2601 if n == nil { 2602 return nil 2603 } 2604 newN := new(NodeReservedResources) 2605 *newN = *n 2606 return newN 2607 } 2608 2609 // Comparable returns a comparable version of the node's reserved resources. The 2610 // returned resources doesn't contain any network information. This conversion 2611 // can be lossy so care must be taken when using it. 2612 func (n *NodeReservedResources) Comparable() *ComparableResources { 2613 if n == nil { 2614 return nil 2615 } 2616 2617 c := &ComparableResources{ 2618 Flattened: AllocatedTaskResources{ 2619 Cpu: AllocatedCpuResources{ 2620 CpuShares: n.Cpu.CpuShares, 2621 }, 2622 Memory: AllocatedMemoryResources{ 2623 MemoryMB: n.Memory.MemoryMB, 2624 }, 2625 }, 2626 Shared: AllocatedSharedResources{ 2627 DiskMB: n.Disk.DiskMB, 2628 }, 2629 } 2630 return c 2631 } 2632 2633 // NodeReservedCpuResources captures the reserved CPU resources of the node. 2634 type NodeReservedCpuResources struct { 2635 CpuShares int64 2636 } 2637 2638 // NodeReservedMemoryResources captures the reserved memory resources of the node. 2639 type NodeReservedMemoryResources struct { 2640 MemoryMB int64 2641 } 2642 2643 // NodeReservedDiskResources captures the reserved disk resources of the node. 2644 type NodeReservedDiskResources struct { 2645 DiskMB int64 2646 } 2647 2648 // NodeReservedNetworkResources captures the reserved network resources of the node. 2649 type NodeReservedNetworkResources struct { 2650 // ReservedHostPorts is the set of ports reserved on all host network 2651 // interfaces. Its format is a comma separate list of integers or integer 2652 // ranges. (80,443,1000-2000,2005) 2653 ReservedHostPorts string 2654 } 2655 2656 // ParsePortHostPorts returns the reserved host ports. 2657 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 2658 return ParsePortRanges(n.ReservedHostPorts) 2659 } 2660 2661 // AllocatedResources is the set of resources to be used by an allocation. 2662 type AllocatedResources struct { 2663 // Tasks is a mapping of task name to the resources for the task. 2664 Tasks map[string]*AllocatedTaskResources 2665 2666 // Shared is the set of resource that are shared by all tasks in the group. 2667 Shared AllocatedSharedResources 2668 } 2669 2670 func (a *AllocatedResources) Copy() *AllocatedResources { 2671 if a == nil { 2672 return nil 2673 } 2674 newA := new(AllocatedResources) 2675 *newA = *a 2676 2677 if a.Tasks != nil { 2678 tr := make(map[string]*AllocatedTaskResources, len(newA.Tasks)) 2679 for task, resource := range newA.Tasks { 2680 tr[task] = resource.Copy() 2681 } 2682 newA.Tasks = tr 2683 } 2684 2685 return newA 2686 } 2687 2688 // Comparable returns a comparable version of the allocations allocated 2689 // resources. This conversion can be lossy so care must be taken when using it. 2690 func (a *AllocatedResources) Comparable() *ComparableResources { 2691 if a == nil { 2692 return nil 2693 } 2694 2695 c := &ComparableResources{ 2696 Shared: a.Shared, 2697 } 2698 for _, r := range a.Tasks { 2699 c.Flattened.Add(r) 2700 } 2701 return c 2702 } 2703 2704 // OldTaskResources returns the pre-0.9.0 map of task resources 2705 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 2706 m := make(map[string]*Resources, len(a.Tasks)) 2707 for name, res := range a.Tasks { 2708 m[name] = &Resources{ 2709 CPU: int(res.Cpu.CpuShares), 2710 MemoryMB: int(res.Memory.MemoryMB), 2711 Networks: res.Networks, 2712 } 2713 } 2714 2715 return m 2716 } 2717 2718 // AllocatedTaskResources are the set of resources allocated to a task. 2719 type AllocatedTaskResources struct { 2720 Cpu AllocatedCpuResources 2721 Memory AllocatedMemoryResources 2722 Networks Networks 2723 Devices []*AllocatedDeviceResource 2724 } 2725 2726 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 2727 if a == nil { 2728 return nil 2729 } 2730 newA := new(AllocatedTaskResources) 2731 *newA = *a 2732 2733 // Copy the networks 2734 if a.Networks != nil { 2735 n := len(a.Networks) 2736 newA.Networks = make([]*NetworkResource, n) 2737 for i := 0; i < n; i++ { 2738 newA.Networks[i] = a.Networks[i].Copy() 2739 } 2740 } 2741 2742 // Copy the devices 2743 if newA.Devices != nil { 2744 n := len(a.Devices) 2745 newA.Devices = make([]*AllocatedDeviceResource, n) 2746 for i := 0; i < n; i++ { 2747 newA.Devices[i] = a.Devices[i].Copy() 2748 } 2749 } 2750 2751 return newA 2752 } 2753 2754 // NetIndex finds the matching net index using device name 2755 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 2756 return a.Networks.NetIndex(n) 2757 } 2758 2759 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 2760 if delta == nil { 2761 return 2762 } 2763 2764 a.Cpu.Add(&delta.Cpu) 2765 a.Memory.Add(&delta.Memory) 2766 2767 for _, n := range delta.Networks { 2768 // Find the matching interface by IP or CIDR 2769 idx := a.NetIndex(n) 2770 if idx == -1 { 2771 a.Networks = append(a.Networks, n.Copy()) 2772 } else { 2773 a.Networks[idx].Add(n) 2774 } 2775 } 2776 2777 for _, d := range delta.Devices { 2778 // Find the matching device 2779 idx := AllocatedDevices(a.Devices).Index(d) 2780 if idx == -1 { 2781 a.Devices = append(a.Devices, d.Copy()) 2782 } else { 2783 a.Devices[idx].Add(d) 2784 } 2785 } 2786 } 2787 2788 // Comparable turns AllocatedTaskResources into ComparableResources 2789 // as a helper step in preemption 2790 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 2791 ret := &ComparableResources{ 2792 Flattened: AllocatedTaskResources{ 2793 Cpu: AllocatedCpuResources{ 2794 CpuShares: a.Cpu.CpuShares, 2795 }, 2796 Memory: AllocatedMemoryResources{ 2797 MemoryMB: a.Memory.MemoryMB, 2798 }, 2799 }, 2800 } 2801 if len(a.Networks) > 0 { 2802 for _, net := range a.Networks { 2803 ret.Flattened.Networks = append(ret.Flattened.Networks, net) 2804 } 2805 } 2806 return ret 2807 } 2808 2809 // Subtract only subtracts CPU and Memory resources. Network utilization 2810 // is managed separately in NetworkIndex 2811 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 2812 if delta == nil { 2813 return 2814 } 2815 2816 a.Cpu.Subtract(&delta.Cpu) 2817 a.Memory.Subtract(&delta.Memory) 2818 } 2819 2820 // AllocatedSharedResources are the set of resources allocated to a task group. 2821 type AllocatedSharedResources struct { 2822 DiskMB int64 2823 } 2824 2825 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 2826 if delta == nil { 2827 return 2828 } 2829 2830 a.DiskMB += delta.DiskMB 2831 } 2832 2833 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 2834 if delta == nil { 2835 return 2836 } 2837 2838 a.DiskMB -= delta.DiskMB 2839 } 2840 2841 // AllocatedCpuResources captures the allocated CPU resources. 2842 type AllocatedCpuResources struct { 2843 CpuShares int64 2844 } 2845 2846 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 2847 if delta == nil { 2848 return 2849 } 2850 2851 a.CpuShares += delta.CpuShares 2852 } 2853 2854 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 2855 if delta == nil { 2856 return 2857 } 2858 2859 a.CpuShares -= delta.CpuShares 2860 } 2861 2862 // AllocatedMemoryResources captures the allocated memory resources. 2863 type AllocatedMemoryResources struct { 2864 MemoryMB int64 2865 } 2866 2867 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 2868 if delta == nil { 2869 return 2870 } 2871 2872 a.MemoryMB += delta.MemoryMB 2873 } 2874 2875 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 2876 if delta == nil { 2877 return 2878 } 2879 2880 a.MemoryMB -= delta.MemoryMB 2881 } 2882 2883 type AllocatedDevices []*AllocatedDeviceResource 2884 2885 // Index finds the matching index using the passed device. If not found, -1 is 2886 // returned. 2887 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 2888 if d == nil { 2889 return -1 2890 } 2891 2892 for i, o := range a { 2893 if o.ID().Equals(d.ID()) { 2894 return i 2895 } 2896 } 2897 2898 return -1 2899 } 2900 2901 // AllocatedDeviceResource captures a set of allocated devices. 2902 type AllocatedDeviceResource struct { 2903 // Vendor, Type, and Name are used to select the plugin to request the 2904 // device IDs from. 2905 Vendor string 2906 Type string 2907 Name string 2908 2909 // DeviceIDs is the set of allocated devices 2910 DeviceIDs []string 2911 } 2912 2913 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 2914 if a == nil { 2915 return nil 2916 } 2917 2918 return &DeviceIdTuple{ 2919 Vendor: a.Vendor, 2920 Type: a.Type, 2921 Name: a.Name, 2922 } 2923 } 2924 2925 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 2926 if delta == nil { 2927 return 2928 } 2929 2930 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 2931 } 2932 2933 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 2934 if a == nil { 2935 return a 2936 } 2937 2938 na := *a 2939 2940 // Copy the devices 2941 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 2942 for i, id := range a.DeviceIDs { 2943 na.DeviceIDs[i] = id 2944 } 2945 2946 return &na 2947 } 2948 2949 // ComparableResources is the set of resources allocated to a task group but 2950 // not keyed by Task, making it easier to compare. 2951 type ComparableResources struct { 2952 Flattened AllocatedTaskResources 2953 Shared AllocatedSharedResources 2954 } 2955 2956 func (c *ComparableResources) Add(delta *ComparableResources) { 2957 if delta == nil { 2958 return 2959 } 2960 2961 c.Flattened.Add(&delta.Flattened) 2962 c.Shared.Add(&delta.Shared) 2963 } 2964 2965 func (c *ComparableResources) Subtract(delta *ComparableResources) { 2966 if delta == nil { 2967 return 2968 } 2969 2970 c.Flattened.Subtract(&delta.Flattened) 2971 c.Shared.Subtract(&delta.Shared) 2972 } 2973 2974 func (c *ComparableResources) Copy() *ComparableResources { 2975 if c == nil { 2976 return nil 2977 } 2978 newR := new(ComparableResources) 2979 *newR = *c 2980 return newR 2981 } 2982 2983 // Superset checks if one set of resources is a superset of another. This 2984 // ignores network resources, and the NetworkIndex should be used for that. 2985 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 2986 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 2987 return false, "cpu" 2988 } 2989 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 2990 return false, "memory" 2991 } 2992 if c.Shared.DiskMB < other.Shared.DiskMB { 2993 return false, "disk" 2994 } 2995 return true, "" 2996 } 2997 2998 // allocated finds the matching net index using device name 2999 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3000 return c.Flattened.Networks.NetIndex(n) 3001 } 3002 3003 const ( 3004 // JobTypeNomad is reserved for internal system tasks and is 3005 // always handled by the CoreScheduler. 3006 JobTypeCore = "_core" 3007 JobTypeService = "service" 3008 JobTypeBatch = "batch" 3009 JobTypeSystem = "system" 3010 ) 3011 3012 const ( 3013 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3014 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3015 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3016 ) 3017 3018 const ( 3019 // JobMinPriority is the minimum allowed priority 3020 JobMinPriority = 1 3021 3022 // JobDefaultPriority is the default priority if not 3023 // not specified. 3024 JobDefaultPriority = 50 3025 3026 // JobMaxPriority is the maximum allowed priority 3027 JobMaxPriority = 100 3028 3029 // Ensure CoreJobPriority is higher than any user 3030 // specified job so that it gets priority. This is important 3031 // for the system to remain healthy. 3032 CoreJobPriority = JobMaxPriority * 2 3033 3034 // JobTrackedVersions is the number of historic job versions that are 3035 // kept. 3036 JobTrackedVersions = 6 3037 ) 3038 3039 // Job is the scope of a scheduling request to Nomad. It is the largest 3040 // scoped object, and is a named collection of task groups. Each task group 3041 // is further composed of tasks. A task group (TG) is the unit of scheduling 3042 // however. 3043 type Job struct { 3044 // Stop marks whether the user has stopped the job. A stopped job will 3045 // have all created allocations stopped and acts as a way to stop a job 3046 // without purging it from the system. This allows existing allocs to be 3047 // queried and the job to be inspected as it is being killed. 3048 Stop bool 3049 3050 // Region is the Nomad region that handles scheduling this job 3051 Region string 3052 3053 // Namespace is the namespace the job is submitted into. 3054 Namespace string 3055 3056 // ID is a unique identifier for the job per region. It can be 3057 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3058 ID string 3059 3060 // ParentID is the unique identifier of the job that spawned this job. 3061 ParentID string 3062 3063 // Name is the logical name of the job used to refer to it. This is unique 3064 // per region, but not unique globally. 3065 Name string 3066 3067 // Type is used to control various behaviors about the job. Most jobs 3068 // are service jobs, meaning they are expected to be long lived. 3069 // Some jobs are batch oriented meaning they run and then terminate. 3070 // This can be extended in the future to support custom schedulers. 3071 Type string 3072 3073 // Priority is used to control scheduling importance and if this job 3074 // can preempt other jobs. 3075 Priority int 3076 3077 // AllAtOnce is used to control if incremental scheduling of task groups 3078 // is allowed or if we must do a gang scheduling of the entire job. This 3079 // can slow down larger jobs if resources are not available. 3080 AllAtOnce bool 3081 3082 // Datacenters contains all the datacenters this job is allowed to span 3083 Datacenters []string 3084 3085 // Constraints can be specified at a job level and apply to 3086 // all the task groups and tasks. 3087 Constraints []*Constraint 3088 3089 // Affinities can be specified at the job level to express 3090 // scheduling preferences that apply to all groups and tasks 3091 Affinities []*Affinity 3092 3093 // Spread can be specified at the job level to express spreading 3094 // allocations across a desired attribute, such as datacenter 3095 Spreads []*Spread 3096 3097 // TaskGroups are the collections of task groups that this job needs 3098 // to run. Each task group is an atomic unit of scheduling and placement. 3099 TaskGroups []*TaskGroup 3100 3101 // COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0. 3102 Update UpdateStrategy 3103 3104 // Periodic is used to define the interval the job is run at. 3105 Periodic *PeriodicConfig 3106 3107 // ParameterizedJob is used to specify the job as a parameterized job 3108 // for dispatching. 3109 ParameterizedJob *ParameterizedJobConfig 3110 3111 // Dispatched is used to identify if the Job has been dispatched from a 3112 // parameterized job. 3113 Dispatched bool 3114 3115 // Payload is the payload supplied when the job was dispatched. 3116 Payload []byte 3117 3118 // Meta is used to associate arbitrary metadata with this 3119 // job. This is opaque to Nomad. 3120 Meta map[string]string 3121 3122 // VaultToken is the Vault token that proves the submitter of the job has 3123 // access to the specified Vault policies. This field is only used to 3124 // transfer the token and is not stored after Job submission. 3125 VaultToken string 3126 3127 // Job status 3128 Status string 3129 3130 // StatusDescription is meant to provide more human useful information 3131 StatusDescription string 3132 3133 // Stable marks a job as stable. Stability is only defined on "service" and 3134 // "system" jobs. The stability of a job will be set automatically as part 3135 // of a deployment and can be manually set via APIs. 3136 Stable bool 3137 3138 // Version is a monotonically increasing version number that is incremented 3139 // on each job register. 3140 Version uint64 3141 3142 // SubmitTime is the time at which the job was submitted as a UnixNano in 3143 // UTC 3144 SubmitTime int64 3145 3146 // Raft Indexes 3147 CreateIndex uint64 3148 ModifyIndex uint64 3149 JobModifyIndex uint64 3150 } 3151 3152 // NamespacedID returns the namespaced id useful for logging 3153 func (j *Job) NamespacedID() *NamespacedID { 3154 return &NamespacedID{ 3155 ID: j.ID, 3156 Namespace: j.Namespace, 3157 } 3158 } 3159 3160 // Canonicalize is used to canonicalize fields in the Job. This should be called 3161 // when registering a Job. A set of warnings are returned if the job was changed 3162 // in anyway that the user should be made aware of. 3163 func (j *Job) Canonicalize() (warnings error) { 3164 if j == nil { 3165 return nil 3166 } 3167 3168 var mErr multierror.Error 3169 // Ensure that an empty and nil map are treated the same to avoid scheduling 3170 // problems since we use reflect DeepEquals. 3171 if len(j.Meta) == 0 { 3172 j.Meta = nil 3173 } 3174 3175 // Ensure the job is in a namespace. 3176 if j.Namespace == "" { 3177 j.Namespace = DefaultNamespace 3178 } 3179 3180 for _, tg := range j.TaskGroups { 3181 tg.Canonicalize(j) 3182 } 3183 3184 if j.ParameterizedJob != nil { 3185 j.ParameterizedJob.Canonicalize() 3186 } 3187 3188 if j.Periodic != nil { 3189 j.Periodic.Canonicalize() 3190 } 3191 3192 return mErr.ErrorOrNil() 3193 } 3194 3195 // Copy returns a deep copy of the Job. It is expected that callers use recover. 3196 // This job can panic if the deep copy failed as it uses reflection. 3197 func (j *Job) Copy() *Job { 3198 if j == nil { 3199 return nil 3200 } 3201 nj := new(Job) 3202 *nj = *j 3203 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 3204 nj.Constraints = CopySliceConstraints(nj.Constraints) 3205 nj.Affinities = CopySliceAffinities(nj.Affinities) 3206 3207 if j.TaskGroups != nil { 3208 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 3209 for i, tg := range nj.TaskGroups { 3210 tgs[i] = tg.Copy() 3211 } 3212 nj.TaskGroups = tgs 3213 } 3214 3215 nj.Periodic = nj.Periodic.Copy() 3216 nj.Meta = helper.CopyMapStringString(nj.Meta) 3217 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 3218 return nj 3219 } 3220 3221 // Validate is used to sanity check a job input 3222 func (j *Job) Validate() error { 3223 var mErr multierror.Error 3224 3225 if j.Region == "" { 3226 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 3227 } 3228 if j.ID == "" { 3229 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 3230 } else if strings.Contains(j.ID, " ") { 3231 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 3232 } 3233 if j.Name == "" { 3234 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 3235 } 3236 if j.Namespace == "" { 3237 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 3238 } 3239 switch j.Type { 3240 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 3241 case "": 3242 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 3243 default: 3244 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 3245 } 3246 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 3247 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 3248 } 3249 if len(j.Datacenters) == 0 { 3250 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 3251 } 3252 if len(j.TaskGroups) == 0 { 3253 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 3254 } 3255 for idx, constr := range j.Constraints { 3256 if err := constr.Validate(); err != nil { 3257 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3258 mErr.Errors = append(mErr.Errors, outer) 3259 } 3260 } 3261 if j.Type == JobTypeSystem { 3262 if j.Affinities != nil { 3263 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 3264 } 3265 } else { 3266 for idx, affinity := range j.Affinities { 3267 if err := affinity.Validate(); err != nil { 3268 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 3269 mErr.Errors = append(mErr.Errors, outer) 3270 } 3271 } 3272 } 3273 3274 if j.Type == JobTypeSystem { 3275 if j.Spreads != nil { 3276 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 3277 } 3278 } else { 3279 for idx, spread := range j.Spreads { 3280 if err := spread.Validate(); err != nil { 3281 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 3282 mErr.Errors = append(mErr.Errors, outer) 3283 } 3284 } 3285 } 3286 3287 // Check for duplicate task groups 3288 taskGroups := make(map[string]int) 3289 for idx, tg := range j.TaskGroups { 3290 if tg.Name == "" { 3291 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 3292 } else if existing, ok := taskGroups[tg.Name]; ok { 3293 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 3294 } else { 3295 taskGroups[tg.Name] = idx 3296 } 3297 3298 if j.Type == "system" && tg.Count > 1 { 3299 mErr.Errors = append(mErr.Errors, 3300 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 3301 tg.Name, tg.Count)) 3302 } 3303 } 3304 3305 // Validate the task group 3306 for _, tg := range j.TaskGroups { 3307 if err := tg.Validate(j); err != nil { 3308 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 3309 mErr.Errors = append(mErr.Errors, outer) 3310 } 3311 } 3312 3313 // Validate periodic is only used with batch jobs. 3314 if j.IsPeriodic() && j.Periodic.Enabled { 3315 if j.Type != JobTypeBatch { 3316 mErr.Errors = append(mErr.Errors, 3317 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 3318 } 3319 3320 if err := j.Periodic.Validate(); err != nil { 3321 mErr.Errors = append(mErr.Errors, err) 3322 } 3323 } 3324 3325 if j.IsParameterized() { 3326 if j.Type != JobTypeBatch { 3327 mErr.Errors = append(mErr.Errors, 3328 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 3329 } 3330 3331 if err := j.ParameterizedJob.Validate(); err != nil { 3332 mErr.Errors = append(mErr.Errors, err) 3333 } 3334 } 3335 3336 return mErr.ErrorOrNil() 3337 } 3338 3339 // Warnings returns a list of warnings that may be from dubious settings or 3340 // deprecation warnings. 3341 func (j *Job) Warnings() error { 3342 var mErr multierror.Error 3343 3344 // Check the groups 3345 for _, tg := range j.TaskGroups { 3346 if err := tg.Warnings(j); err != nil { 3347 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 3348 mErr.Errors = append(mErr.Errors, outer) 3349 } 3350 } 3351 3352 return mErr.ErrorOrNil() 3353 } 3354 3355 // LookupTaskGroup finds a task group by name 3356 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 3357 for _, tg := range j.TaskGroups { 3358 if tg.Name == name { 3359 return tg 3360 } 3361 } 3362 return nil 3363 } 3364 3365 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 3366 // meta data for the task. When joining Job, Group and Task Meta, the precedence 3367 // is by deepest scope (Task > Group > Job). 3368 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 3369 group := j.LookupTaskGroup(groupName) 3370 if group == nil { 3371 return nil 3372 } 3373 3374 task := group.LookupTask(taskName) 3375 if task == nil { 3376 return nil 3377 } 3378 3379 meta := helper.CopyMapStringString(task.Meta) 3380 if meta == nil { 3381 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 3382 } 3383 3384 // Add the group specific meta 3385 for k, v := range group.Meta { 3386 if _, ok := meta[k]; !ok { 3387 meta[k] = v 3388 } 3389 } 3390 3391 // Add the job specific meta 3392 for k, v := range j.Meta { 3393 if _, ok := meta[k]; !ok { 3394 meta[k] = v 3395 } 3396 } 3397 3398 return meta 3399 } 3400 3401 // Stopped returns if a job is stopped. 3402 func (j *Job) Stopped() bool { 3403 return j == nil || j.Stop 3404 } 3405 3406 // HasUpdateStrategy returns if any task group in the job has an update strategy 3407 func (j *Job) HasUpdateStrategy() bool { 3408 for _, tg := range j.TaskGroups { 3409 if tg.Update != nil { 3410 return true 3411 } 3412 } 3413 3414 return false 3415 } 3416 3417 // Stub is used to return a summary of the job 3418 func (j *Job) Stub(summary *JobSummary) *JobListStub { 3419 return &JobListStub{ 3420 ID: j.ID, 3421 ParentID: j.ParentID, 3422 Name: j.Name, 3423 Datacenters: j.Datacenters, 3424 Type: j.Type, 3425 Priority: j.Priority, 3426 Periodic: j.IsPeriodic(), 3427 ParameterizedJob: j.IsParameterized(), 3428 Stop: j.Stop, 3429 Status: j.Status, 3430 StatusDescription: j.StatusDescription, 3431 CreateIndex: j.CreateIndex, 3432 ModifyIndex: j.ModifyIndex, 3433 JobModifyIndex: j.JobModifyIndex, 3434 SubmitTime: j.SubmitTime, 3435 JobSummary: summary, 3436 } 3437 } 3438 3439 // IsPeriodic returns whether a job is periodic. 3440 func (j *Job) IsPeriodic() bool { 3441 return j.Periodic != nil 3442 } 3443 3444 // IsPeriodicActive returns whether the job is an active periodic job that will 3445 // create child jobs 3446 func (j *Job) IsPeriodicActive() bool { 3447 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 3448 } 3449 3450 // IsParameterized returns whether a job is parameterized job. 3451 func (j *Job) IsParameterized() bool { 3452 return j.ParameterizedJob != nil && !j.Dispatched 3453 } 3454 3455 // VaultPolicies returns the set of Vault policies per task group, per task 3456 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 3457 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 3458 3459 for _, tg := range j.TaskGroups { 3460 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 3461 3462 for _, task := range tg.Tasks { 3463 if task.Vault == nil { 3464 continue 3465 } 3466 3467 tgPolicies[task.Name] = task.Vault 3468 } 3469 3470 if len(tgPolicies) != 0 { 3471 policies[tg.Name] = tgPolicies 3472 } 3473 } 3474 3475 return policies 3476 } 3477 3478 // RequiredSignals returns a mapping of task groups to tasks to their required 3479 // set of signals 3480 func (j *Job) RequiredSignals() map[string]map[string][]string { 3481 signals := make(map[string]map[string][]string) 3482 3483 for _, tg := range j.TaskGroups { 3484 for _, task := range tg.Tasks { 3485 // Use this local one as a set 3486 taskSignals := make(map[string]struct{}) 3487 3488 // Check if the Vault change mode uses signals 3489 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 3490 taskSignals[task.Vault.ChangeSignal] = struct{}{} 3491 } 3492 3493 // If a user has specified a KillSignal, add it to required signals 3494 if task.KillSignal != "" { 3495 taskSignals[task.KillSignal] = struct{}{} 3496 } 3497 3498 // Check if any template change mode uses signals 3499 for _, t := range task.Templates { 3500 if t.ChangeMode != TemplateChangeModeSignal { 3501 continue 3502 } 3503 3504 taskSignals[t.ChangeSignal] = struct{}{} 3505 } 3506 3507 // Flatten and sort the signals 3508 l := len(taskSignals) 3509 if l == 0 { 3510 continue 3511 } 3512 3513 flat := make([]string, 0, l) 3514 for sig := range taskSignals { 3515 flat = append(flat, sig) 3516 } 3517 3518 sort.Strings(flat) 3519 tgSignals, ok := signals[tg.Name] 3520 if !ok { 3521 tgSignals = make(map[string][]string) 3522 signals[tg.Name] = tgSignals 3523 } 3524 tgSignals[task.Name] = flat 3525 } 3526 3527 } 3528 3529 return signals 3530 } 3531 3532 // SpecChanged determines if the functional specification has changed between 3533 // two job versions. 3534 func (j *Job) SpecChanged(new *Job) bool { 3535 if j == nil { 3536 return new != nil 3537 } 3538 3539 // Create a copy of the new job 3540 c := new.Copy() 3541 3542 // Update the new job so we can do a reflect 3543 c.Status = j.Status 3544 c.StatusDescription = j.StatusDescription 3545 c.Stable = j.Stable 3546 c.Version = j.Version 3547 c.CreateIndex = j.CreateIndex 3548 c.ModifyIndex = j.ModifyIndex 3549 c.JobModifyIndex = j.JobModifyIndex 3550 c.SubmitTime = j.SubmitTime 3551 3552 // Deep equals the jobs 3553 return !reflect.DeepEqual(j, c) 3554 } 3555 3556 func (j *Job) SetSubmitTime() { 3557 j.SubmitTime = time.Now().UTC().UnixNano() 3558 } 3559 3560 // JobListStub is used to return a subset of job information 3561 // for the job list 3562 type JobListStub struct { 3563 ID string 3564 ParentID string 3565 Name string 3566 Datacenters []string 3567 Type string 3568 Priority int 3569 Periodic bool 3570 ParameterizedJob bool 3571 Stop bool 3572 Status string 3573 StatusDescription string 3574 JobSummary *JobSummary 3575 CreateIndex uint64 3576 ModifyIndex uint64 3577 JobModifyIndex uint64 3578 SubmitTime int64 3579 } 3580 3581 // JobSummary summarizes the state of the allocations of a job 3582 type JobSummary struct { 3583 // JobID is the ID of the job the summary is for 3584 JobID string 3585 3586 // Namespace is the namespace of the job and its summary 3587 Namespace string 3588 3589 // Summary contains the summary per task group for the Job 3590 Summary map[string]TaskGroupSummary 3591 3592 // Children contains a summary for the children of this job. 3593 Children *JobChildrenSummary 3594 3595 // Raft Indexes 3596 CreateIndex uint64 3597 ModifyIndex uint64 3598 } 3599 3600 // Copy returns a new copy of JobSummary 3601 func (js *JobSummary) Copy() *JobSummary { 3602 newJobSummary := new(JobSummary) 3603 *newJobSummary = *js 3604 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 3605 for k, v := range js.Summary { 3606 newTGSummary[k] = v 3607 } 3608 newJobSummary.Summary = newTGSummary 3609 newJobSummary.Children = newJobSummary.Children.Copy() 3610 return newJobSummary 3611 } 3612 3613 // JobChildrenSummary contains the summary of children job statuses 3614 type JobChildrenSummary struct { 3615 Pending int64 3616 Running int64 3617 Dead int64 3618 } 3619 3620 // Copy returns a new copy of a JobChildrenSummary 3621 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 3622 if jc == nil { 3623 return nil 3624 } 3625 3626 njc := new(JobChildrenSummary) 3627 *njc = *jc 3628 return njc 3629 } 3630 3631 // TaskGroup summarizes the state of all the allocations of a particular 3632 // TaskGroup 3633 type TaskGroupSummary struct { 3634 Queued int 3635 Complete int 3636 Failed int 3637 Running int 3638 Starting int 3639 Lost int 3640 } 3641 3642 const ( 3643 // Checks uses any registered health check state in combination with task 3644 // states to determine if a allocation is healthy. 3645 UpdateStrategyHealthCheck_Checks = "checks" 3646 3647 // TaskStates uses the task states of an allocation to determine if the 3648 // allocation is healthy. 3649 UpdateStrategyHealthCheck_TaskStates = "task_states" 3650 3651 // Manual allows the operator to manually signal to Nomad when an 3652 // allocations is healthy. This allows more advanced health checking that is 3653 // outside of the scope of Nomad. 3654 UpdateStrategyHealthCheck_Manual = "manual" 3655 ) 3656 3657 var ( 3658 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 3659 // jobs with the old policy or for populating field defaults. 3660 DefaultUpdateStrategy = &UpdateStrategy{ 3661 Stagger: 30 * time.Second, 3662 MaxParallel: 1, 3663 HealthCheck: UpdateStrategyHealthCheck_Checks, 3664 MinHealthyTime: 10 * time.Second, 3665 HealthyDeadline: 5 * time.Minute, 3666 ProgressDeadline: 10 * time.Minute, 3667 AutoRevert: false, 3668 Canary: 0, 3669 } 3670 ) 3671 3672 // UpdateStrategy is used to modify how updates are done 3673 type UpdateStrategy struct { 3674 // Stagger is used to determine the rate at which allocations are migrated 3675 // due to down or draining nodes. 3676 Stagger time.Duration 3677 3678 // MaxParallel is how many updates can be done in parallel 3679 MaxParallel int 3680 3681 // HealthCheck specifies the mechanism in which allocations are marked 3682 // healthy or unhealthy as part of a deployment. 3683 HealthCheck string 3684 3685 // MinHealthyTime is the minimum time an allocation must be in the healthy 3686 // state before it is marked as healthy, unblocking more allocations to be 3687 // rolled. 3688 MinHealthyTime time.Duration 3689 3690 // HealthyDeadline is the time in which an allocation must be marked as 3691 // healthy before it is automatically transitioned to unhealthy. This time 3692 // period doesn't count against the MinHealthyTime. 3693 HealthyDeadline time.Duration 3694 3695 // ProgressDeadline is the time in which an allocation as part of the 3696 // deployment must transition to healthy. If no allocation becomes healthy 3697 // after the deadline, the deployment is marked as failed. If the deadline 3698 // is zero, the first failure causes the deployment to fail. 3699 ProgressDeadline time.Duration 3700 3701 // AutoRevert declares that if a deployment fails because of unhealthy 3702 // allocations, there should be an attempt to auto-revert the job to a 3703 // stable version. 3704 AutoRevert bool 3705 3706 // Canary is the number of canaries to deploy when a change to the task 3707 // group is detected. 3708 Canary int 3709 } 3710 3711 func (u *UpdateStrategy) Copy() *UpdateStrategy { 3712 if u == nil { 3713 return nil 3714 } 3715 3716 copy := new(UpdateStrategy) 3717 *copy = *u 3718 return copy 3719 } 3720 3721 func (u *UpdateStrategy) Validate() error { 3722 if u == nil { 3723 return nil 3724 } 3725 3726 var mErr multierror.Error 3727 switch u.HealthCheck { 3728 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 3729 default: 3730 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 3731 } 3732 3733 if u.MaxParallel < 1 { 3734 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than one: %d < 1", u.MaxParallel)) 3735 } 3736 if u.Canary < 0 { 3737 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 3738 } 3739 if u.MinHealthyTime < 0 { 3740 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 3741 } 3742 if u.HealthyDeadline <= 0 { 3743 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 3744 } 3745 if u.ProgressDeadline < 0 { 3746 multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 3747 } 3748 if u.MinHealthyTime >= u.HealthyDeadline { 3749 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 3750 } 3751 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 3752 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 3753 } 3754 if u.Stagger <= 0 { 3755 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 3756 } 3757 3758 return mErr.ErrorOrNil() 3759 } 3760 3761 // TODO(alexdadgar): Remove once no longer used by the scheduler. 3762 // Rolling returns if a rolling strategy should be used 3763 func (u *UpdateStrategy) Rolling() bool { 3764 return u.Stagger > 0 && u.MaxParallel > 0 3765 } 3766 3767 const ( 3768 // PeriodicSpecCron is used for a cron spec. 3769 PeriodicSpecCron = "cron" 3770 3771 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 3772 // separated list of unix timestamps at which to launch. 3773 PeriodicSpecTest = "_internal_test" 3774 ) 3775 3776 // Periodic defines the interval a job should be run at. 3777 type PeriodicConfig struct { 3778 // Enabled determines if the job should be run periodically. 3779 Enabled bool 3780 3781 // Spec specifies the interval the job should be run as. It is parsed based 3782 // on the SpecType. 3783 Spec string 3784 3785 // SpecType defines the format of the spec. 3786 SpecType string 3787 3788 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 3789 ProhibitOverlap bool 3790 3791 // TimeZone is the user specified string that determines the time zone to 3792 // launch against. The time zones must be specified from IANA Time Zone 3793 // database, such as "America/New_York". 3794 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 3795 // Reference: https://www.iana.org/time-zones 3796 TimeZone string 3797 3798 // location is the time zone to evaluate the launch time against 3799 location *time.Location 3800 } 3801 3802 func (p *PeriodicConfig) Copy() *PeriodicConfig { 3803 if p == nil { 3804 return nil 3805 } 3806 np := new(PeriodicConfig) 3807 *np = *p 3808 return np 3809 } 3810 3811 func (p *PeriodicConfig) Validate() error { 3812 if !p.Enabled { 3813 return nil 3814 } 3815 3816 var mErr multierror.Error 3817 if p.Spec == "" { 3818 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 3819 } 3820 3821 // Check if we got a valid time zone 3822 if p.TimeZone != "" { 3823 if _, err := time.LoadLocation(p.TimeZone); err != nil { 3824 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 3825 } 3826 } 3827 3828 switch p.SpecType { 3829 case PeriodicSpecCron: 3830 // Validate the cron spec 3831 if _, err := cronexpr.Parse(p.Spec); err != nil { 3832 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 3833 } 3834 case PeriodicSpecTest: 3835 // No-op 3836 default: 3837 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 3838 } 3839 3840 return mErr.ErrorOrNil() 3841 } 3842 3843 func (p *PeriodicConfig) Canonicalize() { 3844 // Load the location 3845 l, err := time.LoadLocation(p.TimeZone) 3846 if err != nil { 3847 p.location = time.UTC 3848 } 3849 3850 p.location = l 3851 } 3852 3853 // CronParseNext is a helper that parses the next time for the given expression 3854 // but captures any panic that may occur in the underlying library. 3855 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 3856 defer func() { 3857 if recover() != nil { 3858 t = time.Time{} 3859 err = fmt.Errorf("failed parsing cron expression: %q", spec) 3860 } 3861 }() 3862 3863 return e.Next(fromTime), nil 3864 } 3865 3866 // Next returns the closest time instant matching the spec that is after the 3867 // passed time. If no matching instance exists, the zero value of time.Time is 3868 // returned. The `time.Location` of the returned value matches that of the 3869 // passed time. 3870 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 3871 switch p.SpecType { 3872 case PeriodicSpecCron: 3873 if e, err := cronexpr.Parse(p.Spec); err == nil { 3874 return CronParseNext(e, fromTime, p.Spec) 3875 } 3876 case PeriodicSpecTest: 3877 split := strings.Split(p.Spec, ",") 3878 if len(split) == 1 && split[0] == "" { 3879 return time.Time{}, nil 3880 } 3881 3882 // Parse the times 3883 times := make([]time.Time, len(split)) 3884 for i, s := range split { 3885 unix, err := strconv.Atoi(s) 3886 if err != nil { 3887 return time.Time{}, nil 3888 } 3889 3890 times[i] = time.Unix(int64(unix), 0) 3891 } 3892 3893 // Find the next match 3894 for _, next := range times { 3895 if fromTime.Before(next) { 3896 return next, nil 3897 } 3898 } 3899 } 3900 3901 return time.Time{}, nil 3902 } 3903 3904 // GetLocation returns the location to use for determining the time zone to run 3905 // the periodic job against. 3906 func (p *PeriodicConfig) GetLocation() *time.Location { 3907 // Jobs pre 0.5.5 will not have this 3908 if p.location != nil { 3909 return p.location 3910 } 3911 3912 return time.UTC 3913 } 3914 3915 const ( 3916 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 3917 // when launching derived instances of it. 3918 PeriodicLaunchSuffix = "/periodic-" 3919 ) 3920 3921 // PeriodicLaunch tracks the last launch time of a periodic job. 3922 type PeriodicLaunch struct { 3923 ID string // ID of the periodic job. 3924 Namespace string // Namespace of the periodic job 3925 Launch time.Time // The last launch time. 3926 3927 // Raft Indexes 3928 CreateIndex uint64 3929 ModifyIndex uint64 3930 } 3931 3932 const ( 3933 DispatchPayloadForbidden = "forbidden" 3934 DispatchPayloadOptional = "optional" 3935 DispatchPayloadRequired = "required" 3936 3937 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 3938 // when dispatching instances of it. 3939 DispatchLaunchSuffix = "/dispatch-" 3940 ) 3941 3942 // ParameterizedJobConfig is used to configure the parameterized job 3943 type ParameterizedJobConfig struct { 3944 // Payload configure the payload requirements 3945 Payload string 3946 3947 // MetaRequired is metadata keys that must be specified by the dispatcher 3948 MetaRequired []string 3949 3950 // MetaOptional is metadata keys that may be specified by the dispatcher 3951 MetaOptional []string 3952 } 3953 3954 func (d *ParameterizedJobConfig) Validate() error { 3955 var mErr multierror.Error 3956 switch d.Payload { 3957 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 3958 default: 3959 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 3960 } 3961 3962 // Check that the meta configurations are disjoint sets 3963 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 3964 if !disjoint { 3965 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 3966 } 3967 3968 return mErr.ErrorOrNil() 3969 } 3970 3971 func (d *ParameterizedJobConfig) Canonicalize() { 3972 if d.Payload == "" { 3973 d.Payload = DispatchPayloadOptional 3974 } 3975 } 3976 3977 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 3978 if d == nil { 3979 return nil 3980 } 3981 nd := new(ParameterizedJobConfig) 3982 *nd = *d 3983 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 3984 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 3985 return nd 3986 } 3987 3988 // DispatchedID returns an ID appropriate for a job dispatched against a 3989 // particular parameterized job 3990 func DispatchedID(templateID string, t time.Time) string { 3991 u := uuid.Generate()[:8] 3992 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 3993 } 3994 3995 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 3996 type DispatchPayloadConfig struct { 3997 // File specifies a relative path to where the input data should be written 3998 File string 3999 } 4000 4001 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 4002 if d == nil { 4003 return nil 4004 } 4005 nd := new(DispatchPayloadConfig) 4006 *nd = *d 4007 return nd 4008 } 4009 4010 func (d *DispatchPayloadConfig) Validate() error { 4011 // Verify the destination doesn't escape 4012 escaped, err := PathEscapesAllocDir("task/local/", d.File) 4013 if err != nil { 4014 return fmt.Errorf("invalid destination path: %v", err) 4015 } else if escaped { 4016 return fmt.Errorf("destination escapes allocation directory") 4017 } 4018 4019 return nil 4020 } 4021 4022 var ( 4023 // These default restart policies needs to be in sync with 4024 // Canonicalize in api/tasks.go 4025 4026 DefaultServiceJobRestartPolicy = RestartPolicy{ 4027 Delay: 15 * time.Second, 4028 Attempts: 2, 4029 Interval: 30 * time.Minute, 4030 Mode: RestartPolicyModeFail, 4031 } 4032 DefaultBatchJobRestartPolicy = RestartPolicy{ 4033 Delay: 15 * time.Second, 4034 Attempts: 3, 4035 Interval: 24 * time.Hour, 4036 Mode: RestartPolicyModeFail, 4037 } 4038 ) 4039 4040 var ( 4041 // These default reschedule policies needs to be in sync with 4042 // NewDefaultReschedulePolicy in api/tasks.go 4043 4044 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 4045 Delay: 30 * time.Second, 4046 DelayFunction: "exponential", 4047 MaxDelay: 1 * time.Hour, 4048 Unlimited: true, 4049 } 4050 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 4051 Attempts: 1, 4052 Interval: 24 * time.Hour, 4053 Delay: 5 * time.Second, 4054 DelayFunction: "constant", 4055 } 4056 ) 4057 4058 const ( 4059 // RestartPolicyModeDelay causes an artificial delay till the next interval is 4060 // reached when the specified attempts have been reached in the interval. 4061 RestartPolicyModeDelay = "delay" 4062 4063 // RestartPolicyModeFail causes a job to fail if the specified number of 4064 // attempts are reached within an interval. 4065 RestartPolicyModeFail = "fail" 4066 4067 // RestartPolicyMinInterval is the minimum interval that is accepted for a 4068 // restart policy. 4069 RestartPolicyMinInterval = 5 * time.Second 4070 4071 // ReasonWithinPolicy describes restart events that are within policy 4072 ReasonWithinPolicy = "Restart within policy" 4073 ) 4074 4075 // RestartPolicy configures how Tasks are restarted when they crash or fail. 4076 type RestartPolicy struct { 4077 // Attempts is the number of restart that will occur in an interval. 4078 Attempts int 4079 4080 // Interval is a duration in which we can limit the number of restarts 4081 // within. 4082 Interval time.Duration 4083 4084 // Delay is the time between a failure and a restart. 4085 Delay time.Duration 4086 4087 // Mode controls what happens when the task restarts more than attempt times 4088 // in an interval. 4089 Mode string 4090 } 4091 4092 func (r *RestartPolicy) Copy() *RestartPolicy { 4093 if r == nil { 4094 return nil 4095 } 4096 nrp := new(RestartPolicy) 4097 *nrp = *r 4098 return nrp 4099 } 4100 4101 func (r *RestartPolicy) Validate() error { 4102 var mErr multierror.Error 4103 switch r.Mode { 4104 case RestartPolicyModeDelay, RestartPolicyModeFail: 4105 default: 4106 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 4107 } 4108 4109 // Check for ambiguous/confusing settings 4110 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 4111 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 4112 } 4113 4114 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 4115 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 4116 } 4117 if time.Duration(r.Attempts)*r.Delay > r.Interval { 4118 multierror.Append(&mErr, 4119 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 4120 } 4121 return mErr.ErrorOrNil() 4122 } 4123 4124 func NewRestartPolicy(jobType string) *RestartPolicy { 4125 switch jobType { 4126 case JobTypeService, JobTypeSystem: 4127 rp := DefaultServiceJobRestartPolicy 4128 return &rp 4129 case JobTypeBatch: 4130 rp := DefaultBatchJobRestartPolicy 4131 return &rp 4132 } 4133 return nil 4134 } 4135 4136 const ReschedulePolicyMinInterval = 15 * time.Second 4137 const ReschedulePolicyMinDelay = 5 * time.Second 4138 4139 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 4140 4141 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 4142 type ReschedulePolicy struct { 4143 // Attempts limits the number of rescheduling attempts that can occur in an interval. 4144 Attempts int 4145 4146 // Interval is a duration in which we can limit the number of reschedule attempts. 4147 Interval time.Duration 4148 4149 // Delay is a minimum duration to wait between reschedule attempts. 4150 // The delay function determines how much subsequent reschedule attempts are delayed by. 4151 Delay time.Duration 4152 4153 // DelayFunction determines how the delay progressively changes on subsequent reschedule 4154 // attempts. Valid values are "exponential", "constant", and "fibonacci". 4155 DelayFunction string 4156 4157 // MaxDelay is an upper bound on the delay. 4158 MaxDelay time.Duration 4159 4160 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 4161 // between reschedule attempts. 4162 Unlimited bool 4163 } 4164 4165 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 4166 if r == nil { 4167 return nil 4168 } 4169 nrp := new(ReschedulePolicy) 4170 *nrp = *r 4171 return nrp 4172 } 4173 4174 func (r *ReschedulePolicy) Enabled() bool { 4175 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 4176 return enabled 4177 } 4178 4179 // Validate uses different criteria to validate the reschedule policy 4180 // Delay must be a minimum of 5 seconds 4181 // Delay Ceiling is ignored if Delay Function is "constant" 4182 // Number of possible attempts is validated, given the interval, delay and delay function 4183 func (r *ReschedulePolicy) Validate() error { 4184 if !r.Enabled() { 4185 return nil 4186 } 4187 var mErr multierror.Error 4188 // Check for ambiguous/confusing settings 4189 if r.Attempts > 0 { 4190 if r.Interval <= 0 { 4191 multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 4192 } 4193 if r.Unlimited { 4194 multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 4195 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 4196 multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 4197 } 4198 } 4199 4200 delayPreCheck := true 4201 // Delay should be bigger than the default 4202 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4203 multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4204 delayPreCheck = false 4205 } 4206 4207 // Must use a valid delay function 4208 if !isValidDelayFunction(r.DelayFunction) { 4209 multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 4210 delayPreCheck = false 4211 } 4212 4213 // Validate MaxDelay if not using linear delay progression 4214 if r.DelayFunction != "constant" { 4215 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 4216 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 4217 delayPreCheck = false 4218 } 4219 if r.MaxDelay < r.Delay { 4220 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 4221 delayPreCheck = false 4222 } 4223 4224 } 4225 4226 // Validate Interval and other delay parameters if attempts are limited 4227 if !r.Unlimited { 4228 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 4229 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 4230 } 4231 if !delayPreCheck { 4232 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 4233 return mErr.ErrorOrNil() 4234 } 4235 crossValidationErr := r.validateDelayParams() 4236 if crossValidationErr != nil { 4237 multierror.Append(&mErr, crossValidationErr) 4238 } 4239 } 4240 return mErr.ErrorOrNil() 4241 } 4242 4243 func isValidDelayFunction(delayFunc string) bool { 4244 for _, value := range RescheduleDelayFunctions { 4245 if value == delayFunc { 4246 return true 4247 } 4248 } 4249 return false 4250 } 4251 4252 func (r *ReschedulePolicy) validateDelayParams() error { 4253 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 4254 if ok { 4255 return nil 4256 } 4257 var mErr multierror.Error 4258 if r.DelayFunction == "constant" { 4259 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 4260 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 4261 } else { 4262 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 4263 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 4264 } 4265 multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 4266 return mErr.ErrorOrNil() 4267 } 4268 4269 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 4270 var possibleAttempts int 4271 var recommendedInterval time.Duration 4272 valid := true 4273 switch r.DelayFunction { 4274 case "constant": 4275 recommendedInterval = time.Duration(r.Attempts) * r.Delay 4276 if r.Interval < recommendedInterval { 4277 possibleAttempts = int(r.Interval / r.Delay) 4278 valid = false 4279 } 4280 case "exponential": 4281 for i := 0; i < r.Attempts; i++ { 4282 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 4283 if nextDelay > r.MaxDelay { 4284 nextDelay = r.MaxDelay 4285 recommendedInterval += nextDelay 4286 } else { 4287 recommendedInterval = nextDelay 4288 } 4289 if recommendedInterval < r.Interval { 4290 possibleAttempts++ 4291 } 4292 } 4293 if possibleAttempts < r.Attempts { 4294 valid = false 4295 } 4296 case "fibonacci": 4297 var slots []time.Duration 4298 slots = append(slots, r.Delay) 4299 slots = append(slots, r.Delay) 4300 reachedCeiling := false 4301 for i := 2; i < r.Attempts; i++ { 4302 var nextDelay time.Duration 4303 if reachedCeiling { 4304 //switch to linear 4305 nextDelay = slots[i-1] + r.MaxDelay 4306 } else { 4307 nextDelay = slots[i-1] + slots[i-2] 4308 if nextDelay > r.MaxDelay { 4309 nextDelay = r.MaxDelay 4310 reachedCeiling = true 4311 } 4312 } 4313 slots = append(slots, nextDelay) 4314 } 4315 recommendedInterval = slots[len(slots)-1] 4316 if r.Interval < recommendedInterval { 4317 valid = false 4318 // calculate possible attempts 4319 for i := 0; i < len(slots); i++ { 4320 if slots[i] > r.Interval { 4321 possibleAttempts = i 4322 break 4323 } 4324 } 4325 } 4326 default: 4327 return false, 0, 0 4328 } 4329 if possibleAttempts < 0 { // can happen if delay is bigger than interval 4330 possibleAttempts = 0 4331 } 4332 return valid, possibleAttempts, recommendedInterval 4333 } 4334 4335 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 4336 switch jobType { 4337 case JobTypeService: 4338 rp := DefaultServiceJobReschedulePolicy 4339 return &rp 4340 case JobTypeBatch: 4341 rp := DefaultBatchJobReschedulePolicy 4342 return &rp 4343 } 4344 return nil 4345 } 4346 4347 const ( 4348 MigrateStrategyHealthChecks = "checks" 4349 MigrateStrategyHealthStates = "task_states" 4350 ) 4351 4352 type MigrateStrategy struct { 4353 MaxParallel int 4354 HealthCheck string 4355 MinHealthyTime time.Duration 4356 HealthyDeadline time.Duration 4357 } 4358 4359 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 4360 // that lack an update strategy. 4361 // 4362 // This function should match its counterpart in api/tasks.go 4363 func DefaultMigrateStrategy() *MigrateStrategy { 4364 return &MigrateStrategy{ 4365 MaxParallel: 1, 4366 HealthCheck: MigrateStrategyHealthChecks, 4367 MinHealthyTime: 10 * time.Second, 4368 HealthyDeadline: 5 * time.Minute, 4369 } 4370 } 4371 4372 func (m *MigrateStrategy) Validate() error { 4373 var mErr multierror.Error 4374 4375 if m.MaxParallel < 0 { 4376 multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 4377 } 4378 4379 switch m.HealthCheck { 4380 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 4381 // ok 4382 case "": 4383 if m.MaxParallel > 0 { 4384 multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 4385 } 4386 default: 4387 multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 4388 } 4389 4390 if m.MinHealthyTime < 0 { 4391 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 4392 } 4393 4394 if m.HealthyDeadline < 0 { 4395 multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 4396 } 4397 4398 if m.MinHealthyTime > m.HealthyDeadline { 4399 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 4400 } 4401 4402 return mErr.ErrorOrNil() 4403 } 4404 4405 // TaskGroup is an atomic unit of placement. Each task group belongs to 4406 // a job and may contain any number of tasks. A task group support running 4407 // in many replicas using the same configuration.. 4408 type TaskGroup struct { 4409 // Name of the task group 4410 Name string 4411 4412 // Count is the number of replicas of this task group that should 4413 // be scheduled. 4414 Count int 4415 4416 // Update is used to control the update strategy for this task group 4417 Update *UpdateStrategy 4418 4419 // Migrate is used to control the migration strategy for this task group 4420 Migrate *MigrateStrategy 4421 4422 // Constraints can be specified at a task group level and apply to 4423 // all the tasks contained. 4424 Constraints []*Constraint 4425 4426 //RestartPolicy of a TaskGroup 4427 RestartPolicy *RestartPolicy 4428 4429 // Tasks are the collection of tasks that this task group needs to run 4430 Tasks []*Task 4431 4432 // EphemeralDisk is the disk resources that the task group requests 4433 EphemeralDisk *EphemeralDisk 4434 4435 // Meta is used to associate arbitrary metadata with this 4436 // task group. This is opaque to Nomad. 4437 Meta map[string]string 4438 4439 // ReschedulePolicy is used to configure how the scheduler should 4440 // retry failed allocations. 4441 ReschedulePolicy *ReschedulePolicy 4442 4443 // Affinities can be specified at the task group level to express 4444 // scheduling preferences. 4445 Affinities []*Affinity 4446 4447 // Spread can be specified at the task group level to express spreading 4448 // allocations across a desired attribute, such as datacenter 4449 Spreads []*Spread 4450 } 4451 4452 func (tg *TaskGroup) Copy() *TaskGroup { 4453 if tg == nil { 4454 return nil 4455 } 4456 ntg := new(TaskGroup) 4457 *ntg = *tg 4458 ntg.Update = ntg.Update.Copy() 4459 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 4460 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 4461 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 4462 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 4463 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 4464 4465 if tg.Tasks != nil { 4466 tasks := make([]*Task, len(ntg.Tasks)) 4467 for i, t := range ntg.Tasks { 4468 tasks[i] = t.Copy() 4469 } 4470 ntg.Tasks = tasks 4471 } 4472 4473 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 4474 4475 if tg.EphemeralDisk != nil { 4476 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 4477 } 4478 return ntg 4479 } 4480 4481 // Canonicalize is used to canonicalize fields in the TaskGroup. 4482 func (tg *TaskGroup) Canonicalize(job *Job) { 4483 // Ensure that an empty and nil map are treated the same to avoid scheduling 4484 // problems since we use reflect DeepEquals. 4485 if len(tg.Meta) == 0 { 4486 tg.Meta = nil 4487 } 4488 4489 // Set the default restart policy. 4490 if tg.RestartPolicy == nil { 4491 tg.RestartPolicy = NewRestartPolicy(job.Type) 4492 } 4493 4494 if tg.ReschedulePolicy == nil { 4495 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 4496 } 4497 4498 // Canonicalize Migrate for service jobs 4499 if job.Type == JobTypeService && tg.Migrate == nil { 4500 tg.Migrate = DefaultMigrateStrategy() 4501 } 4502 4503 // Set a default ephemeral disk object if the user has not requested for one 4504 if tg.EphemeralDisk == nil { 4505 tg.EphemeralDisk = DefaultEphemeralDisk() 4506 } 4507 4508 for _, task := range tg.Tasks { 4509 task.Canonicalize(job, tg) 4510 } 4511 } 4512 4513 // Validate is used to sanity check a task group 4514 func (tg *TaskGroup) Validate(j *Job) error { 4515 var mErr multierror.Error 4516 if tg.Name == "" { 4517 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 4518 } 4519 if tg.Count < 0 { 4520 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 4521 } 4522 if len(tg.Tasks) == 0 { 4523 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 4524 } 4525 for idx, constr := range tg.Constraints { 4526 if err := constr.Validate(); err != nil { 4527 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4528 mErr.Errors = append(mErr.Errors, outer) 4529 } 4530 } 4531 if j.Type == JobTypeSystem { 4532 if tg.Affinities != nil { 4533 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4534 } 4535 } else { 4536 for idx, affinity := range tg.Affinities { 4537 if err := affinity.Validate(); err != nil { 4538 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4539 mErr.Errors = append(mErr.Errors, outer) 4540 } 4541 } 4542 } 4543 4544 if tg.RestartPolicy != nil { 4545 if err := tg.RestartPolicy.Validate(); err != nil { 4546 mErr.Errors = append(mErr.Errors, err) 4547 } 4548 } else { 4549 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 4550 } 4551 4552 if j.Type == JobTypeSystem { 4553 if tg.Spreads != nil { 4554 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 4555 } 4556 } else { 4557 for idx, spread := range tg.Spreads { 4558 if err := spread.Validate(); err != nil { 4559 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 4560 mErr.Errors = append(mErr.Errors, outer) 4561 } 4562 } 4563 } 4564 4565 if j.Type == JobTypeSystem { 4566 if tg.ReschedulePolicy != nil { 4567 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 4568 } 4569 } else { 4570 if tg.ReschedulePolicy != nil { 4571 if err := tg.ReschedulePolicy.Validate(); err != nil { 4572 mErr.Errors = append(mErr.Errors, err) 4573 } 4574 } else { 4575 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 4576 } 4577 } 4578 4579 if tg.EphemeralDisk != nil { 4580 if err := tg.EphemeralDisk.Validate(); err != nil { 4581 mErr.Errors = append(mErr.Errors, err) 4582 } 4583 } else { 4584 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 4585 } 4586 4587 // Validate the update strategy 4588 if u := tg.Update; u != nil { 4589 switch j.Type { 4590 case JobTypeService, JobTypeSystem: 4591 default: 4592 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 4593 } 4594 if err := u.Validate(); err != nil { 4595 mErr.Errors = append(mErr.Errors, err) 4596 } 4597 } 4598 4599 // Validate the migration strategy 4600 switch j.Type { 4601 case JobTypeService: 4602 if tg.Migrate != nil { 4603 if err := tg.Migrate.Validate(); err != nil { 4604 mErr.Errors = append(mErr.Errors, err) 4605 } 4606 } 4607 default: 4608 if tg.Migrate != nil { 4609 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 4610 } 4611 } 4612 4613 // Check for duplicate tasks, that there is only leader task if any, 4614 // and no duplicated static ports 4615 tasks := make(map[string]int) 4616 staticPorts := make(map[int]string) 4617 leaderTasks := 0 4618 for idx, task := range tg.Tasks { 4619 if task.Name == "" { 4620 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 4621 } else if existing, ok := tasks[task.Name]; ok { 4622 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 4623 } else { 4624 tasks[task.Name] = idx 4625 } 4626 4627 if task.Leader { 4628 leaderTasks++ 4629 } 4630 4631 if task.Resources == nil { 4632 continue 4633 } 4634 4635 for _, net := range task.Resources.Networks { 4636 for _, port := range net.ReservedPorts { 4637 if other, ok := staticPorts[port.Value]; ok { 4638 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 4639 mErr.Errors = append(mErr.Errors, err) 4640 } else { 4641 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 4642 } 4643 } 4644 } 4645 } 4646 4647 if leaderTasks > 1 { 4648 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 4649 } 4650 4651 // Validate the tasks 4652 for _, task := range tg.Tasks { 4653 if err := task.Validate(tg.EphemeralDisk, j.Type); err != nil { 4654 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 4655 mErr.Errors = append(mErr.Errors, outer) 4656 } 4657 } 4658 return mErr.ErrorOrNil() 4659 } 4660 4661 // Warnings returns a list of warnings that may be from dubious settings or 4662 // deprecation warnings. 4663 func (tg *TaskGroup) Warnings(j *Job) error { 4664 var mErr multierror.Error 4665 4666 // Validate the update strategy 4667 if u := tg.Update; u != nil { 4668 // Check the counts are appropriate 4669 if u.MaxParallel > tg.Count { 4670 mErr.Errors = append(mErr.Errors, 4671 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 4672 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 4673 } 4674 } 4675 4676 for _, t := range tg.Tasks { 4677 if err := t.Warnings(); err != nil { 4678 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 4679 mErr.Errors = append(mErr.Errors, err) 4680 } 4681 } 4682 4683 return mErr.ErrorOrNil() 4684 } 4685 4686 // LookupTask finds a task by name 4687 func (tg *TaskGroup) LookupTask(name string) *Task { 4688 for _, t := range tg.Tasks { 4689 if t.Name == name { 4690 return t 4691 } 4692 } 4693 return nil 4694 } 4695 4696 func (tg *TaskGroup) GoString() string { 4697 return fmt.Sprintf("*%#v", *tg) 4698 } 4699 4700 // CheckRestart describes if and when a task should be restarted based on 4701 // failing health checks. 4702 type CheckRestart struct { 4703 Limit int // Restart task after this many unhealthy intervals 4704 Grace time.Duration // Grace time to give tasks after starting to get healthy 4705 IgnoreWarnings bool // If true treat checks in `warning` as passing 4706 } 4707 4708 func (c *CheckRestart) Copy() *CheckRestart { 4709 if c == nil { 4710 return nil 4711 } 4712 4713 nc := new(CheckRestart) 4714 *nc = *c 4715 return nc 4716 } 4717 4718 func (c *CheckRestart) Validate() error { 4719 if c == nil { 4720 return nil 4721 } 4722 4723 var mErr multierror.Error 4724 if c.Limit < 0 { 4725 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 4726 } 4727 4728 if c.Grace < 0 { 4729 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 4730 } 4731 4732 return mErr.ErrorOrNil() 4733 } 4734 4735 const ( 4736 ServiceCheckHTTP = "http" 4737 ServiceCheckTCP = "tcp" 4738 ServiceCheckScript = "script" 4739 ServiceCheckGRPC = "grpc" 4740 4741 // minCheckInterval is the minimum check interval permitted. Consul 4742 // currently has its MinInterval set to 1s. Mirror that here for 4743 // consistency. 4744 minCheckInterval = 1 * time.Second 4745 4746 // minCheckTimeout is the minimum check timeout permitted for Consul 4747 // script TTL checks. 4748 minCheckTimeout = 1 * time.Second 4749 ) 4750 4751 // The ServiceCheck data model represents the consul health check that 4752 // Nomad registers for a Task 4753 type ServiceCheck struct { 4754 Name string // Name of the check, defaults to id 4755 Type string // Type of the check - tcp, http, docker and script 4756 Command string // Command is the command to run for script checks 4757 Args []string // Args is a list of arguments for script checks 4758 Path string // path of the health check url for http type check 4759 Protocol string // Protocol to use if check is http, defaults to http 4760 PortLabel string // The port to use for tcp/http checks 4761 AddressMode string // 'host' to use host ip:port or 'driver' to use driver's 4762 Interval time.Duration // Interval of the check 4763 Timeout time.Duration // Timeout of the response from the check before consul fails the check 4764 InitialStatus string // Initial status of the check 4765 TLSSkipVerify bool // Skip TLS verification when Protocol=https 4766 Method string // HTTP Method to use (GET by default) 4767 Header map[string][]string // HTTP Headers for Consul to set when making HTTP checks 4768 CheckRestart *CheckRestart // If and when a task should be restarted based on checks 4769 GRPCService string // Service for GRPC checks 4770 GRPCUseTLS bool // Whether or not to use TLS for GRPC checks 4771 } 4772 4773 func (sc *ServiceCheck) Copy() *ServiceCheck { 4774 if sc == nil { 4775 return nil 4776 } 4777 nsc := new(ServiceCheck) 4778 *nsc = *sc 4779 nsc.Args = helper.CopySliceString(sc.Args) 4780 nsc.Header = helper.CopyMapStringSliceString(sc.Header) 4781 nsc.CheckRestart = sc.CheckRestart.Copy() 4782 return nsc 4783 } 4784 4785 func (sc *ServiceCheck) Canonicalize(serviceName string) { 4786 // Ensure empty maps/slices are treated as null to avoid scheduling 4787 // issues when using DeepEquals. 4788 if len(sc.Args) == 0 { 4789 sc.Args = nil 4790 } 4791 4792 if len(sc.Header) == 0 { 4793 sc.Header = nil 4794 } else { 4795 for k, v := range sc.Header { 4796 if len(v) == 0 { 4797 sc.Header[k] = nil 4798 } 4799 } 4800 } 4801 4802 if sc.Name == "" { 4803 sc.Name = fmt.Sprintf("service: %q check", serviceName) 4804 } 4805 } 4806 4807 // validate a Service's ServiceCheck 4808 func (sc *ServiceCheck) validate() error { 4809 // Validate Type 4810 switch strings.ToLower(sc.Type) { 4811 case ServiceCheckGRPC: 4812 case ServiceCheckTCP: 4813 case ServiceCheckHTTP: 4814 if sc.Path == "" { 4815 return fmt.Errorf("http type must have a valid http path") 4816 } 4817 url, err := url.Parse(sc.Path) 4818 if err != nil { 4819 return fmt.Errorf("http type must have a valid http path") 4820 } 4821 if url.IsAbs() { 4822 return fmt.Errorf("http type must have a relative http path") 4823 } 4824 4825 case ServiceCheckScript: 4826 if sc.Command == "" { 4827 return fmt.Errorf("script type must have a valid script path") 4828 } 4829 4830 default: 4831 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 4832 } 4833 4834 // Validate interval and timeout 4835 if sc.Interval == 0 { 4836 return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval) 4837 } else if sc.Interval < minCheckInterval { 4838 return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval) 4839 } 4840 4841 if sc.Timeout == 0 { 4842 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 4843 } else if sc.Timeout < minCheckTimeout { 4844 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 4845 } 4846 4847 // Validate InitialStatus 4848 switch sc.InitialStatus { 4849 case "": 4850 case api.HealthPassing: 4851 case api.HealthWarning: 4852 case api.HealthCritical: 4853 default: 4854 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical) 4855 4856 } 4857 4858 // Validate AddressMode 4859 switch sc.AddressMode { 4860 case "", AddressModeHost, AddressModeDriver: 4861 // Ok 4862 case AddressModeAuto: 4863 return fmt.Errorf("invalid address_mode %q - %s only valid for services", sc.AddressMode, AddressModeAuto) 4864 default: 4865 return fmt.Errorf("invalid address_mode %q", sc.AddressMode) 4866 } 4867 4868 return sc.CheckRestart.Validate() 4869 } 4870 4871 // RequiresPort returns whether the service check requires the task has a port. 4872 func (sc *ServiceCheck) RequiresPort() bool { 4873 switch sc.Type { 4874 case ServiceCheckGRPC, ServiceCheckHTTP, ServiceCheckTCP: 4875 return true 4876 default: 4877 return false 4878 } 4879 } 4880 4881 // TriggersRestarts returns true if this check should be watched and trigger a restart 4882 // on failure. 4883 func (sc *ServiceCheck) TriggersRestarts() bool { 4884 return sc.CheckRestart != nil && sc.CheckRestart.Limit > 0 4885 } 4886 4887 // Hash all ServiceCheck fields and the check's corresponding service ID to 4888 // create an identifier. The identifier is not guaranteed to be unique as if 4889 // the PortLabel is blank, the Service's PortLabel will be used after Hash is 4890 // called. 4891 func (sc *ServiceCheck) Hash(serviceID string) string { 4892 h := sha1.New() 4893 io.WriteString(h, serviceID) 4894 io.WriteString(h, sc.Name) 4895 io.WriteString(h, sc.Type) 4896 io.WriteString(h, sc.Command) 4897 io.WriteString(h, strings.Join(sc.Args, "")) 4898 io.WriteString(h, sc.Path) 4899 io.WriteString(h, sc.Protocol) 4900 io.WriteString(h, sc.PortLabel) 4901 io.WriteString(h, sc.Interval.String()) 4902 io.WriteString(h, sc.Timeout.String()) 4903 io.WriteString(h, sc.Method) 4904 // Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6 4905 if sc.TLSSkipVerify { 4906 io.WriteString(h, "true") 4907 } 4908 4909 // Since map iteration order isn't stable we need to write k/v pairs to 4910 // a slice and sort it before hashing. 4911 if len(sc.Header) > 0 { 4912 headers := make([]string, 0, len(sc.Header)) 4913 for k, v := range sc.Header { 4914 headers = append(headers, k+strings.Join(v, "")) 4915 } 4916 sort.Strings(headers) 4917 io.WriteString(h, strings.Join(headers, "")) 4918 } 4919 4920 // Only include AddressMode if set to maintain ID stability with Nomad <0.7.1 4921 if len(sc.AddressMode) > 0 { 4922 io.WriteString(h, sc.AddressMode) 4923 } 4924 4925 // Only include GRPC if set to maintain ID stability with Nomad <0.8.4 4926 if sc.GRPCService != "" { 4927 io.WriteString(h, sc.GRPCService) 4928 } 4929 if sc.GRPCUseTLS { 4930 io.WriteString(h, "true") 4931 } 4932 4933 return fmt.Sprintf("%x", h.Sum(nil)) 4934 } 4935 4936 const ( 4937 AddressModeAuto = "auto" 4938 AddressModeHost = "host" 4939 AddressModeDriver = "driver" 4940 ) 4941 4942 // Service represents a Consul service definition in Nomad 4943 type Service struct { 4944 // Name of the service registered with Consul. Consul defaults the 4945 // Name to ServiceID if not specified. The Name if specified is used 4946 // as one of the seed values when generating a Consul ServiceID. 4947 Name string 4948 4949 // PortLabel is either the numeric port number or the `host:port`. 4950 // To specify the port number using the host's Consul Advertise 4951 // address, specify an empty host in the PortLabel (e.g. `:port`). 4952 PortLabel string 4953 4954 // AddressMode specifies whether or not to use the host ip:port for 4955 // this service. 4956 AddressMode string 4957 4958 Tags []string // List of tags for the service 4959 CanaryTags []string // List of tags for the service when it is a canary 4960 Checks []*ServiceCheck // List of checks associated with the service 4961 } 4962 4963 func (s *Service) Copy() *Service { 4964 if s == nil { 4965 return nil 4966 } 4967 ns := new(Service) 4968 *ns = *s 4969 ns.Tags = helper.CopySliceString(ns.Tags) 4970 ns.CanaryTags = helper.CopySliceString(ns.CanaryTags) 4971 4972 if s.Checks != nil { 4973 checks := make([]*ServiceCheck, len(ns.Checks)) 4974 for i, c := range ns.Checks { 4975 checks[i] = c.Copy() 4976 } 4977 ns.Checks = checks 4978 } 4979 4980 return ns 4981 } 4982 4983 // Canonicalize interpolates values of Job, Task Group and Task in the Service 4984 // Name. This also generates check names, service id and check ids. 4985 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 4986 // Ensure empty lists are treated as null to avoid scheduler issues when 4987 // using DeepEquals 4988 if len(s.Tags) == 0 { 4989 s.Tags = nil 4990 } 4991 if len(s.CanaryTags) == 0 { 4992 s.CanaryTags = nil 4993 } 4994 if len(s.Checks) == 0 { 4995 s.Checks = nil 4996 } 4997 4998 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 4999 "JOB": job, 5000 "TASKGROUP": taskGroup, 5001 "TASK": task, 5002 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 5003 }, 5004 ) 5005 5006 for _, check := range s.Checks { 5007 check.Canonicalize(s.Name) 5008 } 5009 } 5010 5011 // Validate checks if the Check definition is valid 5012 func (s *Service) Validate() error { 5013 var mErr multierror.Error 5014 5015 // Ensure the service name is valid per the below RFCs but make an exception 5016 // for our interpolation syntax by first stripping any environment variables from the name 5017 5018 serviceNameStripped := args.ReplaceEnvWithPlaceHolder(s.Name, "ENV-VAR") 5019 5020 if err := s.ValidateName(serviceNameStripped); err != nil { 5021 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name)) 5022 } 5023 5024 switch s.AddressMode { 5025 case "", AddressModeAuto, AddressModeHost, AddressModeDriver: 5026 // OK 5027 default: 5028 mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode)) 5029 } 5030 5031 for _, c := range s.Checks { 5032 if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() { 5033 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name)) 5034 continue 5035 } 5036 5037 if err := c.validate(); err != nil { 5038 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 5039 } 5040 } 5041 5042 return mErr.ErrorOrNil() 5043 } 5044 5045 // ValidateName checks if the services Name is valid and should be called after 5046 // the name has been interpolated 5047 func (s *Service) ValidateName(name string) error { 5048 // Ensure the service name is valid per RFC-952 §1 5049 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 5050 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 5051 // (https://tools.ietf.org/html/rfc2782). 5052 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 5053 if !re.MatchString(name) { 5054 return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name) 5055 } 5056 return nil 5057 } 5058 5059 // Hash returns a base32 encoded hash of a Service's contents excluding checks 5060 // as they're hashed independently. 5061 func (s *Service) Hash(allocID, taskName string, canary bool) string { 5062 h := sha1.New() 5063 io.WriteString(h, allocID) 5064 io.WriteString(h, taskName) 5065 io.WriteString(h, s.Name) 5066 io.WriteString(h, s.PortLabel) 5067 io.WriteString(h, s.AddressMode) 5068 for _, tag := range s.Tags { 5069 io.WriteString(h, tag) 5070 } 5071 for _, tag := range s.CanaryTags { 5072 io.WriteString(h, tag) 5073 } 5074 5075 // Vary ID on whether or not CanaryTags will be used 5076 if canary { 5077 h.Write([]byte("Canary")) 5078 } 5079 5080 // Base32 is used for encoding the hash as sha1 hashes can always be 5081 // encoded without padding, only 4 bytes larger than base64, and saves 5082 // 8 bytes vs hex. Since these hashes are used in Consul URLs it's nice 5083 // to have a reasonably compact URL-safe representation. 5084 return b32.EncodeToString(h.Sum(nil)) 5085 } 5086 5087 const ( 5088 // DefaultKillTimeout is the default timeout between signaling a task it 5089 // will be killed and killing it. 5090 DefaultKillTimeout = 5 * time.Second 5091 ) 5092 5093 // LogConfig provides configuration for log rotation 5094 type LogConfig struct { 5095 MaxFiles int 5096 MaxFileSizeMB int 5097 } 5098 5099 // DefaultLogConfig returns the default LogConfig values. 5100 func DefaultLogConfig() *LogConfig { 5101 return &LogConfig{ 5102 MaxFiles: 10, 5103 MaxFileSizeMB: 10, 5104 } 5105 } 5106 5107 // Validate returns an error if the log config specified are less than 5108 // the minimum allowed. 5109 func (l *LogConfig) Validate() error { 5110 var mErr multierror.Error 5111 if l.MaxFiles < 1 { 5112 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 5113 } 5114 if l.MaxFileSizeMB < 1 { 5115 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 5116 } 5117 return mErr.ErrorOrNil() 5118 } 5119 5120 // Task is a single process typically that is executed as part of a task group. 5121 type Task struct { 5122 // Name of the task 5123 Name string 5124 5125 // Driver is used to control which driver is used 5126 Driver string 5127 5128 // User is used to determine which user will run the task. It defaults to 5129 // the same user the Nomad client is being run as. 5130 User string 5131 5132 // Config is provided to the driver to initialize 5133 Config map[string]interface{} 5134 5135 // Map of environment variables to be used by the driver 5136 Env map[string]string 5137 5138 // List of service definitions exposed by the Task 5139 Services []*Service 5140 5141 // Vault is used to define the set of Vault policies that this task should 5142 // have access to. 5143 Vault *Vault 5144 5145 // Templates are the set of templates to be rendered for the task. 5146 Templates []*Template 5147 5148 // Constraints can be specified at a task level and apply only to 5149 // the particular task. 5150 Constraints []*Constraint 5151 5152 // Affinities can be specified at the task level to express 5153 // scheduling preferences 5154 Affinities []*Affinity 5155 5156 // Resources is the resources needed by this task 5157 Resources *Resources 5158 5159 // DispatchPayload configures how the task retrieves its input from a dispatch 5160 DispatchPayload *DispatchPayloadConfig 5161 5162 // Meta is used to associate arbitrary metadata with this 5163 // task. This is opaque to Nomad. 5164 Meta map[string]string 5165 5166 // KillTimeout is the time between signaling a task that it will be 5167 // killed and killing it. 5168 KillTimeout time.Duration 5169 5170 // LogConfig provides configuration for log rotation 5171 LogConfig *LogConfig 5172 5173 // Artifacts is a list of artifacts to download and extract before running 5174 // the task. 5175 Artifacts []*TaskArtifact 5176 5177 // Leader marks the task as the leader within the group. When the leader 5178 // task exits, other tasks will be gracefully terminated. 5179 Leader bool 5180 5181 // ShutdownDelay is the duration of the delay between deregistering a 5182 // task from Consul and sending it a signal to shutdown. See #2441 5183 ShutdownDelay time.Duration 5184 5185 // The kill signal to use for the task. This is an optional specification, 5186 5187 // KillSignal is the kill signal to use for the task. This is an optional 5188 // specification and defaults to SIGINT 5189 KillSignal string 5190 } 5191 5192 func (t *Task) Copy() *Task { 5193 if t == nil { 5194 return nil 5195 } 5196 nt := new(Task) 5197 *nt = *t 5198 nt.Env = helper.CopyMapStringString(nt.Env) 5199 5200 if t.Services != nil { 5201 services := make([]*Service, len(nt.Services)) 5202 for i, s := range nt.Services { 5203 services[i] = s.Copy() 5204 } 5205 nt.Services = services 5206 } 5207 5208 nt.Constraints = CopySliceConstraints(nt.Constraints) 5209 nt.Affinities = CopySliceAffinities(nt.Affinities) 5210 5211 nt.Vault = nt.Vault.Copy() 5212 nt.Resources = nt.Resources.Copy() 5213 nt.Meta = helper.CopyMapStringString(nt.Meta) 5214 nt.DispatchPayload = nt.DispatchPayload.Copy() 5215 5216 if t.Artifacts != nil { 5217 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 5218 for _, a := range nt.Artifacts { 5219 artifacts = append(artifacts, a.Copy()) 5220 } 5221 nt.Artifacts = artifacts 5222 } 5223 5224 if i, err := copystructure.Copy(nt.Config); err != nil { 5225 panic(err.Error()) 5226 } else { 5227 nt.Config = i.(map[string]interface{}) 5228 } 5229 5230 if t.Templates != nil { 5231 templates := make([]*Template, len(t.Templates)) 5232 for i, tmpl := range nt.Templates { 5233 templates[i] = tmpl.Copy() 5234 } 5235 nt.Templates = templates 5236 } 5237 5238 return nt 5239 } 5240 5241 // Canonicalize canonicalizes fields in the task. 5242 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 5243 // Ensure that an empty and nil map are treated the same to avoid scheduling 5244 // problems since we use reflect DeepEquals. 5245 if len(t.Meta) == 0 { 5246 t.Meta = nil 5247 } 5248 if len(t.Config) == 0 { 5249 t.Config = nil 5250 } 5251 if len(t.Env) == 0 { 5252 t.Env = nil 5253 } 5254 5255 for _, service := range t.Services { 5256 service.Canonicalize(job.Name, tg.Name, t.Name) 5257 } 5258 5259 // If Resources are nil initialize them to defaults, otherwise canonicalize 5260 if t.Resources == nil { 5261 t.Resources = DefaultResources() 5262 } else { 5263 t.Resources.Canonicalize() 5264 } 5265 5266 // Set the default timeout if it is not specified. 5267 if t.KillTimeout == 0 { 5268 t.KillTimeout = DefaultKillTimeout 5269 } 5270 5271 if t.Vault != nil { 5272 t.Vault.Canonicalize() 5273 } 5274 5275 for _, template := range t.Templates { 5276 template.Canonicalize() 5277 } 5278 } 5279 5280 func (t *Task) GoString() string { 5281 return fmt.Sprintf("*%#v", *t) 5282 } 5283 5284 // Validate is used to sanity check a task 5285 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string) error { 5286 var mErr multierror.Error 5287 if t.Name == "" { 5288 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 5289 } 5290 if strings.ContainsAny(t.Name, `/\`) { 5291 // We enforce this so that when creating the directory on disk it will 5292 // not have any slashes. 5293 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 5294 } 5295 if t.Driver == "" { 5296 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 5297 } 5298 if t.KillTimeout < 0 { 5299 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 5300 } 5301 if t.ShutdownDelay < 0 { 5302 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 5303 } 5304 5305 // Validate the resources. 5306 if t.Resources == nil { 5307 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 5308 } else if err := t.Resources.Validate(); err != nil { 5309 mErr.Errors = append(mErr.Errors, err) 5310 } 5311 5312 // Validate the log config 5313 if t.LogConfig == nil { 5314 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 5315 } else if err := t.LogConfig.Validate(); err != nil { 5316 mErr.Errors = append(mErr.Errors, err) 5317 } 5318 5319 for idx, constr := range t.Constraints { 5320 if err := constr.Validate(); err != nil { 5321 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 5322 mErr.Errors = append(mErr.Errors, outer) 5323 } 5324 5325 switch constr.Operand { 5326 case ConstraintDistinctHosts, ConstraintDistinctProperty: 5327 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 5328 mErr.Errors = append(mErr.Errors, outer) 5329 } 5330 } 5331 5332 if jobType == JobTypeSystem { 5333 if t.Affinities != nil { 5334 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 5335 } 5336 } else { 5337 for idx, affinity := range t.Affinities { 5338 if err := affinity.Validate(); err != nil { 5339 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 5340 mErr.Errors = append(mErr.Errors, outer) 5341 } 5342 } 5343 } 5344 5345 // Validate Services 5346 if err := validateServices(t); err != nil { 5347 mErr.Errors = append(mErr.Errors, err) 5348 } 5349 5350 if t.LogConfig != nil && ephemeralDisk != nil { 5351 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 5352 if ephemeralDisk.SizeMB <= logUsage { 5353 mErr.Errors = append(mErr.Errors, 5354 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 5355 logUsage, ephemeralDisk.SizeMB)) 5356 } 5357 } 5358 5359 for idx, artifact := range t.Artifacts { 5360 if err := artifact.Validate(); err != nil { 5361 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 5362 mErr.Errors = append(mErr.Errors, outer) 5363 } 5364 } 5365 5366 if t.Vault != nil { 5367 if err := t.Vault.Validate(); err != nil { 5368 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 5369 } 5370 } 5371 5372 destinations := make(map[string]int, len(t.Templates)) 5373 for idx, tmpl := range t.Templates { 5374 if err := tmpl.Validate(); err != nil { 5375 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 5376 mErr.Errors = append(mErr.Errors, outer) 5377 } 5378 5379 if other, ok := destinations[tmpl.DestPath]; ok { 5380 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 5381 mErr.Errors = append(mErr.Errors, outer) 5382 } else { 5383 destinations[tmpl.DestPath] = idx + 1 5384 } 5385 } 5386 5387 // Validate the dispatch payload block if there 5388 if t.DispatchPayload != nil { 5389 if err := t.DispatchPayload.Validate(); err != nil { 5390 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 5391 } 5392 } 5393 5394 return mErr.ErrorOrNil() 5395 } 5396 5397 // validateServices takes a task and validates the services within it are valid 5398 // and reference ports that exist. 5399 func validateServices(t *Task) error { 5400 var mErr multierror.Error 5401 5402 // Ensure that services don't ask for nonexistent ports and their names are 5403 // unique. 5404 servicePorts := make(map[string]map[string]struct{}) 5405 addServicePort := func(label, service string) { 5406 if _, ok := servicePorts[label]; !ok { 5407 servicePorts[label] = map[string]struct{}{} 5408 } 5409 servicePorts[label][service] = struct{}{} 5410 } 5411 knownServices := make(map[string]struct{}) 5412 for i, service := range t.Services { 5413 if err := service.Validate(); err != nil { 5414 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 5415 mErr.Errors = append(mErr.Errors, outer) 5416 } 5417 5418 // Ensure that services with the same name are not being registered for 5419 // the same port 5420 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 5421 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 5422 } 5423 knownServices[service.Name+service.PortLabel] = struct{}{} 5424 5425 if service.PortLabel != "" { 5426 if service.AddressMode == "driver" { 5427 // Numeric port labels are valid for address_mode=driver 5428 _, err := strconv.Atoi(service.PortLabel) 5429 if err != nil { 5430 // Not a numeric port label, add it to list to check 5431 addServicePort(service.PortLabel, service.Name) 5432 } 5433 } else { 5434 addServicePort(service.PortLabel, service.Name) 5435 } 5436 } 5437 5438 // Ensure that check names are unique and have valid ports 5439 knownChecks := make(map[string]struct{}) 5440 for _, check := range service.Checks { 5441 if _, ok := knownChecks[check.Name]; ok { 5442 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 5443 } 5444 knownChecks[check.Name] = struct{}{} 5445 5446 if !check.RequiresPort() { 5447 // No need to continue validating check if it doesn't need a port 5448 continue 5449 } 5450 5451 effectivePort := check.PortLabel 5452 if effectivePort == "" { 5453 // Inherits from service 5454 effectivePort = service.PortLabel 5455 } 5456 5457 if effectivePort == "" { 5458 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 5459 continue 5460 } 5461 5462 isNumeric := false 5463 portNumber, err := strconv.Atoi(effectivePort) 5464 if err == nil { 5465 isNumeric = true 5466 } 5467 5468 // Numeric ports are fine for address_mode = "driver" 5469 if check.AddressMode == "driver" && isNumeric { 5470 if portNumber <= 0 { 5471 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 5472 } 5473 continue 5474 } 5475 5476 if isNumeric { 5477 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 5478 continue 5479 } 5480 5481 // PortLabel must exist, report errors by its parent service 5482 addServicePort(effectivePort, service.Name) 5483 } 5484 } 5485 5486 // Get the set of port labels. 5487 portLabels := make(map[string]struct{}) 5488 if t.Resources != nil { 5489 for _, network := range t.Resources.Networks { 5490 ports := network.PortLabels() 5491 for portLabel := range ports { 5492 portLabels[portLabel] = struct{}{} 5493 } 5494 } 5495 } 5496 5497 // Iterate over a sorted list of keys to make error listings stable 5498 keys := make([]string, 0, len(servicePorts)) 5499 for p := range servicePorts { 5500 keys = append(keys, p) 5501 } 5502 sort.Strings(keys) 5503 5504 // Ensure all ports referenced in services exist. 5505 for _, servicePort := range keys { 5506 services := servicePorts[servicePort] 5507 _, ok := portLabels[servicePort] 5508 if !ok { 5509 names := make([]string, 0, len(services)) 5510 for name := range services { 5511 names = append(names, name) 5512 } 5513 5514 // Keep order deterministic 5515 sort.Strings(names) 5516 joined := strings.Join(names, ", ") 5517 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 5518 mErr.Errors = append(mErr.Errors, err) 5519 } 5520 } 5521 5522 // Ensure address mode is valid 5523 return mErr.ErrorOrNil() 5524 } 5525 5526 func (t *Task) Warnings() error { 5527 var mErr multierror.Error 5528 5529 // Validate the resources 5530 if t.Resources != nil && t.Resources.IOPS != 0 { 5531 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 5532 } 5533 5534 return mErr.ErrorOrNil() 5535 } 5536 5537 const ( 5538 // TemplateChangeModeNoop marks that no action should be taken if the 5539 // template is re-rendered 5540 TemplateChangeModeNoop = "noop" 5541 5542 // TemplateChangeModeSignal marks that the task should be signaled if the 5543 // template is re-rendered 5544 TemplateChangeModeSignal = "signal" 5545 5546 // TemplateChangeModeRestart marks that the task should be restarted if the 5547 // template is re-rendered 5548 TemplateChangeModeRestart = "restart" 5549 ) 5550 5551 var ( 5552 // TemplateChangeModeInvalidError is the error for when an invalid change 5553 // mode is given 5554 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 5555 ) 5556 5557 // Template represents a template configuration to be rendered for a given task 5558 type Template struct { 5559 // SourcePath is the path to the template to be rendered 5560 SourcePath string 5561 5562 // DestPath is the path to where the template should be rendered 5563 DestPath string 5564 5565 // EmbeddedTmpl store the raw template. This is useful for smaller templates 5566 // where they are embedded in the job file rather than sent as an artifact 5567 EmbeddedTmpl string 5568 5569 // ChangeMode indicates what should be done if the template is re-rendered 5570 ChangeMode string 5571 5572 // ChangeSignal is the signal that should be sent if the change mode 5573 // requires it. 5574 ChangeSignal string 5575 5576 // Splay is used to avoid coordinated restarts of processes by applying a 5577 // random wait between 0 and the given splay value before signalling the 5578 // application of a change 5579 Splay time.Duration 5580 5581 // Perms is the permission the file should be written out with. 5582 Perms string 5583 5584 // LeftDelim and RightDelim are optional configurations to control what 5585 // delimiter is utilized when parsing the template. 5586 LeftDelim string 5587 RightDelim string 5588 5589 // Envvars enables exposing the template as environment variables 5590 // instead of as a file. The template must be of the form: 5591 // 5592 // VAR_NAME_1={{ key service/my-key }} 5593 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 5594 // 5595 // Lines will be split on the initial "=" with the first part being the 5596 // key name and the second part the value. 5597 // Empty lines and lines starting with # will be ignored, but to avoid 5598 // escaping issues #s within lines will not be treated as comments. 5599 Envvars bool 5600 5601 // VaultGrace is the grace duration between lease renewal and reacquiring a 5602 // secret. If the lease of a secret is less than the grace, a new secret is 5603 // acquired. 5604 VaultGrace time.Duration 5605 } 5606 5607 // DefaultTemplate returns a default template. 5608 func DefaultTemplate() *Template { 5609 return &Template{ 5610 ChangeMode: TemplateChangeModeRestart, 5611 Splay: 5 * time.Second, 5612 Perms: "0644", 5613 } 5614 } 5615 5616 func (t *Template) Copy() *Template { 5617 if t == nil { 5618 return nil 5619 } 5620 copy := new(Template) 5621 *copy = *t 5622 return copy 5623 } 5624 5625 func (t *Template) Canonicalize() { 5626 if t.ChangeSignal != "" { 5627 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 5628 } 5629 } 5630 5631 func (t *Template) Validate() error { 5632 var mErr multierror.Error 5633 5634 // Verify we have something to render 5635 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 5636 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 5637 } 5638 5639 // Verify we can render somewhere 5640 if t.DestPath == "" { 5641 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 5642 } 5643 5644 // Verify the destination doesn't escape 5645 escaped, err := PathEscapesAllocDir("task", t.DestPath) 5646 if err != nil { 5647 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 5648 } else if escaped { 5649 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 5650 } 5651 5652 // Verify a proper change mode 5653 switch t.ChangeMode { 5654 case TemplateChangeModeNoop, TemplateChangeModeRestart: 5655 case TemplateChangeModeSignal: 5656 if t.ChangeSignal == "" { 5657 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 5658 } 5659 if t.Envvars { 5660 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 5661 } 5662 default: 5663 multierror.Append(&mErr, TemplateChangeModeInvalidError) 5664 } 5665 5666 // Verify the splay is positive 5667 if t.Splay < 0 { 5668 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 5669 } 5670 5671 // Verify the permissions 5672 if t.Perms != "" { 5673 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 5674 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 5675 } 5676 } 5677 5678 if t.VaultGrace.Nanoseconds() < 0 { 5679 multierror.Append(&mErr, fmt.Errorf("Vault grace must be greater than zero: %v < 0", t.VaultGrace)) 5680 } 5681 5682 return mErr.ErrorOrNil() 5683 } 5684 5685 // Set of possible states for a task. 5686 const ( 5687 TaskStatePending = "pending" // The task is waiting to be run. 5688 TaskStateRunning = "running" // The task is currently running. 5689 TaskStateDead = "dead" // Terminal state of task. 5690 ) 5691 5692 // TaskState tracks the current state of a task and events that caused state 5693 // transitions. 5694 type TaskState struct { 5695 // The current state of the task. 5696 State string 5697 5698 // Failed marks a task as having failed 5699 Failed bool 5700 5701 // Restarts is the number of times the task has restarted 5702 Restarts uint64 5703 5704 // LastRestart is the time the task last restarted. It is updated each time the 5705 // task restarts 5706 LastRestart time.Time 5707 5708 // StartedAt is the time the task is started. It is updated each time the 5709 // task starts 5710 StartedAt time.Time 5711 5712 // FinishedAt is the time at which the task transitioned to dead and will 5713 // not be started again. 5714 FinishedAt time.Time 5715 5716 // Series of task events that transition the state of the task. 5717 Events []*TaskEvent 5718 } 5719 5720 // NewTaskState returns a TaskState initialized in the Pending state. 5721 func NewTaskState() *TaskState { 5722 return &TaskState{ 5723 State: TaskStatePending, 5724 } 5725 } 5726 5727 // Canonicalize ensures the TaskState has a State set. It should default to 5728 // Pending. 5729 func (ts *TaskState) Canonicalize() { 5730 if ts.State == "" { 5731 ts.State = TaskStatePending 5732 } 5733 } 5734 5735 func (ts *TaskState) Copy() *TaskState { 5736 if ts == nil { 5737 return nil 5738 } 5739 copy := new(TaskState) 5740 *copy = *ts 5741 5742 if ts.Events != nil { 5743 copy.Events = make([]*TaskEvent, len(ts.Events)) 5744 for i, e := range ts.Events { 5745 copy.Events[i] = e.Copy() 5746 } 5747 } 5748 return copy 5749 } 5750 5751 // Successful returns whether a task finished successfully. This doesn't really 5752 // have meaning on a non-batch allocation because a service and system 5753 // allocation should not finish. 5754 func (ts *TaskState) Successful() bool { 5755 return ts.State == TaskStateDead && !ts.Failed 5756 } 5757 5758 const ( 5759 // TaskSetupFailure indicates that the task could not be started due to a 5760 // a setup failure. 5761 TaskSetupFailure = "Setup Failure" 5762 5763 // TaskDriveFailure indicates that the task could not be started due to a 5764 // failure in the driver. 5765 TaskDriverFailure = "Driver Failure" 5766 5767 // TaskReceived signals that the task has been pulled by the client at the 5768 // given timestamp. 5769 TaskReceived = "Received" 5770 5771 // TaskFailedValidation indicates the task was invalid and as such was not 5772 // run. 5773 TaskFailedValidation = "Failed Validation" 5774 5775 // TaskStarted signals that the task was started and its timestamp can be 5776 // used to determine the running length of the task. 5777 TaskStarted = "Started" 5778 5779 // TaskTerminated indicates that the task was started and exited. 5780 TaskTerminated = "Terminated" 5781 5782 // TaskKilling indicates a kill signal has been sent to the task. 5783 TaskKilling = "Killing" 5784 5785 // TaskKilled indicates a user has killed the task. 5786 TaskKilled = "Killed" 5787 5788 // TaskRestarting indicates that task terminated and is being restarted. 5789 TaskRestarting = "Restarting" 5790 5791 // TaskNotRestarting indicates that the task has failed and is not being 5792 // restarted because it has exceeded its restart policy. 5793 TaskNotRestarting = "Not Restarting" 5794 5795 // TaskRestartSignal indicates that the task has been signalled to be 5796 // restarted 5797 TaskRestartSignal = "Restart Signaled" 5798 5799 // TaskSignaling indicates that the task is being signalled. 5800 TaskSignaling = "Signaling" 5801 5802 // TaskDownloadingArtifacts means the task is downloading the artifacts 5803 // specified in the task. 5804 TaskDownloadingArtifacts = "Downloading Artifacts" 5805 5806 // TaskArtifactDownloadFailed indicates that downloading the artifacts 5807 // failed. 5808 TaskArtifactDownloadFailed = "Failed Artifact Download" 5809 5810 // TaskBuildingTaskDir indicates that the task directory/chroot is being 5811 // built. 5812 TaskBuildingTaskDir = "Building Task Directory" 5813 5814 // TaskSetup indicates the task runner is setting up the task environment 5815 TaskSetup = "Task Setup" 5816 5817 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 5818 // exceeded the requested disk resources. 5819 TaskDiskExceeded = "Disk Resources Exceeded" 5820 5821 // TaskSiblingFailed indicates that a sibling task in the task group has 5822 // failed. 5823 TaskSiblingFailed = "Sibling Task Failed" 5824 5825 // TaskDriverMessage is an informational event message emitted by 5826 // drivers such as when they're performing a long running action like 5827 // downloading an image. 5828 TaskDriverMessage = "Driver" 5829 5830 // TaskLeaderDead indicates that the leader task within the has finished. 5831 TaskLeaderDead = "Leader Task Dead" 5832 5833 // TaskHookFailed indicates that one of the hooks for a task failed. 5834 TaskHookFailed = "Task hook failed" 5835 ) 5836 5837 // TaskEvent is an event that effects the state of a task and contains meta-data 5838 // appropriate to the events type. 5839 type TaskEvent struct { 5840 Type string 5841 Time int64 // Unix Nanosecond timestamp 5842 5843 Message string // A possible message explaining the termination of the task. 5844 5845 // DisplayMessage is a human friendly message about the event 5846 DisplayMessage string 5847 5848 // Details is a map with annotated info about the event 5849 Details map[string]string 5850 5851 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 5852 // in a future release. Field values are available in the Details map. 5853 5854 // FailsTask marks whether this event fails the task. 5855 // Deprecated, use Details["fails_task"] to access this. 5856 FailsTask bool 5857 5858 // Restart fields. 5859 // Deprecated, use Details["restart_reason"] to access this. 5860 RestartReason string 5861 5862 // Setup Failure fields. 5863 // Deprecated, use Details["setup_error"] to access this. 5864 SetupError string 5865 5866 // Driver Failure fields. 5867 // Deprecated, use Details["driver_error"] to access this. 5868 DriverError string // A driver error occurred while starting the task. 5869 5870 // Task Terminated Fields. 5871 5872 // Deprecated, use Details["exit_code"] to access this. 5873 ExitCode int // The exit code of the task. 5874 5875 // Deprecated, use Details["signal"] to access this. 5876 Signal int // The signal that terminated the task. 5877 5878 // Killing fields 5879 // Deprecated, use Details["kill_timeout"] to access this. 5880 KillTimeout time.Duration 5881 5882 // Task Killed Fields. 5883 // Deprecated, use Details["kill_error"] to access this. 5884 KillError string // Error killing the task. 5885 5886 // KillReason is the reason the task was killed 5887 // Deprecated, use Details["kill_reason"] to access this. 5888 KillReason string 5889 5890 // TaskRestarting fields. 5891 // Deprecated, use Details["start_delay"] to access this. 5892 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 5893 5894 // Artifact Download fields 5895 // Deprecated, use Details["download_error"] to access this. 5896 DownloadError string // Error downloading artifacts 5897 5898 // Validation fields 5899 // Deprecated, use Details["validation_error"] to access this. 5900 ValidationError string // Validation error 5901 5902 // The maximum allowed task disk size. 5903 // Deprecated, use Details["disk_limit"] to access this. 5904 DiskLimit int64 5905 5906 // Name of the sibling task that caused termination of the task that 5907 // the TaskEvent refers to. 5908 // Deprecated, use Details["failed_sibling"] to access this. 5909 FailedSibling string 5910 5911 // VaultError is the error from token renewal 5912 // Deprecated, use Details["vault_renewal_error"] to access this. 5913 VaultError string 5914 5915 // TaskSignalReason indicates the reason the task is being signalled. 5916 // Deprecated, use Details["task_signal_reason"] to access this. 5917 TaskSignalReason string 5918 5919 // TaskSignal is the signal that was sent to the task 5920 // Deprecated, use Details["task_signal"] to access this. 5921 TaskSignal string 5922 5923 // DriverMessage indicates a driver action being taken. 5924 // Deprecated, use Details["driver_message"] to access this. 5925 DriverMessage string 5926 5927 // GenericSource is the source of a message. 5928 // Deprecated, is redundant with event type. 5929 GenericSource string 5930 } 5931 5932 func (event *TaskEvent) PopulateEventDisplayMessage() { 5933 // Build up the description based on the event type. 5934 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 5935 return 5936 } 5937 5938 if event.DisplayMessage != "" { 5939 return 5940 } 5941 5942 var desc string 5943 switch event.Type { 5944 case TaskSetup: 5945 desc = event.Message 5946 case TaskStarted: 5947 desc = "Task started by client" 5948 case TaskReceived: 5949 desc = "Task received by client" 5950 case TaskFailedValidation: 5951 if event.ValidationError != "" { 5952 desc = event.ValidationError 5953 } else { 5954 desc = "Validation of task failed" 5955 } 5956 case TaskSetupFailure: 5957 if event.SetupError != "" { 5958 desc = event.SetupError 5959 } else { 5960 desc = "Task setup failed" 5961 } 5962 case TaskDriverFailure: 5963 if event.DriverError != "" { 5964 desc = event.DriverError 5965 } else { 5966 desc = "Failed to start task" 5967 } 5968 case TaskDownloadingArtifacts: 5969 desc = "Client is downloading artifacts" 5970 case TaskArtifactDownloadFailed: 5971 if event.DownloadError != "" { 5972 desc = event.DownloadError 5973 } else { 5974 desc = "Failed to download artifacts" 5975 } 5976 case TaskKilling: 5977 if event.KillReason != "" { 5978 desc = event.KillReason 5979 } else if event.KillTimeout != 0 { 5980 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 5981 } else { 5982 desc = "Sent interrupt" 5983 } 5984 case TaskKilled: 5985 if event.KillError != "" { 5986 desc = event.KillError 5987 } else { 5988 desc = "Task successfully killed" 5989 } 5990 case TaskTerminated: 5991 var parts []string 5992 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 5993 5994 if event.Signal != 0 { 5995 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 5996 } 5997 5998 if event.Message != "" { 5999 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 6000 } 6001 desc = strings.Join(parts, ", ") 6002 case TaskRestarting: 6003 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 6004 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 6005 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 6006 } else { 6007 desc = in 6008 } 6009 case TaskNotRestarting: 6010 if event.RestartReason != "" { 6011 desc = event.RestartReason 6012 } else { 6013 desc = "Task exceeded restart policy" 6014 } 6015 case TaskSiblingFailed: 6016 if event.FailedSibling != "" { 6017 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 6018 } else { 6019 desc = "Task's sibling failed" 6020 } 6021 case TaskSignaling: 6022 sig := event.TaskSignal 6023 reason := event.TaskSignalReason 6024 6025 if sig == "" && reason == "" { 6026 desc = "Task being sent a signal" 6027 } else if sig == "" { 6028 desc = reason 6029 } else if reason == "" { 6030 desc = fmt.Sprintf("Task being sent signal %v", sig) 6031 } else { 6032 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 6033 } 6034 case TaskRestartSignal: 6035 if event.RestartReason != "" { 6036 desc = event.RestartReason 6037 } else { 6038 desc = "Task signaled to restart" 6039 } 6040 case TaskDriverMessage: 6041 desc = event.DriverMessage 6042 case TaskLeaderDead: 6043 desc = "Leader Task in Group dead" 6044 default: 6045 desc = event.Message 6046 } 6047 6048 event.DisplayMessage = desc 6049 } 6050 6051 func (te *TaskEvent) GoString() string { 6052 return fmt.Sprintf("%v - %v", te.Time, te.Type) 6053 } 6054 6055 // SetDisplayMessage sets the display message of TaskEvent 6056 func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 6057 te.DisplayMessage = msg 6058 return te 6059 } 6060 6061 // SetMessage sets the message of TaskEvent 6062 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 6063 te.Message = msg 6064 te.Details["message"] = msg 6065 return te 6066 } 6067 6068 func (te *TaskEvent) Copy() *TaskEvent { 6069 if te == nil { 6070 return nil 6071 } 6072 copy := new(TaskEvent) 6073 *copy = *te 6074 return copy 6075 } 6076 6077 func NewTaskEvent(event string) *TaskEvent { 6078 return &TaskEvent{ 6079 Type: event, 6080 Time: time.Now().UnixNano(), 6081 Details: make(map[string]string), 6082 } 6083 } 6084 6085 // SetSetupError is used to store an error that occurred while setting up the 6086 // task 6087 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 6088 if err != nil { 6089 e.SetupError = err.Error() 6090 e.Details["setup_error"] = err.Error() 6091 } 6092 return e 6093 } 6094 6095 func (e *TaskEvent) SetFailsTask() *TaskEvent { 6096 e.FailsTask = true 6097 e.Details["fails_task"] = "true" 6098 return e 6099 } 6100 6101 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 6102 if err != nil { 6103 e.DriverError = err.Error() 6104 e.Details["driver_error"] = err.Error() 6105 } 6106 return e 6107 } 6108 6109 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 6110 e.ExitCode = c 6111 e.Details["exit_code"] = fmt.Sprintf("%d", c) 6112 return e 6113 } 6114 6115 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 6116 e.Signal = s 6117 e.Details["signal"] = fmt.Sprintf("%d", s) 6118 return e 6119 } 6120 6121 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 6122 if err != nil { 6123 e.Message = err.Error() 6124 e.Details["exit_message"] = err.Error() 6125 } 6126 return e 6127 } 6128 6129 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 6130 if err != nil { 6131 e.KillError = err.Error() 6132 e.Details["kill_error"] = err.Error() 6133 } 6134 return e 6135 } 6136 6137 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 6138 e.KillReason = r 6139 e.Details["kill_reason"] = r 6140 return e 6141 } 6142 6143 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 6144 e.StartDelay = int64(delay) 6145 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 6146 return e 6147 } 6148 6149 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 6150 e.RestartReason = reason 6151 e.Details["restart_reason"] = reason 6152 return e 6153 } 6154 6155 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 6156 e.TaskSignalReason = r 6157 e.Details["task_signal_reason"] = r 6158 return e 6159 } 6160 6161 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 6162 e.TaskSignal = s.String() 6163 e.Details["task_signal"] = s.String() 6164 return e 6165 } 6166 6167 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 6168 if err != nil { 6169 e.DownloadError = err.Error() 6170 e.Details["download_error"] = err.Error() 6171 } 6172 return e 6173 } 6174 6175 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 6176 if err != nil { 6177 e.ValidationError = err.Error() 6178 e.Details["validation_error"] = err.Error() 6179 } 6180 return e 6181 } 6182 6183 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 6184 e.KillTimeout = timeout 6185 e.Details["kill_timeout"] = timeout.String() 6186 return e 6187 } 6188 6189 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 6190 e.DiskLimit = limit 6191 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 6192 return e 6193 } 6194 6195 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 6196 e.FailedSibling = sibling 6197 e.Details["failed_sibling"] = sibling 6198 return e 6199 } 6200 6201 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 6202 if err != nil { 6203 e.VaultError = err.Error() 6204 e.Details["vault_renewal_error"] = err.Error() 6205 } 6206 return e 6207 } 6208 6209 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 6210 e.DriverMessage = m 6211 e.Details["driver_message"] = m 6212 return e 6213 } 6214 6215 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 6216 e.Details["oom_killed"] = strconv.FormatBool(oom) 6217 return e 6218 } 6219 6220 // TaskArtifact is an artifact to download before running the task. 6221 type TaskArtifact struct { 6222 // GetterSource is the source to download an artifact using go-getter 6223 GetterSource string 6224 6225 // GetterOptions are options to use when downloading the artifact using 6226 // go-getter. 6227 GetterOptions map[string]string 6228 6229 // GetterMode is the go-getter.ClientMode for fetching resources. 6230 // Defaults to "any" but can be set to "file" or "dir". 6231 GetterMode string 6232 6233 // RelativeDest is the download destination given relative to the task's 6234 // directory. 6235 RelativeDest string 6236 } 6237 6238 func (ta *TaskArtifact) Copy() *TaskArtifact { 6239 if ta == nil { 6240 return nil 6241 } 6242 nta := new(TaskArtifact) 6243 *nta = *ta 6244 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 6245 return nta 6246 } 6247 6248 func (ta *TaskArtifact) GoString() string { 6249 return fmt.Sprintf("%+v", ta) 6250 } 6251 6252 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 6253 // may be specified multiple times with different destinations. 6254 func (ta *TaskArtifact) Hash() string { 6255 hash, err := blake2b.New256(nil) 6256 if err != nil { 6257 panic(err) 6258 } 6259 6260 hash.Write([]byte(ta.GetterSource)) 6261 6262 // Must iterate over keys in a consistent order 6263 keys := make([]string, 0, len(ta.GetterOptions)) 6264 for k := range ta.GetterOptions { 6265 keys = append(keys, k) 6266 } 6267 sort.Strings(keys) 6268 for _, k := range keys { 6269 hash.Write([]byte(k)) 6270 hash.Write([]byte(ta.GetterOptions[k])) 6271 } 6272 6273 hash.Write([]byte(ta.GetterMode)) 6274 hash.Write([]byte(ta.RelativeDest)) 6275 return base64.RawStdEncoding.EncodeToString(hash.Sum(nil)) 6276 } 6277 6278 // PathEscapesAllocDir returns if the given path escapes the allocation 6279 // directory. The prefix allows adding a prefix if the path will be joined, for 6280 // example a "task/local" prefix may be provided if the path will be joined 6281 // against that prefix. 6282 func PathEscapesAllocDir(prefix, path string) (bool, error) { 6283 // Verify the destination doesn't escape the tasks directory 6284 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 6285 if err != nil { 6286 return false, err 6287 } 6288 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 6289 if err != nil { 6290 return false, err 6291 } 6292 rel, err := filepath.Rel(alloc, abs) 6293 if err != nil { 6294 return false, err 6295 } 6296 6297 return strings.HasPrefix(rel, ".."), nil 6298 } 6299 6300 func (ta *TaskArtifact) Validate() error { 6301 // Verify the source 6302 var mErr multierror.Error 6303 if ta.GetterSource == "" { 6304 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 6305 } 6306 6307 switch ta.GetterMode { 6308 case "": 6309 // Default to any 6310 ta.GetterMode = GetterModeAny 6311 case GetterModeAny, GetterModeFile, GetterModeDir: 6312 // Ok 6313 default: 6314 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 6315 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 6316 } 6317 6318 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 6319 if err != nil { 6320 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 6321 } else if escaped { 6322 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 6323 } 6324 6325 if err := ta.validateChecksum(); err != nil { 6326 mErr.Errors = append(mErr.Errors, err) 6327 } 6328 6329 return mErr.ErrorOrNil() 6330 } 6331 6332 func (ta *TaskArtifact) validateChecksum() error { 6333 check, ok := ta.GetterOptions["checksum"] 6334 if !ok { 6335 return nil 6336 } 6337 6338 // Job struct validation occurs before interpolation resolution can be effective. 6339 // Skip checking if checksum contain variable reference, and artifacts fetching will 6340 // eventually fail, if checksum is indeed invalid. 6341 if args.ContainsEnv(check) { 6342 return nil 6343 } 6344 6345 check = strings.TrimSpace(check) 6346 if check == "" { 6347 return fmt.Errorf("checksum value cannot be empty") 6348 } 6349 6350 parts := strings.Split(check, ":") 6351 if l := len(parts); l != 2 { 6352 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 6353 } 6354 6355 checksumVal := parts[1] 6356 checksumBytes, err := hex.DecodeString(checksumVal) 6357 if err != nil { 6358 return fmt.Errorf("invalid checksum: %v", err) 6359 } 6360 6361 checksumType := parts[0] 6362 expectedLength := 0 6363 switch checksumType { 6364 case "md5": 6365 expectedLength = md5.Size 6366 case "sha1": 6367 expectedLength = sha1.Size 6368 case "sha256": 6369 expectedLength = sha256.Size 6370 case "sha512": 6371 expectedLength = sha512.Size 6372 default: 6373 return fmt.Errorf("unsupported checksum type: %s", checksumType) 6374 } 6375 6376 if len(checksumBytes) != expectedLength { 6377 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 6378 } 6379 6380 return nil 6381 } 6382 6383 const ( 6384 ConstraintDistinctProperty = "distinct_property" 6385 ConstraintDistinctHosts = "distinct_hosts" 6386 ConstraintRegex = "regexp" 6387 ConstraintVersion = "version" 6388 ConstraintSetContains = "set_contains" 6389 ConstraintSetContainsAll = "set_contains_all" 6390 ConstraintSetContainsAny = "set_contains_any" 6391 ConstraintAttributeIsSet = "is_set" 6392 ConstraintAttributeIsNotSet = "is_not_set" 6393 ) 6394 6395 // Constraints are used to restrict placement options. 6396 type Constraint struct { 6397 LTarget string // Left-hand target 6398 RTarget string // Right-hand target 6399 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 6400 str string // Memoized string 6401 } 6402 6403 // Equal checks if two constraints are equal 6404 func (c *Constraint) Equal(o *Constraint) bool { 6405 return c.LTarget == o.LTarget && 6406 c.RTarget == o.RTarget && 6407 c.Operand == o.Operand 6408 } 6409 6410 func (c *Constraint) Copy() *Constraint { 6411 if c == nil { 6412 return nil 6413 } 6414 nc := new(Constraint) 6415 *nc = *c 6416 return nc 6417 } 6418 6419 func (c *Constraint) String() string { 6420 if c.str != "" { 6421 return c.str 6422 } 6423 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 6424 return c.str 6425 } 6426 6427 func (c *Constraint) Validate() error { 6428 var mErr multierror.Error 6429 if c.Operand == "" { 6430 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 6431 } 6432 6433 // requireLtarget specifies whether the constraint requires an LTarget to be 6434 // provided. 6435 requireLtarget := true 6436 6437 // Perform additional validation based on operand 6438 switch c.Operand { 6439 case ConstraintDistinctHosts: 6440 requireLtarget = false 6441 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 6442 if c.RTarget == "" { 6443 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 6444 } 6445 case ConstraintRegex: 6446 if _, err := regexp.Compile(c.RTarget); err != nil { 6447 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 6448 } 6449 case ConstraintVersion: 6450 if _, err := version.NewConstraint(c.RTarget); err != nil { 6451 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 6452 } 6453 case ConstraintDistinctProperty: 6454 // If a count is set, make sure it is convertible to a uint64 6455 if c.RTarget != "" { 6456 count, err := strconv.ParseUint(c.RTarget, 10, 64) 6457 if err != nil { 6458 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 6459 } else if count < 1 { 6460 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 6461 } 6462 } 6463 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 6464 if c.RTarget != "" { 6465 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 6466 } 6467 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 6468 if c.RTarget == "" { 6469 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 6470 } 6471 default: 6472 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 6473 } 6474 6475 // Ensure we have an LTarget for the constraints that need one 6476 if requireLtarget && c.LTarget == "" { 6477 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 6478 } 6479 6480 return mErr.ErrorOrNil() 6481 } 6482 6483 // Affinity is used to score placement options based on a weight 6484 type Affinity struct { 6485 LTarget string // Left-hand target 6486 RTarget string // Right-hand target 6487 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 6488 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 6489 str string // Memoized string 6490 } 6491 6492 // Equal checks if two affinities are equal 6493 func (a *Affinity) Equal(o *Affinity) bool { 6494 return a.LTarget == o.LTarget && 6495 a.RTarget == o.RTarget && 6496 a.Operand == o.Operand && 6497 a.Weight == o.Weight 6498 } 6499 6500 func (a *Affinity) Copy() *Affinity { 6501 if a == nil { 6502 return nil 6503 } 6504 na := new(Affinity) 6505 *na = *a 6506 return na 6507 } 6508 6509 func (a *Affinity) String() string { 6510 if a.str != "" { 6511 return a.str 6512 } 6513 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 6514 return a.str 6515 } 6516 6517 func (a *Affinity) Validate() error { 6518 var mErr multierror.Error 6519 if a.Operand == "" { 6520 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 6521 } 6522 6523 // Perform additional validation based on operand 6524 switch a.Operand { 6525 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 6526 if a.RTarget == "" { 6527 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 6528 } 6529 case ConstraintRegex: 6530 if _, err := regexp.Compile(a.RTarget); err != nil { 6531 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 6532 } 6533 case ConstraintVersion: 6534 if _, err := version.NewConstraint(a.RTarget); err != nil { 6535 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 6536 } 6537 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 6538 if a.RTarget == "" { 6539 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 6540 } 6541 default: 6542 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 6543 } 6544 6545 // Ensure we have an LTarget 6546 if a.LTarget == "" { 6547 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 6548 } 6549 6550 // Ensure that weight is between -100 and 100, and not zero 6551 if a.Weight == 0 { 6552 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 6553 } 6554 6555 if a.Weight > 100 || a.Weight < -100 { 6556 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 6557 } 6558 6559 return mErr.ErrorOrNil() 6560 } 6561 6562 // Spread is used to specify desired distribution of allocations according to weight 6563 type Spread struct { 6564 // Attribute is the node attribute used as the spread criteria 6565 Attribute string 6566 6567 // Weight is the relative weight of this spread, useful when there are multiple 6568 // spread and affinities 6569 Weight int8 6570 6571 // SpreadTarget is used to describe desired percentages for each attribute value 6572 SpreadTarget []*SpreadTarget 6573 6574 // Memoized string representation 6575 str string 6576 } 6577 6578 func (s *Spread) Copy() *Spread { 6579 if s == nil { 6580 return nil 6581 } 6582 ns := new(Spread) 6583 *ns = *s 6584 6585 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 6586 return ns 6587 } 6588 6589 func (s *Spread) String() string { 6590 if s.str != "" { 6591 return s.str 6592 } 6593 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 6594 return s.str 6595 } 6596 6597 func (s *Spread) Validate() error { 6598 var mErr multierror.Error 6599 if s.Attribute == "" { 6600 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 6601 } 6602 if s.Weight <= 0 || s.Weight > 100 { 6603 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 6604 } 6605 seen := make(map[string]struct{}) 6606 sumPercent := uint32(0) 6607 6608 for _, target := range s.SpreadTarget { 6609 // Make sure there are no duplicates 6610 _, ok := seen[target.Value] 6611 if !ok { 6612 seen[target.Value] = struct{}{} 6613 } else { 6614 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target value %q already defined", target.Value))) 6615 } 6616 if target.Percent < 0 || target.Percent > 100 { 6617 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target percentage for value %q must be between 0 and 100", target.Value))) 6618 } 6619 sumPercent += uint32(target.Percent) 6620 } 6621 if sumPercent > 100 { 6622 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent))) 6623 } 6624 return mErr.ErrorOrNil() 6625 } 6626 6627 // SpreadTarget is used to specify desired percentages for each attribute value 6628 type SpreadTarget struct { 6629 // Value is a single attribute value, like "dc1" 6630 Value string 6631 6632 // Percent is the desired percentage of allocs 6633 Percent uint8 6634 6635 // Memoized string representation 6636 str string 6637 } 6638 6639 func (s *SpreadTarget) Copy() *SpreadTarget { 6640 if s == nil { 6641 return nil 6642 } 6643 6644 ns := new(SpreadTarget) 6645 *ns = *s 6646 return ns 6647 } 6648 6649 func (s *SpreadTarget) String() string { 6650 if s.str != "" { 6651 return s.str 6652 } 6653 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 6654 return s.str 6655 } 6656 6657 // EphemeralDisk is an ephemeral disk object 6658 type EphemeralDisk struct { 6659 // Sticky indicates whether the allocation is sticky to a node 6660 Sticky bool 6661 6662 // SizeMB is the size of the local disk 6663 SizeMB int 6664 6665 // Migrate determines if Nomad client should migrate the allocation dir for 6666 // sticky allocations 6667 Migrate bool 6668 } 6669 6670 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 6671 func DefaultEphemeralDisk() *EphemeralDisk { 6672 return &EphemeralDisk{ 6673 SizeMB: 300, 6674 } 6675 } 6676 6677 // Validate validates EphemeralDisk 6678 func (d *EphemeralDisk) Validate() error { 6679 if d.SizeMB < 10 { 6680 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 6681 } 6682 return nil 6683 } 6684 6685 // Copy copies the EphemeralDisk struct and returns a new one 6686 func (d *EphemeralDisk) Copy() *EphemeralDisk { 6687 ld := new(EphemeralDisk) 6688 *ld = *d 6689 return ld 6690 } 6691 6692 var ( 6693 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 6694 // server 6695 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 6696 ) 6697 6698 const ( 6699 // VaultChangeModeNoop takes no action when a new token is retrieved. 6700 VaultChangeModeNoop = "noop" 6701 6702 // VaultChangeModeSignal signals the task when a new token is retrieved. 6703 VaultChangeModeSignal = "signal" 6704 6705 // VaultChangeModeRestart restarts the task when a new token is retrieved. 6706 VaultChangeModeRestart = "restart" 6707 ) 6708 6709 // Vault stores the set of permissions a task needs access to from Vault. 6710 type Vault struct { 6711 // Policies is the set of policies that the task needs access to 6712 Policies []string 6713 6714 // Env marks whether the Vault Token should be exposed as an environment 6715 // variable 6716 Env bool 6717 6718 // ChangeMode is used to configure the task's behavior when the Vault 6719 // token changes because the original token could not be renewed in time. 6720 ChangeMode string 6721 6722 // ChangeSignal is the signal sent to the task when a new token is 6723 // retrieved. This is only valid when using the signal change mode. 6724 ChangeSignal string 6725 } 6726 6727 func DefaultVaultBlock() *Vault { 6728 return &Vault{ 6729 Env: true, 6730 ChangeMode: VaultChangeModeRestart, 6731 } 6732 } 6733 6734 // Copy returns a copy of this Vault block. 6735 func (v *Vault) Copy() *Vault { 6736 if v == nil { 6737 return nil 6738 } 6739 6740 nv := new(Vault) 6741 *nv = *v 6742 return nv 6743 } 6744 6745 func (v *Vault) Canonicalize() { 6746 if v.ChangeSignal != "" { 6747 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 6748 } 6749 } 6750 6751 // Validate returns if the Vault block is valid. 6752 func (v *Vault) Validate() error { 6753 if v == nil { 6754 return nil 6755 } 6756 6757 var mErr multierror.Error 6758 if len(v.Policies) == 0 { 6759 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 6760 } 6761 6762 for _, p := range v.Policies { 6763 if p == "root" { 6764 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 6765 } 6766 } 6767 6768 switch v.ChangeMode { 6769 case VaultChangeModeSignal: 6770 if v.ChangeSignal == "" { 6771 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 6772 } 6773 case VaultChangeModeNoop, VaultChangeModeRestart: 6774 default: 6775 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 6776 } 6777 6778 return mErr.ErrorOrNil() 6779 } 6780 6781 const ( 6782 // DeploymentStatuses are the various states a deployment can be be in 6783 DeploymentStatusRunning = "running" 6784 DeploymentStatusPaused = "paused" 6785 DeploymentStatusFailed = "failed" 6786 DeploymentStatusSuccessful = "successful" 6787 DeploymentStatusCancelled = "cancelled" 6788 6789 // DeploymentStatusDescriptions are the various descriptions of the states a 6790 // deployment can be in. 6791 DeploymentStatusDescriptionRunning = "Deployment is running" 6792 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion" 6793 DeploymentStatusDescriptionPaused = "Deployment is paused" 6794 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 6795 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 6796 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 6797 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 6798 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 6799 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 6800 ) 6801 6802 // DeploymentStatusDescriptionRollback is used to get the status description of 6803 // a deployment when rolling back to an older job. 6804 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 6805 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 6806 } 6807 6808 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 6809 // a deployment when rolling back is not possible because it has the same specification 6810 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 6811 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 6812 } 6813 6814 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 6815 // a deployment when there is no target to rollback to but autorevert is desired. 6816 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 6817 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 6818 } 6819 6820 // Deployment is the object that represents a job deployment which is used to 6821 // transition a job between versions. 6822 type Deployment struct { 6823 // ID is a generated UUID for the deployment 6824 ID string 6825 6826 // Namespace is the namespace the deployment is created in 6827 Namespace string 6828 6829 // JobID is the job the deployment is created for 6830 JobID string 6831 6832 // JobVersion is the version of the job at which the deployment is tracking 6833 JobVersion uint64 6834 6835 // JobModifyIndex is the ModifyIndex of the job which the deployment is 6836 // tracking. 6837 JobModifyIndex uint64 6838 6839 // JobSpecModifyIndex is the JobModifyIndex of the job which the 6840 // deployment is tracking. 6841 JobSpecModifyIndex uint64 6842 6843 // JobCreateIndex is the create index of the job which the deployment is 6844 // tracking. It is needed so that if the job gets stopped and reran we can 6845 // present the correct list of deployments for the job and not old ones. 6846 JobCreateIndex uint64 6847 6848 // TaskGroups is the set of task groups effected by the deployment and their 6849 // current deployment status. 6850 TaskGroups map[string]*DeploymentState 6851 6852 // The status of the deployment 6853 Status string 6854 6855 // StatusDescription allows a human readable description of the deployment 6856 // status. 6857 StatusDescription string 6858 6859 CreateIndex uint64 6860 ModifyIndex uint64 6861 } 6862 6863 // NewDeployment creates a new deployment given the job. 6864 func NewDeployment(job *Job) *Deployment { 6865 return &Deployment{ 6866 ID: uuid.Generate(), 6867 Namespace: job.Namespace, 6868 JobID: job.ID, 6869 JobVersion: job.Version, 6870 JobModifyIndex: job.ModifyIndex, 6871 JobSpecModifyIndex: job.JobModifyIndex, 6872 JobCreateIndex: job.CreateIndex, 6873 Status: DeploymentStatusRunning, 6874 StatusDescription: DeploymentStatusDescriptionRunning, 6875 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 6876 } 6877 } 6878 6879 func (d *Deployment) Copy() *Deployment { 6880 if d == nil { 6881 return nil 6882 } 6883 6884 c := &Deployment{} 6885 *c = *d 6886 6887 c.TaskGroups = nil 6888 if l := len(d.TaskGroups); d.TaskGroups != nil { 6889 c.TaskGroups = make(map[string]*DeploymentState, l) 6890 for tg, s := range d.TaskGroups { 6891 c.TaskGroups[tg] = s.Copy() 6892 } 6893 } 6894 6895 return c 6896 } 6897 6898 // Active returns whether the deployment is active or terminal. 6899 func (d *Deployment) Active() bool { 6900 switch d.Status { 6901 case DeploymentStatusRunning, DeploymentStatusPaused: 6902 return true 6903 default: 6904 return false 6905 } 6906 } 6907 6908 // GetID is a helper for getting the ID when the object may be nil 6909 func (d *Deployment) GetID() string { 6910 if d == nil { 6911 return "" 6912 } 6913 return d.ID 6914 } 6915 6916 // HasPlacedCanaries returns whether the deployment has placed canaries 6917 func (d *Deployment) HasPlacedCanaries() bool { 6918 if d == nil || len(d.TaskGroups) == 0 { 6919 return false 6920 } 6921 for _, group := range d.TaskGroups { 6922 if len(group.PlacedCanaries) != 0 { 6923 return true 6924 } 6925 } 6926 return false 6927 } 6928 6929 // RequiresPromotion returns whether the deployment requires promotion to 6930 // continue 6931 func (d *Deployment) RequiresPromotion() bool { 6932 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 6933 return false 6934 } 6935 for _, group := range d.TaskGroups { 6936 if group.DesiredCanaries > 0 && !group.Promoted { 6937 return true 6938 } 6939 } 6940 return false 6941 } 6942 6943 func (d *Deployment) GoString() string { 6944 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 6945 for group, state := range d.TaskGroups { 6946 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 6947 } 6948 return base 6949 } 6950 6951 // DeploymentState tracks the state of a deployment for a given task group. 6952 type DeploymentState struct { 6953 // AutoRevert marks whether the task group has indicated the job should be 6954 // reverted on failure 6955 AutoRevert bool 6956 6957 // ProgressDeadline is the deadline by which an allocation must transition 6958 // to healthy before the deployment is considered failed. 6959 ProgressDeadline time.Duration 6960 6961 // RequireProgressBy is the time by which an allocation must transition 6962 // to healthy before the deployment is considered failed. 6963 RequireProgressBy time.Time 6964 6965 // Promoted marks whether the canaries have been promoted 6966 Promoted bool 6967 6968 // PlacedCanaries is the set of placed canary allocations 6969 PlacedCanaries []string 6970 6971 // DesiredCanaries is the number of canaries that should be created. 6972 DesiredCanaries int 6973 6974 // DesiredTotal is the total number of allocations that should be created as 6975 // part of the deployment. 6976 DesiredTotal int 6977 6978 // PlacedAllocs is the number of allocations that have been placed 6979 PlacedAllocs int 6980 6981 // HealthyAllocs is the number of allocations that have been marked healthy. 6982 HealthyAllocs int 6983 6984 // UnhealthyAllocs are allocations that have been marked as unhealthy. 6985 UnhealthyAllocs int 6986 } 6987 6988 func (d *DeploymentState) GoString() string { 6989 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 6990 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 6991 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 6992 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 6993 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 6994 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 6995 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 6996 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 6997 return base 6998 } 6999 7000 func (d *DeploymentState) Copy() *DeploymentState { 7001 c := &DeploymentState{} 7002 *c = *d 7003 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 7004 return c 7005 } 7006 7007 // DeploymentStatusUpdate is used to update the status of a given deployment 7008 type DeploymentStatusUpdate struct { 7009 // DeploymentID is the ID of the deployment to update 7010 DeploymentID string 7011 7012 // Status is the new status of the deployment. 7013 Status string 7014 7015 // StatusDescription is the new status description of the deployment. 7016 StatusDescription string 7017 } 7018 7019 // RescheduleTracker encapsulates previous reschedule events 7020 type RescheduleTracker struct { 7021 Events []*RescheduleEvent 7022 } 7023 7024 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 7025 if rt == nil { 7026 return nil 7027 } 7028 nt := &RescheduleTracker{} 7029 *nt = *rt 7030 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 7031 for _, tracker := range rt.Events { 7032 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 7033 } 7034 nt.Events = rescheduleEvents 7035 return nt 7036 } 7037 7038 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 7039 type RescheduleEvent struct { 7040 // RescheduleTime is the timestamp of a reschedule attempt 7041 RescheduleTime int64 7042 7043 // PrevAllocID is the ID of the previous allocation being restarted 7044 PrevAllocID string 7045 7046 // PrevNodeID is the node ID of the previous allocation 7047 PrevNodeID string 7048 7049 // Delay is the reschedule delay associated with the attempt 7050 Delay time.Duration 7051 } 7052 7053 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 7054 return &RescheduleEvent{RescheduleTime: rescheduleTime, 7055 PrevAllocID: prevAllocID, 7056 PrevNodeID: prevNodeID, 7057 Delay: delay} 7058 } 7059 7060 func (re *RescheduleEvent) Copy() *RescheduleEvent { 7061 if re == nil { 7062 return nil 7063 } 7064 copy := new(RescheduleEvent) 7065 *copy = *re 7066 return copy 7067 } 7068 7069 // DesiredTransition is used to mark an allocation as having a desired state 7070 // transition. This information can be used by the scheduler to make the 7071 // correct decision. 7072 type DesiredTransition struct { 7073 // Migrate is used to indicate that this allocation should be stopped and 7074 // migrated to another node. 7075 Migrate *bool 7076 7077 // Reschedule is used to indicate that this allocation is eligible to be 7078 // rescheduled. Most allocations are automatically eligible for 7079 // rescheduling, so this field is only required when an allocation is not 7080 // automatically eligible. An example is an allocation that is part of a 7081 // deployment. 7082 Reschedule *bool 7083 7084 // ForceReschedule is used to indicate that this allocation must be rescheduled. 7085 // This field is only used when operators want to force a placement even if 7086 // a failed allocation is not eligible to be rescheduled 7087 ForceReschedule *bool 7088 } 7089 7090 // Merge merges the two desired transitions, preferring the values from the 7091 // passed in object. 7092 func (d *DesiredTransition) Merge(o *DesiredTransition) { 7093 if o.Migrate != nil { 7094 d.Migrate = o.Migrate 7095 } 7096 7097 if o.Reschedule != nil { 7098 d.Reschedule = o.Reschedule 7099 } 7100 7101 if o.ForceReschedule != nil { 7102 d.ForceReschedule = o.ForceReschedule 7103 } 7104 } 7105 7106 // ShouldMigrate returns whether the transition object dictates a migration. 7107 func (d *DesiredTransition) ShouldMigrate() bool { 7108 return d.Migrate != nil && *d.Migrate 7109 } 7110 7111 // ShouldReschedule returns whether the transition object dictates a 7112 // rescheduling. 7113 func (d *DesiredTransition) ShouldReschedule() bool { 7114 return d.Reschedule != nil && *d.Reschedule 7115 } 7116 7117 // ShouldForceReschedule returns whether the transition object dictates a 7118 // forced rescheduling. 7119 func (d *DesiredTransition) ShouldForceReschedule() bool { 7120 if d == nil { 7121 return false 7122 } 7123 return d.ForceReschedule != nil && *d.ForceReschedule 7124 } 7125 7126 const ( 7127 AllocDesiredStatusRun = "run" // Allocation should run 7128 AllocDesiredStatusStop = "stop" // Allocation should stop 7129 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 7130 ) 7131 7132 const ( 7133 AllocClientStatusPending = "pending" 7134 AllocClientStatusRunning = "running" 7135 AllocClientStatusComplete = "complete" 7136 AllocClientStatusFailed = "failed" 7137 AllocClientStatusLost = "lost" 7138 ) 7139 7140 // Allocation is used to allocate the placement of a task group to a node. 7141 type Allocation struct { 7142 // ID of the allocation (UUID) 7143 ID string 7144 7145 // Namespace is the namespace the allocation is created in 7146 Namespace string 7147 7148 // ID of the evaluation that generated this allocation 7149 EvalID string 7150 7151 // Name is a logical name of the allocation. 7152 Name string 7153 7154 // NodeID is the node this is being placed on 7155 NodeID string 7156 7157 // Job is the parent job of the task group being allocated. 7158 // This is copied at allocation time to avoid issues if the job 7159 // definition is updated. 7160 JobID string 7161 Job *Job 7162 7163 // TaskGroup is the name of the task group that should be run 7164 TaskGroup string 7165 7166 // COMPAT(0.11): Remove in 0.11 7167 // Resources is the total set of resources allocated as part 7168 // of this allocation of the task group. Dynamic ports will be set by 7169 // the scheduler. 7170 Resources *Resources 7171 7172 // COMPAT(0.11): Remove in 0.11 7173 // SharedResources are the resources that are shared by all the tasks in an 7174 // allocation 7175 SharedResources *Resources 7176 7177 // COMPAT(0.11): Remove in 0.11 7178 // TaskResources is the set of resources allocated to each 7179 // task. These should sum to the total Resources. Dynamic ports will be 7180 // set by the scheduler. 7181 TaskResources map[string]*Resources 7182 7183 // AllocatedResources is the total resources allocated for the task group. 7184 AllocatedResources *AllocatedResources 7185 7186 // Metrics associated with this allocation 7187 Metrics *AllocMetric 7188 7189 // Desired Status of the allocation on the client 7190 DesiredStatus string 7191 7192 // DesiredStatusDescription is meant to provide more human useful information 7193 DesiredDescription string 7194 7195 // DesiredTransition is used to indicate that a state transition 7196 // is desired for a given reason. 7197 DesiredTransition DesiredTransition 7198 7199 // Status of the allocation on the client 7200 ClientStatus string 7201 7202 // ClientStatusDescription is meant to provide more human useful information 7203 ClientDescription string 7204 7205 // TaskStates stores the state of each task, 7206 TaskStates map[string]*TaskState 7207 7208 // PreviousAllocation is the allocation that this allocation is replacing 7209 PreviousAllocation string 7210 7211 // NextAllocation is the allocation that this allocation is being replaced by 7212 NextAllocation string 7213 7214 // DeploymentID identifies an allocation as being created from a 7215 // particular deployment 7216 DeploymentID string 7217 7218 // DeploymentStatus captures the status of the allocation as part of the 7219 // given deployment 7220 DeploymentStatus *AllocDeploymentStatus 7221 7222 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 7223 RescheduleTracker *RescheduleTracker 7224 7225 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 7226 // that can be rescheduled in the future 7227 FollowupEvalID string 7228 7229 // PreemptedAllocations captures IDs of any allocations that were preempted 7230 // in order to place this allocation 7231 PreemptedAllocations []string 7232 7233 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 7234 // to stop running because it got preempted 7235 PreemptedByAllocation string 7236 7237 // Raft Indexes 7238 CreateIndex uint64 7239 ModifyIndex uint64 7240 7241 // AllocModifyIndex is not updated when the client updates allocations. This 7242 // lets the client pull only the allocs updated by the server. 7243 AllocModifyIndex uint64 7244 7245 // CreateTime is the time the allocation has finished scheduling and been 7246 // verified by the plan applier. 7247 CreateTime int64 7248 7249 // ModifyTime is the time the allocation was last updated. 7250 ModifyTime int64 7251 } 7252 7253 // Index returns the index of the allocation. If the allocation is from a task 7254 // group with count greater than 1, there will be multiple allocations for it. 7255 func (a *Allocation) Index() uint { 7256 l := len(a.Name) 7257 prefix := len(a.JobID) + len(a.TaskGroup) + 2 7258 if l <= 3 || l <= prefix { 7259 return uint(0) 7260 } 7261 7262 strNum := a.Name[prefix : len(a.Name)-1] 7263 num, _ := strconv.Atoi(strNum) 7264 return uint(num) 7265 } 7266 7267 func (a *Allocation) Copy() *Allocation { 7268 return a.copyImpl(true) 7269 } 7270 7271 // Copy provides a copy of the allocation but doesn't deep copy the job 7272 func (a *Allocation) CopySkipJob() *Allocation { 7273 return a.copyImpl(false) 7274 } 7275 7276 func (a *Allocation) copyImpl(job bool) *Allocation { 7277 if a == nil { 7278 return nil 7279 } 7280 na := new(Allocation) 7281 *na = *a 7282 7283 if job { 7284 na.Job = na.Job.Copy() 7285 } 7286 7287 na.AllocatedResources = na.AllocatedResources.Copy() 7288 na.Resources = na.Resources.Copy() 7289 na.SharedResources = na.SharedResources.Copy() 7290 7291 if a.TaskResources != nil { 7292 tr := make(map[string]*Resources, len(na.TaskResources)) 7293 for task, resource := range na.TaskResources { 7294 tr[task] = resource.Copy() 7295 } 7296 na.TaskResources = tr 7297 } 7298 7299 na.Metrics = na.Metrics.Copy() 7300 na.DeploymentStatus = na.DeploymentStatus.Copy() 7301 7302 if a.TaskStates != nil { 7303 ts := make(map[string]*TaskState, len(na.TaskStates)) 7304 for task, state := range na.TaskStates { 7305 ts[task] = state.Copy() 7306 } 7307 na.TaskStates = ts 7308 } 7309 7310 na.RescheduleTracker = a.RescheduleTracker.Copy() 7311 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 7312 return na 7313 } 7314 7315 // TerminalStatus returns if the desired or actual status is terminal and 7316 // will no longer transition. 7317 func (a *Allocation) TerminalStatus() bool { 7318 // First check the desired state and if that isn't terminal, check client 7319 // state. 7320 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 7321 } 7322 7323 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 7324 func (a *Allocation) ServerTerminalStatus() bool { 7325 switch a.DesiredStatus { 7326 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 7327 return true 7328 default: 7329 return false 7330 } 7331 } 7332 7333 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 7334 func (a *Allocation) ClientTerminalStatus() bool { 7335 switch a.ClientStatus { 7336 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 7337 return true 7338 default: 7339 return false 7340 } 7341 } 7342 7343 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 7344 // to its status and ReschedulePolicy given its failure time 7345 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 7346 // First check the desired state 7347 switch a.DesiredStatus { 7348 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 7349 return false 7350 default: 7351 } 7352 switch a.ClientStatus { 7353 case AllocClientStatusFailed: 7354 return a.RescheduleEligible(reschedulePolicy, failTime) 7355 default: 7356 return false 7357 } 7358 } 7359 7360 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 7361 // to its ReschedulePolicy and the current state of its reschedule trackers 7362 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 7363 if reschedulePolicy == nil { 7364 return false 7365 } 7366 attempts := reschedulePolicy.Attempts 7367 interval := reschedulePolicy.Interval 7368 enabled := attempts > 0 || reschedulePolicy.Unlimited 7369 if !enabled { 7370 return false 7371 } 7372 if reschedulePolicy.Unlimited { 7373 return true 7374 } 7375 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 7376 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 7377 return true 7378 } 7379 attempted := 0 7380 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 7381 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 7382 timeDiff := failTime.UTC().UnixNano() - lastAttempt 7383 if timeDiff < interval.Nanoseconds() { 7384 attempted += 1 7385 } 7386 } 7387 return attempted < attempts 7388 } 7389 7390 // LastEventTime is the time of the last task event in the allocation. 7391 // It is used to determine allocation failure time. If the FinishedAt field 7392 // is not set, the alloc's modify time is used 7393 func (a *Allocation) LastEventTime() time.Time { 7394 var lastEventTime time.Time 7395 if a.TaskStates != nil { 7396 for _, s := range a.TaskStates { 7397 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 7398 lastEventTime = s.FinishedAt 7399 } 7400 } 7401 } 7402 7403 if lastEventTime.IsZero() { 7404 return time.Unix(0, a.ModifyTime).UTC() 7405 } 7406 return lastEventTime 7407 } 7408 7409 // ReschedulePolicy returns the reschedule policy based on the task group 7410 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 7411 tg := a.Job.LookupTaskGroup(a.TaskGroup) 7412 if tg == nil { 7413 return nil 7414 } 7415 return tg.ReschedulePolicy 7416 } 7417 7418 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 7419 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 7420 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 7421 failTime := a.LastEventTime() 7422 reschedulePolicy := a.ReschedulePolicy() 7423 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 7424 return time.Time{}, false 7425 } 7426 7427 nextDelay := a.NextDelay() 7428 nextRescheduleTime := failTime.Add(nextDelay) 7429 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 7430 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 7431 // Check for eligibility based on the interval if max attempts is set 7432 attempted := 0 7433 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 7434 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 7435 timeDiff := failTime.UTC().UnixNano() - lastAttempt 7436 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 7437 attempted += 1 7438 } 7439 } 7440 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 7441 } 7442 return nextRescheduleTime, rescheduleEligible 7443 } 7444 7445 // NextDelay returns a duration after which the allocation can be rescheduled. 7446 // It is calculated according to the delay function and previous reschedule attempts. 7447 func (a *Allocation) NextDelay() time.Duration { 7448 policy := a.ReschedulePolicy() 7449 // Can be nil if the task group was updated to remove its reschedule policy 7450 if policy == nil { 7451 return 0 7452 } 7453 delayDur := policy.Delay 7454 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 7455 return delayDur 7456 } 7457 events := a.RescheduleTracker.Events 7458 switch policy.DelayFunction { 7459 case "exponential": 7460 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 7461 case "fibonacci": 7462 if len(events) >= 2 { 7463 fibN1Delay := events[len(events)-1].Delay 7464 fibN2Delay := events[len(events)-2].Delay 7465 // Handle reset of delay ceiling which should cause 7466 // a new series to start 7467 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 7468 delayDur = fibN1Delay 7469 } else { 7470 delayDur = fibN1Delay + fibN2Delay 7471 } 7472 } 7473 default: 7474 return delayDur 7475 } 7476 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 7477 delayDur = policy.MaxDelay 7478 // check if delay needs to be reset 7479 7480 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 7481 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 7482 if timeDiff > delayDur.Nanoseconds() { 7483 delayDur = policy.Delay 7484 } 7485 7486 } 7487 7488 return delayDur 7489 } 7490 7491 // Terminated returns if the allocation is in a terminal state on a client. 7492 func (a *Allocation) Terminated() bool { 7493 if a.ClientStatus == AllocClientStatusFailed || 7494 a.ClientStatus == AllocClientStatusComplete || 7495 a.ClientStatus == AllocClientStatusLost { 7496 return true 7497 } 7498 return false 7499 } 7500 7501 // RanSuccessfully returns whether the client has ran the allocation and all 7502 // tasks finished successfully. Critically this function returns whether the 7503 // allocation has ran to completion and not just that the alloc has converged to 7504 // its desired state. That is to say that a batch allocation must have finished 7505 // with exit code 0 on all task groups. This doesn't really have meaning on a 7506 // non-batch allocation because a service and system allocation should not 7507 // finish. 7508 func (a *Allocation) RanSuccessfully() bool { 7509 // Handle the case the client hasn't started the allocation. 7510 if len(a.TaskStates) == 0 { 7511 return false 7512 } 7513 7514 // Check to see if all the tasks finished successfully in the allocation 7515 allSuccess := true 7516 for _, state := range a.TaskStates { 7517 allSuccess = allSuccess && state.Successful() 7518 } 7519 7520 return allSuccess 7521 } 7522 7523 // ShouldMigrate returns if the allocation needs data migration 7524 func (a *Allocation) ShouldMigrate() bool { 7525 if a.PreviousAllocation == "" { 7526 return false 7527 } 7528 7529 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 7530 return false 7531 } 7532 7533 tg := a.Job.LookupTaskGroup(a.TaskGroup) 7534 7535 // if the task group is nil or the ephemeral disk block isn't present then 7536 // we won't migrate 7537 if tg == nil || tg.EphemeralDisk == nil { 7538 return false 7539 } 7540 7541 // We won't migrate any data is the user hasn't enabled migration or the 7542 // disk is not marked as sticky 7543 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 7544 return false 7545 } 7546 7547 return true 7548 } 7549 7550 // SetEventDisplayMessage populates the display message if its not already set, 7551 // a temporary fix to handle old allocations that don't have it. 7552 // This method will be removed in a future release. 7553 func (a *Allocation) SetEventDisplayMessages() { 7554 setDisplayMsg(a.TaskStates) 7555 } 7556 7557 // COMPAT(0.11): Remove in 0.11 7558 // ComparableResources returns the resouces on the allocation 7559 // handling upgrade paths. After 0.11 calls to this should be replaced with: 7560 // alloc.AllocatedResources.Comparable() 7561 func (a *Allocation) ComparableResources() *ComparableResources { 7562 // ALloc already has 0.9+ behavior 7563 if a.AllocatedResources != nil { 7564 return a.AllocatedResources.Comparable() 7565 } 7566 7567 var resources *Resources 7568 if a.Resources != nil { 7569 resources = a.Resources 7570 } else if a.TaskResources != nil { 7571 resources = new(Resources) 7572 resources.Add(a.SharedResources) 7573 for _, taskResource := range a.TaskResources { 7574 resources.Add(taskResource) 7575 } 7576 } 7577 7578 // Upgrade path 7579 return &ComparableResources{ 7580 Flattened: AllocatedTaskResources{ 7581 Cpu: AllocatedCpuResources{ 7582 CpuShares: int64(resources.CPU), 7583 }, 7584 Memory: AllocatedMemoryResources{ 7585 MemoryMB: int64(resources.MemoryMB), 7586 }, 7587 Networks: resources.Networks, 7588 }, 7589 Shared: AllocatedSharedResources{ 7590 DiskMB: int64(resources.DiskMB), 7591 }, 7592 } 7593 } 7594 7595 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 7596 // TaskGroup does not exist, or the task name cannot be found. 7597 func (a *Allocation) LookupTask(name string) *Task { 7598 if a.Job == nil { 7599 return nil 7600 } 7601 7602 tg := a.Job.LookupTaskGroup(a.TaskGroup) 7603 if tg == nil { 7604 return nil 7605 } 7606 7607 return tg.LookupTask(name) 7608 } 7609 7610 // Stub returns a list stub for the allocation 7611 func (a *Allocation) Stub() *AllocListStub { 7612 return &AllocListStub{ 7613 ID: a.ID, 7614 EvalID: a.EvalID, 7615 Name: a.Name, 7616 Namespace: a.Namespace, 7617 NodeID: a.NodeID, 7618 JobID: a.JobID, 7619 JobType: a.Job.Type, 7620 JobVersion: a.Job.Version, 7621 TaskGroup: a.TaskGroup, 7622 DesiredStatus: a.DesiredStatus, 7623 DesiredDescription: a.DesiredDescription, 7624 ClientStatus: a.ClientStatus, 7625 ClientDescription: a.ClientDescription, 7626 DesiredTransition: a.DesiredTransition, 7627 TaskStates: a.TaskStates, 7628 DeploymentStatus: a.DeploymentStatus, 7629 FollowupEvalID: a.FollowupEvalID, 7630 RescheduleTracker: a.RescheduleTracker, 7631 CreateIndex: a.CreateIndex, 7632 ModifyIndex: a.ModifyIndex, 7633 CreateTime: a.CreateTime, 7634 ModifyTime: a.ModifyTime, 7635 } 7636 } 7637 7638 // AllocListStub is used to return a subset of alloc information 7639 type AllocListStub struct { 7640 ID string 7641 EvalID string 7642 Name string 7643 Namespace string 7644 NodeID string 7645 JobID string 7646 JobType string 7647 JobVersion uint64 7648 TaskGroup string 7649 DesiredStatus string 7650 DesiredDescription string 7651 ClientStatus string 7652 ClientDescription string 7653 DesiredTransition DesiredTransition 7654 TaskStates map[string]*TaskState 7655 DeploymentStatus *AllocDeploymentStatus 7656 FollowupEvalID string 7657 RescheduleTracker *RescheduleTracker 7658 CreateIndex uint64 7659 ModifyIndex uint64 7660 CreateTime int64 7661 ModifyTime int64 7662 } 7663 7664 // SetEventDisplayMessage populates the display message if its not already set, 7665 // a temporary fix to handle old allocations that don't have it. 7666 // This method will be removed in a future release. 7667 func (a *AllocListStub) SetEventDisplayMessages() { 7668 setDisplayMsg(a.TaskStates) 7669 } 7670 7671 func setDisplayMsg(taskStates map[string]*TaskState) { 7672 if taskStates != nil { 7673 for _, taskState := range taskStates { 7674 for _, event := range taskState.Events { 7675 event.PopulateEventDisplayMessage() 7676 } 7677 } 7678 } 7679 } 7680 7681 // AllocMetric is used to track various metrics while attempting 7682 // to make an allocation. These are used to debug a job, or to better 7683 // understand the pressure within the system. 7684 type AllocMetric struct { 7685 // NodesEvaluated is the number of nodes that were evaluated 7686 NodesEvaluated int 7687 7688 // NodesFiltered is the number of nodes filtered due to a constraint 7689 NodesFiltered int 7690 7691 // NodesAvailable is the number of nodes available for evaluation per DC. 7692 NodesAvailable map[string]int 7693 7694 // ClassFiltered is the number of nodes filtered by class 7695 ClassFiltered map[string]int 7696 7697 // ConstraintFiltered is the number of failures caused by constraint 7698 ConstraintFiltered map[string]int 7699 7700 // NodesExhausted is the number of nodes skipped due to being 7701 // exhausted of at least one resource 7702 NodesExhausted int 7703 7704 // ClassExhausted is the number of nodes exhausted by class 7705 ClassExhausted map[string]int 7706 7707 // DimensionExhausted provides the count by dimension or reason 7708 DimensionExhausted map[string]int 7709 7710 // QuotaExhausted provides the exhausted dimensions 7711 QuotaExhausted []string 7712 7713 // Scores is the scores of the final few nodes remaining 7714 // for placement. The top score is typically selected. 7715 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 7716 Scores map[string]float64 7717 7718 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 7719 ScoreMetaData []*NodeScoreMeta 7720 7721 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 7722 // we receive normalized score during the last step of the scoring stack. 7723 nodeScoreMeta *NodeScoreMeta 7724 7725 // topScores is used to maintain a heap of the top K nodes with 7726 // the highest normalized score 7727 topScores *kheap.ScoreHeap 7728 7729 // AllocationTime is a measure of how long the allocation 7730 // attempt took. This can affect performance and SLAs. 7731 AllocationTime time.Duration 7732 7733 // CoalescedFailures indicates the number of other 7734 // allocations that were coalesced into this failed allocation. 7735 // This is to prevent creating many failed allocations for a 7736 // single task group. 7737 CoalescedFailures int 7738 } 7739 7740 func (a *AllocMetric) Copy() *AllocMetric { 7741 if a == nil { 7742 return nil 7743 } 7744 na := new(AllocMetric) 7745 *na = *a 7746 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 7747 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 7748 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 7749 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 7750 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 7751 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 7752 na.Scores = helper.CopyMapStringFloat64(na.Scores) 7753 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 7754 return na 7755 } 7756 7757 func (a *AllocMetric) EvaluateNode() { 7758 a.NodesEvaluated += 1 7759 } 7760 7761 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 7762 a.NodesFiltered += 1 7763 if node != nil && node.NodeClass != "" { 7764 if a.ClassFiltered == nil { 7765 a.ClassFiltered = make(map[string]int) 7766 } 7767 a.ClassFiltered[node.NodeClass] += 1 7768 } 7769 if constraint != "" { 7770 if a.ConstraintFiltered == nil { 7771 a.ConstraintFiltered = make(map[string]int) 7772 } 7773 a.ConstraintFiltered[constraint] += 1 7774 } 7775 } 7776 7777 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 7778 a.NodesExhausted += 1 7779 if node != nil && node.NodeClass != "" { 7780 if a.ClassExhausted == nil { 7781 a.ClassExhausted = make(map[string]int) 7782 } 7783 a.ClassExhausted[node.NodeClass] += 1 7784 } 7785 if dimension != "" { 7786 if a.DimensionExhausted == nil { 7787 a.DimensionExhausted = make(map[string]int) 7788 } 7789 a.DimensionExhausted[dimension] += 1 7790 } 7791 } 7792 7793 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 7794 if a.QuotaExhausted == nil { 7795 a.QuotaExhausted = make([]string, 0, len(dimensions)) 7796 } 7797 7798 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 7799 } 7800 7801 // ScoreNode is used to gather top K scoring nodes in a heap 7802 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 7803 // Create nodeScoreMeta lazily if its the first time or if its a new node 7804 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 7805 a.nodeScoreMeta = &NodeScoreMeta{ 7806 NodeID: node.ID, 7807 Scores: make(map[string]float64), 7808 } 7809 } 7810 if name == NormScorerName { 7811 a.nodeScoreMeta.NormScore = score 7812 // Once we have the normalized score we can push to the heap 7813 // that tracks top K by normalized score 7814 7815 // Create the heap if its not there already 7816 if a.topScores == nil { 7817 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 7818 } 7819 heap.Push(a.topScores, a.nodeScoreMeta) 7820 7821 // Clear out this entry because its now in the heap 7822 a.nodeScoreMeta = nil 7823 } else { 7824 a.nodeScoreMeta.Scores[name] = score 7825 } 7826 } 7827 7828 // PopulateScoreMetaData populates a map of scorer to scoring metadata 7829 // The map is populated by popping elements from a heap of top K scores 7830 // maintained per scorer 7831 func (a *AllocMetric) PopulateScoreMetaData() { 7832 if a.topScores == nil { 7833 return 7834 } 7835 7836 if a.ScoreMetaData == nil { 7837 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 7838 } 7839 heapItems := a.topScores.GetItemsReverse() 7840 for i, item := range heapItems { 7841 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 7842 } 7843 } 7844 7845 // NodeScoreMeta captures scoring meta data derived from 7846 // different scoring factors. 7847 type NodeScoreMeta struct { 7848 NodeID string 7849 Scores map[string]float64 7850 NormScore float64 7851 } 7852 7853 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 7854 if s == nil { 7855 return nil 7856 } 7857 ns := new(NodeScoreMeta) 7858 *ns = *s 7859 return ns 7860 } 7861 7862 func (s *NodeScoreMeta) String() string { 7863 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 7864 } 7865 7866 func (s *NodeScoreMeta) Score() float64 { 7867 return s.NormScore 7868 } 7869 7870 func (s *NodeScoreMeta) Data() interface{} { 7871 return s 7872 } 7873 7874 // AllocDeploymentStatus captures the status of the allocation as part of the 7875 // deployment. This can include things like if the allocation has been marked as 7876 // healthy. 7877 type AllocDeploymentStatus struct { 7878 // Healthy marks whether the allocation has been marked healthy or unhealthy 7879 // as part of a deployment. It can be unset if it has neither been marked 7880 // healthy or unhealthy. 7881 Healthy *bool 7882 7883 // Timestamp is the time at which the health status was set. 7884 Timestamp time.Time 7885 7886 // Canary marks whether the allocation is a canary or not. A canary that has 7887 // been promoted will have this field set to false. 7888 Canary bool 7889 7890 // ModifyIndex is the raft index in which the deployment status was last 7891 // changed. 7892 ModifyIndex uint64 7893 } 7894 7895 // HasHealth returns true if the allocation has its health set. 7896 func (a *AllocDeploymentStatus) HasHealth() bool { 7897 return a != nil && a.Healthy != nil 7898 } 7899 7900 // IsHealthy returns if the allocation is marked as healthy as part of a 7901 // deployment 7902 func (a *AllocDeploymentStatus) IsHealthy() bool { 7903 if a == nil { 7904 return false 7905 } 7906 7907 return a.Healthy != nil && *a.Healthy 7908 } 7909 7910 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 7911 // deployment 7912 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 7913 if a == nil { 7914 return false 7915 } 7916 7917 return a.Healthy != nil && !*a.Healthy 7918 } 7919 7920 // IsCanary returns if the allocation is marked as a canary 7921 func (a *AllocDeploymentStatus) IsCanary() bool { 7922 if a == nil { 7923 return false 7924 } 7925 7926 return a.Canary 7927 } 7928 7929 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 7930 if a == nil { 7931 return nil 7932 } 7933 7934 c := new(AllocDeploymentStatus) 7935 *c = *a 7936 7937 if a.Healthy != nil { 7938 c.Healthy = helper.BoolToPtr(*a.Healthy) 7939 } 7940 7941 return c 7942 } 7943 7944 const ( 7945 EvalStatusBlocked = "blocked" 7946 EvalStatusPending = "pending" 7947 EvalStatusComplete = "complete" 7948 EvalStatusFailed = "failed" 7949 EvalStatusCancelled = "canceled" 7950 ) 7951 7952 const ( 7953 EvalTriggerJobRegister = "job-register" 7954 EvalTriggerJobDeregister = "job-deregister" 7955 EvalTriggerPeriodicJob = "periodic-job" 7956 EvalTriggerNodeDrain = "node-drain" 7957 EvalTriggerNodeUpdate = "node-update" 7958 EvalTriggerScheduled = "scheduled" 7959 EvalTriggerRollingUpdate = "rolling-update" 7960 EvalTriggerDeploymentWatcher = "deployment-watcher" 7961 EvalTriggerFailedFollowUp = "failed-follow-up" 7962 EvalTriggerMaxPlans = "max-plan-attempts" 7963 EvalTriggerRetryFailedAlloc = "alloc-failure" 7964 EvalTriggerQueuedAllocs = "queued-allocs" 7965 EvalTriggerPreemption = "preemption" 7966 ) 7967 7968 const ( 7969 // CoreJobEvalGC is used for the garbage collection of evaluations 7970 // and allocations. We periodically scan evaluations in a terminal state, 7971 // in which all the corresponding allocations are also terminal. We 7972 // delete these out of the system to bound the state. 7973 CoreJobEvalGC = "eval-gc" 7974 7975 // CoreJobNodeGC is used for the garbage collection of failed nodes. 7976 // We periodically scan nodes in a terminal state, and if they have no 7977 // corresponding allocations we delete these out of the system. 7978 CoreJobNodeGC = "node-gc" 7979 7980 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 7981 // periodically scan garbage collectible jobs and check if both their 7982 // evaluations and allocations are terminal. If so, we delete these out of 7983 // the system. 7984 CoreJobJobGC = "job-gc" 7985 7986 // CoreJobDeploymentGC is used for the garbage collection of eligible 7987 // deployments. We periodically scan garbage collectible deployments and 7988 // check if they are terminal. If so, we delete these out of the system. 7989 CoreJobDeploymentGC = "deployment-gc" 7990 7991 // CoreJobForceGC is used to force garbage collection of all GCable objects. 7992 CoreJobForceGC = "force-gc" 7993 ) 7994 7995 // Evaluation is used anytime we need to apply business logic as a result 7996 // of a change to our desired state (job specification) or the emergent state 7997 // (registered nodes). When the inputs change, we need to "evaluate" them, 7998 // potentially taking action (allocation of work) or doing nothing if the state 7999 // of the world does not require it. 8000 type Evaluation struct { 8001 // ID is a randomly generated UUID used for this evaluation. This 8002 // is assigned upon the creation of the evaluation. 8003 ID string 8004 8005 // Namespace is the namespace the evaluation is created in 8006 Namespace string 8007 8008 // Priority is used to control scheduling importance and if this job 8009 // can preempt other jobs. 8010 Priority int 8011 8012 // Type is used to control which schedulers are available to handle 8013 // this evaluation. 8014 Type string 8015 8016 // TriggeredBy is used to give some insight into why this Eval 8017 // was created. (Job change, node failure, alloc failure, etc). 8018 TriggeredBy string 8019 8020 // JobID is the job this evaluation is scoped to. Evaluations cannot 8021 // be run in parallel for a given JobID, so we serialize on this. 8022 JobID string 8023 8024 // JobModifyIndex is the modify index of the job at the time 8025 // the evaluation was created 8026 JobModifyIndex uint64 8027 8028 // NodeID is the node that was affected triggering the evaluation. 8029 NodeID string 8030 8031 // NodeModifyIndex is the modify index of the node at the time 8032 // the evaluation was created 8033 NodeModifyIndex uint64 8034 8035 // DeploymentID is the ID of the deployment that triggered the evaluation. 8036 DeploymentID string 8037 8038 // Status of the evaluation 8039 Status string 8040 8041 // StatusDescription is meant to provide more human useful information 8042 StatusDescription string 8043 8044 // Wait is a minimum wait time for running the eval. This is used to 8045 // support a rolling upgrade in versions prior to 0.7.0 8046 // Deprecated 8047 Wait time.Duration 8048 8049 // WaitUntil is the time when this eval should be run. This is used to 8050 // supported delayed rescheduling of failed allocations 8051 WaitUntil time.Time 8052 8053 // NextEval is the evaluation ID for the eval created to do a followup. 8054 // This is used to support rolling upgrades and failed-follow-up evals, where 8055 // we need a chain of evaluations. 8056 NextEval string 8057 8058 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 8059 // This is used to support rolling upgrades and failed-follow-up evals, where 8060 // we need a chain of evaluations. 8061 PreviousEval string 8062 8063 // BlockedEval is the evaluation ID for a created blocked eval. A 8064 // blocked eval will be created if all allocations could not be placed due 8065 // to constraints or lacking resources. 8066 BlockedEval string 8067 8068 // FailedTGAllocs are task groups which have allocations that could not be 8069 // made, but the metrics are persisted so that the user can use the feedback 8070 // to determine the cause. 8071 FailedTGAllocs map[string]*AllocMetric 8072 8073 // ClassEligibility tracks computed node classes that have been explicitly 8074 // marked as eligible or ineligible. 8075 ClassEligibility map[string]bool 8076 8077 // QuotaLimitReached marks whether a quota limit was reached for the 8078 // evaluation. 8079 QuotaLimitReached string 8080 8081 // EscapedComputedClass marks whether the job has constraints that are not 8082 // captured by computed node classes. 8083 EscapedComputedClass bool 8084 8085 // AnnotatePlan triggers the scheduler to provide additional annotations 8086 // during the evaluation. This should not be set during normal operations. 8087 AnnotatePlan bool 8088 8089 // QueuedAllocations is the number of unplaced allocations at the time the 8090 // evaluation was processed. The map is keyed by Task Group names. 8091 QueuedAllocations map[string]int 8092 8093 // LeaderACL provides the ACL token to when issuing RPCs back to the 8094 // leader. This will be a valid management token as long as the leader is 8095 // active. This should not ever be exposed via the API. 8096 LeaderACL string 8097 8098 // SnapshotIndex is the Raft index of the snapshot used to process the 8099 // evaluation. The index will either be set when it has gone through the 8100 // scheduler or if a blocked evaluation is being created. The index is set 8101 // in this case so we can determine if an early unblocking is required since 8102 // capacity has changed since the evaluation was created. This can result in 8103 // the SnapshotIndex being less than the CreateIndex. 8104 SnapshotIndex uint64 8105 8106 // Raft Indexes 8107 CreateIndex uint64 8108 ModifyIndex uint64 8109 } 8110 8111 // TerminalStatus returns if the current status is terminal and 8112 // will no longer transition. 8113 func (e *Evaluation) TerminalStatus() bool { 8114 switch e.Status { 8115 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 8116 return true 8117 default: 8118 return false 8119 } 8120 } 8121 8122 func (e *Evaluation) GoString() string { 8123 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 8124 } 8125 8126 func (e *Evaluation) Copy() *Evaluation { 8127 if e == nil { 8128 return nil 8129 } 8130 ne := new(Evaluation) 8131 *ne = *e 8132 8133 // Copy ClassEligibility 8134 if e.ClassEligibility != nil { 8135 classes := make(map[string]bool, len(e.ClassEligibility)) 8136 for class, elig := range e.ClassEligibility { 8137 classes[class] = elig 8138 } 8139 ne.ClassEligibility = classes 8140 } 8141 8142 // Copy FailedTGAllocs 8143 if e.FailedTGAllocs != nil { 8144 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 8145 for tg, metric := range e.FailedTGAllocs { 8146 failedTGs[tg] = metric.Copy() 8147 } 8148 ne.FailedTGAllocs = failedTGs 8149 } 8150 8151 // Copy queued allocations 8152 if e.QueuedAllocations != nil { 8153 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 8154 for tg, num := range e.QueuedAllocations { 8155 queuedAllocations[tg] = num 8156 } 8157 ne.QueuedAllocations = queuedAllocations 8158 } 8159 8160 return ne 8161 } 8162 8163 // ShouldEnqueue checks if a given evaluation should be enqueued into the 8164 // eval_broker 8165 func (e *Evaluation) ShouldEnqueue() bool { 8166 switch e.Status { 8167 case EvalStatusPending: 8168 return true 8169 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 8170 return false 8171 default: 8172 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 8173 } 8174 } 8175 8176 // ShouldBlock checks if a given evaluation should be entered into the blocked 8177 // eval tracker. 8178 func (e *Evaluation) ShouldBlock() bool { 8179 switch e.Status { 8180 case EvalStatusBlocked: 8181 return true 8182 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 8183 return false 8184 default: 8185 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 8186 } 8187 } 8188 8189 // MakePlan is used to make a plan from the given evaluation 8190 // for a given Job 8191 func (e *Evaluation) MakePlan(j *Job) *Plan { 8192 p := &Plan{ 8193 EvalID: e.ID, 8194 Priority: e.Priority, 8195 Job: j, 8196 NodeUpdate: make(map[string][]*Allocation), 8197 NodeAllocation: make(map[string][]*Allocation), 8198 NodePreemptions: make(map[string][]*Allocation), 8199 } 8200 if j != nil { 8201 p.AllAtOnce = j.AllAtOnce 8202 } 8203 return p 8204 } 8205 8206 // NextRollingEval creates an evaluation to followup this eval for rolling updates 8207 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 8208 return &Evaluation{ 8209 ID: uuid.Generate(), 8210 Namespace: e.Namespace, 8211 Priority: e.Priority, 8212 Type: e.Type, 8213 TriggeredBy: EvalTriggerRollingUpdate, 8214 JobID: e.JobID, 8215 JobModifyIndex: e.JobModifyIndex, 8216 Status: EvalStatusPending, 8217 Wait: wait, 8218 PreviousEval: e.ID, 8219 } 8220 } 8221 8222 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 8223 // failed allocations. It takes the classes marked explicitly eligible or 8224 // ineligible, whether the job has escaped computed node classes and whether the 8225 // quota limit was reached. 8226 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 8227 escaped bool, quotaReached string) *Evaluation { 8228 8229 return &Evaluation{ 8230 ID: uuid.Generate(), 8231 Namespace: e.Namespace, 8232 Priority: e.Priority, 8233 Type: e.Type, 8234 TriggeredBy: EvalTriggerQueuedAllocs, 8235 JobID: e.JobID, 8236 JobModifyIndex: e.JobModifyIndex, 8237 Status: EvalStatusBlocked, 8238 PreviousEval: e.ID, 8239 ClassEligibility: classEligibility, 8240 EscapedComputedClass: escaped, 8241 QuotaLimitReached: quotaReached, 8242 } 8243 } 8244 8245 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 8246 // has been marked as failed because it has hit the delivery limit and will not 8247 // be retried by the eval_broker. Callers should copy the created eval's ID to 8248 // into the old eval's NextEval field. 8249 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 8250 return &Evaluation{ 8251 ID: uuid.Generate(), 8252 Namespace: e.Namespace, 8253 Priority: e.Priority, 8254 Type: e.Type, 8255 TriggeredBy: EvalTriggerFailedFollowUp, 8256 JobID: e.JobID, 8257 JobModifyIndex: e.JobModifyIndex, 8258 Status: EvalStatusPending, 8259 Wait: wait, 8260 PreviousEval: e.ID, 8261 } 8262 } 8263 8264 // Plan is used to submit a commit plan for task allocations. These 8265 // are submitted to the leader which verifies that resources have 8266 // not been overcommitted before admitting the plan. 8267 type Plan struct { 8268 // EvalID is the evaluation ID this plan is associated with 8269 EvalID string 8270 8271 // EvalToken is used to prevent a split-brain processing of 8272 // an evaluation. There should only be a single scheduler running 8273 // an Eval at a time, but this could be violated after a leadership 8274 // transition. This unique token is used to reject plans that are 8275 // being submitted from a different leader. 8276 EvalToken string 8277 8278 // Priority is the priority of the upstream job 8279 Priority int 8280 8281 // AllAtOnce is used to control if incremental scheduling of task groups 8282 // is allowed or if we must do a gang scheduling of the entire job. 8283 // If this is false, a plan may be partially applied. Otherwise, the 8284 // entire plan must be able to make progress. 8285 AllAtOnce bool 8286 8287 // Job is the parent job of all the allocations in the Plan. 8288 // Since a Plan only involves a single Job, we can reduce the size 8289 // of the plan by only including it once. 8290 Job *Job 8291 8292 // NodeUpdate contains all the allocations for each node. For each node, 8293 // this is a list of the allocations to update to either stop or evict. 8294 NodeUpdate map[string][]*Allocation 8295 8296 // NodeAllocation contains all the allocations for each node. 8297 // The evicts must be considered prior to the allocations. 8298 NodeAllocation map[string][]*Allocation 8299 8300 // Annotations contains annotations by the scheduler to be used by operators 8301 // to understand the decisions made by the scheduler. 8302 Annotations *PlanAnnotations 8303 8304 // Deployment is the deployment created or updated by the scheduler that 8305 // should be applied by the planner. 8306 Deployment *Deployment 8307 8308 // DeploymentUpdates is a set of status updates to apply to the given 8309 // deployments. This allows the scheduler to cancel any unneeded deployment 8310 // because the job is stopped or the update block is removed. 8311 DeploymentUpdates []*DeploymentStatusUpdate 8312 8313 // NodePreemptions is a map from node id to a set of allocations from other 8314 // lower priority jobs that are preempted. Preempted allocations are marked 8315 // as evicted. 8316 NodePreemptions map[string][]*Allocation 8317 } 8318 8319 // AppendUpdate marks the allocation for eviction. The clientStatus of the 8320 // allocation may be optionally set by passing in a non-empty value. 8321 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 8322 newAlloc := new(Allocation) 8323 *newAlloc = *alloc 8324 8325 // If the job is not set in the plan we are deregistering a job so we 8326 // extract the job from the allocation. 8327 if p.Job == nil && newAlloc.Job != nil { 8328 p.Job = newAlloc.Job 8329 } 8330 8331 // Normalize the job 8332 newAlloc.Job = nil 8333 8334 // Strip the resources as it can be rebuilt. 8335 newAlloc.Resources = nil 8336 8337 newAlloc.DesiredStatus = desiredStatus 8338 newAlloc.DesiredDescription = desiredDesc 8339 8340 if clientStatus != "" { 8341 newAlloc.ClientStatus = clientStatus 8342 } 8343 8344 node := alloc.NodeID 8345 existing := p.NodeUpdate[node] 8346 p.NodeUpdate[node] = append(existing, newAlloc) 8347 } 8348 8349 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 8350 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 8351 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, desiredStatus, preemptingAllocID string) { 8352 newAlloc := &Allocation{} 8353 newAlloc.ID = alloc.ID 8354 newAlloc.JobID = alloc.JobID 8355 newAlloc.Namespace = alloc.Namespace 8356 newAlloc.DesiredStatus = desiredStatus 8357 newAlloc.PreemptedByAllocation = preemptingAllocID 8358 8359 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 8360 newAlloc.DesiredDescription = desiredDesc 8361 8362 // TaskResources are needed by the plan applier to check if allocations fit 8363 // after removing preempted allocations 8364 if alloc.AllocatedResources != nil { 8365 newAlloc.AllocatedResources = alloc.AllocatedResources 8366 } else { 8367 // COMPAT Remove in version 0.11 8368 newAlloc.TaskResources = alloc.TaskResources 8369 newAlloc.SharedResources = alloc.SharedResources 8370 } 8371 8372 // Append this alloc to slice for this node 8373 node := alloc.NodeID 8374 existing := p.NodePreemptions[node] 8375 p.NodePreemptions[node] = append(existing, newAlloc) 8376 } 8377 8378 func (p *Plan) PopUpdate(alloc *Allocation) { 8379 existing := p.NodeUpdate[alloc.NodeID] 8380 n := len(existing) 8381 if n > 0 && existing[n-1].ID == alloc.ID { 8382 existing = existing[:n-1] 8383 if len(existing) > 0 { 8384 p.NodeUpdate[alloc.NodeID] = existing 8385 } else { 8386 delete(p.NodeUpdate, alloc.NodeID) 8387 } 8388 } 8389 } 8390 8391 func (p *Plan) AppendAlloc(alloc *Allocation) { 8392 node := alloc.NodeID 8393 existing := p.NodeAllocation[node] 8394 8395 // Normalize the job 8396 alloc.Job = nil 8397 8398 p.NodeAllocation[node] = append(existing, alloc) 8399 } 8400 8401 // IsNoOp checks if this plan would do nothing 8402 func (p *Plan) IsNoOp() bool { 8403 return len(p.NodeUpdate) == 0 && 8404 len(p.NodeAllocation) == 0 && 8405 p.Deployment == nil && 8406 len(p.DeploymentUpdates) == 0 8407 } 8408 8409 // PlanResult is the result of a plan submitted to the leader. 8410 type PlanResult struct { 8411 // NodeUpdate contains all the updates that were committed. 8412 NodeUpdate map[string][]*Allocation 8413 8414 // NodeAllocation contains all the allocations that were committed. 8415 NodeAllocation map[string][]*Allocation 8416 8417 // Deployment is the deployment that was committed. 8418 Deployment *Deployment 8419 8420 // DeploymentUpdates is the set of deployment updates that were committed. 8421 DeploymentUpdates []*DeploymentStatusUpdate 8422 8423 // NodePreemptions is a map from node id to a set of allocations from other 8424 // lower priority jobs that are preempted. Preempted allocations are marked 8425 // as stopped. 8426 NodePreemptions map[string][]*Allocation 8427 8428 // RefreshIndex is the index the worker should refresh state up to. 8429 // This allows all evictions and allocations to be materialized. 8430 // If any allocations were rejected due to stale data (node state, 8431 // over committed) this can be used to force a worker refresh. 8432 RefreshIndex uint64 8433 8434 // AllocIndex is the Raft index in which the evictions and 8435 // allocations took place. This is used for the write index. 8436 AllocIndex uint64 8437 } 8438 8439 // IsNoOp checks if this plan result would do nothing 8440 func (p *PlanResult) IsNoOp() bool { 8441 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 8442 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 8443 } 8444 8445 // FullCommit is used to check if all the allocations in a plan 8446 // were committed as part of the result. Returns if there was 8447 // a match, and the number of expected and actual allocations. 8448 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 8449 expected := 0 8450 actual := 0 8451 for name, allocList := range plan.NodeAllocation { 8452 didAlloc, _ := p.NodeAllocation[name] 8453 expected += len(allocList) 8454 actual += len(didAlloc) 8455 } 8456 return actual == expected, expected, actual 8457 } 8458 8459 // PlanAnnotations holds annotations made by the scheduler to give further debug 8460 // information to operators. 8461 type PlanAnnotations struct { 8462 // DesiredTGUpdates is the set of desired updates per task group. 8463 DesiredTGUpdates map[string]*DesiredUpdates 8464 8465 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 8466 PreemptedAllocs []*AllocListStub 8467 } 8468 8469 // DesiredUpdates is the set of changes the scheduler would like to make given 8470 // sufficient resources and cluster capacity. 8471 type DesiredUpdates struct { 8472 Ignore uint64 8473 Place uint64 8474 Migrate uint64 8475 Stop uint64 8476 InPlaceUpdate uint64 8477 DestructiveUpdate uint64 8478 Canary uint64 8479 Preemptions uint64 8480 } 8481 8482 func (d *DesiredUpdates) GoString() string { 8483 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 8484 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 8485 } 8486 8487 // msgpackHandle is a shared handle for encoding/decoding of structs 8488 var MsgpackHandle = func() *codec.MsgpackHandle { 8489 h := &codec.MsgpackHandle{RawToString: true} 8490 8491 // Sets the default type for decoding a map into a nil interface{}. 8492 // This is necessary in particular because we store the driver configs as a 8493 // nil interface{}. 8494 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 8495 return h 8496 }() 8497 8498 var ( 8499 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 8500 // structs. The pretty handle will add indents for easier human consumption. 8501 JsonHandle = &codec.JsonHandle{ 8502 HTMLCharsAsIs: true, 8503 } 8504 JsonHandlePretty = &codec.JsonHandle{ 8505 HTMLCharsAsIs: true, 8506 Indent: 4, 8507 } 8508 ) 8509 8510 // TODO Figure out if we can remove this. This is our fork that is just way 8511 // behind. I feel like its original purpose was to pin at a stable version but 8512 // now we can accomplish this with vendoring. 8513 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 8514 h := &hcodec.MsgpackHandle{RawToString: true} 8515 8516 // Sets the default type for decoding a map into a nil interface{}. 8517 // This is necessary in particular because we store the driver configs as a 8518 // nil interface{}. 8519 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 8520 return h 8521 }() 8522 8523 // Decode is used to decode a MsgPack encoded object 8524 func Decode(buf []byte, out interface{}) error { 8525 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 8526 } 8527 8528 // Encode is used to encode a MsgPack object with type prefix 8529 func Encode(t MessageType, msg interface{}) ([]byte, error) { 8530 var buf bytes.Buffer 8531 buf.WriteByte(uint8(t)) 8532 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 8533 return buf.Bytes(), err 8534 } 8535 8536 // KeyringResponse is a unified key response and can be used for install, 8537 // remove, use, as well as listing key queries. 8538 type KeyringResponse struct { 8539 Messages map[string]string 8540 Keys map[string]int 8541 NumNodes int 8542 } 8543 8544 // KeyringRequest is request objects for serf key operations. 8545 type KeyringRequest struct { 8546 Key string 8547 } 8548 8549 // RecoverableError wraps an error and marks whether it is recoverable and could 8550 // be retried or it is fatal. 8551 type RecoverableError struct { 8552 Err string 8553 Recoverable bool 8554 } 8555 8556 // NewRecoverableError is used to wrap an error and mark it as recoverable or 8557 // not. 8558 func NewRecoverableError(e error, recoverable bool) error { 8559 if e == nil { 8560 return nil 8561 } 8562 8563 return &RecoverableError{ 8564 Err: e.Error(), 8565 Recoverable: recoverable, 8566 } 8567 } 8568 8569 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 8570 // message. If the error was recoverable before the returned error is as well; 8571 // otherwise it is unrecoverable. 8572 func WrapRecoverable(msg string, err error) error { 8573 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 8574 } 8575 8576 func (r *RecoverableError) Error() string { 8577 return r.Err 8578 } 8579 8580 func (r *RecoverableError) IsRecoverable() bool { 8581 return r.Recoverable 8582 } 8583 8584 func (r *RecoverableError) IsUnrecoverable() bool { 8585 return !r.Recoverable 8586 } 8587 8588 // Recoverable is an interface for errors to implement to indicate whether or 8589 // not they are fatal or recoverable. 8590 type Recoverable interface { 8591 error 8592 IsRecoverable() bool 8593 } 8594 8595 // IsRecoverable returns true if error is a RecoverableError with 8596 // Recoverable=true. Otherwise false is returned. 8597 func IsRecoverable(e error) bool { 8598 if re, ok := e.(Recoverable); ok { 8599 return re.IsRecoverable() 8600 } 8601 return false 8602 } 8603 8604 // WrappedServerError wraps an error and satisfies 8605 // both the Recoverable and the ServerSideError interfaces 8606 type WrappedServerError struct { 8607 Err error 8608 } 8609 8610 // NewWrappedServerError is used to create a wrapped server side error 8611 func NewWrappedServerError(e error) error { 8612 return &WrappedServerError{ 8613 Err: e, 8614 } 8615 } 8616 8617 func (r *WrappedServerError) IsRecoverable() bool { 8618 return IsRecoverable(r.Err) 8619 } 8620 8621 func (r *WrappedServerError) Error() string { 8622 return r.Err.Error() 8623 } 8624 8625 func (r *WrappedServerError) IsServerSide() bool { 8626 return true 8627 } 8628 8629 // ServerSideError is an interface for errors to implement to indicate 8630 // errors occurring after the request makes it to a server 8631 type ServerSideError interface { 8632 error 8633 IsServerSide() bool 8634 } 8635 8636 // IsServerSide returns true if error is a wrapped 8637 // server side error 8638 func IsServerSide(e error) bool { 8639 if se, ok := e.(ServerSideError); ok { 8640 return se.IsServerSide() 8641 } 8642 return false 8643 } 8644 8645 // ACLPolicy is used to represent an ACL policy 8646 type ACLPolicy struct { 8647 Name string // Unique name 8648 Description string // Human readable 8649 Rules string // HCL or JSON format 8650 Hash []byte 8651 CreateIndex uint64 8652 ModifyIndex uint64 8653 } 8654 8655 // SetHash is used to compute and set the hash of the ACL policy 8656 func (c *ACLPolicy) SetHash() []byte { 8657 // Initialize a 256bit Blake2 hash (32 bytes) 8658 hash, err := blake2b.New256(nil) 8659 if err != nil { 8660 panic(err) 8661 } 8662 8663 // Write all the user set fields 8664 hash.Write([]byte(c.Name)) 8665 hash.Write([]byte(c.Description)) 8666 hash.Write([]byte(c.Rules)) 8667 8668 // Finalize the hash 8669 hashVal := hash.Sum(nil) 8670 8671 // Set and return the hash 8672 c.Hash = hashVal 8673 return hashVal 8674 } 8675 8676 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 8677 return &ACLPolicyListStub{ 8678 Name: a.Name, 8679 Description: a.Description, 8680 Hash: a.Hash, 8681 CreateIndex: a.CreateIndex, 8682 ModifyIndex: a.ModifyIndex, 8683 } 8684 } 8685 8686 func (a *ACLPolicy) Validate() error { 8687 var mErr multierror.Error 8688 if !validPolicyName.MatchString(a.Name) { 8689 err := fmt.Errorf("invalid name '%s'", a.Name) 8690 mErr.Errors = append(mErr.Errors, err) 8691 } 8692 if _, err := acl.Parse(a.Rules); err != nil { 8693 err = fmt.Errorf("failed to parse rules: %v", err) 8694 mErr.Errors = append(mErr.Errors, err) 8695 } 8696 if len(a.Description) > maxPolicyDescriptionLength { 8697 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 8698 mErr.Errors = append(mErr.Errors, err) 8699 } 8700 return mErr.ErrorOrNil() 8701 } 8702 8703 // ACLPolicyListStub is used to for listing ACL policies 8704 type ACLPolicyListStub struct { 8705 Name string 8706 Description string 8707 Hash []byte 8708 CreateIndex uint64 8709 ModifyIndex uint64 8710 } 8711 8712 // ACLPolicyListRequest is used to request a list of policies 8713 type ACLPolicyListRequest struct { 8714 QueryOptions 8715 } 8716 8717 // ACLPolicySpecificRequest is used to query a specific policy 8718 type ACLPolicySpecificRequest struct { 8719 Name string 8720 QueryOptions 8721 } 8722 8723 // ACLPolicySetRequest is used to query a set of policies 8724 type ACLPolicySetRequest struct { 8725 Names []string 8726 QueryOptions 8727 } 8728 8729 // ACLPolicyListResponse is used for a list request 8730 type ACLPolicyListResponse struct { 8731 Policies []*ACLPolicyListStub 8732 QueryMeta 8733 } 8734 8735 // SingleACLPolicyResponse is used to return a single policy 8736 type SingleACLPolicyResponse struct { 8737 Policy *ACLPolicy 8738 QueryMeta 8739 } 8740 8741 // ACLPolicySetResponse is used to return a set of policies 8742 type ACLPolicySetResponse struct { 8743 Policies map[string]*ACLPolicy 8744 QueryMeta 8745 } 8746 8747 // ACLPolicyDeleteRequest is used to delete a set of policies 8748 type ACLPolicyDeleteRequest struct { 8749 Names []string 8750 WriteRequest 8751 } 8752 8753 // ACLPolicyUpsertRequest is used to upsert a set of policies 8754 type ACLPolicyUpsertRequest struct { 8755 Policies []*ACLPolicy 8756 WriteRequest 8757 } 8758 8759 // ACLToken represents a client token which is used to Authenticate 8760 type ACLToken struct { 8761 AccessorID string // Public Accessor ID (UUID) 8762 SecretID string // Secret ID, private (UUID) 8763 Name string // Human friendly name 8764 Type string // Client or Management 8765 Policies []string // Policies this token ties to 8766 Global bool // Global or Region local 8767 Hash []byte 8768 CreateTime time.Time // Time of creation 8769 CreateIndex uint64 8770 ModifyIndex uint64 8771 } 8772 8773 var ( 8774 // AnonymousACLToken is used no SecretID is provided, and the 8775 // request is made anonymously. 8776 AnonymousACLToken = &ACLToken{ 8777 AccessorID: "anonymous", 8778 Name: "Anonymous Token", 8779 Type: ACLClientToken, 8780 Policies: []string{"anonymous"}, 8781 Global: false, 8782 } 8783 ) 8784 8785 type ACLTokenListStub struct { 8786 AccessorID string 8787 Name string 8788 Type string 8789 Policies []string 8790 Global bool 8791 Hash []byte 8792 CreateTime time.Time 8793 CreateIndex uint64 8794 ModifyIndex uint64 8795 } 8796 8797 // SetHash is used to compute and set the hash of the ACL token 8798 func (a *ACLToken) SetHash() []byte { 8799 // Initialize a 256bit Blake2 hash (32 bytes) 8800 hash, err := blake2b.New256(nil) 8801 if err != nil { 8802 panic(err) 8803 } 8804 8805 // Write all the user set fields 8806 hash.Write([]byte(a.Name)) 8807 hash.Write([]byte(a.Type)) 8808 for _, policyName := range a.Policies { 8809 hash.Write([]byte(policyName)) 8810 } 8811 if a.Global { 8812 hash.Write([]byte("global")) 8813 } else { 8814 hash.Write([]byte("local")) 8815 } 8816 8817 // Finalize the hash 8818 hashVal := hash.Sum(nil) 8819 8820 // Set and return the hash 8821 a.Hash = hashVal 8822 return hashVal 8823 } 8824 8825 func (a *ACLToken) Stub() *ACLTokenListStub { 8826 return &ACLTokenListStub{ 8827 AccessorID: a.AccessorID, 8828 Name: a.Name, 8829 Type: a.Type, 8830 Policies: a.Policies, 8831 Global: a.Global, 8832 Hash: a.Hash, 8833 CreateTime: a.CreateTime, 8834 CreateIndex: a.CreateIndex, 8835 ModifyIndex: a.ModifyIndex, 8836 } 8837 } 8838 8839 // Validate is used to sanity check a token 8840 func (a *ACLToken) Validate() error { 8841 var mErr multierror.Error 8842 if len(a.Name) > maxTokenNameLength { 8843 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 8844 } 8845 switch a.Type { 8846 case ACLClientToken: 8847 if len(a.Policies) == 0 { 8848 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 8849 } 8850 case ACLManagementToken: 8851 if len(a.Policies) != 0 { 8852 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 8853 } 8854 default: 8855 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 8856 } 8857 return mErr.ErrorOrNil() 8858 } 8859 8860 // PolicySubset checks if a given set of policies is a subset of the token 8861 func (a *ACLToken) PolicySubset(policies []string) bool { 8862 // Hot-path the management tokens, superset of all policies. 8863 if a.Type == ACLManagementToken { 8864 return true 8865 } 8866 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 8867 for _, policy := range a.Policies { 8868 associatedPolicies[policy] = struct{}{} 8869 } 8870 for _, policy := range policies { 8871 if _, ok := associatedPolicies[policy]; !ok { 8872 return false 8873 } 8874 } 8875 return true 8876 } 8877 8878 // ACLTokenListRequest is used to request a list of tokens 8879 type ACLTokenListRequest struct { 8880 GlobalOnly bool 8881 QueryOptions 8882 } 8883 8884 // ACLTokenSpecificRequest is used to query a specific token 8885 type ACLTokenSpecificRequest struct { 8886 AccessorID string 8887 QueryOptions 8888 } 8889 8890 // ACLTokenSetRequest is used to query a set of tokens 8891 type ACLTokenSetRequest struct { 8892 AccessorIDS []string 8893 QueryOptions 8894 } 8895 8896 // ACLTokenListResponse is used for a list request 8897 type ACLTokenListResponse struct { 8898 Tokens []*ACLTokenListStub 8899 QueryMeta 8900 } 8901 8902 // SingleACLTokenResponse is used to return a single token 8903 type SingleACLTokenResponse struct { 8904 Token *ACLToken 8905 QueryMeta 8906 } 8907 8908 // ACLTokenSetResponse is used to return a set of token 8909 type ACLTokenSetResponse struct { 8910 Tokens map[string]*ACLToken // Keyed by Accessor ID 8911 QueryMeta 8912 } 8913 8914 // ResolveACLTokenRequest is used to resolve a specific token 8915 type ResolveACLTokenRequest struct { 8916 SecretID string 8917 QueryOptions 8918 } 8919 8920 // ResolveACLTokenResponse is used to resolve a single token 8921 type ResolveACLTokenResponse struct { 8922 Token *ACLToken 8923 QueryMeta 8924 } 8925 8926 // ACLTokenDeleteRequest is used to delete a set of tokens 8927 type ACLTokenDeleteRequest struct { 8928 AccessorIDs []string 8929 WriteRequest 8930 } 8931 8932 // ACLTokenBootstrapRequest is used to bootstrap ACLs 8933 type ACLTokenBootstrapRequest struct { 8934 Token *ACLToken // Not client specifiable 8935 ResetIndex uint64 // Reset index is used to clear the bootstrap token 8936 WriteRequest 8937 } 8938 8939 // ACLTokenUpsertRequest is used to upsert a set of tokens 8940 type ACLTokenUpsertRequest struct { 8941 Tokens []*ACLToken 8942 WriteRequest 8943 } 8944 8945 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 8946 type ACLTokenUpsertResponse struct { 8947 Tokens []*ACLToken 8948 WriteMeta 8949 }