github.com/zoomfoo/nomad@v0.8.5-0.20180907175415-f28fd3a1a056/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/base32" 10 "encoding/hex" 11 "errors" 12 "fmt" 13 "io" 14 "net" 15 "net/url" 16 "os" 17 "path/filepath" 18 "reflect" 19 "regexp" 20 "sort" 21 "strconv" 22 "strings" 23 "time" 24 25 "golang.org/x/crypto/blake2b" 26 27 "container/heap" 28 "math" 29 30 "github.com/gorhill/cronexpr" 31 "github.com/hashicorp/consul/api" 32 multierror "github.com/hashicorp/go-multierror" 33 "github.com/hashicorp/go-version" 34 "github.com/hashicorp/nomad/acl" 35 "github.com/hashicorp/nomad/helper" 36 "github.com/hashicorp/nomad/helper/args" 37 "github.com/hashicorp/nomad/helper/uuid" 38 "github.com/hashicorp/nomad/lib/kheap" 39 "github.com/mitchellh/copystructure" 40 "github.com/ugorji/go/codec" 41 42 hcodec "github.com/hashicorp/go-msgpack/codec" 43 ) 44 45 var ( 46 // validPolicyName is used to validate a policy name 47 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 48 49 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 50 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 51 ) 52 53 type MessageType uint8 54 55 const ( 56 NodeRegisterRequestType MessageType = iota 57 NodeDeregisterRequestType 58 NodeUpdateStatusRequestType 59 NodeUpdateDrainRequestType 60 JobRegisterRequestType 61 JobDeregisterRequestType 62 EvalUpdateRequestType 63 EvalDeleteRequestType 64 AllocUpdateRequestType 65 AllocClientUpdateRequestType 66 ReconcileJobSummariesRequestType 67 VaultAccessorRegisterRequestType 68 VaultAccessorDeregisterRequestType 69 ApplyPlanResultsRequestType 70 DeploymentStatusUpdateRequestType 71 DeploymentPromoteRequestType 72 DeploymentAllocHealthRequestType 73 DeploymentDeleteRequestType 74 JobStabilityRequestType 75 ACLPolicyUpsertRequestType 76 ACLPolicyDeleteRequestType 77 ACLTokenUpsertRequestType 78 ACLTokenDeleteRequestType 79 ACLTokenBootstrapRequestType 80 AutopilotRequestType 81 UpsertNodeEventsType 82 JobBatchDeregisterRequestType 83 AllocUpdateDesiredTransitionRequestType 84 NodeUpdateEligibilityRequestType 85 BatchNodeUpdateDrainRequestType 86 ) 87 88 const ( 89 // IgnoreUnknownTypeFlag is set along with a MessageType 90 // to indicate that the message type can be safely ignored 91 // if it is not recognized. This is for future proofing, so 92 // that new commands can be added in a way that won't cause 93 // old servers to crash when the FSM attempts to process them. 94 IgnoreUnknownTypeFlag MessageType = 128 95 96 // ApiMajorVersion is returned as part of the Status.Version request. 97 // It should be incremented anytime the APIs are changed in a way 98 // that would break clients for sane client versioning. 99 ApiMajorVersion = 1 100 101 // ApiMinorVersion is returned as part of the Status.Version request. 102 // It should be incremented anytime the APIs are changed to allow 103 // for sane client versioning. Minor changes should be compatible 104 // within the major version. 105 ApiMinorVersion = 1 106 107 ProtocolVersion = "protocol" 108 APIMajorVersion = "api.major" 109 APIMinorVersion = "api.minor" 110 111 GetterModeAny = "any" 112 GetterModeFile = "file" 113 GetterModeDir = "dir" 114 115 // maxPolicyDescriptionLength limits a policy description length 116 maxPolicyDescriptionLength = 256 117 118 // maxTokenNameLength limits a ACL token name length 119 maxTokenNameLength = 256 120 121 // ACLClientToken and ACLManagementToken are the only types of tokens 122 ACLClientToken = "client" 123 ACLManagementToken = "management" 124 125 // DefaultNamespace is the default namespace. 126 DefaultNamespace = "default" 127 DefaultNamespaceDescription = "Default shared namespace" 128 129 // JitterFraction is a the limit to the amount of jitter we apply 130 // to a user specified MaxQueryTime. We divide the specified time by 131 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 132 // applied to RPCHoldTimeout. 133 JitterFraction = 16 134 135 // MaxRetainedNodeEvents is the maximum number of node events that will be 136 // retained for a single node 137 MaxRetainedNodeEvents = 10 138 139 // MaxRetainedNodeScores is the number of top scoring nodes for which we 140 // retain scoring metadata 141 MaxRetainedNodeScores = 5 142 143 // Normalized scorer name 144 NormScorerName = "normalized-score" 145 ) 146 147 // Context defines the scope in which a search for Nomad object operates, and 148 // is also used to query the matching index value for this context 149 type Context string 150 151 const ( 152 Allocs Context = "allocs" 153 Deployments Context = "deployment" 154 Evals Context = "evals" 155 Jobs Context = "jobs" 156 Nodes Context = "nodes" 157 Namespaces Context = "namespaces" 158 Quotas Context = "quotas" 159 All Context = "all" 160 ) 161 162 // NamespacedID is a tuple of an ID and a namespace 163 type NamespacedID struct { 164 ID string 165 Namespace string 166 } 167 168 func (n NamespacedID) String() string { 169 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 170 } 171 172 // RPCInfo is used to describe common information about query 173 type RPCInfo interface { 174 RequestRegion() string 175 IsRead() bool 176 AllowStaleRead() bool 177 IsForwarded() bool 178 SetForwarded() 179 } 180 181 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 182 // should NOT be replicated in the API package as it is internal only. 183 type InternalRpcInfo struct { 184 // Forwarded marks whether the RPC has been forwarded. 185 Forwarded bool 186 } 187 188 // IsForwarded returns whether the RPC is forwarded from another server. 189 func (i *InternalRpcInfo) IsForwarded() bool { 190 return i.Forwarded 191 } 192 193 // SetForwarded marks that the RPC is being forwarded from another server. 194 func (i *InternalRpcInfo) SetForwarded() { 195 i.Forwarded = true 196 } 197 198 // QueryOptions is used to specify various flags for read queries 199 type QueryOptions struct { 200 // The target region for this query 201 Region string 202 203 // Namespace is the target namespace for the query. 204 Namespace string 205 206 // If set, wait until query exceeds given index. Must be provided 207 // with MaxQueryTime. 208 MinQueryIndex uint64 209 210 // Provided with MinQueryIndex to wait for change. 211 MaxQueryTime time.Duration 212 213 // If set, any follower can service the request. Results 214 // may be arbitrarily stale. 215 AllowStale bool 216 217 // If set, used as prefix for resource list searches 218 Prefix string 219 220 // AuthToken is secret portion of the ACL token used for the request 221 AuthToken string 222 223 InternalRpcInfo 224 } 225 226 func (q QueryOptions) RequestRegion() string { 227 return q.Region 228 } 229 230 func (q QueryOptions) RequestNamespace() string { 231 if q.Namespace == "" { 232 return DefaultNamespace 233 } 234 return q.Namespace 235 } 236 237 // QueryOption only applies to reads, so always true 238 func (q QueryOptions) IsRead() bool { 239 return true 240 } 241 242 func (q QueryOptions) AllowStaleRead() bool { 243 return q.AllowStale 244 } 245 246 type WriteRequest struct { 247 // The target region for this write 248 Region string 249 250 // Namespace is the target namespace for the write. 251 Namespace string 252 253 // AuthToken is secret portion of the ACL token used for the request 254 AuthToken string 255 256 InternalRpcInfo 257 } 258 259 func (w WriteRequest) RequestRegion() string { 260 // The target region for this request 261 return w.Region 262 } 263 264 func (w WriteRequest) RequestNamespace() string { 265 if w.Namespace == "" { 266 return DefaultNamespace 267 } 268 return w.Namespace 269 } 270 271 // WriteRequest only applies to writes, always false 272 func (w WriteRequest) IsRead() bool { 273 return false 274 } 275 276 func (w WriteRequest) AllowStaleRead() bool { 277 return false 278 } 279 280 // QueryMeta allows a query response to include potentially 281 // useful metadata about a query 282 type QueryMeta struct { 283 // This is the index associated with the read 284 Index uint64 285 286 // If AllowStale is used, this is time elapsed since 287 // last contact between the follower and leader. This 288 // can be used to gauge staleness. 289 LastContact time.Duration 290 291 // Used to indicate if there is a known leader node 292 KnownLeader bool 293 } 294 295 // WriteMeta allows a write response to include potentially 296 // useful metadata about the write 297 type WriteMeta struct { 298 // This is the index associated with the write 299 Index uint64 300 } 301 302 // NodeRegisterRequest is used for Node.Register endpoint 303 // to register a node as being a schedulable entity. 304 type NodeRegisterRequest struct { 305 Node *Node 306 NodeEvent *NodeEvent 307 WriteRequest 308 } 309 310 // NodeDeregisterRequest is used for Node.Deregister endpoint 311 // to deregister a node as being a schedulable entity. 312 type NodeDeregisterRequest struct { 313 NodeID string 314 WriteRequest 315 } 316 317 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 318 // information used in RPC server lists. 319 type NodeServerInfo struct { 320 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 321 // be contacted at for RPCs. 322 RPCAdvertiseAddr string 323 324 // RpcMajorVersion is the major version number the Nomad Server 325 // supports 326 RPCMajorVersion int32 327 328 // RpcMinorVersion is the minor version number the Nomad Server 329 // supports 330 RPCMinorVersion int32 331 332 // Datacenter is the datacenter that a Nomad server belongs to 333 Datacenter string 334 } 335 336 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 337 // to update the status of a node. 338 type NodeUpdateStatusRequest struct { 339 NodeID string 340 Status string 341 NodeEvent *NodeEvent 342 WriteRequest 343 } 344 345 // NodeUpdateDrainRequest is used for updating the drain strategy 346 type NodeUpdateDrainRequest struct { 347 NodeID string 348 DrainStrategy *DrainStrategy 349 350 // COMPAT Remove in version 0.10 351 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 352 // drain strategy but we need to handle the upgrade path where the Raft log 353 // contains drain updates with just the drain boolean being manipulated. 354 Drain bool 355 356 // MarkEligible marks the node as eligible if removing the drain strategy. 357 MarkEligible bool 358 359 // NodeEvent is the event added to the node 360 NodeEvent *NodeEvent 361 362 WriteRequest 363 } 364 365 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 366 // batch of nodes 367 type BatchNodeUpdateDrainRequest struct { 368 // Updates is a mapping of nodes to their updated drain strategy 369 Updates map[string]*DrainUpdate 370 371 // NodeEvents is a mapping of the node to the event to add to the node 372 NodeEvents map[string]*NodeEvent 373 374 WriteRequest 375 } 376 377 // DrainUpdate is used to update the drain of a node 378 type DrainUpdate struct { 379 // DrainStrategy is the new strategy for the node 380 DrainStrategy *DrainStrategy 381 382 // MarkEligible marks the node as eligible if removing the drain strategy. 383 MarkEligible bool 384 } 385 386 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 387 type NodeUpdateEligibilityRequest struct { 388 NodeID string 389 Eligibility string 390 391 // NodeEvent is the event added to the node 392 NodeEvent *NodeEvent 393 394 WriteRequest 395 } 396 397 // NodeEvaluateRequest is used to re-evaluate the node 398 type NodeEvaluateRequest struct { 399 NodeID string 400 WriteRequest 401 } 402 403 // NodeSpecificRequest is used when we just need to specify a target node 404 type NodeSpecificRequest struct { 405 NodeID string 406 SecretID string 407 QueryOptions 408 } 409 410 // SearchResponse is used to return matches and information about whether 411 // the match list is truncated specific to each type of context. 412 type SearchResponse struct { 413 // Map of context types to ids which match a specified prefix 414 Matches map[Context][]string 415 416 // Truncations indicates whether the matches for a particular context have 417 // been truncated 418 Truncations map[Context]bool 419 420 QueryMeta 421 } 422 423 // SearchRequest is used to parameterize a request, and returns a 424 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 425 // along with whether or not the information returned is truncated. 426 type SearchRequest struct { 427 // Prefix is what ids are matched to. I.e, if the given prefix were 428 // "a", potential matches might be "abcd" or "aabb" 429 Prefix string 430 431 // Context is the type that can be matched against. A context can be a job, 432 // node, evaluation, allocation, or empty (indicated every context should be 433 // matched) 434 Context Context 435 436 QueryOptions 437 } 438 439 // JobRegisterRequest is used for Job.Register endpoint 440 // to register a job as being a schedulable entity. 441 type JobRegisterRequest struct { 442 Job *Job 443 444 // If EnforceIndex is set then the job will only be registered if the passed 445 // JobModifyIndex matches the current Jobs index. If the index is zero, the 446 // register only occurs if the job is new. 447 EnforceIndex bool 448 JobModifyIndex uint64 449 450 // PolicyOverride is set when the user is attempting to override any policies 451 PolicyOverride bool 452 453 WriteRequest 454 } 455 456 // JobDeregisterRequest is used for Job.Deregister endpoint 457 // to deregister a job as being a schedulable entity. 458 type JobDeregisterRequest struct { 459 JobID string 460 461 // Purge controls whether the deregister purges the job from the system or 462 // whether the job is just marked as stopped and will be removed by the 463 // garbage collector 464 Purge bool 465 466 WriteRequest 467 } 468 469 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 470 // evaluations. 471 type JobBatchDeregisterRequest struct { 472 // Jobs is the set of jobs to deregister 473 Jobs map[NamespacedID]*JobDeregisterOptions 474 475 // Evals is the set of evaluations to create. 476 Evals []*Evaluation 477 478 WriteRequest 479 } 480 481 // JobDeregisterOptions configures how a job is deregistered. 482 type JobDeregisterOptions struct { 483 // Purge controls whether the deregister purges the job from the system or 484 // whether the job is just marked as stopped and will be removed by the 485 // garbage collector 486 Purge bool 487 } 488 489 // JobEvaluateRequest is used when we just need to re-evaluate a target job 490 type JobEvaluateRequest struct { 491 JobID string 492 EvalOptions EvalOptions 493 WriteRequest 494 } 495 496 // EvalOptions is used to encapsulate options when forcing a job evaluation 497 type EvalOptions struct { 498 ForceReschedule bool 499 } 500 501 // JobSpecificRequest is used when we just need to specify a target job 502 type JobSpecificRequest struct { 503 JobID string 504 AllAllocs bool 505 QueryOptions 506 } 507 508 // JobListRequest is used to parameterize a list request 509 type JobListRequest struct { 510 QueryOptions 511 } 512 513 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 514 // evaluation of the Job. 515 type JobPlanRequest struct { 516 Job *Job 517 Diff bool // Toggles an annotated diff 518 // PolicyOverride is set when the user is attempting to override any policies 519 PolicyOverride bool 520 WriteRequest 521 } 522 523 // JobSummaryRequest is used when we just need to get a specific job summary 524 type JobSummaryRequest struct { 525 JobID string 526 QueryOptions 527 } 528 529 // JobDispatchRequest is used to dispatch a job based on a parameterized job 530 type JobDispatchRequest struct { 531 JobID string 532 Payload []byte 533 Meta map[string]string 534 WriteRequest 535 } 536 537 // JobValidateRequest is used to validate a job 538 type JobValidateRequest struct { 539 Job *Job 540 WriteRequest 541 } 542 543 // JobRevertRequest is used to revert a job to a prior version. 544 type JobRevertRequest struct { 545 // JobID is the ID of the job being reverted 546 JobID string 547 548 // JobVersion the version to revert to. 549 JobVersion uint64 550 551 // EnforcePriorVersion if set will enforce that the job is at the given 552 // version before reverting. 553 EnforcePriorVersion *uint64 554 555 WriteRequest 556 } 557 558 // JobStabilityRequest is used to marked a job as stable. 559 type JobStabilityRequest struct { 560 // Job to set the stability on 561 JobID string 562 JobVersion uint64 563 564 // Set the stability 565 Stable bool 566 WriteRequest 567 } 568 569 // JobStabilityResponse is the response when marking a job as stable. 570 type JobStabilityResponse struct { 571 WriteMeta 572 } 573 574 // NodeListRequest is used to parameterize a list request 575 type NodeListRequest struct { 576 QueryOptions 577 } 578 579 // EvalUpdateRequest is used for upserting evaluations. 580 type EvalUpdateRequest struct { 581 Evals []*Evaluation 582 EvalToken string 583 WriteRequest 584 } 585 586 // EvalDeleteRequest is used for deleting an evaluation. 587 type EvalDeleteRequest struct { 588 Evals []string 589 Allocs []string 590 WriteRequest 591 } 592 593 // EvalSpecificRequest is used when we just need to specify a target evaluation 594 type EvalSpecificRequest struct { 595 EvalID string 596 QueryOptions 597 } 598 599 // EvalAckRequest is used to Ack/Nack a specific evaluation 600 type EvalAckRequest struct { 601 EvalID string 602 Token string 603 WriteRequest 604 } 605 606 // EvalDequeueRequest is used when we want to dequeue an evaluation 607 type EvalDequeueRequest struct { 608 Schedulers []string 609 Timeout time.Duration 610 SchedulerVersion uint16 611 WriteRequest 612 } 613 614 // EvalListRequest is used to list the evaluations 615 type EvalListRequest struct { 616 QueryOptions 617 } 618 619 // PlanRequest is used to submit an allocation plan to the leader 620 type PlanRequest struct { 621 Plan *Plan 622 WriteRequest 623 } 624 625 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 626 // committing the result of a plan. 627 type ApplyPlanResultsRequest struct { 628 // AllocUpdateRequest holds the allocation updates to be made by the 629 // scheduler. 630 AllocUpdateRequest 631 632 // Deployment is the deployment created or updated as a result of a 633 // scheduling event. 634 Deployment *Deployment 635 636 // DeploymentUpdates is a set of status updates to apply to the given 637 // deployments. This allows the scheduler to cancel any unneeded deployment 638 // because the job is stopped or the update block is removed. 639 DeploymentUpdates []*DeploymentStatusUpdate 640 641 // EvalID is the eval ID of the plan being applied. The modify index of the 642 // evaluation is updated as part of applying the plan to ensure that subsequent 643 // scheduling events for the same job will wait for the index that last produced 644 // state changes. This is necessary for blocked evaluations since they can be 645 // processed many times, potentially making state updates, without the state of 646 // the evaluation itself being updated. 647 EvalID string 648 } 649 650 // AllocUpdateRequest is used to submit changes to allocations, either 651 // to cause evictions or to assign new allocations. Both can be done 652 // within a single transaction 653 type AllocUpdateRequest struct { 654 // Alloc is the list of new allocations to assign 655 Alloc []*Allocation 656 657 // Evals is the list of new evaluations to create 658 // Evals are valid only when used in the Raft RPC 659 Evals []*Evaluation 660 661 // Job is the shared parent job of the allocations. 662 // It is pulled out since it is common to reduce payload size. 663 Job *Job 664 665 WriteRequest 666 } 667 668 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 669 // desired transition state. 670 type AllocUpdateDesiredTransitionRequest struct { 671 // Allocs is the mapping of allocation ids to their desired state 672 // transition 673 Allocs map[string]*DesiredTransition 674 675 // Evals is the set of evaluations to create 676 Evals []*Evaluation 677 678 WriteRequest 679 } 680 681 // AllocListRequest is used to request a list of allocations 682 type AllocListRequest struct { 683 QueryOptions 684 } 685 686 // AllocSpecificRequest is used to query a specific allocation 687 type AllocSpecificRequest struct { 688 AllocID string 689 QueryOptions 690 } 691 692 // AllocsGetRequest is used to query a set of allocations 693 type AllocsGetRequest struct { 694 AllocIDs []string 695 QueryOptions 696 } 697 698 // PeriodicForceRequest is used to force a specific periodic job. 699 type PeriodicForceRequest struct { 700 JobID string 701 WriteRequest 702 } 703 704 // ServerMembersResponse has the list of servers in a cluster 705 type ServerMembersResponse struct { 706 ServerName string 707 ServerRegion string 708 ServerDC string 709 Members []*ServerMember 710 } 711 712 // ServerMember holds information about a Nomad server agent in a cluster 713 type ServerMember struct { 714 Name string 715 Addr net.IP 716 Port uint16 717 Tags map[string]string 718 Status string 719 ProtocolMin uint8 720 ProtocolMax uint8 721 ProtocolCur uint8 722 DelegateMin uint8 723 DelegateMax uint8 724 DelegateCur uint8 725 } 726 727 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 728 // following tasks in the given allocation 729 type DeriveVaultTokenRequest struct { 730 NodeID string 731 SecretID string 732 AllocID string 733 Tasks []string 734 QueryOptions 735 } 736 737 // VaultAccessorsRequest is used to operate on a set of Vault accessors 738 type VaultAccessorsRequest struct { 739 Accessors []*VaultAccessor 740 } 741 742 // VaultAccessor is a reference to a created Vault token on behalf of 743 // an allocation's task. 744 type VaultAccessor struct { 745 AllocID string 746 Task string 747 NodeID string 748 Accessor string 749 CreationTTL int 750 751 // Raft Indexes 752 CreateIndex uint64 753 } 754 755 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 756 type DeriveVaultTokenResponse struct { 757 // Tasks is a mapping between the task name and the wrapped token 758 Tasks map[string]string 759 760 // Error stores any error that occurred. Errors are stored here so we can 761 // communicate whether it is retriable 762 Error *RecoverableError 763 764 QueryMeta 765 } 766 767 // GenericRequest is used to request where no 768 // specific information is needed. 769 type GenericRequest struct { 770 QueryOptions 771 } 772 773 // DeploymentListRequest is used to list the deployments 774 type DeploymentListRequest struct { 775 QueryOptions 776 } 777 778 // DeploymentDeleteRequest is used for deleting deployments. 779 type DeploymentDeleteRequest struct { 780 Deployments []string 781 WriteRequest 782 } 783 784 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 785 // well as optionally creating an evaluation atomically. 786 type DeploymentStatusUpdateRequest struct { 787 // Eval, if set, is used to create an evaluation at the same time as 788 // updating the status of a deployment. 789 Eval *Evaluation 790 791 // DeploymentUpdate is a status update to apply to the given 792 // deployment. 793 DeploymentUpdate *DeploymentStatusUpdate 794 795 // Job is used to optionally upsert a job. This is used when setting the 796 // allocation health results in a deployment failure and the deployment 797 // auto-reverts to the latest stable job. 798 Job *Job 799 } 800 801 // DeploymentAllocHealthRequest is used to set the health of a set of 802 // allocations as part of a deployment. 803 type DeploymentAllocHealthRequest struct { 804 DeploymentID string 805 806 // Marks these allocations as healthy, allow further allocations 807 // to be rolled. 808 HealthyAllocationIDs []string 809 810 // Any unhealthy allocations fail the deployment 811 UnhealthyAllocationIDs []string 812 813 WriteRequest 814 } 815 816 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 817 type ApplyDeploymentAllocHealthRequest struct { 818 DeploymentAllocHealthRequest 819 820 // Timestamp is the timestamp to use when setting the allocations health. 821 Timestamp time.Time 822 823 // An optional field to update the status of a deployment 824 DeploymentUpdate *DeploymentStatusUpdate 825 826 // Job is used to optionally upsert a job. This is used when setting the 827 // allocation health results in a deployment failure and the deployment 828 // auto-reverts to the latest stable job. 829 Job *Job 830 831 // An optional evaluation to create after promoting the canaries 832 Eval *Evaluation 833 } 834 835 // DeploymentPromoteRequest is used to promote task groups in a deployment 836 type DeploymentPromoteRequest struct { 837 DeploymentID string 838 839 // All is to promote all task groups 840 All bool 841 842 // Groups is used to set the promotion status per task group 843 Groups []string 844 845 WriteRequest 846 } 847 848 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 849 type ApplyDeploymentPromoteRequest struct { 850 DeploymentPromoteRequest 851 852 // An optional evaluation to create after promoting the canaries 853 Eval *Evaluation 854 } 855 856 // DeploymentPauseRequest is used to pause a deployment 857 type DeploymentPauseRequest struct { 858 DeploymentID string 859 860 // Pause sets the pause status 861 Pause bool 862 863 WriteRequest 864 } 865 866 // DeploymentSpecificRequest is used to make a request specific to a particular 867 // deployment 868 type DeploymentSpecificRequest struct { 869 DeploymentID string 870 QueryOptions 871 } 872 873 // DeploymentFailRequest is used to fail a particular deployment 874 type DeploymentFailRequest struct { 875 DeploymentID string 876 WriteRequest 877 } 878 879 // SingleDeploymentResponse is used to respond with a single deployment 880 type SingleDeploymentResponse struct { 881 Deployment *Deployment 882 QueryMeta 883 } 884 885 // GenericResponse is used to respond to a request where no 886 // specific response information is needed. 887 type GenericResponse struct { 888 WriteMeta 889 } 890 891 // VersionResponse is used for the Status.Version response 892 type VersionResponse struct { 893 Build string 894 Versions map[string]int 895 QueryMeta 896 } 897 898 // JobRegisterResponse is used to respond to a job registration 899 type JobRegisterResponse struct { 900 EvalID string 901 EvalCreateIndex uint64 902 JobModifyIndex uint64 903 904 // Warnings contains any warnings about the given job. These may include 905 // deprecation warnings. 906 Warnings string 907 908 QueryMeta 909 } 910 911 // JobDeregisterResponse is used to respond to a job deregistration 912 type JobDeregisterResponse struct { 913 EvalID string 914 EvalCreateIndex uint64 915 JobModifyIndex uint64 916 QueryMeta 917 } 918 919 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 920 type JobBatchDeregisterResponse struct { 921 // JobEvals maps the job to its created evaluation 922 JobEvals map[NamespacedID]string 923 QueryMeta 924 } 925 926 // JobValidateResponse is the response from validate request 927 type JobValidateResponse struct { 928 // DriverConfigValidated indicates whether the agent validated the driver 929 // config 930 DriverConfigValidated bool 931 932 // ValidationErrors is a list of validation errors 933 ValidationErrors []string 934 935 // Error is a string version of any error that may have occurred 936 Error string 937 938 // Warnings contains any warnings about the given job. These may include 939 // deprecation warnings. 940 Warnings string 941 } 942 943 // NodeUpdateResponse is used to respond to a node update 944 type NodeUpdateResponse struct { 945 HeartbeatTTL time.Duration 946 EvalIDs []string 947 EvalCreateIndex uint64 948 NodeModifyIndex uint64 949 950 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 951 // empty, the current Nomad Server is in the minority of a partition. 952 LeaderRPCAddr string 953 954 // NumNodes is the number of Nomad nodes attached to this quorum of 955 // Nomad Servers at the time of the response. This value can 956 // fluctuate based on the health of the cluster between heartbeats. 957 NumNodes int32 958 959 // Servers is the full list of known Nomad servers in the local 960 // region. 961 Servers []*NodeServerInfo 962 963 QueryMeta 964 } 965 966 // NodeDrainUpdateResponse is used to respond to a node drain update 967 type NodeDrainUpdateResponse struct { 968 NodeModifyIndex uint64 969 EvalIDs []string 970 EvalCreateIndex uint64 971 WriteMeta 972 } 973 974 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 975 type NodeEligibilityUpdateResponse struct { 976 NodeModifyIndex uint64 977 EvalIDs []string 978 EvalCreateIndex uint64 979 WriteMeta 980 } 981 982 // NodeAllocsResponse is used to return allocs for a single node 983 type NodeAllocsResponse struct { 984 Allocs []*Allocation 985 QueryMeta 986 } 987 988 // NodeClientAllocsResponse is used to return allocs meta data for a single node 989 type NodeClientAllocsResponse struct { 990 Allocs map[string]uint64 991 992 // MigrateTokens are used when ACLs are enabled to allow cross node, 993 // authenticated access to sticky volumes 994 MigrateTokens map[string]string 995 996 QueryMeta 997 } 998 999 // SingleNodeResponse is used to return a single node 1000 type SingleNodeResponse struct { 1001 Node *Node 1002 QueryMeta 1003 } 1004 1005 // NodeListResponse is used for a list request 1006 type NodeListResponse struct { 1007 Nodes []*NodeListStub 1008 QueryMeta 1009 } 1010 1011 // SingleJobResponse is used to return a single job 1012 type SingleJobResponse struct { 1013 Job *Job 1014 QueryMeta 1015 } 1016 1017 // JobSummaryResponse is used to return a single job summary 1018 type JobSummaryResponse struct { 1019 JobSummary *JobSummary 1020 QueryMeta 1021 } 1022 1023 type JobDispatchResponse struct { 1024 DispatchedJobID string 1025 EvalID string 1026 EvalCreateIndex uint64 1027 JobCreateIndex uint64 1028 WriteMeta 1029 } 1030 1031 // JobListResponse is used for a list request 1032 type JobListResponse struct { 1033 Jobs []*JobListStub 1034 QueryMeta 1035 } 1036 1037 // JobVersionsRequest is used to get a jobs versions 1038 type JobVersionsRequest struct { 1039 JobID string 1040 Diffs bool 1041 QueryOptions 1042 } 1043 1044 // JobVersionsResponse is used for a job get versions request 1045 type JobVersionsResponse struct { 1046 Versions []*Job 1047 Diffs []*JobDiff 1048 QueryMeta 1049 } 1050 1051 // JobPlanResponse is used to respond to a job plan request 1052 type JobPlanResponse struct { 1053 // Annotations stores annotations explaining decisions the scheduler made. 1054 Annotations *PlanAnnotations 1055 1056 // FailedTGAllocs is the placement failures per task group. 1057 FailedTGAllocs map[string]*AllocMetric 1058 1059 // JobModifyIndex is the modification index of the job. The value can be 1060 // used when running `nomad run` to ensure that the Job wasn’t modified 1061 // since the last plan. If the job is being created, the value is zero. 1062 JobModifyIndex uint64 1063 1064 // CreatedEvals is the set of evaluations created by the scheduler. The 1065 // reasons for this can be rolling-updates or blocked evals. 1066 CreatedEvals []*Evaluation 1067 1068 // Diff contains the diff of the job and annotations on whether the change 1069 // causes an in-place update or create/destroy 1070 Diff *JobDiff 1071 1072 // NextPeriodicLaunch is the time duration till the job would be launched if 1073 // submitted. 1074 NextPeriodicLaunch time.Time 1075 1076 // Warnings contains any warnings about the given job. These may include 1077 // deprecation warnings. 1078 Warnings string 1079 1080 WriteMeta 1081 } 1082 1083 // SingleAllocResponse is used to return a single allocation 1084 type SingleAllocResponse struct { 1085 Alloc *Allocation 1086 QueryMeta 1087 } 1088 1089 // AllocsGetResponse is used to return a set of allocations 1090 type AllocsGetResponse struct { 1091 Allocs []*Allocation 1092 QueryMeta 1093 } 1094 1095 // JobAllocationsResponse is used to return the allocations for a job 1096 type JobAllocationsResponse struct { 1097 Allocations []*AllocListStub 1098 QueryMeta 1099 } 1100 1101 // JobEvaluationsResponse is used to return the evaluations for a job 1102 type JobEvaluationsResponse struct { 1103 Evaluations []*Evaluation 1104 QueryMeta 1105 } 1106 1107 // SingleEvalResponse is used to return a single evaluation 1108 type SingleEvalResponse struct { 1109 Eval *Evaluation 1110 QueryMeta 1111 } 1112 1113 // EvalDequeueResponse is used to return from a dequeue 1114 type EvalDequeueResponse struct { 1115 Eval *Evaluation 1116 Token string 1117 1118 // WaitIndex is the Raft index the worker should wait until invoking the 1119 // scheduler. 1120 WaitIndex uint64 1121 1122 QueryMeta 1123 } 1124 1125 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1126 // or beyond before invoking the scheduler. 1127 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1128 // Prefer the wait index sent. This will be populated on all responses from 1129 // 0.7.0 and above 1130 if e.WaitIndex != 0 { 1131 return e.WaitIndex 1132 } else if e.Eval != nil { 1133 return e.Eval.ModifyIndex 1134 } 1135 1136 // This should never happen 1137 return 1 1138 } 1139 1140 // PlanResponse is used to return from a PlanRequest 1141 type PlanResponse struct { 1142 Result *PlanResult 1143 WriteMeta 1144 } 1145 1146 // AllocListResponse is used for a list request 1147 type AllocListResponse struct { 1148 Allocations []*AllocListStub 1149 QueryMeta 1150 } 1151 1152 // DeploymentListResponse is used for a list request 1153 type DeploymentListResponse struct { 1154 Deployments []*Deployment 1155 QueryMeta 1156 } 1157 1158 // EvalListResponse is used for a list request 1159 type EvalListResponse struct { 1160 Evaluations []*Evaluation 1161 QueryMeta 1162 } 1163 1164 // EvalAllocationsResponse is used to return the allocations for an evaluation 1165 type EvalAllocationsResponse struct { 1166 Allocations []*AllocListStub 1167 QueryMeta 1168 } 1169 1170 // PeriodicForceResponse is used to respond to a periodic job force launch 1171 type PeriodicForceResponse struct { 1172 EvalID string 1173 EvalCreateIndex uint64 1174 WriteMeta 1175 } 1176 1177 // DeploymentUpdateResponse is used to respond to a deployment change. The 1178 // response will include the modify index of the deployment as well as details 1179 // of any triggered evaluation. 1180 type DeploymentUpdateResponse struct { 1181 EvalID string 1182 EvalCreateIndex uint64 1183 DeploymentModifyIndex uint64 1184 1185 // RevertedJobVersion is the version the job was reverted to. If unset, the 1186 // job wasn't reverted 1187 RevertedJobVersion *uint64 1188 1189 WriteMeta 1190 } 1191 1192 // NodeConnQueryResponse is used to respond to a query of whether a server has 1193 // a connection to a specific Node 1194 type NodeConnQueryResponse struct { 1195 // Connected indicates whether a connection to the Client exists 1196 Connected bool 1197 1198 // Established marks the time at which the connection was established 1199 Established time.Time 1200 1201 QueryMeta 1202 } 1203 1204 // EmitNodeEventsRequest is a request to update the node events source 1205 // with a new client-side event 1206 type EmitNodeEventsRequest struct { 1207 // NodeEvents are a map where the key is a node id, and value is a list of 1208 // events for that node 1209 NodeEvents map[string][]*NodeEvent 1210 1211 WriteRequest 1212 } 1213 1214 // EmitNodeEventsResponse is a response to the client about the status of 1215 // the node event source update. 1216 type EmitNodeEventsResponse struct { 1217 Index uint64 1218 WriteMeta 1219 } 1220 1221 const ( 1222 NodeEventSubsystemDrain = "Drain" 1223 NodeEventSubsystemDriver = "Driver" 1224 NodeEventSubsystemHeartbeat = "Heartbeat" 1225 NodeEventSubsystemCluster = "Cluster" 1226 ) 1227 1228 // NodeEvent is a single unit representing a node’s state change 1229 type NodeEvent struct { 1230 Message string 1231 Subsystem string 1232 Details map[string]string 1233 Timestamp time.Time 1234 CreateIndex uint64 1235 } 1236 1237 func (ne *NodeEvent) String() string { 1238 var details []string 1239 for k, v := range ne.Details { 1240 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1241 } 1242 1243 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1244 } 1245 1246 func (ne *NodeEvent) Copy() *NodeEvent { 1247 c := new(NodeEvent) 1248 *c = *ne 1249 c.Details = helper.CopyMapStringString(ne.Details) 1250 return c 1251 } 1252 1253 // NewNodeEvent generates a new node event storing the current time as the 1254 // timestamp 1255 func NewNodeEvent() *NodeEvent { 1256 return &NodeEvent{Timestamp: time.Now()} 1257 } 1258 1259 // SetMessage is used to set the message on the node event 1260 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1261 ne.Message = msg 1262 return ne 1263 } 1264 1265 // SetSubsystem is used to set the subsystem on the node event 1266 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1267 ne.Subsystem = sys 1268 return ne 1269 } 1270 1271 // SetTimestamp is used to set the timestamp on the node event 1272 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1273 ne.Timestamp = ts 1274 return ne 1275 } 1276 1277 // AddDetail is used to add a detail to the node event 1278 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1279 if ne.Details == nil { 1280 ne.Details = make(map[string]string, 1) 1281 } 1282 ne.Details[k] = v 1283 return ne 1284 } 1285 1286 const ( 1287 NodeStatusInit = "initializing" 1288 NodeStatusReady = "ready" 1289 NodeStatusDown = "down" 1290 ) 1291 1292 // ShouldDrainNode checks if a given node status should trigger an 1293 // evaluation. Some states don't require any further action. 1294 func ShouldDrainNode(status string) bool { 1295 switch status { 1296 case NodeStatusInit, NodeStatusReady: 1297 return false 1298 case NodeStatusDown: 1299 return true 1300 default: 1301 panic(fmt.Sprintf("unhandled node status %s", status)) 1302 } 1303 } 1304 1305 // ValidNodeStatus is used to check if a node status is valid 1306 func ValidNodeStatus(status string) bool { 1307 switch status { 1308 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1309 return true 1310 default: 1311 return false 1312 } 1313 } 1314 1315 const ( 1316 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1317 // respectively, for receiving allocations. This is orthoginal to the node 1318 // status being ready. 1319 NodeSchedulingEligible = "eligible" 1320 NodeSchedulingIneligible = "ineligible" 1321 ) 1322 1323 // DrainSpec describes a Node's desired drain behavior. 1324 type DrainSpec struct { 1325 // Deadline is the duration after StartTime when the remaining 1326 // allocations on a draining Node should be told to stop. 1327 Deadline time.Duration 1328 1329 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1330 // has been marked for draining. 1331 IgnoreSystemJobs bool 1332 } 1333 1334 // DrainStrategy describes a Node's drain behavior. 1335 type DrainStrategy struct { 1336 // DrainSpec is the user declared drain specification 1337 DrainSpec 1338 1339 // ForceDeadline is the deadline time for the drain after which drains will 1340 // be forced 1341 ForceDeadline time.Time 1342 } 1343 1344 func (d *DrainStrategy) Copy() *DrainStrategy { 1345 if d == nil { 1346 return nil 1347 } 1348 1349 nd := new(DrainStrategy) 1350 *nd = *d 1351 return nd 1352 } 1353 1354 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1355 // duration or otherwise the deadline time. The force drain is captured by the 1356 // deadline time being in the past. 1357 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1358 // Treat the nil case as a force drain so during an upgrade where a node may 1359 // not have a drain strategy but has Drain set to true, it is treated as a 1360 // force to mimick old behavior. 1361 if d == nil { 1362 return false, time.Time{} 1363 } 1364 1365 ns := d.Deadline.Nanoseconds() 1366 switch { 1367 case ns < 0: // Force 1368 return false, time.Time{} 1369 case ns == 0: // Infinite 1370 return true, time.Time{} 1371 default: 1372 return false, d.ForceDeadline 1373 } 1374 } 1375 1376 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1377 if d == nil && o == nil { 1378 return true 1379 } else if o != nil && d == nil { 1380 return false 1381 } else if d != nil && o == nil { 1382 return false 1383 } 1384 1385 // Compare values 1386 if d.ForceDeadline != o.ForceDeadline { 1387 return false 1388 } else if d.Deadline != o.Deadline { 1389 return false 1390 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1391 return false 1392 } 1393 1394 return true 1395 } 1396 1397 // Node is a representation of a schedulable client node 1398 type Node struct { 1399 // ID is a unique identifier for the node. It can be constructed 1400 // by doing a concatenation of the Name and Datacenter as a simple 1401 // approach. Alternatively a UUID may be used. 1402 ID string 1403 1404 // SecretID is an ID that is only known by the Node and the set of Servers. 1405 // It is not accessible via the API and is used to authenticate nodes 1406 // conducting privileged activities. 1407 SecretID string 1408 1409 // Datacenter for this node 1410 Datacenter string 1411 1412 // Node name 1413 Name string 1414 1415 // HTTPAddr is the address on which the Nomad client is listening for http 1416 // requests 1417 HTTPAddr string 1418 1419 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1420 TLSEnabled bool 1421 1422 // Attributes is an arbitrary set of key/value 1423 // data that can be used for constraints. Examples 1424 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1425 // "docker.runtime=1.8.3" 1426 Attributes map[string]string 1427 1428 // Resources is the available resources on the client. 1429 // For example 'cpu=2' 'memory=2048' 1430 Resources *Resources 1431 1432 // Reserved is the set of resources that are reserved, 1433 // and should be subtracted from the total resources for 1434 // the purposes of scheduling. This may be provide certain 1435 // high-watermark tolerances or because of external schedulers 1436 // consuming resources. 1437 Reserved *Resources 1438 1439 // Links are used to 'link' this client to external 1440 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1441 // 'ami=ami-123' 1442 Links map[string]string 1443 1444 // Meta is used to associate arbitrary metadata with this 1445 // client. This is opaque to Nomad. 1446 Meta map[string]string 1447 1448 // NodeClass is an opaque identifier used to group nodes 1449 // together for the purpose of determining scheduling pressure. 1450 NodeClass string 1451 1452 // ComputedClass is a unique id that identifies nodes with a common set of 1453 // attributes and capabilities. 1454 ComputedClass string 1455 1456 // COMPAT: Remove in Nomad 0.9 1457 // Drain is controlled by the servers, and not the client. 1458 // If true, no jobs will be scheduled to this node, and existing 1459 // allocations will be drained. Superceded by DrainStrategy in Nomad 1460 // 0.8 but kept for backward compat. 1461 Drain bool 1462 1463 // DrainStrategy determines the node's draining behavior. Will be nil 1464 // when Drain=false. 1465 DrainStrategy *DrainStrategy 1466 1467 // SchedulingEligibility determines whether this node will receive new 1468 // placements. 1469 SchedulingEligibility string 1470 1471 // Status of this node 1472 Status string 1473 1474 // StatusDescription is meant to provide more human useful information 1475 StatusDescription string 1476 1477 // StatusUpdatedAt is the time stamp at which the state of the node was 1478 // updated 1479 StatusUpdatedAt int64 1480 1481 // Events is the most recent set of events generated for the node, 1482 // retaining only MaxRetainedNodeEvents number at a time 1483 Events []*NodeEvent 1484 1485 // Drivers is a map of driver names to current driver information 1486 Drivers map[string]*DriverInfo 1487 1488 // Raft Indexes 1489 CreateIndex uint64 1490 ModifyIndex uint64 1491 } 1492 1493 // Ready returns true if the node is ready for running allocations 1494 func (n *Node) Ready() bool { 1495 // Drain is checked directly to support pre-0.8 Node data 1496 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1497 } 1498 1499 func (n *Node) Canonicalize() { 1500 if n == nil { 1501 return 1502 } 1503 1504 // COMPAT Remove in 0.10 1505 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1506 // upgrading nodes 1507 if n.SchedulingEligibility == "" { 1508 if n.Drain { 1509 n.SchedulingEligibility = NodeSchedulingIneligible 1510 } else { 1511 n.SchedulingEligibility = NodeSchedulingEligible 1512 } 1513 } 1514 } 1515 1516 func (n *Node) Copy() *Node { 1517 if n == nil { 1518 return nil 1519 } 1520 nn := new(Node) 1521 *nn = *n 1522 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1523 nn.Resources = nn.Resources.Copy() 1524 nn.Reserved = nn.Reserved.Copy() 1525 nn.Links = helper.CopyMapStringString(nn.Links) 1526 nn.Meta = helper.CopyMapStringString(nn.Meta) 1527 nn.Events = copyNodeEvents(n.Events) 1528 nn.DrainStrategy = nn.DrainStrategy.Copy() 1529 nn.Drivers = copyNodeDrivers(n.Drivers) 1530 return nn 1531 } 1532 1533 // copyNodeEvents is a helper to copy a list of NodeEvent's 1534 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1535 l := len(events) 1536 if l == 0 { 1537 return nil 1538 } 1539 1540 c := make([]*NodeEvent, l) 1541 for i, event := range events { 1542 c[i] = event.Copy() 1543 } 1544 return c 1545 } 1546 1547 // copyNodeDrivers is a helper to copy a map of DriverInfo 1548 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 1549 l := len(drivers) 1550 if l == 0 { 1551 return nil 1552 } 1553 1554 c := make(map[string]*DriverInfo, l) 1555 for driver, info := range drivers { 1556 c[driver] = info.Copy() 1557 } 1558 return c 1559 } 1560 1561 // TerminalStatus returns if the current status is terminal and 1562 // will no longer transition. 1563 func (n *Node) TerminalStatus() bool { 1564 switch n.Status { 1565 case NodeStatusDown: 1566 return true 1567 default: 1568 return false 1569 } 1570 } 1571 1572 // Stub returns a summarized version of the node 1573 func (n *Node) Stub() *NodeListStub { 1574 1575 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1576 1577 return &NodeListStub{ 1578 Address: addr, 1579 ID: n.ID, 1580 Datacenter: n.Datacenter, 1581 Name: n.Name, 1582 NodeClass: n.NodeClass, 1583 Version: n.Attributes["nomad.version"], 1584 Drain: n.Drain, 1585 SchedulingEligibility: n.SchedulingEligibility, 1586 Status: n.Status, 1587 StatusDescription: n.StatusDescription, 1588 Drivers: n.Drivers, 1589 CreateIndex: n.CreateIndex, 1590 ModifyIndex: n.ModifyIndex, 1591 } 1592 } 1593 1594 // NodeListStub is used to return a subset of job information 1595 // for the job list 1596 type NodeListStub struct { 1597 Address string 1598 ID string 1599 Datacenter string 1600 Name string 1601 NodeClass string 1602 Version string 1603 Drain bool 1604 SchedulingEligibility string 1605 Status string 1606 StatusDescription string 1607 Drivers map[string]*DriverInfo 1608 CreateIndex uint64 1609 ModifyIndex uint64 1610 } 1611 1612 // Networks defined for a task on the Resources struct. 1613 type Networks []*NetworkResource 1614 1615 // Port assignment and IP for the given label or empty values. 1616 func (ns Networks) Port(label string) (string, int) { 1617 for _, n := range ns { 1618 for _, p := range n.ReservedPorts { 1619 if p.Label == label { 1620 return n.IP, p.Value 1621 } 1622 } 1623 for _, p := range n.DynamicPorts { 1624 if p.Label == label { 1625 return n.IP, p.Value 1626 } 1627 } 1628 } 1629 return "", 0 1630 } 1631 1632 // Resources is used to define the resources available 1633 // on a client 1634 type Resources struct { 1635 CPU int 1636 MemoryMB int 1637 DiskMB int 1638 IOPS int 1639 Networks Networks 1640 } 1641 1642 const ( 1643 BytesInMegabyte = 1024 * 1024 1644 ) 1645 1646 // DefaultResources is a small resources object that contains the 1647 // default resources requests that we will provide to an object. 1648 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1649 // be kept in sync. 1650 func DefaultResources() *Resources { 1651 return &Resources{ 1652 CPU: 100, 1653 MemoryMB: 300, 1654 IOPS: 0, 1655 } 1656 } 1657 1658 // MinResources is a small resources object that contains the 1659 // absolute minimum resources that we will provide to an object. 1660 // This should not be confused with the defaults which are 1661 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 1662 // api/resources.go and should be kept in sync. 1663 func MinResources() *Resources { 1664 return &Resources{ 1665 CPU: 20, 1666 MemoryMB: 10, 1667 IOPS: 0, 1668 } 1669 } 1670 1671 // DiskInBytes returns the amount of disk resources in bytes. 1672 func (r *Resources) DiskInBytes() int64 { 1673 return int64(r.DiskMB * BytesInMegabyte) 1674 } 1675 1676 // Merge merges this resource with another resource. 1677 func (r *Resources) Merge(other *Resources) { 1678 if other.CPU != 0 { 1679 r.CPU = other.CPU 1680 } 1681 if other.MemoryMB != 0 { 1682 r.MemoryMB = other.MemoryMB 1683 } 1684 if other.DiskMB != 0 { 1685 r.DiskMB = other.DiskMB 1686 } 1687 if other.IOPS != 0 { 1688 r.IOPS = other.IOPS 1689 } 1690 if len(other.Networks) != 0 { 1691 r.Networks = other.Networks 1692 } 1693 } 1694 1695 func (r *Resources) Canonicalize() { 1696 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1697 // problems since we use reflect DeepEquals. 1698 if len(r.Networks) == 0 { 1699 r.Networks = nil 1700 } 1701 1702 for _, n := range r.Networks { 1703 n.Canonicalize() 1704 } 1705 } 1706 1707 // MeetsMinResources returns an error if the resources specified are less than 1708 // the minimum allowed. 1709 // This is based on the minimums defined in the Resources type 1710 func (r *Resources) MeetsMinResources() error { 1711 var mErr multierror.Error 1712 minResources := MinResources() 1713 if r.CPU < minResources.CPU { 1714 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 1715 } 1716 if r.MemoryMB < minResources.MemoryMB { 1717 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 1718 } 1719 if r.IOPS < minResources.IOPS { 1720 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is %d; got %d", minResources.IOPS, r.IOPS)) 1721 } 1722 for i, n := range r.Networks { 1723 if err := n.MeetsMinResources(); err != nil { 1724 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 1725 } 1726 } 1727 1728 return mErr.ErrorOrNil() 1729 } 1730 1731 // Copy returns a deep copy of the resources 1732 func (r *Resources) Copy() *Resources { 1733 if r == nil { 1734 return nil 1735 } 1736 newR := new(Resources) 1737 *newR = *r 1738 if r.Networks != nil { 1739 n := len(r.Networks) 1740 newR.Networks = make([]*NetworkResource, n) 1741 for i := 0; i < n; i++ { 1742 newR.Networks[i] = r.Networks[i].Copy() 1743 } 1744 } 1745 return newR 1746 } 1747 1748 // NetIndex finds the matching net index using device name 1749 func (r *Resources) NetIndex(n *NetworkResource) int { 1750 for idx, net := range r.Networks { 1751 if net.Device == n.Device { 1752 return idx 1753 } 1754 } 1755 return -1 1756 } 1757 1758 // Superset checks if one set of resources is a superset 1759 // of another. This ignores network resources, and the NetworkIndex 1760 // should be used for that. 1761 func (r *Resources) Superset(other *Resources) (bool, string) { 1762 if r.CPU < other.CPU { 1763 return false, "cpu" 1764 } 1765 if r.MemoryMB < other.MemoryMB { 1766 return false, "memory" 1767 } 1768 if r.DiskMB < other.DiskMB { 1769 return false, "disk" 1770 } 1771 if r.IOPS < other.IOPS { 1772 return false, "iops" 1773 } 1774 return true, "" 1775 } 1776 1777 // Add adds the resources of the delta to this, potentially 1778 // returning an error if not possible. 1779 func (r *Resources) Add(delta *Resources) error { 1780 if delta == nil { 1781 return nil 1782 } 1783 r.CPU += delta.CPU 1784 r.MemoryMB += delta.MemoryMB 1785 r.DiskMB += delta.DiskMB 1786 r.IOPS += delta.IOPS 1787 1788 for _, n := range delta.Networks { 1789 // Find the matching interface by IP or CIDR 1790 idx := r.NetIndex(n) 1791 if idx == -1 { 1792 r.Networks = append(r.Networks, n.Copy()) 1793 } else { 1794 r.Networks[idx].Add(n) 1795 } 1796 } 1797 return nil 1798 } 1799 1800 func (r *Resources) GoString() string { 1801 return fmt.Sprintf("*%#v", *r) 1802 } 1803 1804 type Port struct { 1805 Label string 1806 Value int 1807 } 1808 1809 // NetworkResource is used to represent available network 1810 // resources 1811 type NetworkResource struct { 1812 Device string // Name of the device 1813 CIDR string // CIDR block of addresses 1814 IP string // Host IP address 1815 MBits int // Throughput 1816 ReservedPorts []Port // Host Reserved ports 1817 DynamicPorts []Port // Host Dynamically assigned ports 1818 } 1819 1820 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 1821 if nr.Device != other.Device { 1822 return false 1823 } 1824 1825 if nr.CIDR != other.CIDR { 1826 return false 1827 } 1828 1829 if nr.IP != other.IP { 1830 return false 1831 } 1832 1833 if nr.MBits != other.MBits { 1834 return false 1835 } 1836 1837 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 1838 return false 1839 } 1840 1841 for i, port := range nr.ReservedPorts { 1842 if len(other.ReservedPorts) <= i { 1843 return false 1844 } 1845 if port != other.ReservedPorts[i] { 1846 return false 1847 } 1848 } 1849 1850 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 1851 return false 1852 } 1853 for i, port := range nr.DynamicPorts { 1854 if len(other.DynamicPorts) <= i { 1855 return false 1856 } 1857 if port != other.DynamicPorts[i] { 1858 return false 1859 } 1860 } 1861 return true 1862 } 1863 1864 func (n *NetworkResource) Canonicalize() { 1865 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1866 // problems since we use reflect DeepEquals. 1867 if len(n.ReservedPorts) == 0 { 1868 n.ReservedPorts = nil 1869 } 1870 if len(n.DynamicPorts) == 0 { 1871 n.DynamicPorts = nil 1872 } 1873 } 1874 1875 // MeetsMinResources returns an error if the resources specified are less than 1876 // the minimum allowed. 1877 func (n *NetworkResource) MeetsMinResources() error { 1878 var mErr multierror.Error 1879 if n.MBits < 1 { 1880 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 1881 } 1882 return mErr.ErrorOrNil() 1883 } 1884 1885 // Copy returns a deep copy of the network resource 1886 func (n *NetworkResource) Copy() *NetworkResource { 1887 if n == nil { 1888 return nil 1889 } 1890 newR := new(NetworkResource) 1891 *newR = *n 1892 if n.ReservedPorts != nil { 1893 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 1894 copy(newR.ReservedPorts, n.ReservedPorts) 1895 } 1896 if n.DynamicPorts != nil { 1897 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 1898 copy(newR.DynamicPorts, n.DynamicPorts) 1899 } 1900 return newR 1901 } 1902 1903 // Add adds the resources of the delta to this, potentially 1904 // returning an error if not possible. 1905 func (n *NetworkResource) Add(delta *NetworkResource) { 1906 if len(delta.ReservedPorts) > 0 { 1907 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 1908 } 1909 n.MBits += delta.MBits 1910 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 1911 } 1912 1913 func (n *NetworkResource) GoString() string { 1914 return fmt.Sprintf("*%#v", *n) 1915 } 1916 1917 // PortLabels returns a map of port labels to their assigned host ports. 1918 func (n *NetworkResource) PortLabels() map[string]int { 1919 num := len(n.ReservedPorts) + len(n.DynamicPorts) 1920 labelValues := make(map[string]int, num) 1921 for _, port := range n.ReservedPorts { 1922 labelValues[port.Label] = port.Value 1923 } 1924 for _, port := range n.DynamicPorts { 1925 labelValues[port.Label] = port.Value 1926 } 1927 return labelValues 1928 } 1929 1930 const ( 1931 // JobTypeNomad is reserved for internal system tasks and is 1932 // always handled by the CoreScheduler. 1933 JobTypeCore = "_core" 1934 JobTypeService = "service" 1935 JobTypeBatch = "batch" 1936 JobTypeSystem = "system" 1937 ) 1938 1939 const ( 1940 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 1941 JobStatusRunning = "running" // Running means the job has non-terminal allocations 1942 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 1943 ) 1944 1945 const ( 1946 // JobMinPriority is the minimum allowed priority 1947 JobMinPriority = 1 1948 1949 // JobDefaultPriority is the default priority if not 1950 // not specified. 1951 JobDefaultPriority = 50 1952 1953 // JobMaxPriority is the maximum allowed priority 1954 JobMaxPriority = 100 1955 1956 // Ensure CoreJobPriority is higher than any user 1957 // specified job so that it gets priority. This is important 1958 // for the system to remain healthy. 1959 CoreJobPriority = JobMaxPriority * 2 1960 1961 // JobTrackedVersions is the number of historic job versions that are 1962 // kept. 1963 JobTrackedVersions = 6 1964 ) 1965 1966 // Job is the scope of a scheduling request to Nomad. It is the largest 1967 // scoped object, and is a named collection of task groups. Each task group 1968 // is further composed of tasks. A task group (TG) is the unit of scheduling 1969 // however. 1970 type Job struct { 1971 // Stop marks whether the user has stopped the job. A stopped job will 1972 // have all created allocations stopped and acts as a way to stop a job 1973 // without purging it from the system. This allows existing allocs to be 1974 // queried and the job to be inspected as it is being killed. 1975 Stop bool 1976 1977 // Region is the Nomad region that handles scheduling this job 1978 Region string 1979 1980 // Namespace is the namespace the job is submitted into. 1981 Namespace string 1982 1983 // ID is a unique identifier for the job per region. It can be 1984 // specified hierarchically like LineOfBiz/OrgName/Team/Project 1985 ID string 1986 1987 // ParentID is the unique identifier of the job that spawned this job. 1988 ParentID string 1989 1990 // Name is the logical name of the job used to refer to it. This is unique 1991 // per region, but not unique globally. 1992 Name string 1993 1994 // Type is used to control various behaviors about the job. Most jobs 1995 // are service jobs, meaning they are expected to be long lived. 1996 // Some jobs are batch oriented meaning they run and then terminate. 1997 // This can be extended in the future to support custom schedulers. 1998 Type string 1999 2000 // Priority is used to control scheduling importance and if this job 2001 // can preempt other jobs. 2002 Priority int 2003 2004 // AllAtOnce is used to control if incremental scheduling of task groups 2005 // is allowed or if we must do a gang scheduling of the entire job. This 2006 // can slow down larger jobs if resources are not available. 2007 AllAtOnce bool 2008 2009 // Datacenters contains all the datacenters this job is allowed to span 2010 Datacenters []string 2011 2012 // Constraints can be specified at a job level and apply to 2013 // all the task groups and tasks. 2014 Constraints []*Constraint 2015 2016 // Affinities can be specified at the job level to express 2017 // scheduling preferences that apply to all groups and tasks 2018 Affinities []*Affinity 2019 2020 // Spread can be specified at the job level to express spreading 2021 // allocations across a desired attribute, such as datacenter 2022 Spreads []*Spread 2023 2024 // TaskGroups are the collections of task groups that this job needs 2025 // to run. Each task group is an atomic unit of scheduling and placement. 2026 TaskGroups []*TaskGroup 2027 2028 // COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0. 2029 Update UpdateStrategy 2030 2031 // Periodic is used to define the interval the job is run at. 2032 Periodic *PeriodicConfig 2033 2034 // ParameterizedJob is used to specify the job as a parameterized job 2035 // for dispatching. 2036 ParameterizedJob *ParameterizedJobConfig 2037 2038 // Dispatched is used to identify if the Job has been dispatched from a 2039 // parameterized job. 2040 Dispatched bool 2041 2042 // Payload is the payload supplied when the job was dispatched. 2043 Payload []byte 2044 2045 // Meta is used to associate arbitrary metadata with this 2046 // job. This is opaque to Nomad. 2047 Meta map[string]string 2048 2049 // VaultToken is the Vault token that proves the submitter of the job has 2050 // access to the specified Vault policies. This field is only used to 2051 // transfer the token and is not stored after Job submission. 2052 VaultToken string 2053 2054 // Job status 2055 Status string 2056 2057 // StatusDescription is meant to provide more human useful information 2058 StatusDescription string 2059 2060 // Stable marks a job as stable. Stability is only defined on "service" and 2061 // "system" jobs. The stability of a job will be set automatically as part 2062 // of a deployment and can be manually set via APIs. 2063 Stable bool 2064 2065 // Version is a monotonically increasing version number that is incremented 2066 // on each job register. 2067 Version uint64 2068 2069 // SubmitTime is the time at which the job was submitted as a UnixNano in 2070 // UTC 2071 SubmitTime int64 2072 2073 // Raft Indexes 2074 CreateIndex uint64 2075 ModifyIndex uint64 2076 JobModifyIndex uint64 2077 } 2078 2079 // NamespacedID returns the namespaced id useful for logging 2080 func (j *Job) NamespacedID() *NamespacedID { 2081 return &NamespacedID{ 2082 ID: j.ID, 2083 Namespace: j.Namespace, 2084 } 2085 } 2086 2087 // Canonicalize is used to canonicalize fields in the Job. This should be called 2088 // when registering a Job. A set of warnings are returned if the job was changed 2089 // in anyway that the user should be made aware of. 2090 func (j *Job) Canonicalize() (warnings error) { 2091 if j == nil { 2092 return nil 2093 } 2094 2095 var mErr multierror.Error 2096 // Ensure that an empty and nil map are treated the same to avoid scheduling 2097 // problems since we use reflect DeepEquals. 2098 if len(j.Meta) == 0 { 2099 j.Meta = nil 2100 } 2101 2102 // Ensure the job is in a namespace. 2103 if j.Namespace == "" { 2104 j.Namespace = DefaultNamespace 2105 } 2106 2107 for _, tg := range j.TaskGroups { 2108 tg.Canonicalize(j) 2109 } 2110 2111 if j.ParameterizedJob != nil { 2112 j.ParameterizedJob.Canonicalize() 2113 } 2114 2115 if j.Periodic != nil { 2116 j.Periodic.Canonicalize() 2117 } 2118 2119 return mErr.ErrorOrNil() 2120 } 2121 2122 // Copy returns a deep copy of the Job. It is expected that callers use recover. 2123 // This job can panic if the deep copy failed as it uses reflection. 2124 func (j *Job) Copy() *Job { 2125 if j == nil { 2126 return nil 2127 } 2128 nj := new(Job) 2129 *nj = *j 2130 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 2131 nj.Constraints = CopySliceConstraints(nj.Constraints) 2132 nj.Affinities = CopySliceAffinities(nj.Affinities) 2133 2134 if j.TaskGroups != nil { 2135 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 2136 for i, tg := range nj.TaskGroups { 2137 tgs[i] = tg.Copy() 2138 } 2139 nj.TaskGroups = tgs 2140 } 2141 2142 nj.Periodic = nj.Periodic.Copy() 2143 nj.Meta = helper.CopyMapStringString(nj.Meta) 2144 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 2145 return nj 2146 } 2147 2148 // Validate is used to sanity check a job input 2149 func (j *Job) Validate() error { 2150 var mErr multierror.Error 2151 2152 if j.Region == "" { 2153 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 2154 } 2155 if j.ID == "" { 2156 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 2157 } else if strings.Contains(j.ID, " ") { 2158 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 2159 } 2160 if j.Name == "" { 2161 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 2162 } 2163 if j.Namespace == "" { 2164 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 2165 } 2166 switch j.Type { 2167 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 2168 case "": 2169 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 2170 default: 2171 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 2172 } 2173 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 2174 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 2175 } 2176 if len(j.Datacenters) == 0 { 2177 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 2178 } 2179 if len(j.TaskGroups) == 0 { 2180 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 2181 } 2182 for idx, constr := range j.Constraints { 2183 if err := constr.Validate(); err != nil { 2184 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2185 mErr.Errors = append(mErr.Errors, outer) 2186 } 2187 } 2188 if j.Type == JobTypeSystem { 2189 if j.Affinities != nil { 2190 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 2191 } 2192 } else { 2193 for idx, affinity := range j.Affinities { 2194 if err := affinity.Validate(); err != nil { 2195 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2196 mErr.Errors = append(mErr.Errors, outer) 2197 } 2198 } 2199 } 2200 2201 if j.Type == JobTypeSystem { 2202 if j.Spreads != nil { 2203 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 2204 } 2205 } else { 2206 for idx, spread := range j.Spreads { 2207 if err := spread.Validate(); err != nil { 2208 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 2209 mErr.Errors = append(mErr.Errors, outer) 2210 } 2211 } 2212 } 2213 2214 // Check for duplicate task groups 2215 taskGroups := make(map[string]int) 2216 for idx, tg := range j.TaskGroups { 2217 if tg.Name == "" { 2218 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 2219 } else if existing, ok := taskGroups[tg.Name]; ok { 2220 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 2221 } else { 2222 taskGroups[tg.Name] = idx 2223 } 2224 2225 if j.Type == "system" && tg.Count > 1 { 2226 mErr.Errors = append(mErr.Errors, 2227 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 2228 tg.Name, tg.Count)) 2229 } 2230 } 2231 2232 // Validate the task group 2233 for _, tg := range j.TaskGroups { 2234 if err := tg.Validate(j); err != nil { 2235 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 2236 mErr.Errors = append(mErr.Errors, outer) 2237 } 2238 } 2239 2240 // Validate periodic is only used with batch jobs. 2241 if j.IsPeriodic() && j.Periodic.Enabled { 2242 if j.Type != JobTypeBatch { 2243 mErr.Errors = append(mErr.Errors, 2244 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 2245 } 2246 2247 if err := j.Periodic.Validate(); err != nil { 2248 mErr.Errors = append(mErr.Errors, err) 2249 } 2250 } 2251 2252 if j.IsParameterized() { 2253 if j.Type != JobTypeBatch { 2254 mErr.Errors = append(mErr.Errors, 2255 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 2256 } 2257 2258 if err := j.ParameterizedJob.Validate(); err != nil { 2259 mErr.Errors = append(mErr.Errors, err) 2260 } 2261 } 2262 2263 return mErr.ErrorOrNil() 2264 } 2265 2266 // Warnings returns a list of warnings that may be from dubious settings or 2267 // deprecation warnings. 2268 func (j *Job) Warnings() error { 2269 var mErr multierror.Error 2270 2271 // Check the groups 2272 for _, tg := range j.TaskGroups { 2273 if err := tg.Warnings(j); err != nil { 2274 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 2275 mErr.Errors = append(mErr.Errors, outer) 2276 } 2277 } 2278 2279 return mErr.ErrorOrNil() 2280 } 2281 2282 // LookupTaskGroup finds a task group by name 2283 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 2284 for _, tg := range j.TaskGroups { 2285 if tg.Name == name { 2286 return tg 2287 } 2288 } 2289 return nil 2290 } 2291 2292 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 2293 // meta data for the task. When joining Job, Group and Task Meta, the precedence 2294 // is by deepest scope (Task > Group > Job). 2295 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 2296 group := j.LookupTaskGroup(groupName) 2297 if group == nil { 2298 return nil 2299 } 2300 2301 task := group.LookupTask(taskName) 2302 if task == nil { 2303 return nil 2304 } 2305 2306 meta := helper.CopyMapStringString(task.Meta) 2307 if meta == nil { 2308 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 2309 } 2310 2311 // Add the group specific meta 2312 for k, v := range group.Meta { 2313 if _, ok := meta[k]; !ok { 2314 meta[k] = v 2315 } 2316 } 2317 2318 // Add the job specific meta 2319 for k, v := range j.Meta { 2320 if _, ok := meta[k]; !ok { 2321 meta[k] = v 2322 } 2323 } 2324 2325 return meta 2326 } 2327 2328 // Stopped returns if a job is stopped. 2329 func (j *Job) Stopped() bool { 2330 return j == nil || j.Stop 2331 } 2332 2333 // HasUpdateStrategy returns if any task group in the job has an update strategy 2334 func (j *Job) HasUpdateStrategy() bool { 2335 for _, tg := range j.TaskGroups { 2336 if tg.Update != nil { 2337 return true 2338 } 2339 } 2340 2341 return false 2342 } 2343 2344 // Stub is used to return a summary of the job 2345 func (j *Job) Stub(summary *JobSummary) *JobListStub { 2346 return &JobListStub{ 2347 ID: j.ID, 2348 ParentID: j.ParentID, 2349 Name: j.Name, 2350 Type: j.Type, 2351 Priority: j.Priority, 2352 Periodic: j.IsPeriodic(), 2353 ParameterizedJob: j.IsParameterized(), 2354 Stop: j.Stop, 2355 Status: j.Status, 2356 StatusDescription: j.StatusDescription, 2357 CreateIndex: j.CreateIndex, 2358 ModifyIndex: j.ModifyIndex, 2359 JobModifyIndex: j.JobModifyIndex, 2360 SubmitTime: j.SubmitTime, 2361 JobSummary: summary, 2362 } 2363 } 2364 2365 // IsPeriodic returns whether a job is periodic. 2366 func (j *Job) IsPeriodic() bool { 2367 return j.Periodic != nil 2368 } 2369 2370 // IsPeriodicActive returns whether the job is an active periodic job that will 2371 // create child jobs 2372 func (j *Job) IsPeriodicActive() bool { 2373 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 2374 } 2375 2376 // IsParameterized returns whether a job is parameterized job. 2377 func (j *Job) IsParameterized() bool { 2378 return j.ParameterizedJob != nil && !j.Dispatched 2379 } 2380 2381 // VaultPolicies returns the set of Vault policies per task group, per task 2382 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 2383 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 2384 2385 for _, tg := range j.TaskGroups { 2386 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 2387 2388 for _, task := range tg.Tasks { 2389 if task.Vault == nil { 2390 continue 2391 } 2392 2393 tgPolicies[task.Name] = task.Vault 2394 } 2395 2396 if len(tgPolicies) != 0 { 2397 policies[tg.Name] = tgPolicies 2398 } 2399 } 2400 2401 return policies 2402 } 2403 2404 // RequiredSignals returns a mapping of task groups to tasks to their required 2405 // set of signals 2406 func (j *Job) RequiredSignals() map[string]map[string][]string { 2407 signals := make(map[string]map[string][]string) 2408 2409 for _, tg := range j.TaskGroups { 2410 for _, task := range tg.Tasks { 2411 // Use this local one as a set 2412 taskSignals := make(map[string]struct{}) 2413 2414 // Check if the Vault change mode uses signals 2415 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 2416 taskSignals[task.Vault.ChangeSignal] = struct{}{} 2417 } 2418 2419 // If a user has specified a KillSignal, add it to required signals 2420 if task.KillSignal != "" { 2421 taskSignals[task.KillSignal] = struct{}{} 2422 } 2423 2424 // Check if any template change mode uses signals 2425 for _, t := range task.Templates { 2426 if t.ChangeMode != TemplateChangeModeSignal { 2427 continue 2428 } 2429 2430 taskSignals[t.ChangeSignal] = struct{}{} 2431 } 2432 2433 // Flatten and sort the signals 2434 l := len(taskSignals) 2435 if l == 0 { 2436 continue 2437 } 2438 2439 flat := make([]string, 0, l) 2440 for sig := range taskSignals { 2441 flat = append(flat, sig) 2442 } 2443 2444 sort.Strings(flat) 2445 tgSignals, ok := signals[tg.Name] 2446 if !ok { 2447 tgSignals = make(map[string][]string) 2448 signals[tg.Name] = tgSignals 2449 } 2450 tgSignals[task.Name] = flat 2451 } 2452 2453 } 2454 2455 return signals 2456 } 2457 2458 // SpecChanged determines if the functional specification has changed between 2459 // two job versions. 2460 func (j *Job) SpecChanged(new *Job) bool { 2461 if j == nil { 2462 return new != nil 2463 } 2464 2465 // Create a copy of the new job 2466 c := new.Copy() 2467 2468 // Update the new job so we can do a reflect 2469 c.Status = j.Status 2470 c.StatusDescription = j.StatusDescription 2471 c.Stable = j.Stable 2472 c.Version = j.Version 2473 c.CreateIndex = j.CreateIndex 2474 c.ModifyIndex = j.ModifyIndex 2475 c.JobModifyIndex = j.JobModifyIndex 2476 c.SubmitTime = j.SubmitTime 2477 2478 // Deep equals the jobs 2479 return !reflect.DeepEqual(j, c) 2480 } 2481 2482 func (j *Job) SetSubmitTime() { 2483 j.SubmitTime = time.Now().UTC().UnixNano() 2484 } 2485 2486 // JobListStub is used to return a subset of job information 2487 // for the job list 2488 type JobListStub struct { 2489 ID string 2490 ParentID string 2491 Name string 2492 Type string 2493 Priority int 2494 Periodic bool 2495 ParameterizedJob bool 2496 Stop bool 2497 Status string 2498 StatusDescription string 2499 JobSummary *JobSummary 2500 CreateIndex uint64 2501 ModifyIndex uint64 2502 JobModifyIndex uint64 2503 SubmitTime int64 2504 } 2505 2506 // JobSummary summarizes the state of the allocations of a job 2507 type JobSummary struct { 2508 // JobID is the ID of the job the summary is for 2509 JobID string 2510 2511 // Namespace is the namespace of the job and its summary 2512 Namespace string 2513 2514 // Summary contains the summary per task group for the Job 2515 Summary map[string]TaskGroupSummary 2516 2517 // Children contains a summary for the children of this job. 2518 Children *JobChildrenSummary 2519 2520 // Raft Indexes 2521 CreateIndex uint64 2522 ModifyIndex uint64 2523 } 2524 2525 // Copy returns a new copy of JobSummary 2526 func (js *JobSummary) Copy() *JobSummary { 2527 newJobSummary := new(JobSummary) 2528 *newJobSummary = *js 2529 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 2530 for k, v := range js.Summary { 2531 newTGSummary[k] = v 2532 } 2533 newJobSummary.Summary = newTGSummary 2534 newJobSummary.Children = newJobSummary.Children.Copy() 2535 return newJobSummary 2536 } 2537 2538 // JobChildrenSummary contains the summary of children job statuses 2539 type JobChildrenSummary struct { 2540 Pending int64 2541 Running int64 2542 Dead int64 2543 } 2544 2545 // Copy returns a new copy of a JobChildrenSummary 2546 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 2547 if jc == nil { 2548 return nil 2549 } 2550 2551 njc := new(JobChildrenSummary) 2552 *njc = *jc 2553 return njc 2554 } 2555 2556 // TaskGroup summarizes the state of all the allocations of a particular 2557 // TaskGroup 2558 type TaskGroupSummary struct { 2559 Queued int 2560 Complete int 2561 Failed int 2562 Running int 2563 Starting int 2564 Lost int 2565 } 2566 2567 const ( 2568 // Checks uses any registered health check state in combination with task 2569 // states to determine if a allocation is healthy. 2570 UpdateStrategyHealthCheck_Checks = "checks" 2571 2572 // TaskStates uses the task states of an allocation to determine if the 2573 // allocation is healthy. 2574 UpdateStrategyHealthCheck_TaskStates = "task_states" 2575 2576 // Manual allows the operator to manually signal to Nomad when an 2577 // allocations is healthy. This allows more advanced health checking that is 2578 // outside of the scope of Nomad. 2579 UpdateStrategyHealthCheck_Manual = "manual" 2580 ) 2581 2582 var ( 2583 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 2584 // jobs with the old policy or for populating field defaults. 2585 DefaultUpdateStrategy = &UpdateStrategy{ 2586 Stagger: 30 * time.Second, 2587 MaxParallel: 1, 2588 HealthCheck: UpdateStrategyHealthCheck_Checks, 2589 MinHealthyTime: 10 * time.Second, 2590 HealthyDeadline: 5 * time.Minute, 2591 ProgressDeadline: 10 * time.Minute, 2592 AutoRevert: false, 2593 Canary: 0, 2594 } 2595 ) 2596 2597 // UpdateStrategy is used to modify how updates are done 2598 type UpdateStrategy struct { 2599 // Stagger is used to determine the rate at which allocations are migrated 2600 // due to down or draining nodes. 2601 Stagger time.Duration 2602 2603 // MaxParallel is how many updates can be done in parallel 2604 MaxParallel int 2605 2606 // HealthCheck specifies the mechanism in which allocations are marked 2607 // healthy or unhealthy as part of a deployment. 2608 HealthCheck string 2609 2610 // MinHealthyTime is the minimum time an allocation must be in the healthy 2611 // state before it is marked as healthy, unblocking more allocations to be 2612 // rolled. 2613 MinHealthyTime time.Duration 2614 2615 // HealthyDeadline is the time in which an allocation must be marked as 2616 // healthy before it is automatically transitioned to unhealthy. This time 2617 // period doesn't count against the MinHealthyTime. 2618 HealthyDeadline time.Duration 2619 2620 // ProgressDeadline is the time in which an allocation as part of the 2621 // deployment must transition to healthy. If no allocation becomes healthy 2622 // after the deadline, the deployment is marked as failed. If the deadline 2623 // is zero, the first failure causes the deployment to fail. 2624 ProgressDeadline time.Duration 2625 2626 // AutoRevert declares that if a deployment fails because of unhealthy 2627 // allocations, there should be an attempt to auto-revert the job to a 2628 // stable version. 2629 AutoRevert bool 2630 2631 // Canary is the number of canaries to deploy when a change to the task 2632 // group is detected. 2633 Canary int 2634 } 2635 2636 func (u *UpdateStrategy) Copy() *UpdateStrategy { 2637 if u == nil { 2638 return nil 2639 } 2640 2641 copy := new(UpdateStrategy) 2642 *copy = *u 2643 return copy 2644 } 2645 2646 func (u *UpdateStrategy) Validate() error { 2647 if u == nil { 2648 return nil 2649 } 2650 2651 var mErr multierror.Error 2652 switch u.HealthCheck { 2653 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 2654 default: 2655 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 2656 } 2657 2658 if u.MaxParallel < 1 { 2659 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than one: %d < 1", u.MaxParallel)) 2660 } 2661 if u.Canary < 0 { 2662 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 2663 } 2664 if u.MinHealthyTime < 0 { 2665 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 2666 } 2667 if u.HealthyDeadline <= 0 { 2668 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 2669 } 2670 if u.ProgressDeadline < 0 { 2671 multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 2672 } 2673 if u.MinHealthyTime >= u.HealthyDeadline { 2674 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 2675 } 2676 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 2677 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 2678 } 2679 if u.Stagger <= 0 { 2680 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 2681 } 2682 2683 return mErr.ErrorOrNil() 2684 } 2685 2686 // TODO(alexdadgar): Remove once no longer used by the scheduler. 2687 // Rolling returns if a rolling strategy should be used 2688 func (u *UpdateStrategy) Rolling() bool { 2689 return u.Stagger > 0 && u.MaxParallel > 0 2690 } 2691 2692 const ( 2693 // PeriodicSpecCron is used for a cron spec. 2694 PeriodicSpecCron = "cron" 2695 2696 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 2697 // separated list of unix timestamps at which to launch. 2698 PeriodicSpecTest = "_internal_test" 2699 ) 2700 2701 // Periodic defines the interval a job should be run at. 2702 type PeriodicConfig struct { 2703 // Enabled determines if the job should be run periodically. 2704 Enabled bool 2705 2706 // Spec specifies the interval the job should be run as. It is parsed based 2707 // on the SpecType. 2708 Spec string 2709 2710 // SpecType defines the format of the spec. 2711 SpecType string 2712 2713 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 2714 ProhibitOverlap bool 2715 2716 // TimeZone is the user specified string that determines the time zone to 2717 // launch against. The time zones must be specified from IANA Time Zone 2718 // database, such as "America/New_York". 2719 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 2720 // Reference: https://www.iana.org/time-zones 2721 TimeZone string 2722 2723 // location is the time zone to evaluate the launch time against 2724 location *time.Location 2725 } 2726 2727 func (p *PeriodicConfig) Copy() *PeriodicConfig { 2728 if p == nil { 2729 return nil 2730 } 2731 np := new(PeriodicConfig) 2732 *np = *p 2733 return np 2734 } 2735 2736 func (p *PeriodicConfig) Validate() error { 2737 if !p.Enabled { 2738 return nil 2739 } 2740 2741 var mErr multierror.Error 2742 if p.Spec == "" { 2743 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 2744 } 2745 2746 // Check if we got a valid time zone 2747 if p.TimeZone != "" { 2748 if _, err := time.LoadLocation(p.TimeZone); err != nil { 2749 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 2750 } 2751 } 2752 2753 switch p.SpecType { 2754 case PeriodicSpecCron: 2755 // Validate the cron spec 2756 if _, err := cronexpr.Parse(p.Spec); err != nil { 2757 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 2758 } 2759 case PeriodicSpecTest: 2760 // No-op 2761 default: 2762 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 2763 } 2764 2765 return mErr.ErrorOrNil() 2766 } 2767 2768 func (p *PeriodicConfig) Canonicalize() { 2769 // Load the location 2770 l, err := time.LoadLocation(p.TimeZone) 2771 if err != nil { 2772 p.location = time.UTC 2773 } 2774 2775 p.location = l 2776 } 2777 2778 // CronParseNext is a helper that parses the next time for the given expression 2779 // but captures any panic that may occur in the underlying library. 2780 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 2781 defer func() { 2782 if recover() != nil { 2783 t = time.Time{} 2784 err = fmt.Errorf("failed parsing cron expression: %q", spec) 2785 } 2786 }() 2787 2788 return e.Next(fromTime), nil 2789 } 2790 2791 // Next returns the closest time instant matching the spec that is after the 2792 // passed time. If no matching instance exists, the zero value of time.Time is 2793 // returned. The `time.Location` of the returned value matches that of the 2794 // passed time. 2795 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 2796 switch p.SpecType { 2797 case PeriodicSpecCron: 2798 if e, err := cronexpr.Parse(p.Spec); err == nil { 2799 return CronParseNext(e, fromTime, p.Spec) 2800 } 2801 case PeriodicSpecTest: 2802 split := strings.Split(p.Spec, ",") 2803 if len(split) == 1 && split[0] == "" { 2804 return time.Time{}, nil 2805 } 2806 2807 // Parse the times 2808 times := make([]time.Time, len(split)) 2809 for i, s := range split { 2810 unix, err := strconv.Atoi(s) 2811 if err != nil { 2812 return time.Time{}, nil 2813 } 2814 2815 times[i] = time.Unix(int64(unix), 0) 2816 } 2817 2818 // Find the next match 2819 for _, next := range times { 2820 if fromTime.Before(next) { 2821 return next, nil 2822 } 2823 } 2824 } 2825 2826 return time.Time{}, nil 2827 } 2828 2829 // GetLocation returns the location to use for determining the time zone to run 2830 // the periodic job against. 2831 func (p *PeriodicConfig) GetLocation() *time.Location { 2832 // Jobs pre 0.5.5 will not have this 2833 if p.location != nil { 2834 return p.location 2835 } 2836 2837 return time.UTC 2838 } 2839 2840 const ( 2841 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 2842 // when launching derived instances of it. 2843 PeriodicLaunchSuffix = "/periodic-" 2844 ) 2845 2846 // PeriodicLaunch tracks the last launch time of a periodic job. 2847 type PeriodicLaunch struct { 2848 ID string // ID of the periodic job. 2849 Namespace string // Namespace of the periodic job 2850 Launch time.Time // The last launch time. 2851 2852 // Raft Indexes 2853 CreateIndex uint64 2854 ModifyIndex uint64 2855 } 2856 2857 const ( 2858 DispatchPayloadForbidden = "forbidden" 2859 DispatchPayloadOptional = "optional" 2860 DispatchPayloadRequired = "required" 2861 2862 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 2863 // when dispatching instances of it. 2864 DispatchLaunchSuffix = "/dispatch-" 2865 ) 2866 2867 // ParameterizedJobConfig is used to configure the parameterized job 2868 type ParameterizedJobConfig struct { 2869 // Payload configure the payload requirements 2870 Payload string 2871 2872 // MetaRequired is metadata keys that must be specified by the dispatcher 2873 MetaRequired []string 2874 2875 // MetaOptional is metadata keys that may be specified by the dispatcher 2876 MetaOptional []string 2877 } 2878 2879 func (d *ParameterizedJobConfig) Validate() error { 2880 var mErr multierror.Error 2881 switch d.Payload { 2882 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 2883 default: 2884 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 2885 } 2886 2887 // Check that the meta configurations are disjoint sets 2888 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 2889 if !disjoint { 2890 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 2891 } 2892 2893 return mErr.ErrorOrNil() 2894 } 2895 2896 func (d *ParameterizedJobConfig) Canonicalize() { 2897 if d.Payload == "" { 2898 d.Payload = DispatchPayloadOptional 2899 } 2900 } 2901 2902 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 2903 if d == nil { 2904 return nil 2905 } 2906 nd := new(ParameterizedJobConfig) 2907 *nd = *d 2908 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 2909 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 2910 return nd 2911 } 2912 2913 // DispatchedID returns an ID appropriate for a job dispatched against a 2914 // particular parameterized job 2915 func DispatchedID(templateID string, t time.Time) string { 2916 u := uuid.Generate()[:8] 2917 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 2918 } 2919 2920 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 2921 type DispatchPayloadConfig struct { 2922 // File specifies a relative path to where the input data should be written 2923 File string 2924 } 2925 2926 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 2927 if d == nil { 2928 return nil 2929 } 2930 nd := new(DispatchPayloadConfig) 2931 *nd = *d 2932 return nd 2933 } 2934 2935 func (d *DispatchPayloadConfig) Validate() error { 2936 // Verify the destination doesn't escape 2937 escaped, err := PathEscapesAllocDir("task/local/", d.File) 2938 if err != nil { 2939 return fmt.Errorf("invalid destination path: %v", err) 2940 } else if escaped { 2941 return fmt.Errorf("destination escapes allocation directory") 2942 } 2943 2944 return nil 2945 } 2946 2947 var ( 2948 DefaultServiceJobRestartPolicy = RestartPolicy{ 2949 Delay: 15 * time.Second, 2950 Attempts: 2, 2951 Interval: 30 * time.Minute, 2952 Mode: RestartPolicyModeFail, 2953 } 2954 DefaultBatchJobRestartPolicy = RestartPolicy{ 2955 Delay: 15 * time.Second, 2956 Attempts: 3, 2957 Interval: 24 * time.Hour, 2958 Mode: RestartPolicyModeFail, 2959 } 2960 ) 2961 2962 var ( 2963 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 2964 Delay: 30 * time.Second, 2965 DelayFunction: "exponential", 2966 MaxDelay: 1 * time.Hour, 2967 Unlimited: true, 2968 } 2969 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 2970 Attempts: 1, 2971 Interval: 24 * time.Hour, 2972 Delay: 5 * time.Second, 2973 DelayFunction: "constant", 2974 } 2975 ) 2976 2977 const ( 2978 // RestartPolicyModeDelay causes an artificial delay till the next interval is 2979 // reached when the specified attempts have been reached in the interval. 2980 RestartPolicyModeDelay = "delay" 2981 2982 // RestartPolicyModeFail causes a job to fail if the specified number of 2983 // attempts are reached within an interval. 2984 RestartPolicyModeFail = "fail" 2985 2986 // RestartPolicyMinInterval is the minimum interval that is accepted for a 2987 // restart policy. 2988 RestartPolicyMinInterval = 5 * time.Second 2989 2990 // ReasonWithinPolicy describes restart events that are within policy 2991 ReasonWithinPolicy = "Restart within policy" 2992 ) 2993 2994 // RestartPolicy configures how Tasks are restarted when they crash or fail. 2995 type RestartPolicy struct { 2996 // Attempts is the number of restart that will occur in an interval. 2997 Attempts int 2998 2999 // Interval is a duration in which we can limit the number of restarts 3000 // within. 3001 Interval time.Duration 3002 3003 // Delay is the time between a failure and a restart. 3004 Delay time.Duration 3005 3006 // Mode controls what happens when the task restarts more than attempt times 3007 // in an interval. 3008 Mode string 3009 } 3010 3011 func (r *RestartPolicy) Copy() *RestartPolicy { 3012 if r == nil { 3013 return nil 3014 } 3015 nrp := new(RestartPolicy) 3016 *nrp = *r 3017 return nrp 3018 } 3019 3020 func (r *RestartPolicy) Validate() error { 3021 var mErr multierror.Error 3022 switch r.Mode { 3023 case RestartPolicyModeDelay, RestartPolicyModeFail: 3024 default: 3025 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 3026 } 3027 3028 // Check for ambiguous/confusing settings 3029 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 3030 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 3031 } 3032 3033 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 3034 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 3035 } 3036 if time.Duration(r.Attempts)*r.Delay > r.Interval { 3037 multierror.Append(&mErr, 3038 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 3039 } 3040 return mErr.ErrorOrNil() 3041 } 3042 3043 func NewRestartPolicy(jobType string) *RestartPolicy { 3044 switch jobType { 3045 case JobTypeService, JobTypeSystem: 3046 rp := DefaultServiceJobRestartPolicy 3047 return &rp 3048 case JobTypeBatch: 3049 rp := DefaultBatchJobRestartPolicy 3050 return &rp 3051 } 3052 return nil 3053 } 3054 3055 const ReschedulePolicyMinInterval = 15 * time.Second 3056 const ReschedulePolicyMinDelay = 5 * time.Second 3057 3058 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 3059 3060 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 3061 type ReschedulePolicy struct { 3062 // Attempts limits the number of rescheduling attempts that can occur in an interval. 3063 Attempts int 3064 3065 // Interval is a duration in which we can limit the number of reschedule attempts. 3066 Interval time.Duration 3067 3068 // Delay is a minimum duration to wait between reschedule attempts. 3069 // The delay function determines how much subsequent reschedule attempts are delayed by. 3070 Delay time.Duration 3071 3072 // DelayFunction determines how the delay progressively changes on subsequent reschedule 3073 // attempts. Valid values are "exponential", "constant", and "fibonacci". 3074 DelayFunction string 3075 3076 // MaxDelay is an upper bound on the delay. 3077 MaxDelay time.Duration 3078 3079 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 3080 // between reschedule attempts. 3081 Unlimited bool 3082 } 3083 3084 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 3085 if r == nil { 3086 return nil 3087 } 3088 nrp := new(ReschedulePolicy) 3089 *nrp = *r 3090 return nrp 3091 } 3092 3093 func (r *ReschedulePolicy) Enabled() bool { 3094 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 3095 return enabled 3096 } 3097 3098 // Validate uses different criteria to validate the reschedule policy 3099 // Delay must be a minimum of 5 seconds 3100 // Delay Ceiling is ignored if Delay Function is "constant" 3101 // Number of possible attempts is validated, given the interval, delay and delay function 3102 func (r *ReschedulePolicy) Validate() error { 3103 if !r.Enabled() { 3104 return nil 3105 } 3106 var mErr multierror.Error 3107 // Check for ambiguous/confusing settings 3108 if r.Attempts > 0 { 3109 if r.Interval <= 0 { 3110 multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 3111 } 3112 if r.Unlimited { 3113 multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 3114 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 3115 multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 3116 } 3117 } 3118 3119 delayPreCheck := true 3120 // Delay should be bigger than the default 3121 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 3122 multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 3123 delayPreCheck = false 3124 } 3125 3126 // Must use a valid delay function 3127 if !isValidDelayFunction(r.DelayFunction) { 3128 multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 3129 delayPreCheck = false 3130 } 3131 3132 // Validate MaxDelay if not using linear delay progression 3133 if r.DelayFunction != "constant" { 3134 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 3135 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 3136 delayPreCheck = false 3137 } 3138 if r.MaxDelay < r.Delay { 3139 multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 3140 delayPreCheck = false 3141 } 3142 3143 } 3144 3145 // Validate Interval and other delay parameters if attempts are limited 3146 if !r.Unlimited { 3147 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 3148 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 3149 } 3150 if !delayPreCheck { 3151 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 3152 return mErr.ErrorOrNil() 3153 } 3154 crossValidationErr := r.validateDelayParams() 3155 if crossValidationErr != nil { 3156 multierror.Append(&mErr, crossValidationErr) 3157 } 3158 } 3159 return mErr.ErrorOrNil() 3160 } 3161 3162 func isValidDelayFunction(delayFunc string) bool { 3163 for _, value := range RescheduleDelayFunctions { 3164 if value == delayFunc { 3165 return true 3166 } 3167 } 3168 return false 3169 } 3170 3171 func (r *ReschedulePolicy) validateDelayParams() error { 3172 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 3173 if ok { 3174 return nil 3175 } 3176 var mErr multierror.Error 3177 if r.DelayFunction == "constant" { 3178 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 3179 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 3180 } else { 3181 multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 3182 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 3183 } 3184 multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 3185 return mErr.ErrorOrNil() 3186 } 3187 3188 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 3189 var possibleAttempts int 3190 var recommendedInterval time.Duration 3191 valid := true 3192 switch r.DelayFunction { 3193 case "constant": 3194 recommendedInterval = time.Duration(r.Attempts) * r.Delay 3195 if r.Interval < recommendedInterval { 3196 possibleAttempts = int(r.Interval / r.Delay) 3197 valid = false 3198 } 3199 case "exponential": 3200 for i := 0; i < r.Attempts; i++ { 3201 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 3202 if nextDelay > r.MaxDelay { 3203 nextDelay = r.MaxDelay 3204 recommendedInterval += nextDelay 3205 } else { 3206 recommendedInterval = nextDelay 3207 } 3208 if recommendedInterval < r.Interval { 3209 possibleAttempts++ 3210 } 3211 } 3212 if possibleAttempts < r.Attempts { 3213 valid = false 3214 } 3215 case "fibonacci": 3216 var slots []time.Duration 3217 slots = append(slots, r.Delay) 3218 slots = append(slots, r.Delay) 3219 reachedCeiling := false 3220 for i := 2; i < r.Attempts; i++ { 3221 var nextDelay time.Duration 3222 if reachedCeiling { 3223 //switch to linear 3224 nextDelay = slots[i-1] + r.MaxDelay 3225 } else { 3226 nextDelay = slots[i-1] + slots[i-2] 3227 if nextDelay > r.MaxDelay { 3228 nextDelay = r.MaxDelay 3229 reachedCeiling = true 3230 } 3231 } 3232 slots = append(slots, nextDelay) 3233 } 3234 recommendedInterval = slots[len(slots)-1] 3235 if r.Interval < recommendedInterval { 3236 valid = false 3237 // calculate possible attempts 3238 for i := 0; i < len(slots); i++ { 3239 if slots[i] > r.Interval { 3240 possibleAttempts = i 3241 break 3242 } 3243 } 3244 } 3245 default: 3246 return false, 0, 0 3247 } 3248 if possibleAttempts < 0 { // can happen if delay is bigger than interval 3249 possibleAttempts = 0 3250 } 3251 return valid, possibleAttempts, recommendedInterval 3252 } 3253 3254 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 3255 switch jobType { 3256 case JobTypeService: 3257 rp := DefaultServiceJobReschedulePolicy 3258 return &rp 3259 case JobTypeBatch: 3260 rp := DefaultBatchJobReschedulePolicy 3261 return &rp 3262 } 3263 return nil 3264 } 3265 3266 const ( 3267 MigrateStrategyHealthChecks = "checks" 3268 MigrateStrategyHealthStates = "task_states" 3269 ) 3270 3271 type MigrateStrategy struct { 3272 MaxParallel int 3273 HealthCheck string 3274 MinHealthyTime time.Duration 3275 HealthyDeadline time.Duration 3276 } 3277 3278 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 3279 // that lack an update strategy. 3280 // 3281 // This function should match its counterpart in api/tasks.go 3282 func DefaultMigrateStrategy() *MigrateStrategy { 3283 return &MigrateStrategy{ 3284 MaxParallel: 1, 3285 HealthCheck: MigrateStrategyHealthChecks, 3286 MinHealthyTime: 10 * time.Second, 3287 HealthyDeadline: 5 * time.Minute, 3288 } 3289 } 3290 3291 func (m *MigrateStrategy) Validate() error { 3292 var mErr multierror.Error 3293 3294 if m.MaxParallel < 0 { 3295 multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 3296 } 3297 3298 switch m.HealthCheck { 3299 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 3300 // ok 3301 case "": 3302 if m.MaxParallel > 0 { 3303 multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 3304 } 3305 default: 3306 multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 3307 } 3308 3309 if m.MinHealthyTime < 0 { 3310 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 3311 } 3312 3313 if m.HealthyDeadline < 0 { 3314 multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 3315 } 3316 3317 if m.MinHealthyTime > m.HealthyDeadline { 3318 multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 3319 } 3320 3321 return mErr.ErrorOrNil() 3322 } 3323 3324 // TaskGroup is an atomic unit of placement. Each task group belongs to 3325 // a job and may contain any number of tasks. A task group support running 3326 // in many replicas using the same configuration.. 3327 type TaskGroup struct { 3328 // Name of the task group 3329 Name string 3330 3331 // Count is the number of replicas of this task group that should 3332 // be scheduled. 3333 Count int 3334 3335 // Update is used to control the update strategy for this task group 3336 Update *UpdateStrategy 3337 3338 // Migrate is used to control the migration strategy for this task group 3339 Migrate *MigrateStrategy 3340 3341 // Constraints can be specified at a task group level and apply to 3342 // all the tasks contained. 3343 Constraints []*Constraint 3344 3345 //RestartPolicy of a TaskGroup 3346 RestartPolicy *RestartPolicy 3347 3348 // Tasks are the collection of tasks that this task group needs to run 3349 Tasks []*Task 3350 3351 // EphemeralDisk is the disk resources that the task group requests 3352 EphemeralDisk *EphemeralDisk 3353 3354 // Meta is used to associate arbitrary metadata with this 3355 // task group. This is opaque to Nomad. 3356 Meta map[string]string 3357 3358 // ReschedulePolicy is used to configure how the scheduler should 3359 // retry failed allocations. 3360 ReschedulePolicy *ReschedulePolicy 3361 3362 // Affinities can be specified at the task group level to express 3363 // scheduling preferences. 3364 Affinities []*Affinity 3365 3366 // Spread can be specified at the task group level to express spreading 3367 // allocations across a desired attribute, such as datacenter 3368 Spreads []*Spread 3369 } 3370 3371 func (tg *TaskGroup) Copy() *TaskGroup { 3372 if tg == nil { 3373 return nil 3374 } 3375 ntg := new(TaskGroup) 3376 *ntg = *tg 3377 ntg.Update = ntg.Update.Copy() 3378 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 3379 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 3380 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 3381 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 3382 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 3383 3384 if tg.Tasks != nil { 3385 tasks := make([]*Task, len(ntg.Tasks)) 3386 for i, t := range ntg.Tasks { 3387 tasks[i] = t.Copy() 3388 } 3389 ntg.Tasks = tasks 3390 } 3391 3392 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 3393 3394 if tg.EphemeralDisk != nil { 3395 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 3396 } 3397 return ntg 3398 } 3399 3400 // Canonicalize is used to canonicalize fields in the TaskGroup. 3401 func (tg *TaskGroup) Canonicalize(job *Job) { 3402 // Ensure that an empty and nil map are treated the same to avoid scheduling 3403 // problems since we use reflect DeepEquals. 3404 if len(tg.Meta) == 0 { 3405 tg.Meta = nil 3406 } 3407 3408 // Set the default restart policy. 3409 if tg.RestartPolicy == nil { 3410 tg.RestartPolicy = NewRestartPolicy(job.Type) 3411 } 3412 3413 if tg.ReschedulePolicy == nil { 3414 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 3415 } 3416 3417 // Canonicalize Migrate for service jobs 3418 if job.Type == JobTypeService && tg.Migrate == nil { 3419 tg.Migrate = DefaultMigrateStrategy() 3420 } 3421 3422 // Set a default ephemeral disk object if the user has not requested for one 3423 if tg.EphemeralDisk == nil { 3424 tg.EphemeralDisk = DefaultEphemeralDisk() 3425 } 3426 3427 for _, task := range tg.Tasks { 3428 task.Canonicalize(job, tg) 3429 } 3430 3431 // Add up the disk resources to EphemeralDisk. This is done so that users 3432 // are not required to move their disk attribute from resources to 3433 // EphemeralDisk section of the job spec in Nomad 0.5 3434 // COMPAT 0.4.1 -> 0.5 3435 // Remove in 0.6 3436 var diskMB int 3437 for _, task := range tg.Tasks { 3438 diskMB += task.Resources.DiskMB 3439 } 3440 if diskMB > 0 { 3441 tg.EphemeralDisk.SizeMB = diskMB 3442 } 3443 } 3444 3445 // Validate is used to sanity check a task group 3446 func (tg *TaskGroup) Validate(j *Job) error { 3447 var mErr multierror.Error 3448 if tg.Name == "" { 3449 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 3450 } 3451 if tg.Count < 0 { 3452 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 3453 } 3454 if len(tg.Tasks) == 0 { 3455 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 3456 } 3457 for idx, constr := range tg.Constraints { 3458 if err := constr.Validate(); err != nil { 3459 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3460 mErr.Errors = append(mErr.Errors, outer) 3461 } 3462 } 3463 if j.Type == JobTypeSystem { 3464 if tg.Affinities != nil { 3465 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 3466 } 3467 } else { 3468 for idx, affinity := range tg.Affinities { 3469 if err := affinity.Validate(); err != nil { 3470 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 3471 mErr.Errors = append(mErr.Errors, outer) 3472 } 3473 } 3474 } 3475 3476 if tg.RestartPolicy != nil { 3477 if err := tg.RestartPolicy.Validate(); err != nil { 3478 mErr.Errors = append(mErr.Errors, err) 3479 } 3480 } else { 3481 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 3482 } 3483 3484 if j.Type == JobTypeSystem { 3485 if tg.Spreads != nil { 3486 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 3487 } 3488 } else { 3489 for idx, spread := range tg.Spreads { 3490 if err := spread.Validate(); err != nil { 3491 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 3492 mErr.Errors = append(mErr.Errors, outer) 3493 } 3494 } 3495 } 3496 3497 if j.Type == JobTypeSystem { 3498 if tg.ReschedulePolicy != nil { 3499 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 3500 } 3501 } else { 3502 if tg.ReschedulePolicy != nil { 3503 if err := tg.ReschedulePolicy.Validate(); err != nil { 3504 mErr.Errors = append(mErr.Errors, err) 3505 } 3506 } else { 3507 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 3508 } 3509 } 3510 3511 if tg.EphemeralDisk != nil { 3512 if err := tg.EphemeralDisk.Validate(); err != nil { 3513 mErr.Errors = append(mErr.Errors, err) 3514 } 3515 } else { 3516 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 3517 } 3518 3519 // Validate the update strategy 3520 if u := tg.Update; u != nil { 3521 switch j.Type { 3522 case JobTypeService, JobTypeSystem: 3523 default: 3524 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 3525 } 3526 if err := u.Validate(); err != nil { 3527 mErr.Errors = append(mErr.Errors, err) 3528 } 3529 } 3530 3531 // Validate the migration strategy 3532 switch j.Type { 3533 case JobTypeService: 3534 if tg.Migrate != nil { 3535 if err := tg.Migrate.Validate(); err != nil { 3536 mErr.Errors = append(mErr.Errors, err) 3537 } 3538 } 3539 default: 3540 if tg.Migrate != nil { 3541 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 3542 } 3543 } 3544 3545 // Check for duplicate tasks, that there is only leader task if any, 3546 // and no duplicated static ports 3547 tasks := make(map[string]int) 3548 staticPorts := make(map[int]string) 3549 leaderTasks := 0 3550 for idx, task := range tg.Tasks { 3551 if task.Name == "" { 3552 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 3553 } else if existing, ok := tasks[task.Name]; ok { 3554 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 3555 } else { 3556 tasks[task.Name] = idx 3557 } 3558 3559 if task.Leader { 3560 leaderTasks++ 3561 } 3562 3563 if task.Resources == nil { 3564 continue 3565 } 3566 3567 for _, net := range task.Resources.Networks { 3568 for _, port := range net.ReservedPorts { 3569 if other, ok := staticPorts[port.Value]; ok { 3570 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 3571 mErr.Errors = append(mErr.Errors, err) 3572 } else { 3573 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 3574 } 3575 } 3576 } 3577 } 3578 3579 if leaderTasks > 1 { 3580 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 3581 } 3582 3583 // Validate the tasks 3584 for _, task := range tg.Tasks { 3585 if err := task.Validate(tg.EphemeralDisk, j.Type); err != nil { 3586 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 3587 mErr.Errors = append(mErr.Errors, outer) 3588 } 3589 } 3590 return mErr.ErrorOrNil() 3591 } 3592 3593 // Warnings returns a list of warnings that may be from dubious settings or 3594 // deprecation warnings. 3595 func (tg *TaskGroup) Warnings(j *Job) error { 3596 var mErr multierror.Error 3597 3598 // Validate the update strategy 3599 if u := tg.Update; u != nil { 3600 // Check the counts are appropriate 3601 if u.MaxParallel > tg.Count { 3602 mErr.Errors = append(mErr.Errors, 3603 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 3604 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 3605 } 3606 } 3607 3608 return mErr.ErrorOrNil() 3609 } 3610 3611 // LookupTask finds a task by name 3612 func (tg *TaskGroup) LookupTask(name string) *Task { 3613 for _, t := range tg.Tasks { 3614 if t.Name == name { 3615 return t 3616 } 3617 } 3618 return nil 3619 } 3620 3621 func (tg *TaskGroup) GoString() string { 3622 return fmt.Sprintf("*%#v", *tg) 3623 } 3624 3625 // CombinedResources returns the combined resources for the task group 3626 func (tg *TaskGroup) CombinedResources() *Resources { 3627 r := &Resources{ 3628 DiskMB: tg.EphemeralDisk.SizeMB, 3629 } 3630 for _, task := range tg.Tasks { 3631 r.Add(task.Resources) 3632 } 3633 return r 3634 } 3635 3636 // CheckRestart describes if and when a task should be restarted based on 3637 // failing health checks. 3638 type CheckRestart struct { 3639 Limit int // Restart task after this many unhealthy intervals 3640 Grace time.Duration // Grace time to give tasks after starting to get healthy 3641 IgnoreWarnings bool // If true treat checks in `warning` as passing 3642 } 3643 3644 func (c *CheckRestart) Copy() *CheckRestart { 3645 if c == nil { 3646 return nil 3647 } 3648 3649 nc := new(CheckRestart) 3650 *nc = *c 3651 return nc 3652 } 3653 3654 func (c *CheckRestart) Validate() error { 3655 if c == nil { 3656 return nil 3657 } 3658 3659 var mErr multierror.Error 3660 if c.Limit < 0 { 3661 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 3662 } 3663 3664 if c.Grace < 0 { 3665 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 3666 } 3667 3668 return mErr.ErrorOrNil() 3669 } 3670 3671 const ( 3672 ServiceCheckHTTP = "http" 3673 ServiceCheckTCP = "tcp" 3674 ServiceCheckScript = "script" 3675 ServiceCheckGRPC = "grpc" 3676 3677 // minCheckInterval is the minimum check interval permitted. Consul 3678 // currently has its MinInterval set to 1s. Mirror that here for 3679 // consistency. 3680 minCheckInterval = 1 * time.Second 3681 3682 // minCheckTimeout is the minimum check timeout permitted for Consul 3683 // script TTL checks. 3684 minCheckTimeout = 1 * time.Second 3685 ) 3686 3687 // The ServiceCheck data model represents the consul health check that 3688 // Nomad registers for a Task 3689 type ServiceCheck struct { 3690 Name string // Name of the check, defaults to id 3691 Type string // Type of the check - tcp, http, docker and script 3692 Command string // Command is the command to run for script checks 3693 Args []string // Args is a list of arguments for script checks 3694 Path string // path of the health check url for http type check 3695 Protocol string // Protocol to use if check is http, defaults to http 3696 PortLabel string // The port to use for tcp/http checks 3697 AddressMode string // 'host' to use host ip:port or 'driver' to use driver's 3698 Interval time.Duration // Interval of the check 3699 Timeout time.Duration // Timeout of the response from the check before consul fails the check 3700 InitialStatus string // Initial status of the check 3701 TLSSkipVerify bool // Skip TLS verification when Protocol=https 3702 Method string // HTTP Method to use (GET by default) 3703 Header map[string][]string // HTTP Headers for Consul to set when making HTTP checks 3704 CheckRestart *CheckRestart // If and when a task should be restarted based on checks 3705 GRPCService string // Service for GRPC checks 3706 GRPCUseTLS bool // Whether or not to use TLS for GRPC checks 3707 } 3708 3709 func (sc *ServiceCheck) Copy() *ServiceCheck { 3710 if sc == nil { 3711 return nil 3712 } 3713 nsc := new(ServiceCheck) 3714 *nsc = *sc 3715 nsc.Args = helper.CopySliceString(sc.Args) 3716 nsc.Header = helper.CopyMapStringSliceString(sc.Header) 3717 nsc.CheckRestart = sc.CheckRestart.Copy() 3718 return nsc 3719 } 3720 3721 func (sc *ServiceCheck) Canonicalize(serviceName string) { 3722 // Ensure empty maps/slices are treated as null to avoid scheduling 3723 // issues when using DeepEquals. 3724 if len(sc.Args) == 0 { 3725 sc.Args = nil 3726 } 3727 3728 if len(sc.Header) == 0 { 3729 sc.Header = nil 3730 } else { 3731 for k, v := range sc.Header { 3732 if len(v) == 0 { 3733 sc.Header[k] = nil 3734 } 3735 } 3736 } 3737 3738 if sc.Name == "" { 3739 sc.Name = fmt.Sprintf("service: %q check", serviceName) 3740 } 3741 } 3742 3743 // validate a Service's ServiceCheck 3744 func (sc *ServiceCheck) validate() error { 3745 // Validate Type 3746 switch strings.ToLower(sc.Type) { 3747 case ServiceCheckGRPC: 3748 case ServiceCheckTCP: 3749 case ServiceCheckHTTP: 3750 if sc.Path == "" { 3751 return fmt.Errorf("http type must have a valid http path") 3752 } 3753 url, err := url.Parse(sc.Path) 3754 if err != nil { 3755 return fmt.Errorf("http type must have a valid http path") 3756 } 3757 if url.IsAbs() { 3758 return fmt.Errorf("http type must have a relative http path") 3759 } 3760 3761 case ServiceCheckScript: 3762 if sc.Command == "" { 3763 return fmt.Errorf("script type must have a valid script path") 3764 } 3765 3766 default: 3767 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 3768 } 3769 3770 // Validate interval and timeout 3771 if sc.Interval == 0 { 3772 return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval) 3773 } else if sc.Interval < minCheckInterval { 3774 return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval) 3775 } 3776 3777 if sc.Timeout == 0 { 3778 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 3779 } else if sc.Timeout < minCheckTimeout { 3780 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 3781 } 3782 3783 // Validate InitialStatus 3784 switch sc.InitialStatus { 3785 case "": 3786 case api.HealthPassing: 3787 case api.HealthWarning: 3788 case api.HealthCritical: 3789 default: 3790 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical) 3791 3792 } 3793 3794 // Validate AddressMode 3795 switch sc.AddressMode { 3796 case "", AddressModeHost, AddressModeDriver: 3797 // Ok 3798 case AddressModeAuto: 3799 return fmt.Errorf("invalid address_mode %q - %s only valid for services", sc.AddressMode, AddressModeAuto) 3800 default: 3801 return fmt.Errorf("invalid address_mode %q", sc.AddressMode) 3802 } 3803 3804 return sc.CheckRestart.Validate() 3805 } 3806 3807 // RequiresPort returns whether the service check requires the task has a port. 3808 func (sc *ServiceCheck) RequiresPort() bool { 3809 switch sc.Type { 3810 case ServiceCheckGRPC, ServiceCheckHTTP, ServiceCheckTCP: 3811 return true 3812 default: 3813 return false 3814 } 3815 } 3816 3817 // TriggersRestarts returns true if this check should be watched and trigger a restart 3818 // on failure. 3819 func (sc *ServiceCheck) TriggersRestarts() bool { 3820 return sc.CheckRestart != nil && sc.CheckRestart.Limit > 0 3821 } 3822 3823 // Hash all ServiceCheck fields and the check's corresponding service ID to 3824 // create an identifier. The identifier is not guaranteed to be unique as if 3825 // the PortLabel is blank, the Service's PortLabel will be used after Hash is 3826 // called. 3827 func (sc *ServiceCheck) Hash(serviceID string) string { 3828 h := sha1.New() 3829 io.WriteString(h, serviceID) 3830 io.WriteString(h, sc.Name) 3831 io.WriteString(h, sc.Type) 3832 io.WriteString(h, sc.Command) 3833 io.WriteString(h, strings.Join(sc.Args, "")) 3834 io.WriteString(h, sc.Path) 3835 io.WriteString(h, sc.Protocol) 3836 io.WriteString(h, sc.PortLabel) 3837 io.WriteString(h, sc.Interval.String()) 3838 io.WriteString(h, sc.Timeout.String()) 3839 io.WriteString(h, sc.Method) 3840 // Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6 3841 if sc.TLSSkipVerify { 3842 io.WriteString(h, "true") 3843 } 3844 3845 // Since map iteration order isn't stable we need to write k/v pairs to 3846 // a slice and sort it before hashing. 3847 if len(sc.Header) > 0 { 3848 headers := make([]string, 0, len(sc.Header)) 3849 for k, v := range sc.Header { 3850 headers = append(headers, k+strings.Join(v, "")) 3851 } 3852 sort.Strings(headers) 3853 io.WriteString(h, strings.Join(headers, "")) 3854 } 3855 3856 // Only include AddressMode if set to maintain ID stability with Nomad <0.7.1 3857 if len(sc.AddressMode) > 0 { 3858 io.WriteString(h, sc.AddressMode) 3859 } 3860 3861 // Only include GRPC if set to maintain ID stability with Nomad <0.8.4 3862 if sc.GRPCService != "" { 3863 io.WriteString(h, sc.GRPCService) 3864 } 3865 if sc.GRPCUseTLS { 3866 io.WriteString(h, "true") 3867 } 3868 3869 return fmt.Sprintf("%x", h.Sum(nil)) 3870 } 3871 3872 const ( 3873 AddressModeAuto = "auto" 3874 AddressModeHost = "host" 3875 AddressModeDriver = "driver" 3876 ) 3877 3878 // Service represents a Consul service definition in Nomad 3879 type Service struct { 3880 // Name of the service registered with Consul. Consul defaults the 3881 // Name to ServiceID if not specified. The Name if specified is used 3882 // as one of the seed values when generating a Consul ServiceID. 3883 Name string 3884 3885 // PortLabel is either the numeric port number or the `host:port`. 3886 // To specify the port number using the host's Consul Advertise 3887 // address, specify an empty host in the PortLabel (e.g. `:port`). 3888 PortLabel string 3889 3890 // AddressMode specifies whether or not to use the host ip:port for 3891 // this service. 3892 AddressMode string 3893 3894 Tags []string // List of tags for the service 3895 CanaryTags []string // List of tags for the service when it is a canary 3896 Checks []*ServiceCheck // List of checks associated with the service 3897 } 3898 3899 func (s *Service) Copy() *Service { 3900 if s == nil { 3901 return nil 3902 } 3903 ns := new(Service) 3904 *ns = *s 3905 ns.Tags = helper.CopySliceString(ns.Tags) 3906 ns.CanaryTags = helper.CopySliceString(ns.CanaryTags) 3907 3908 if s.Checks != nil { 3909 checks := make([]*ServiceCheck, len(ns.Checks)) 3910 for i, c := range ns.Checks { 3911 checks[i] = c.Copy() 3912 } 3913 ns.Checks = checks 3914 } 3915 3916 return ns 3917 } 3918 3919 // Canonicalize interpolates values of Job, Task Group and Task in the Service 3920 // Name. This also generates check names, service id and check ids. 3921 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 3922 // Ensure empty lists are treated as null to avoid scheduler issues when 3923 // using DeepEquals 3924 if len(s.Tags) == 0 { 3925 s.Tags = nil 3926 } 3927 if len(s.CanaryTags) == 0 { 3928 s.CanaryTags = nil 3929 } 3930 if len(s.Checks) == 0 { 3931 s.Checks = nil 3932 } 3933 3934 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 3935 "JOB": job, 3936 "TASKGROUP": taskGroup, 3937 "TASK": task, 3938 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 3939 }, 3940 ) 3941 3942 for _, check := range s.Checks { 3943 check.Canonicalize(s.Name) 3944 } 3945 } 3946 3947 // Validate checks if the Check definition is valid 3948 func (s *Service) Validate() error { 3949 var mErr multierror.Error 3950 3951 // Ensure the service name is valid per the below RFCs but make an exception 3952 // for our interpolation syntax by first stripping any environment variables from the name 3953 3954 serviceNameStripped := args.ReplaceEnvWithPlaceHolder(s.Name, "ENV-VAR") 3955 3956 if err := s.ValidateName(serviceNameStripped); err != nil { 3957 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name)) 3958 } 3959 3960 switch s.AddressMode { 3961 case "", AddressModeAuto, AddressModeHost, AddressModeDriver: 3962 // OK 3963 default: 3964 mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode)) 3965 } 3966 3967 for _, c := range s.Checks { 3968 if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() { 3969 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name)) 3970 continue 3971 } 3972 3973 if err := c.validate(); err != nil { 3974 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 3975 } 3976 } 3977 3978 return mErr.ErrorOrNil() 3979 } 3980 3981 // ValidateName checks if the services Name is valid and should be called after 3982 // the name has been interpolated 3983 func (s *Service) ValidateName(name string) error { 3984 // Ensure the service name is valid per RFC-952 §1 3985 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 3986 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 3987 // (https://tools.ietf.org/html/rfc2782). 3988 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 3989 if !re.MatchString(name) { 3990 return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name) 3991 } 3992 return nil 3993 } 3994 3995 // Hash returns a base32 encoded hash of a Service's contents excluding checks 3996 // as they're hashed independently. 3997 func (s *Service) Hash(allocID, taskName string, canary bool) string { 3998 h := sha1.New() 3999 io.WriteString(h, allocID) 4000 io.WriteString(h, taskName) 4001 io.WriteString(h, s.Name) 4002 io.WriteString(h, s.PortLabel) 4003 io.WriteString(h, s.AddressMode) 4004 for _, tag := range s.Tags { 4005 io.WriteString(h, tag) 4006 } 4007 for _, tag := range s.CanaryTags { 4008 io.WriteString(h, tag) 4009 } 4010 4011 // Vary ID on whether or not CanaryTags will be used 4012 if canary { 4013 h.Write([]byte("Canary")) 4014 } 4015 4016 // Base32 is used for encoding the hash as sha1 hashes can always be 4017 // encoded without padding, only 4 bytes larger than base64, and saves 4018 // 8 bytes vs hex. Since these hashes are used in Consul URLs it's nice 4019 // to have a reasonably compact URL-safe representation. 4020 return b32.EncodeToString(h.Sum(nil)) 4021 } 4022 4023 const ( 4024 // DefaultKillTimeout is the default timeout between signaling a task it 4025 // will be killed and killing it. 4026 DefaultKillTimeout = 5 * time.Second 4027 ) 4028 4029 // LogConfig provides configuration for log rotation 4030 type LogConfig struct { 4031 MaxFiles int 4032 MaxFileSizeMB int 4033 } 4034 4035 // DefaultLogConfig returns the default LogConfig values. 4036 func DefaultLogConfig() *LogConfig { 4037 return &LogConfig{ 4038 MaxFiles: 10, 4039 MaxFileSizeMB: 10, 4040 } 4041 } 4042 4043 // Validate returns an error if the log config specified are less than 4044 // the minimum allowed. 4045 func (l *LogConfig) Validate() error { 4046 var mErr multierror.Error 4047 if l.MaxFiles < 1 { 4048 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 4049 } 4050 if l.MaxFileSizeMB < 1 { 4051 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 4052 } 4053 return mErr.ErrorOrNil() 4054 } 4055 4056 // Task is a single process typically that is executed as part of a task group. 4057 type Task struct { 4058 // Name of the task 4059 Name string 4060 4061 // Driver is used to control which driver is used 4062 Driver string 4063 4064 // User is used to determine which user will run the task. It defaults to 4065 // the same user the Nomad client is being run as. 4066 User string 4067 4068 // Config is provided to the driver to initialize 4069 Config map[string]interface{} 4070 4071 // Map of environment variables to be used by the driver 4072 Env map[string]string 4073 4074 // List of service definitions exposed by the Task 4075 Services []*Service 4076 4077 // Vault is used to define the set of Vault policies that this task should 4078 // have access to. 4079 Vault *Vault 4080 4081 // Templates are the set of templates to be rendered for the task. 4082 Templates []*Template 4083 4084 // Constraints can be specified at a task level and apply only to 4085 // the particular task. 4086 Constraints []*Constraint 4087 4088 // Affinities can be specified at the task level to express 4089 // scheduling preferences 4090 Affinities []*Affinity 4091 4092 // Resources is the resources needed by this task 4093 Resources *Resources 4094 4095 // DispatchPayload configures how the task retrieves its input from a dispatch 4096 DispatchPayload *DispatchPayloadConfig 4097 4098 // Meta is used to associate arbitrary metadata with this 4099 // task. This is opaque to Nomad. 4100 Meta map[string]string 4101 4102 // KillTimeout is the time between signaling a task that it will be 4103 // killed and killing it. 4104 KillTimeout time.Duration 4105 4106 // LogConfig provides configuration for log rotation 4107 LogConfig *LogConfig 4108 4109 // Artifacts is a list of artifacts to download and extract before running 4110 // the task. 4111 Artifacts []*TaskArtifact 4112 4113 // Leader marks the task as the leader within the group. When the leader 4114 // task exits, other tasks will be gracefully terminated. 4115 Leader bool 4116 4117 // ShutdownDelay is the duration of the delay between deregistering a 4118 // task from Consul and sending it a signal to shutdown. See #2441 4119 ShutdownDelay time.Duration 4120 4121 // The kill signal to use for the task. This is an optional specification, 4122 4123 // KillSignal is the kill signal to use for the task. This is an optional 4124 // specification and defaults to SIGINT 4125 KillSignal string 4126 } 4127 4128 func (t *Task) Copy() *Task { 4129 if t == nil { 4130 return nil 4131 } 4132 nt := new(Task) 4133 *nt = *t 4134 nt.Env = helper.CopyMapStringString(nt.Env) 4135 4136 if t.Services != nil { 4137 services := make([]*Service, len(nt.Services)) 4138 for i, s := range nt.Services { 4139 services[i] = s.Copy() 4140 } 4141 nt.Services = services 4142 } 4143 4144 nt.Constraints = CopySliceConstraints(nt.Constraints) 4145 nt.Affinities = CopySliceAffinities(nt.Affinities) 4146 4147 nt.Vault = nt.Vault.Copy() 4148 nt.Resources = nt.Resources.Copy() 4149 nt.Meta = helper.CopyMapStringString(nt.Meta) 4150 nt.DispatchPayload = nt.DispatchPayload.Copy() 4151 4152 if t.Artifacts != nil { 4153 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 4154 for _, a := range nt.Artifacts { 4155 artifacts = append(artifacts, a.Copy()) 4156 } 4157 nt.Artifacts = artifacts 4158 } 4159 4160 if i, err := copystructure.Copy(nt.Config); err != nil { 4161 panic(err.Error()) 4162 } else { 4163 nt.Config = i.(map[string]interface{}) 4164 } 4165 4166 if t.Templates != nil { 4167 templates := make([]*Template, len(t.Templates)) 4168 for i, tmpl := range nt.Templates { 4169 templates[i] = tmpl.Copy() 4170 } 4171 nt.Templates = templates 4172 } 4173 4174 return nt 4175 } 4176 4177 // Canonicalize canonicalizes fields in the task. 4178 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 4179 // Ensure that an empty and nil map are treated the same to avoid scheduling 4180 // problems since we use reflect DeepEquals. 4181 if len(t.Meta) == 0 { 4182 t.Meta = nil 4183 } 4184 if len(t.Config) == 0 { 4185 t.Config = nil 4186 } 4187 if len(t.Env) == 0 { 4188 t.Env = nil 4189 } 4190 4191 for _, service := range t.Services { 4192 service.Canonicalize(job.Name, tg.Name, t.Name) 4193 } 4194 4195 // If Resources are nil initialize them to defaults, otherwise canonicalize 4196 if t.Resources == nil { 4197 t.Resources = DefaultResources() 4198 } else { 4199 t.Resources.Canonicalize() 4200 } 4201 4202 // Set the default timeout if it is not specified. 4203 if t.KillTimeout == 0 { 4204 t.KillTimeout = DefaultKillTimeout 4205 } 4206 4207 if t.Vault != nil { 4208 t.Vault.Canonicalize() 4209 } 4210 4211 for _, template := range t.Templates { 4212 template.Canonicalize() 4213 } 4214 } 4215 4216 func (t *Task) GoString() string { 4217 return fmt.Sprintf("*%#v", *t) 4218 } 4219 4220 // Validate is used to sanity check a task 4221 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string) error { 4222 var mErr multierror.Error 4223 if t.Name == "" { 4224 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 4225 } 4226 if strings.ContainsAny(t.Name, `/\`) { 4227 // We enforce this so that when creating the directory on disk it will 4228 // not have any slashes. 4229 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 4230 } 4231 if t.Driver == "" { 4232 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 4233 } 4234 if t.KillTimeout < 0 { 4235 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 4236 } 4237 if t.ShutdownDelay < 0 { 4238 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 4239 } 4240 4241 // Validate the resources. 4242 if t.Resources == nil { 4243 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 4244 } else { 4245 if err := t.Resources.MeetsMinResources(); err != nil { 4246 mErr.Errors = append(mErr.Errors, err) 4247 } 4248 4249 // Ensure the task isn't asking for disk resources 4250 if t.Resources.DiskMB > 0 { 4251 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 4252 } 4253 } 4254 4255 // Validate the log config 4256 if t.LogConfig == nil { 4257 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 4258 } else if err := t.LogConfig.Validate(); err != nil { 4259 mErr.Errors = append(mErr.Errors, err) 4260 } 4261 4262 for idx, constr := range t.Constraints { 4263 if err := constr.Validate(); err != nil { 4264 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4265 mErr.Errors = append(mErr.Errors, outer) 4266 } 4267 4268 switch constr.Operand { 4269 case ConstraintDistinctHosts, ConstraintDistinctProperty: 4270 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 4271 mErr.Errors = append(mErr.Errors, outer) 4272 } 4273 } 4274 4275 if jobType == JobTypeSystem { 4276 if t.Affinities != nil { 4277 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4278 } 4279 } else { 4280 for idx, affinity := range t.Affinities { 4281 if err := affinity.Validate(); err != nil { 4282 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4283 mErr.Errors = append(mErr.Errors, outer) 4284 } 4285 } 4286 } 4287 4288 // Validate Services 4289 if err := validateServices(t); err != nil { 4290 mErr.Errors = append(mErr.Errors, err) 4291 } 4292 4293 if t.LogConfig != nil && ephemeralDisk != nil { 4294 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 4295 if ephemeralDisk.SizeMB <= logUsage { 4296 mErr.Errors = append(mErr.Errors, 4297 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 4298 logUsage, ephemeralDisk.SizeMB)) 4299 } 4300 } 4301 4302 for idx, artifact := range t.Artifacts { 4303 if err := artifact.Validate(); err != nil { 4304 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 4305 mErr.Errors = append(mErr.Errors, outer) 4306 } 4307 } 4308 4309 if t.Vault != nil { 4310 if err := t.Vault.Validate(); err != nil { 4311 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 4312 } 4313 } 4314 4315 destinations := make(map[string]int, len(t.Templates)) 4316 for idx, tmpl := range t.Templates { 4317 if err := tmpl.Validate(); err != nil { 4318 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 4319 mErr.Errors = append(mErr.Errors, outer) 4320 } 4321 4322 if other, ok := destinations[tmpl.DestPath]; ok { 4323 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 4324 mErr.Errors = append(mErr.Errors, outer) 4325 } else { 4326 destinations[tmpl.DestPath] = idx + 1 4327 } 4328 } 4329 4330 // Validate the dispatch payload block if there 4331 if t.DispatchPayload != nil { 4332 if err := t.DispatchPayload.Validate(); err != nil { 4333 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 4334 } 4335 } 4336 4337 return mErr.ErrorOrNil() 4338 } 4339 4340 // validateServices takes a task and validates the services within it are valid 4341 // and reference ports that exist. 4342 func validateServices(t *Task) error { 4343 var mErr multierror.Error 4344 4345 // Ensure that services don't ask for nonexistent ports and their names are 4346 // unique. 4347 servicePorts := make(map[string]map[string]struct{}) 4348 addServicePort := func(label, service string) { 4349 if _, ok := servicePorts[label]; !ok { 4350 servicePorts[label] = map[string]struct{}{} 4351 } 4352 servicePorts[label][service] = struct{}{} 4353 } 4354 knownServices := make(map[string]struct{}) 4355 for i, service := range t.Services { 4356 if err := service.Validate(); err != nil { 4357 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 4358 mErr.Errors = append(mErr.Errors, outer) 4359 } 4360 4361 // Ensure that services with the same name are not being registered for 4362 // the same port 4363 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 4364 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 4365 } 4366 knownServices[service.Name+service.PortLabel] = struct{}{} 4367 4368 if service.PortLabel != "" { 4369 if service.AddressMode == "driver" { 4370 // Numeric port labels are valid for address_mode=driver 4371 _, err := strconv.Atoi(service.PortLabel) 4372 if err != nil { 4373 // Not a numeric port label, add it to list to check 4374 addServicePort(service.PortLabel, service.Name) 4375 } 4376 } else { 4377 addServicePort(service.PortLabel, service.Name) 4378 } 4379 } 4380 4381 // Ensure that check names are unique and have valid ports 4382 knownChecks := make(map[string]struct{}) 4383 for _, check := range service.Checks { 4384 if _, ok := knownChecks[check.Name]; ok { 4385 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 4386 } 4387 knownChecks[check.Name] = struct{}{} 4388 4389 if !check.RequiresPort() { 4390 // No need to continue validating check if it doesn't need a port 4391 continue 4392 } 4393 4394 effectivePort := check.PortLabel 4395 if effectivePort == "" { 4396 // Inherits from service 4397 effectivePort = service.PortLabel 4398 } 4399 4400 if effectivePort == "" { 4401 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 4402 continue 4403 } 4404 4405 isNumeric := false 4406 portNumber, err := strconv.Atoi(effectivePort) 4407 if err == nil { 4408 isNumeric = true 4409 } 4410 4411 // Numeric ports are fine for address_mode = "driver" 4412 if check.AddressMode == "driver" && isNumeric { 4413 if portNumber <= 0 { 4414 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 4415 } 4416 continue 4417 } 4418 4419 if isNumeric { 4420 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 4421 continue 4422 } 4423 4424 // PortLabel must exist, report errors by its parent service 4425 addServicePort(effectivePort, service.Name) 4426 } 4427 } 4428 4429 // Get the set of port labels. 4430 portLabels := make(map[string]struct{}) 4431 if t.Resources != nil { 4432 for _, network := range t.Resources.Networks { 4433 ports := network.PortLabels() 4434 for portLabel := range ports { 4435 portLabels[portLabel] = struct{}{} 4436 } 4437 } 4438 } 4439 4440 // Iterate over a sorted list of keys to make error listings stable 4441 keys := make([]string, 0, len(servicePorts)) 4442 for p := range servicePorts { 4443 keys = append(keys, p) 4444 } 4445 sort.Strings(keys) 4446 4447 // Ensure all ports referenced in services exist. 4448 for _, servicePort := range keys { 4449 services := servicePorts[servicePort] 4450 _, ok := portLabels[servicePort] 4451 if !ok { 4452 names := make([]string, 0, len(services)) 4453 for name := range services { 4454 names = append(names, name) 4455 } 4456 4457 // Keep order deterministic 4458 sort.Strings(names) 4459 joined := strings.Join(names, ", ") 4460 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 4461 mErr.Errors = append(mErr.Errors, err) 4462 } 4463 } 4464 4465 // Ensure address mode is valid 4466 return mErr.ErrorOrNil() 4467 } 4468 4469 const ( 4470 // TemplateChangeModeNoop marks that no action should be taken if the 4471 // template is re-rendered 4472 TemplateChangeModeNoop = "noop" 4473 4474 // TemplateChangeModeSignal marks that the task should be signaled if the 4475 // template is re-rendered 4476 TemplateChangeModeSignal = "signal" 4477 4478 // TemplateChangeModeRestart marks that the task should be restarted if the 4479 // template is re-rendered 4480 TemplateChangeModeRestart = "restart" 4481 ) 4482 4483 var ( 4484 // TemplateChangeModeInvalidError is the error for when an invalid change 4485 // mode is given 4486 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 4487 ) 4488 4489 // Template represents a template configuration to be rendered for a given task 4490 type Template struct { 4491 // SourcePath is the path to the template to be rendered 4492 SourcePath string 4493 4494 // DestPath is the path to where the template should be rendered 4495 DestPath string 4496 4497 // EmbeddedTmpl store the raw template. This is useful for smaller templates 4498 // where they are embedded in the job file rather than sent as an artifact 4499 EmbeddedTmpl string 4500 4501 // ChangeMode indicates what should be done if the template is re-rendered 4502 ChangeMode string 4503 4504 // ChangeSignal is the signal that should be sent if the change mode 4505 // requires it. 4506 ChangeSignal string 4507 4508 // Splay is used to avoid coordinated restarts of processes by applying a 4509 // random wait between 0 and the given splay value before signalling the 4510 // application of a change 4511 Splay time.Duration 4512 4513 // Perms is the permission the file should be written out with. 4514 Perms string 4515 4516 // LeftDelim and RightDelim are optional configurations to control what 4517 // delimiter is utilized when parsing the template. 4518 LeftDelim string 4519 RightDelim string 4520 4521 // Envvars enables exposing the template as environment variables 4522 // instead of as a file. The template must be of the form: 4523 // 4524 // VAR_NAME_1={{ key service/my-key }} 4525 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 4526 // 4527 // Lines will be split on the initial "=" with the first part being the 4528 // key name and the second part the value. 4529 // Empty lines and lines starting with # will be ignored, but to avoid 4530 // escaping issues #s within lines will not be treated as comments. 4531 Envvars bool 4532 4533 // VaultGrace is the grace duration between lease renewal and reacquiring a 4534 // secret. If the lease of a secret is less than the grace, a new secret is 4535 // acquired. 4536 VaultGrace time.Duration 4537 } 4538 4539 // DefaultTemplate returns a default template. 4540 func DefaultTemplate() *Template { 4541 return &Template{ 4542 ChangeMode: TemplateChangeModeRestart, 4543 Splay: 5 * time.Second, 4544 Perms: "0644", 4545 } 4546 } 4547 4548 func (t *Template) Copy() *Template { 4549 if t == nil { 4550 return nil 4551 } 4552 copy := new(Template) 4553 *copy = *t 4554 return copy 4555 } 4556 4557 func (t *Template) Canonicalize() { 4558 if t.ChangeSignal != "" { 4559 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 4560 } 4561 } 4562 4563 func (t *Template) Validate() error { 4564 var mErr multierror.Error 4565 4566 // Verify we have something to render 4567 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 4568 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 4569 } 4570 4571 // Verify we can render somewhere 4572 if t.DestPath == "" { 4573 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 4574 } 4575 4576 // Verify the destination doesn't escape 4577 escaped, err := PathEscapesAllocDir("task", t.DestPath) 4578 if err != nil { 4579 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 4580 } else if escaped { 4581 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 4582 } 4583 4584 // Verify a proper change mode 4585 switch t.ChangeMode { 4586 case TemplateChangeModeNoop, TemplateChangeModeRestart: 4587 case TemplateChangeModeSignal: 4588 if t.ChangeSignal == "" { 4589 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 4590 } 4591 if t.Envvars { 4592 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 4593 } 4594 default: 4595 multierror.Append(&mErr, TemplateChangeModeInvalidError) 4596 } 4597 4598 // Verify the splay is positive 4599 if t.Splay < 0 { 4600 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 4601 } 4602 4603 // Verify the permissions 4604 if t.Perms != "" { 4605 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 4606 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 4607 } 4608 } 4609 4610 if t.VaultGrace.Nanoseconds() < 0 { 4611 multierror.Append(&mErr, fmt.Errorf("Vault grace must be greater than zero: %v < 0", t.VaultGrace)) 4612 } 4613 4614 return mErr.ErrorOrNil() 4615 } 4616 4617 // Set of possible states for a task. 4618 const ( 4619 TaskStatePending = "pending" // The task is waiting to be run. 4620 TaskStateRunning = "running" // The task is currently running. 4621 TaskStateDead = "dead" // Terminal state of task. 4622 ) 4623 4624 // TaskState tracks the current state of a task and events that caused state 4625 // transitions. 4626 type TaskState struct { 4627 // The current state of the task. 4628 State string 4629 4630 // Failed marks a task as having failed 4631 Failed bool 4632 4633 // Restarts is the number of times the task has restarted 4634 Restarts uint64 4635 4636 // LastRestart is the time the task last restarted. It is updated each time the 4637 // task restarts 4638 LastRestart time.Time 4639 4640 // StartedAt is the time the task is started. It is updated each time the 4641 // task starts 4642 StartedAt time.Time 4643 4644 // FinishedAt is the time at which the task transitioned to dead and will 4645 // not be started again. 4646 FinishedAt time.Time 4647 4648 // Series of task events that transition the state of the task. 4649 Events []*TaskEvent 4650 } 4651 4652 func (ts *TaskState) Copy() *TaskState { 4653 if ts == nil { 4654 return nil 4655 } 4656 copy := new(TaskState) 4657 *copy = *ts 4658 4659 if ts.Events != nil { 4660 copy.Events = make([]*TaskEvent, len(ts.Events)) 4661 for i, e := range ts.Events { 4662 copy.Events[i] = e.Copy() 4663 } 4664 } 4665 return copy 4666 } 4667 4668 // Successful returns whether a task finished successfully. This doesn't really 4669 // have meaning on a non-batch allocation because a service and system 4670 // allocation should not finish. 4671 func (ts *TaskState) Successful() bool { 4672 l := len(ts.Events) 4673 if ts.State != TaskStateDead || l == 0 { 4674 return false 4675 } 4676 4677 e := ts.Events[l-1] 4678 if e.Type != TaskTerminated { 4679 return false 4680 } 4681 4682 return e.ExitCode == 0 4683 } 4684 4685 const ( 4686 // TaskSetupFailure indicates that the task could not be started due to a 4687 // a setup failure. 4688 TaskSetupFailure = "Setup Failure" 4689 4690 // TaskDriveFailure indicates that the task could not be started due to a 4691 // failure in the driver. 4692 TaskDriverFailure = "Driver Failure" 4693 4694 // TaskReceived signals that the task has been pulled by the client at the 4695 // given timestamp. 4696 TaskReceived = "Received" 4697 4698 // TaskFailedValidation indicates the task was invalid and as such was not 4699 // run. 4700 TaskFailedValidation = "Failed Validation" 4701 4702 // TaskStarted signals that the task was started and its timestamp can be 4703 // used to determine the running length of the task. 4704 TaskStarted = "Started" 4705 4706 // TaskTerminated indicates that the task was started and exited. 4707 TaskTerminated = "Terminated" 4708 4709 // TaskKilling indicates a kill signal has been sent to the task. 4710 TaskKilling = "Killing" 4711 4712 // TaskKilled indicates a user has killed the task. 4713 TaskKilled = "Killed" 4714 4715 // TaskRestarting indicates that task terminated and is being restarted. 4716 TaskRestarting = "Restarting" 4717 4718 // TaskNotRestarting indicates that the task has failed and is not being 4719 // restarted because it has exceeded its restart policy. 4720 TaskNotRestarting = "Not Restarting" 4721 4722 // TaskRestartSignal indicates that the task has been signalled to be 4723 // restarted 4724 TaskRestartSignal = "Restart Signaled" 4725 4726 // TaskSignaling indicates that the task is being signalled. 4727 TaskSignaling = "Signaling" 4728 4729 // TaskDownloadingArtifacts means the task is downloading the artifacts 4730 // specified in the task. 4731 TaskDownloadingArtifacts = "Downloading Artifacts" 4732 4733 // TaskArtifactDownloadFailed indicates that downloading the artifacts 4734 // failed. 4735 TaskArtifactDownloadFailed = "Failed Artifact Download" 4736 4737 // TaskBuildingTaskDir indicates that the task directory/chroot is being 4738 // built. 4739 TaskBuildingTaskDir = "Building Task Directory" 4740 4741 // TaskSetup indicates the task runner is setting up the task environment 4742 TaskSetup = "Task Setup" 4743 4744 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 4745 // exceeded the requested disk resources. 4746 TaskDiskExceeded = "Disk Resources Exceeded" 4747 4748 // TaskSiblingFailed indicates that a sibling task in the task group has 4749 // failed. 4750 TaskSiblingFailed = "Sibling Task Failed" 4751 4752 // TaskDriverMessage is an informational event message emitted by 4753 // drivers such as when they're performing a long running action like 4754 // downloading an image. 4755 TaskDriverMessage = "Driver" 4756 4757 // TaskLeaderDead indicates that the leader task within the has finished. 4758 TaskLeaderDead = "Leader Task Dead" 4759 ) 4760 4761 // TaskEvent is an event that effects the state of a task and contains meta-data 4762 // appropriate to the events type. 4763 type TaskEvent struct { 4764 Type string 4765 Time int64 // Unix Nanosecond timestamp 4766 4767 Message string // A possible message explaining the termination of the task. 4768 4769 // DisplayMessage is a human friendly message about the event 4770 DisplayMessage string 4771 4772 // Details is a map with annotated info about the event 4773 Details map[string]string 4774 4775 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 4776 // in a future release. Field values are available in the Details map. 4777 4778 // FailsTask marks whether this event fails the task. 4779 // Deprecated, use Details["fails_task"] to access this. 4780 FailsTask bool 4781 4782 // Restart fields. 4783 // Deprecated, use Details["restart_reason"] to access this. 4784 RestartReason string 4785 4786 // Setup Failure fields. 4787 // Deprecated, use Details["setup_error"] to access this. 4788 SetupError string 4789 4790 // Driver Failure fields. 4791 // Deprecated, use Details["driver_error"] to access this. 4792 DriverError string // A driver error occurred while starting the task. 4793 4794 // Task Terminated Fields. 4795 4796 // Deprecated, use Details["exit_code"] to access this. 4797 ExitCode int // The exit code of the task. 4798 4799 // Deprecated, use Details["signal"] to access this. 4800 Signal int // The signal that terminated the task. 4801 4802 // Killing fields 4803 // Deprecated, use Details["kill_timeout"] to access this. 4804 KillTimeout time.Duration 4805 4806 // Task Killed Fields. 4807 // Deprecated, use Details["kill_error"] to access this. 4808 KillError string // Error killing the task. 4809 4810 // KillReason is the reason the task was killed 4811 // Deprecated, use Details["kill_reason"] to access this. 4812 KillReason string 4813 4814 // TaskRestarting fields. 4815 // Deprecated, use Details["start_delay"] to access this. 4816 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 4817 4818 // Artifact Download fields 4819 // Deprecated, use Details["download_error"] to access this. 4820 DownloadError string // Error downloading artifacts 4821 4822 // Validation fields 4823 // Deprecated, use Details["validation_error"] to access this. 4824 ValidationError string // Validation error 4825 4826 // The maximum allowed task disk size. 4827 // Deprecated, use Details["disk_limit"] to access this. 4828 DiskLimit int64 4829 4830 // Name of the sibling task that caused termination of the task that 4831 // the TaskEvent refers to. 4832 // Deprecated, use Details["failed_sibling"] to access this. 4833 FailedSibling string 4834 4835 // VaultError is the error from token renewal 4836 // Deprecated, use Details["vault_renewal_error"] to access this. 4837 VaultError string 4838 4839 // TaskSignalReason indicates the reason the task is being signalled. 4840 // Deprecated, use Details["task_signal_reason"] to access this. 4841 TaskSignalReason string 4842 4843 // TaskSignal is the signal that was sent to the task 4844 // Deprecated, use Details["task_signal"] to access this. 4845 TaskSignal string 4846 4847 // DriverMessage indicates a driver action being taken. 4848 // Deprecated, use Details["driver_message"] to access this. 4849 DriverMessage string 4850 4851 // GenericSource is the source of a message. 4852 // Deprecated, is redundant with event type. 4853 GenericSource string 4854 } 4855 4856 func (event *TaskEvent) PopulateEventDisplayMessage() { 4857 // Build up the description based on the event type. 4858 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 4859 return 4860 } 4861 4862 if event.DisplayMessage != "" { 4863 return 4864 } 4865 4866 var desc string 4867 switch event.Type { 4868 case TaskSetup: 4869 desc = event.Message 4870 case TaskStarted: 4871 desc = "Task started by client" 4872 case TaskReceived: 4873 desc = "Task received by client" 4874 case TaskFailedValidation: 4875 if event.ValidationError != "" { 4876 desc = event.ValidationError 4877 } else { 4878 desc = "Validation of task failed" 4879 } 4880 case TaskSetupFailure: 4881 if event.SetupError != "" { 4882 desc = event.SetupError 4883 } else { 4884 desc = "Task setup failed" 4885 } 4886 case TaskDriverFailure: 4887 if event.DriverError != "" { 4888 desc = event.DriverError 4889 } else { 4890 desc = "Failed to start task" 4891 } 4892 case TaskDownloadingArtifacts: 4893 desc = "Client is downloading artifacts" 4894 case TaskArtifactDownloadFailed: 4895 if event.DownloadError != "" { 4896 desc = event.DownloadError 4897 } else { 4898 desc = "Failed to download artifacts" 4899 } 4900 case TaskKilling: 4901 if event.KillReason != "" { 4902 desc = event.KillReason 4903 } else if event.KillTimeout != 0 { 4904 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 4905 } else { 4906 desc = "Sent interrupt" 4907 } 4908 case TaskKilled: 4909 if event.KillError != "" { 4910 desc = event.KillError 4911 } else { 4912 desc = "Task successfully killed" 4913 } 4914 case TaskTerminated: 4915 var parts []string 4916 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 4917 4918 if event.Signal != 0 { 4919 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 4920 } 4921 4922 if event.Message != "" { 4923 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 4924 } 4925 desc = strings.Join(parts, ", ") 4926 case TaskRestarting: 4927 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 4928 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 4929 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 4930 } else { 4931 desc = in 4932 } 4933 case TaskNotRestarting: 4934 if event.RestartReason != "" { 4935 desc = event.RestartReason 4936 } else { 4937 desc = "Task exceeded restart policy" 4938 } 4939 case TaskSiblingFailed: 4940 if event.FailedSibling != "" { 4941 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 4942 } else { 4943 desc = "Task's sibling failed" 4944 } 4945 case TaskSignaling: 4946 sig := event.TaskSignal 4947 reason := event.TaskSignalReason 4948 4949 if sig == "" && reason == "" { 4950 desc = "Task being sent a signal" 4951 } else if sig == "" { 4952 desc = reason 4953 } else if reason == "" { 4954 desc = fmt.Sprintf("Task being sent signal %v", sig) 4955 } else { 4956 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 4957 } 4958 case TaskRestartSignal: 4959 if event.RestartReason != "" { 4960 desc = event.RestartReason 4961 } else { 4962 desc = "Task signaled to restart" 4963 } 4964 case TaskDriverMessage: 4965 desc = event.DriverMessage 4966 case TaskLeaderDead: 4967 desc = "Leader Task in Group dead" 4968 default: 4969 desc = event.Message 4970 } 4971 4972 event.DisplayMessage = desc 4973 } 4974 4975 func (te *TaskEvent) GoString() string { 4976 return fmt.Sprintf("%v - %v", te.Time, te.Type) 4977 } 4978 4979 // SetMessage sets the message of TaskEvent 4980 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 4981 te.Message = msg 4982 te.Details["message"] = msg 4983 return te 4984 } 4985 4986 func (te *TaskEvent) Copy() *TaskEvent { 4987 if te == nil { 4988 return nil 4989 } 4990 copy := new(TaskEvent) 4991 *copy = *te 4992 return copy 4993 } 4994 4995 func NewTaskEvent(event string) *TaskEvent { 4996 return &TaskEvent{ 4997 Type: event, 4998 Time: time.Now().UnixNano(), 4999 Details: make(map[string]string), 5000 } 5001 } 5002 5003 // SetSetupError is used to store an error that occurred while setting up the 5004 // task 5005 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 5006 if err != nil { 5007 e.SetupError = err.Error() 5008 e.Details["setup_error"] = err.Error() 5009 } 5010 return e 5011 } 5012 5013 func (e *TaskEvent) SetFailsTask() *TaskEvent { 5014 e.FailsTask = true 5015 e.Details["fails_task"] = "true" 5016 return e 5017 } 5018 5019 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 5020 if err != nil { 5021 e.DriverError = err.Error() 5022 e.Details["driver_error"] = err.Error() 5023 } 5024 return e 5025 } 5026 5027 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 5028 e.ExitCode = c 5029 e.Details["exit_code"] = fmt.Sprintf("%d", c) 5030 return e 5031 } 5032 5033 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 5034 e.Signal = s 5035 e.Details["signal"] = fmt.Sprintf("%d", s) 5036 return e 5037 } 5038 5039 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 5040 if err != nil { 5041 e.Message = err.Error() 5042 e.Details["exit_message"] = err.Error() 5043 } 5044 return e 5045 } 5046 5047 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 5048 if err != nil { 5049 e.KillError = err.Error() 5050 e.Details["kill_error"] = err.Error() 5051 } 5052 return e 5053 } 5054 5055 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 5056 e.KillReason = r 5057 e.Details["kill_reason"] = r 5058 return e 5059 } 5060 5061 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 5062 e.StartDelay = int64(delay) 5063 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 5064 return e 5065 } 5066 5067 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 5068 e.RestartReason = reason 5069 e.Details["restart_reason"] = reason 5070 return e 5071 } 5072 5073 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 5074 e.TaskSignalReason = r 5075 e.Details["task_signal_reason"] = r 5076 return e 5077 } 5078 5079 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 5080 e.TaskSignal = s.String() 5081 e.Details["task_signal"] = s.String() 5082 return e 5083 } 5084 5085 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 5086 if err != nil { 5087 e.DownloadError = err.Error() 5088 e.Details["download_error"] = err.Error() 5089 } 5090 return e 5091 } 5092 5093 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 5094 if err != nil { 5095 e.ValidationError = err.Error() 5096 e.Details["validation_error"] = err.Error() 5097 } 5098 return e 5099 } 5100 5101 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 5102 e.KillTimeout = timeout 5103 e.Details["kill_timeout"] = timeout.String() 5104 return e 5105 } 5106 5107 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 5108 e.DiskLimit = limit 5109 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 5110 return e 5111 } 5112 5113 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 5114 e.FailedSibling = sibling 5115 e.Details["failed_sibling"] = sibling 5116 return e 5117 } 5118 5119 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 5120 if err != nil { 5121 e.VaultError = err.Error() 5122 e.Details["vault_renewal_error"] = err.Error() 5123 } 5124 return e 5125 } 5126 5127 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 5128 e.DriverMessage = m 5129 e.Details["driver_message"] = m 5130 return e 5131 } 5132 5133 // TaskArtifact is an artifact to download before running the task. 5134 type TaskArtifact struct { 5135 // GetterSource is the source to download an artifact using go-getter 5136 GetterSource string 5137 5138 // GetterOptions are options to use when downloading the artifact using 5139 // go-getter. 5140 GetterOptions map[string]string 5141 5142 // GetterMode is the go-getter.ClientMode for fetching resources. 5143 // Defaults to "any" but can be set to "file" or "dir". 5144 GetterMode string 5145 5146 // RelativeDest is the download destination given relative to the task's 5147 // directory. 5148 RelativeDest string 5149 } 5150 5151 func (ta *TaskArtifact) Copy() *TaskArtifact { 5152 if ta == nil { 5153 return nil 5154 } 5155 nta := new(TaskArtifact) 5156 *nta = *ta 5157 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 5158 return nta 5159 } 5160 5161 func (ta *TaskArtifact) GoString() string { 5162 return fmt.Sprintf("%+v", ta) 5163 } 5164 5165 // PathEscapesAllocDir returns if the given path escapes the allocation 5166 // directory. The prefix allows adding a prefix if the path will be joined, for 5167 // example a "task/local" prefix may be provided if the path will be joined 5168 // against that prefix. 5169 func PathEscapesAllocDir(prefix, path string) (bool, error) { 5170 // Verify the destination doesn't escape the tasks directory 5171 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 5172 if err != nil { 5173 return false, err 5174 } 5175 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 5176 if err != nil { 5177 return false, err 5178 } 5179 rel, err := filepath.Rel(alloc, abs) 5180 if err != nil { 5181 return false, err 5182 } 5183 5184 return strings.HasPrefix(rel, ".."), nil 5185 } 5186 5187 func (ta *TaskArtifact) Validate() error { 5188 // Verify the source 5189 var mErr multierror.Error 5190 if ta.GetterSource == "" { 5191 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 5192 } 5193 5194 switch ta.GetterMode { 5195 case "": 5196 // Default to any 5197 ta.GetterMode = GetterModeAny 5198 case GetterModeAny, GetterModeFile, GetterModeDir: 5199 // Ok 5200 default: 5201 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 5202 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 5203 } 5204 5205 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 5206 if err != nil { 5207 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 5208 } else if escaped { 5209 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 5210 } 5211 5212 // Verify the checksum 5213 if check, ok := ta.GetterOptions["checksum"]; ok { 5214 check = strings.TrimSpace(check) 5215 if check == "" { 5216 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty")) 5217 return mErr.ErrorOrNil() 5218 } 5219 5220 parts := strings.Split(check, ":") 5221 if l := len(parts); l != 2 { 5222 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 5223 return mErr.ErrorOrNil() 5224 } 5225 5226 checksumVal := parts[1] 5227 checksumBytes, err := hex.DecodeString(checksumVal) 5228 if err != nil { 5229 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 5230 return mErr.ErrorOrNil() 5231 } 5232 5233 checksumType := parts[0] 5234 expectedLength := 0 5235 switch checksumType { 5236 case "md5": 5237 expectedLength = md5.Size 5238 case "sha1": 5239 expectedLength = sha1.Size 5240 case "sha256": 5241 expectedLength = sha256.Size 5242 case "sha512": 5243 expectedLength = sha512.Size 5244 default: 5245 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 5246 return mErr.ErrorOrNil() 5247 } 5248 5249 if len(checksumBytes) != expectedLength { 5250 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 5251 return mErr.ErrorOrNil() 5252 } 5253 } 5254 5255 return mErr.ErrorOrNil() 5256 } 5257 5258 const ( 5259 ConstraintDistinctProperty = "distinct_property" 5260 ConstraintDistinctHosts = "distinct_hosts" 5261 ConstraintRegex = "regexp" 5262 ConstraintVersion = "version" 5263 ConstraintSetContains = "set_contains" 5264 ConstraintSetContainsAll = "set_contains_all" 5265 ConstraintSetContaintsAny = "set_contains_any" 5266 ) 5267 5268 // Constraints are used to restrict placement options. 5269 type Constraint struct { 5270 LTarget string // Left-hand target 5271 RTarget string // Right-hand target 5272 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 5273 str string // Memoized string 5274 } 5275 5276 // Equal checks if two constraints are equal 5277 func (c *Constraint) Equal(o *Constraint) bool { 5278 return c.LTarget == o.LTarget && 5279 c.RTarget == o.RTarget && 5280 c.Operand == o.Operand 5281 } 5282 5283 func (c *Constraint) Copy() *Constraint { 5284 if c == nil { 5285 return nil 5286 } 5287 nc := new(Constraint) 5288 *nc = *c 5289 return nc 5290 } 5291 5292 func (c *Constraint) String() string { 5293 if c.str != "" { 5294 return c.str 5295 } 5296 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 5297 return c.str 5298 } 5299 5300 func (c *Constraint) Validate() error { 5301 var mErr multierror.Error 5302 if c.Operand == "" { 5303 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 5304 } 5305 5306 // requireLtarget specifies whether the constraint requires an LTarget to be 5307 // provided. 5308 requireLtarget := true 5309 5310 // Perform additional validation based on operand 5311 switch c.Operand { 5312 case ConstraintDistinctHosts: 5313 requireLtarget = false 5314 case ConstraintSetContains: 5315 if c.RTarget == "" { 5316 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 5317 } 5318 case ConstraintRegex: 5319 if _, err := regexp.Compile(c.RTarget); err != nil { 5320 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 5321 } 5322 case ConstraintVersion: 5323 if _, err := version.NewConstraint(c.RTarget); err != nil { 5324 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 5325 } 5326 case ConstraintDistinctProperty: 5327 // If a count is set, make sure it is convertible to a uint64 5328 if c.RTarget != "" { 5329 count, err := strconv.ParseUint(c.RTarget, 10, 64) 5330 if err != nil { 5331 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 5332 } else if count < 1 { 5333 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 5334 } 5335 } 5336 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 5337 if c.RTarget == "" { 5338 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 5339 } 5340 default: 5341 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 5342 } 5343 5344 // Ensure we have an LTarget for the constraints that need one 5345 if requireLtarget && c.LTarget == "" { 5346 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 5347 } 5348 5349 return mErr.ErrorOrNil() 5350 } 5351 5352 // Affinity is used to score placement options based on a weight 5353 type Affinity struct { 5354 LTarget string // Left-hand target 5355 RTarget string // Right-hand target 5356 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 5357 Weight float64 // Weight applied to nodes that match the affinity. Can be negative 5358 str string // Memoized string 5359 } 5360 5361 // Equal checks if two affinities are equal 5362 func (a *Affinity) Equal(o *Affinity) bool { 5363 return a.LTarget == o.LTarget && 5364 a.RTarget == o.RTarget && 5365 a.Operand == o.Operand && 5366 a.Weight == o.Weight 5367 } 5368 5369 func (a *Affinity) Copy() *Affinity { 5370 if a == nil { 5371 return nil 5372 } 5373 na := new(Affinity) 5374 *na = *a 5375 return na 5376 } 5377 5378 func (a *Affinity) String() string { 5379 if a.str != "" { 5380 return a.str 5381 } 5382 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 5383 return a.str 5384 } 5385 5386 func (a *Affinity) Validate() error { 5387 var mErr multierror.Error 5388 if a.Operand == "" { 5389 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 5390 } 5391 5392 // Perform additional validation based on operand 5393 switch a.Operand { 5394 case ConstraintSetContainsAll, ConstraintSetContaintsAny, ConstraintSetContains: 5395 if a.RTarget == "" { 5396 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 5397 } 5398 case ConstraintRegex: 5399 if _, err := regexp.Compile(a.RTarget); err != nil { 5400 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 5401 } 5402 case ConstraintVersion: 5403 if _, err := version.NewConstraint(a.RTarget); err != nil { 5404 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 5405 } 5406 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 5407 if a.RTarget == "" { 5408 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 5409 } 5410 default: 5411 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 5412 } 5413 5414 // Ensure we have an LTarget 5415 if a.LTarget == "" { 5416 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 5417 } 5418 5419 // Ensure that weight is between -100 and 100, and not zero 5420 if a.Weight == 0 { 5421 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 5422 } 5423 5424 if a.Weight > 100 || a.Weight < -100 { 5425 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 5426 } 5427 5428 return mErr.ErrorOrNil() 5429 } 5430 5431 // Spread is used to specify desired distribution of allocations according to weight 5432 type Spread struct { 5433 // Attribute is the node attribute used as the spread criteria 5434 Attribute string 5435 5436 // Weight is the relative weight of this spread, useful when there are multiple 5437 // spread and affinities 5438 Weight int 5439 5440 // SpreadTarget is used to describe desired percentages for each attribute value 5441 SpreadTarget []*SpreadTarget 5442 5443 // Memoized string representation 5444 str string 5445 } 5446 5447 func (s *Spread) Copy() *Spread { 5448 if s == nil { 5449 return nil 5450 } 5451 ns := new(Spread) 5452 *ns = *s 5453 5454 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 5455 return ns 5456 } 5457 5458 func (s *Spread) String() string { 5459 if s.str != "" { 5460 return s.str 5461 } 5462 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 5463 return s.str 5464 } 5465 5466 func (s *Spread) Validate() error { 5467 var mErr multierror.Error 5468 if s.Attribute == "" { 5469 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 5470 } 5471 if s.Weight <= 0 || s.Weight > 100 { 5472 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 5473 } 5474 seen := make(map[string]struct{}) 5475 sumPercent := uint32(0) 5476 5477 for _, target := range s.SpreadTarget { 5478 // Make sure there are no duplicates 5479 _, ok := seen[target.Value] 5480 if !ok { 5481 seen[target.Value] = struct{}{} 5482 } else { 5483 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target value %q already defined", target.Value))) 5484 } 5485 if target.Percent < 0 || target.Percent > 100 { 5486 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Spread target percentage for value %q must be between 0 and 100", target.Value))) 5487 } 5488 sumPercent += target.Percent 5489 } 5490 if sumPercent > 100 { 5491 mErr.Errors = append(mErr.Errors, errors.New(fmt.Sprintf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent))) 5492 } 5493 return mErr.ErrorOrNil() 5494 } 5495 5496 // SpreadTarget is used to specify desired percentages for each attribute value 5497 type SpreadTarget struct { 5498 // Value is a single attribute value, like "dc1" 5499 Value string 5500 5501 // Percent is the desired percentage of allocs 5502 Percent uint32 5503 5504 // Memoized string representation 5505 str string 5506 } 5507 5508 func (s *SpreadTarget) Copy() *SpreadTarget { 5509 if s == nil { 5510 return nil 5511 } 5512 5513 ns := new(SpreadTarget) 5514 *ns = *s 5515 return ns 5516 } 5517 5518 func (s *SpreadTarget) String() string { 5519 if s.str != "" { 5520 return s.str 5521 } 5522 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 5523 return s.str 5524 } 5525 5526 // EphemeralDisk is an ephemeral disk object 5527 type EphemeralDisk struct { 5528 // Sticky indicates whether the allocation is sticky to a node 5529 Sticky bool 5530 5531 // SizeMB is the size of the local disk 5532 SizeMB int 5533 5534 // Migrate determines if Nomad client should migrate the allocation dir for 5535 // sticky allocations 5536 Migrate bool 5537 } 5538 5539 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 5540 func DefaultEphemeralDisk() *EphemeralDisk { 5541 return &EphemeralDisk{ 5542 SizeMB: 300, 5543 } 5544 } 5545 5546 // Validate validates EphemeralDisk 5547 func (d *EphemeralDisk) Validate() error { 5548 if d.SizeMB < 10 { 5549 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 5550 } 5551 return nil 5552 } 5553 5554 // Copy copies the EphemeralDisk struct and returns a new one 5555 func (d *EphemeralDisk) Copy() *EphemeralDisk { 5556 ld := new(EphemeralDisk) 5557 *ld = *d 5558 return ld 5559 } 5560 5561 var ( 5562 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 5563 // server 5564 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 5565 ) 5566 5567 const ( 5568 // VaultChangeModeNoop takes no action when a new token is retrieved. 5569 VaultChangeModeNoop = "noop" 5570 5571 // VaultChangeModeSignal signals the task when a new token is retrieved. 5572 VaultChangeModeSignal = "signal" 5573 5574 // VaultChangeModeRestart restarts the task when a new token is retrieved. 5575 VaultChangeModeRestart = "restart" 5576 ) 5577 5578 // Vault stores the set of permissions a task needs access to from Vault. 5579 type Vault struct { 5580 // Policies is the set of policies that the task needs access to 5581 Policies []string 5582 5583 // Env marks whether the Vault Token should be exposed as an environment 5584 // variable 5585 Env bool 5586 5587 // ChangeMode is used to configure the task's behavior when the Vault 5588 // token changes because the original token could not be renewed in time. 5589 ChangeMode string 5590 5591 // ChangeSignal is the signal sent to the task when a new token is 5592 // retrieved. This is only valid when using the signal change mode. 5593 ChangeSignal string 5594 } 5595 5596 func DefaultVaultBlock() *Vault { 5597 return &Vault{ 5598 Env: true, 5599 ChangeMode: VaultChangeModeRestart, 5600 } 5601 } 5602 5603 // Copy returns a copy of this Vault block. 5604 func (v *Vault) Copy() *Vault { 5605 if v == nil { 5606 return nil 5607 } 5608 5609 nv := new(Vault) 5610 *nv = *v 5611 return nv 5612 } 5613 5614 func (v *Vault) Canonicalize() { 5615 if v.ChangeSignal != "" { 5616 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 5617 } 5618 } 5619 5620 // Validate returns if the Vault block is valid. 5621 func (v *Vault) Validate() error { 5622 if v == nil { 5623 return nil 5624 } 5625 5626 var mErr multierror.Error 5627 if len(v.Policies) == 0 { 5628 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 5629 } 5630 5631 for _, p := range v.Policies { 5632 if p == "root" { 5633 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 5634 } 5635 } 5636 5637 switch v.ChangeMode { 5638 case VaultChangeModeSignal: 5639 if v.ChangeSignal == "" { 5640 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 5641 } 5642 case VaultChangeModeNoop, VaultChangeModeRestart: 5643 default: 5644 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 5645 } 5646 5647 return mErr.ErrorOrNil() 5648 } 5649 5650 const ( 5651 // DeploymentStatuses are the various states a deployment can be be in 5652 DeploymentStatusRunning = "running" 5653 DeploymentStatusPaused = "paused" 5654 DeploymentStatusFailed = "failed" 5655 DeploymentStatusSuccessful = "successful" 5656 DeploymentStatusCancelled = "cancelled" 5657 5658 // DeploymentStatusDescriptions are the various descriptions of the states a 5659 // deployment can be in. 5660 DeploymentStatusDescriptionRunning = "Deployment is running" 5661 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion" 5662 DeploymentStatusDescriptionPaused = "Deployment is paused" 5663 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 5664 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 5665 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 5666 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 5667 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 5668 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 5669 ) 5670 5671 // DeploymentStatusDescriptionRollback is used to get the status description of 5672 // a deployment when rolling back to an older job. 5673 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 5674 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 5675 } 5676 5677 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 5678 // a deployment when rolling back is not possible because it has the same specification 5679 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 5680 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 5681 } 5682 5683 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 5684 // a deployment when there is no target to rollback to but autorevert is desired. 5685 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 5686 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 5687 } 5688 5689 // Deployment is the object that represents a job deployment which is used to 5690 // transition a job between versions. 5691 type Deployment struct { 5692 // ID is a generated UUID for the deployment 5693 ID string 5694 5695 // Namespace is the namespace the deployment is created in 5696 Namespace string 5697 5698 // JobID is the job the deployment is created for 5699 JobID string 5700 5701 // JobVersion is the version of the job at which the deployment is tracking 5702 JobVersion uint64 5703 5704 // JobModifyIndex is the ModifyIndex of the job which the deployment is 5705 // tracking. 5706 JobModifyIndex uint64 5707 5708 // JobSpecModifyIndex is the JobModifyIndex of the job which the 5709 // deployment is tracking. 5710 JobSpecModifyIndex uint64 5711 5712 // JobCreateIndex is the create index of the job which the deployment is 5713 // tracking. It is needed so that if the job gets stopped and reran we can 5714 // present the correct list of deployments for the job and not old ones. 5715 JobCreateIndex uint64 5716 5717 // TaskGroups is the set of task groups effected by the deployment and their 5718 // current deployment status. 5719 TaskGroups map[string]*DeploymentState 5720 5721 // The status of the deployment 5722 Status string 5723 5724 // StatusDescription allows a human readable description of the deployment 5725 // status. 5726 StatusDescription string 5727 5728 CreateIndex uint64 5729 ModifyIndex uint64 5730 } 5731 5732 // NewDeployment creates a new deployment given the job. 5733 func NewDeployment(job *Job) *Deployment { 5734 return &Deployment{ 5735 ID: uuid.Generate(), 5736 Namespace: job.Namespace, 5737 JobID: job.ID, 5738 JobVersion: job.Version, 5739 JobModifyIndex: job.ModifyIndex, 5740 JobSpecModifyIndex: job.JobModifyIndex, 5741 JobCreateIndex: job.CreateIndex, 5742 Status: DeploymentStatusRunning, 5743 StatusDescription: DeploymentStatusDescriptionRunning, 5744 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 5745 } 5746 } 5747 5748 func (d *Deployment) Copy() *Deployment { 5749 if d == nil { 5750 return nil 5751 } 5752 5753 c := &Deployment{} 5754 *c = *d 5755 5756 c.TaskGroups = nil 5757 if l := len(d.TaskGroups); d.TaskGroups != nil { 5758 c.TaskGroups = make(map[string]*DeploymentState, l) 5759 for tg, s := range d.TaskGroups { 5760 c.TaskGroups[tg] = s.Copy() 5761 } 5762 } 5763 5764 return c 5765 } 5766 5767 // Active returns whether the deployment is active or terminal. 5768 func (d *Deployment) Active() bool { 5769 switch d.Status { 5770 case DeploymentStatusRunning, DeploymentStatusPaused: 5771 return true 5772 default: 5773 return false 5774 } 5775 } 5776 5777 // GetID is a helper for getting the ID when the object may be nil 5778 func (d *Deployment) GetID() string { 5779 if d == nil { 5780 return "" 5781 } 5782 return d.ID 5783 } 5784 5785 // HasPlacedCanaries returns whether the deployment has placed canaries 5786 func (d *Deployment) HasPlacedCanaries() bool { 5787 if d == nil || len(d.TaskGroups) == 0 { 5788 return false 5789 } 5790 for _, group := range d.TaskGroups { 5791 if len(group.PlacedCanaries) != 0 { 5792 return true 5793 } 5794 } 5795 return false 5796 } 5797 5798 // RequiresPromotion returns whether the deployment requires promotion to 5799 // continue 5800 func (d *Deployment) RequiresPromotion() bool { 5801 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 5802 return false 5803 } 5804 for _, group := range d.TaskGroups { 5805 if group.DesiredCanaries > 0 && !group.Promoted { 5806 return true 5807 } 5808 } 5809 return false 5810 } 5811 5812 func (d *Deployment) GoString() string { 5813 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 5814 for group, state := range d.TaskGroups { 5815 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 5816 } 5817 return base 5818 } 5819 5820 // DeploymentState tracks the state of a deployment for a given task group. 5821 type DeploymentState struct { 5822 // AutoRevert marks whether the task group has indicated the job should be 5823 // reverted on failure 5824 AutoRevert bool 5825 5826 // ProgressDeadline is the deadline by which an allocation must transition 5827 // to healthy before the deployment is considered failed. 5828 ProgressDeadline time.Duration 5829 5830 // RequireProgressBy is the time by which an allocation must transition 5831 // to healthy before the deployment is considered failed. 5832 RequireProgressBy time.Time 5833 5834 // Promoted marks whether the canaries have been promoted 5835 Promoted bool 5836 5837 // PlacedCanaries is the set of placed canary allocations 5838 PlacedCanaries []string 5839 5840 // DesiredCanaries is the number of canaries that should be created. 5841 DesiredCanaries int 5842 5843 // DesiredTotal is the total number of allocations that should be created as 5844 // part of the deployment. 5845 DesiredTotal int 5846 5847 // PlacedAllocs is the number of allocations that have been placed 5848 PlacedAllocs int 5849 5850 // HealthyAllocs is the number of allocations that have been marked healthy. 5851 HealthyAllocs int 5852 5853 // UnhealthyAllocs are allocations that have been marked as unhealthy. 5854 UnhealthyAllocs int 5855 } 5856 5857 func (d *DeploymentState) GoString() string { 5858 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 5859 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 5860 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 5861 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 5862 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 5863 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 5864 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 5865 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 5866 return base 5867 } 5868 5869 func (d *DeploymentState) Copy() *DeploymentState { 5870 c := &DeploymentState{} 5871 *c = *d 5872 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 5873 return c 5874 } 5875 5876 // DeploymentStatusUpdate is used to update the status of a given deployment 5877 type DeploymentStatusUpdate struct { 5878 // DeploymentID is the ID of the deployment to update 5879 DeploymentID string 5880 5881 // Status is the new status of the deployment. 5882 Status string 5883 5884 // StatusDescription is the new status description of the deployment. 5885 StatusDescription string 5886 } 5887 5888 // RescheduleTracker encapsulates previous reschedule events 5889 type RescheduleTracker struct { 5890 Events []*RescheduleEvent 5891 } 5892 5893 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 5894 if rt == nil { 5895 return nil 5896 } 5897 nt := &RescheduleTracker{} 5898 *nt = *rt 5899 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 5900 for _, tracker := range rt.Events { 5901 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 5902 } 5903 nt.Events = rescheduleEvents 5904 return nt 5905 } 5906 5907 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 5908 type RescheduleEvent struct { 5909 // RescheduleTime is the timestamp of a reschedule attempt 5910 RescheduleTime int64 5911 5912 // PrevAllocID is the ID of the previous allocation being restarted 5913 PrevAllocID string 5914 5915 // PrevNodeID is the node ID of the previous allocation 5916 PrevNodeID string 5917 5918 // Delay is the reschedule delay associated with the attempt 5919 Delay time.Duration 5920 } 5921 5922 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 5923 return &RescheduleEvent{RescheduleTime: rescheduleTime, 5924 PrevAllocID: prevAllocID, 5925 PrevNodeID: prevNodeID, 5926 Delay: delay} 5927 } 5928 5929 func (re *RescheduleEvent) Copy() *RescheduleEvent { 5930 if re == nil { 5931 return nil 5932 } 5933 copy := new(RescheduleEvent) 5934 *copy = *re 5935 return copy 5936 } 5937 5938 // DesiredTransition is used to mark an allocation as having a desired state 5939 // transition. This information can be used by the scheduler to make the 5940 // correct decision. 5941 type DesiredTransition struct { 5942 // Migrate is used to indicate that this allocation should be stopped and 5943 // migrated to another node. 5944 Migrate *bool 5945 5946 // Reschedule is used to indicate that this allocation is eligible to be 5947 // rescheduled. Most allocations are automatically eligible for 5948 // rescheduling, so this field is only required when an allocation is not 5949 // automatically eligible. An example is an allocation that is part of a 5950 // deployment. 5951 Reschedule *bool 5952 5953 // ForceReschedule is used to indicate that this allocation must be rescheduled. 5954 // This field is only used when operators want to force a placement even if 5955 // a failed allocation is not eligible to be rescheduled 5956 ForceReschedule *bool 5957 } 5958 5959 // Merge merges the two desired transitions, preferring the values from the 5960 // passed in object. 5961 func (d *DesiredTransition) Merge(o *DesiredTransition) { 5962 if o.Migrate != nil { 5963 d.Migrate = o.Migrate 5964 } 5965 5966 if o.Reschedule != nil { 5967 d.Reschedule = o.Reschedule 5968 } 5969 5970 if o.ForceReschedule != nil { 5971 d.ForceReschedule = o.ForceReschedule 5972 } 5973 } 5974 5975 // ShouldMigrate returns whether the transition object dictates a migration. 5976 func (d *DesiredTransition) ShouldMigrate() bool { 5977 return d.Migrate != nil && *d.Migrate 5978 } 5979 5980 // ShouldReschedule returns whether the transition object dictates a 5981 // rescheduling. 5982 func (d *DesiredTransition) ShouldReschedule() bool { 5983 return d.Reschedule != nil && *d.Reschedule 5984 } 5985 5986 // ShouldForceReschedule returns whether the transition object dictates a 5987 // forced rescheduling. 5988 func (d *DesiredTransition) ShouldForceReschedule() bool { 5989 if d == nil { 5990 return false 5991 } 5992 return d.ForceReschedule != nil && *d.ForceReschedule 5993 } 5994 5995 const ( 5996 AllocDesiredStatusRun = "run" // Allocation should run 5997 AllocDesiredStatusStop = "stop" // Allocation should stop 5998 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 5999 ) 6000 6001 const ( 6002 AllocClientStatusPending = "pending" 6003 AllocClientStatusRunning = "running" 6004 AllocClientStatusComplete = "complete" 6005 AllocClientStatusFailed = "failed" 6006 AllocClientStatusLost = "lost" 6007 ) 6008 6009 // Allocation is used to allocate the placement of a task group to a node. 6010 type Allocation struct { 6011 // ID of the allocation (UUID) 6012 ID string 6013 6014 // Namespace is the namespace the allocation is created in 6015 Namespace string 6016 6017 // ID of the evaluation that generated this allocation 6018 EvalID string 6019 6020 // Name is a logical name of the allocation. 6021 Name string 6022 6023 // NodeID is the node this is being placed on 6024 NodeID string 6025 6026 // Job is the parent job of the task group being allocated. 6027 // This is copied at allocation time to avoid issues if the job 6028 // definition is updated. 6029 JobID string 6030 Job *Job 6031 6032 // TaskGroup is the name of the task group that should be run 6033 TaskGroup string 6034 6035 // Resources is the total set of resources allocated as part 6036 // of this allocation of the task group. 6037 Resources *Resources 6038 6039 // SharedResources are the resources that are shared by all the tasks in an 6040 // allocation 6041 SharedResources *Resources 6042 6043 // TaskResources is the set of resources allocated to each 6044 // task. These should sum to the total Resources. 6045 TaskResources map[string]*Resources 6046 6047 // Metrics associated with this allocation 6048 Metrics *AllocMetric 6049 6050 // Desired Status of the allocation on the client 6051 DesiredStatus string 6052 6053 // DesiredStatusDescription is meant to provide more human useful information 6054 DesiredDescription string 6055 6056 // DesiredTransition is used to indicate that a state transition 6057 // is desired for a given reason. 6058 DesiredTransition DesiredTransition 6059 6060 // Status of the allocation on the client 6061 ClientStatus string 6062 6063 // ClientStatusDescription is meant to provide more human useful information 6064 ClientDescription string 6065 6066 // TaskStates stores the state of each task, 6067 TaskStates map[string]*TaskState 6068 6069 // PreviousAllocation is the allocation that this allocation is replacing 6070 PreviousAllocation string 6071 6072 // NextAllocation is the allocation that this allocation is being replaced by 6073 NextAllocation string 6074 6075 // DeploymentID identifies an allocation as being created from a 6076 // particular deployment 6077 DeploymentID string 6078 6079 // DeploymentStatus captures the status of the allocation as part of the 6080 // given deployment 6081 DeploymentStatus *AllocDeploymentStatus 6082 6083 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 6084 RescheduleTracker *RescheduleTracker 6085 6086 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 6087 // that can be rescheduled in the future 6088 FollowupEvalID string 6089 6090 // Raft Indexes 6091 CreateIndex uint64 6092 ModifyIndex uint64 6093 6094 // AllocModifyIndex is not updated when the client updates allocations. This 6095 // lets the client pull only the allocs updated by the server. 6096 AllocModifyIndex uint64 6097 6098 // CreateTime is the time the allocation has finished scheduling and been 6099 // verified by the plan applier. 6100 CreateTime int64 6101 6102 // ModifyTime is the time the allocation was last updated. 6103 ModifyTime int64 6104 } 6105 6106 // Index returns the index of the allocation. If the allocation is from a task 6107 // group with count greater than 1, there will be multiple allocations for it. 6108 func (a *Allocation) Index() uint { 6109 l := len(a.Name) 6110 prefix := len(a.JobID) + len(a.TaskGroup) + 2 6111 if l <= 3 || l <= prefix { 6112 return uint(0) 6113 } 6114 6115 strNum := a.Name[prefix : len(a.Name)-1] 6116 num, _ := strconv.Atoi(strNum) 6117 return uint(num) 6118 } 6119 6120 func (a *Allocation) Copy() *Allocation { 6121 return a.copyImpl(true) 6122 } 6123 6124 // Copy provides a copy of the allocation but doesn't deep copy the job 6125 func (a *Allocation) CopySkipJob() *Allocation { 6126 return a.copyImpl(false) 6127 } 6128 6129 func (a *Allocation) copyImpl(job bool) *Allocation { 6130 if a == nil { 6131 return nil 6132 } 6133 na := new(Allocation) 6134 *na = *a 6135 6136 if job { 6137 na.Job = na.Job.Copy() 6138 } 6139 6140 na.Resources = na.Resources.Copy() 6141 na.SharedResources = na.SharedResources.Copy() 6142 6143 if a.TaskResources != nil { 6144 tr := make(map[string]*Resources, len(na.TaskResources)) 6145 for task, resource := range na.TaskResources { 6146 tr[task] = resource.Copy() 6147 } 6148 na.TaskResources = tr 6149 } 6150 6151 na.Metrics = na.Metrics.Copy() 6152 na.DeploymentStatus = na.DeploymentStatus.Copy() 6153 6154 if a.TaskStates != nil { 6155 ts := make(map[string]*TaskState, len(na.TaskStates)) 6156 for task, state := range na.TaskStates { 6157 ts[task] = state.Copy() 6158 } 6159 na.TaskStates = ts 6160 } 6161 6162 na.RescheduleTracker = a.RescheduleTracker.Copy() 6163 return na 6164 } 6165 6166 // TerminalStatus returns if the desired or actual status is terminal and 6167 // will no longer transition. 6168 func (a *Allocation) TerminalStatus() bool { 6169 // First check the desired state and if that isn't terminal, check client 6170 // state. 6171 switch a.DesiredStatus { 6172 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 6173 return true 6174 default: 6175 } 6176 6177 return a.ClientTerminalStatus() 6178 } 6179 6180 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 6181 func (a *Allocation) ClientTerminalStatus() bool { 6182 switch a.ClientStatus { 6183 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 6184 return true 6185 default: 6186 return false 6187 } 6188 } 6189 6190 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 6191 // to its status and ReschedulePolicy given its failure time 6192 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 6193 // First check the desired state 6194 switch a.DesiredStatus { 6195 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 6196 return false 6197 default: 6198 } 6199 switch a.ClientStatus { 6200 case AllocClientStatusFailed: 6201 return a.RescheduleEligible(reschedulePolicy, failTime) 6202 default: 6203 return false 6204 } 6205 } 6206 6207 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 6208 // to its ReschedulePolicy and the current state of its reschedule trackers 6209 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 6210 if reschedulePolicy == nil { 6211 return false 6212 } 6213 attempts := reschedulePolicy.Attempts 6214 interval := reschedulePolicy.Interval 6215 enabled := attempts > 0 || reschedulePolicy.Unlimited 6216 if !enabled { 6217 return false 6218 } 6219 if reschedulePolicy.Unlimited { 6220 return true 6221 } 6222 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 6223 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 6224 return true 6225 } 6226 attempted := 0 6227 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 6228 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 6229 timeDiff := failTime.UTC().UnixNano() - lastAttempt 6230 if timeDiff < interval.Nanoseconds() { 6231 attempted += 1 6232 } 6233 } 6234 return attempted < attempts 6235 } 6236 6237 // LastEventTime is the time of the last task event in the allocation. 6238 // It is used to determine allocation failure time. If the FinishedAt field 6239 // is not set, the alloc's modify time is used 6240 func (a *Allocation) LastEventTime() time.Time { 6241 var lastEventTime time.Time 6242 if a.TaskStates != nil { 6243 for _, s := range a.TaskStates { 6244 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 6245 lastEventTime = s.FinishedAt 6246 } 6247 } 6248 } 6249 6250 if lastEventTime.IsZero() { 6251 return time.Unix(0, a.ModifyTime).UTC() 6252 } 6253 return lastEventTime 6254 } 6255 6256 // ReschedulePolicy returns the reschedule policy based on the task group 6257 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 6258 tg := a.Job.LookupTaskGroup(a.TaskGroup) 6259 if tg == nil { 6260 return nil 6261 } 6262 return tg.ReschedulePolicy 6263 } 6264 6265 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 6266 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 6267 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 6268 failTime := a.LastEventTime() 6269 reschedulePolicy := a.ReschedulePolicy() 6270 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 6271 return time.Time{}, false 6272 } 6273 6274 nextDelay := a.NextDelay() 6275 nextRescheduleTime := failTime.Add(nextDelay) 6276 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 6277 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 6278 // Check for eligibility based on the interval if max attempts is set 6279 attempted := 0 6280 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 6281 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 6282 timeDiff := failTime.UTC().UnixNano() - lastAttempt 6283 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 6284 attempted += 1 6285 } 6286 } 6287 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 6288 } 6289 return nextRescheduleTime, rescheduleEligible 6290 } 6291 6292 // NextDelay returns a duration after which the allocation can be rescheduled. 6293 // It is calculated according to the delay function and previous reschedule attempts. 6294 func (a *Allocation) NextDelay() time.Duration { 6295 policy := a.ReschedulePolicy() 6296 delayDur := policy.Delay 6297 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 6298 return delayDur 6299 } 6300 events := a.RescheduleTracker.Events 6301 switch policy.DelayFunction { 6302 case "exponential": 6303 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 6304 case "fibonacci": 6305 if len(events) >= 2 { 6306 fibN1Delay := events[len(events)-1].Delay 6307 fibN2Delay := events[len(events)-2].Delay 6308 // Handle reset of delay ceiling which should cause 6309 // a new series to start 6310 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 6311 delayDur = fibN1Delay 6312 } else { 6313 delayDur = fibN1Delay + fibN2Delay 6314 } 6315 } 6316 default: 6317 return delayDur 6318 } 6319 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 6320 delayDur = policy.MaxDelay 6321 // check if delay needs to be reset 6322 6323 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 6324 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 6325 if timeDiff > delayDur.Nanoseconds() { 6326 delayDur = policy.Delay 6327 } 6328 6329 } 6330 6331 return delayDur 6332 } 6333 6334 // Terminated returns if the allocation is in a terminal state on a client. 6335 func (a *Allocation) Terminated() bool { 6336 if a.ClientStatus == AllocClientStatusFailed || 6337 a.ClientStatus == AllocClientStatusComplete || 6338 a.ClientStatus == AllocClientStatusLost { 6339 return true 6340 } 6341 return false 6342 } 6343 6344 // RanSuccessfully returns whether the client has ran the allocation and all 6345 // tasks finished successfully. Critically this function returns whether the 6346 // allocation has ran to completion and not just that the alloc has converged to 6347 // its desired state. That is to say that a batch allocation must have finished 6348 // with exit code 0 on all task groups. This doesn't really have meaning on a 6349 // non-batch allocation because a service and system allocation should not 6350 // finish. 6351 func (a *Allocation) RanSuccessfully() bool { 6352 // Handle the case the client hasn't started the allocation. 6353 if len(a.TaskStates) == 0 { 6354 return false 6355 } 6356 6357 // Check to see if all the tasks finished successfully in the allocation 6358 allSuccess := true 6359 for _, state := range a.TaskStates { 6360 allSuccess = allSuccess && state.Successful() 6361 } 6362 6363 return allSuccess 6364 } 6365 6366 // ShouldMigrate returns if the allocation needs data migration 6367 func (a *Allocation) ShouldMigrate() bool { 6368 if a.PreviousAllocation == "" { 6369 return false 6370 } 6371 6372 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 6373 return false 6374 } 6375 6376 tg := a.Job.LookupTaskGroup(a.TaskGroup) 6377 6378 // if the task group is nil or the ephemeral disk block isn't present then 6379 // we won't migrate 6380 if tg == nil || tg.EphemeralDisk == nil { 6381 return false 6382 } 6383 6384 // We won't migrate any data is the user hasn't enabled migration or the 6385 // disk is not marked as sticky 6386 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 6387 return false 6388 } 6389 6390 return true 6391 } 6392 6393 // SetEventDisplayMessage populates the display message if its not already set, 6394 // a temporary fix to handle old allocations that don't have it. 6395 // This method will be removed in a future release. 6396 func (a *Allocation) SetEventDisplayMessages() { 6397 setDisplayMsg(a.TaskStates) 6398 } 6399 6400 // Stub returns a list stub for the allocation 6401 func (a *Allocation) Stub() *AllocListStub { 6402 return &AllocListStub{ 6403 ID: a.ID, 6404 EvalID: a.EvalID, 6405 Name: a.Name, 6406 NodeID: a.NodeID, 6407 JobID: a.JobID, 6408 JobVersion: a.Job.Version, 6409 TaskGroup: a.TaskGroup, 6410 DesiredStatus: a.DesiredStatus, 6411 DesiredDescription: a.DesiredDescription, 6412 ClientStatus: a.ClientStatus, 6413 ClientDescription: a.ClientDescription, 6414 DesiredTransition: a.DesiredTransition, 6415 TaskStates: a.TaskStates, 6416 DeploymentStatus: a.DeploymentStatus, 6417 FollowupEvalID: a.FollowupEvalID, 6418 RescheduleTracker: a.RescheduleTracker, 6419 CreateIndex: a.CreateIndex, 6420 ModifyIndex: a.ModifyIndex, 6421 CreateTime: a.CreateTime, 6422 ModifyTime: a.ModifyTime, 6423 } 6424 } 6425 6426 // AllocListStub is used to return a subset of alloc information 6427 type AllocListStub struct { 6428 ID string 6429 EvalID string 6430 Name string 6431 NodeID string 6432 JobID string 6433 JobVersion uint64 6434 TaskGroup string 6435 DesiredStatus string 6436 DesiredDescription string 6437 ClientStatus string 6438 ClientDescription string 6439 DesiredTransition DesiredTransition 6440 TaskStates map[string]*TaskState 6441 DeploymentStatus *AllocDeploymentStatus 6442 FollowupEvalID string 6443 RescheduleTracker *RescheduleTracker 6444 CreateIndex uint64 6445 ModifyIndex uint64 6446 CreateTime int64 6447 ModifyTime int64 6448 } 6449 6450 // SetEventDisplayMessage populates the display message if its not already set, 6451 // a temporary fix to handle old allocations that don't have it. 6452 // This method will be removed in a future release. 6453 func (a *AllocListStub) SetEventDisplayMessages() { 6454 setDisplayMsg(a.TaskStates) 6455 } 6456 6457 func setDisplayMsg(taskStates map[string]*TaskState) { 6458 if taskStates != nil { 6459 for _, taskState := range taskStates { 6460 for _, event := range taskState.Events { 6461 event.PopulateEventDisplayMessage() 6462 } 6463 } 6464 } 6465 } 6466 6467 // AllocMetric is used to track various metrics while attempting 6468 // to make an allocation. These are used to debug a job, or to better 6469 // understand the pressure within the system. 6470 type AllocMetric struct { 6471 // NodesEvaluated is the number of nodes that were evaluated 6472 NodesEvaluated int 6473 6474 // NodesFiltered is the number of nodes filtered due to a constraint 6475 NodesFiltered int 6476 6477 // NodesAvailable is the number of nodes available for evaluation per DC. 6478 NodesAvailable map[string]int 6479 6480 // ClassFiltered is the number of nodes filtered by class 6481 ClassFiltered map[string]int 6482 6483 // ConstraintFiltered is the number of failures caused by constraint 6484 ConstraintFiltered map[string]int 6485 6486 // NodesExhausted is the number of nodes skipped due to being 6487 // exhausted of at least one resource 6488 NodesExhausted int 6489 6490 // ClassExhausted is the number of nodes exhausted by class 6491 ClassExhausted map[string]int 6492 6493 // DimensionExhausted provides the count by dimension or reason 6494 DimensionExhausted map[string]int 6495 6496 // QuotaExhausted provides the exhausted dimensions 6497 QuotaExhausted []string 6498 6499 // Scores is the scores of the final few nodes remaining 6500 // for placement. The top score is typically selected. 6501 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 6502 Scores map[string]float64 6503 6504 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 6505 ScoreMetaData []*NodeScoreMeta 6506 6507 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 6508 // we receive normalized score during the last step of the scoring stack. 6509 nodeScoreMeta *NodeScoreMeta 6510 6511 // topScores is used to maintain a heap of the top K nodes with 6512 // the highest normalized score 6513 topScores *kheap.ScoreHeap 6514 6515 // AllocationTime is a measure of how long the allocation 6516 // attempt took. This can affect performance and SLAs. 6517 AllocationTime time.Duration 6518 6519 // CoalescedFailures indicates the number of other 6520 // allocations that were coalesced into this failed allocation. 6521 // This is to prevent creating many failed allocations for a 6522 // single task group. 6523 CoalescedFailures int 6524 } 6525 6526 func (a *AllocMetric) Copy() *AllocMetric { 6527 if a == nil { 6528 return nil 6529 } 6530 na := new(AllocMetric) 6531 *na = *a 6532 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 6533 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 6534 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 6535 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 6536 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 6537 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 6538 na.Scores = helper.CopyMapStringFloat64(na.Scores) 6539 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 6540 return na 6541 } 6542 6543 func (a *AllocMetric) EvaluateNode() { 6544 a.NodesEvaluated += 1 6545 } 6546 6547 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 6548 a.NodesFiltered += 1 6549 if node != nil && node.NodeClass != "" { 6550 if a.ClassFiltered == nil { 6551 a.ClassFiltered = make(map[string]int) 6552 } 6553 a.ClassFiltered[node.NodeClass] += 1 6554 } 6555 if constraint != "" { 6556 if a.ConstraintFiltered == nil { 6557 a.ConstraintFiltered = make(map[string]int) 6558 } 6559 a.ConstraintFiltered[constraint] += 1 6560 } 6561 } 6562 6563 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 6564 a.NodesExhausted += 1 6565 if node != nil && node.NodeClass != "" { 6566 if a.ClassExhausted == nil { 6567 a.ClassExhausted = make(map[string]int) 6568 } 6569 a.ClassExhausted[node.NodeClass] += 1 6570 } 6571 if dimension != "" { 6572 if a.DimensionExhausted == nil { 6573 a.DimensionExhausted = make(map[string]int) 6574 } 6575 a.DimensionExhausted[dimension] += 1 6576 } 6577 } 6578 6579 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 6580 if a.QuotaExhausted == nil { 6581 a.QuotaExhausted = make([]string, 0, len(dimensions)) 6582 } 6583 6584 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 6585 } 6586 6587 // ScoreNode is used to gather top K scoring nodes in a heap 6588 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 6589 // Create nodeScoreMeta lazily if its the first time or if its a new node 6590 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 6591 a.nodeScoreMeta = &NodeScoreMeta{ 6592 NodeID: node.ID, 6593 Scores: make(map[string]float64), 6594 } 6595 } 6596 if name == NormScorerName { 6597 a.nodeScoreMeta.NormScore = score 6598 // Once we have the normalized score we can push to the heap 6599 // that tracks top K by normalized score 6600 6601 // Create the heap if its not there already 6602 if a.topScores == nil { 6603 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 6604 } 6605 heap.Push(a.topScores, a.nodeScoreMeta) 6606 6607 // Clear out this entry because its now in the heap 6608 a.nodeScoreMeta = nil 6609 } else { 6610 a.nodeScoreMeta.Scores[name] = score 6611 } 6612 } 6613 6614 // PopulateScoreMetaData populates a map of scorer to scoring metadata 6615 // The map is populated by popping elements from a heap of top K scores 6616 // maintained per scorer 6617 func (a *AllocMetric) PopulateScoreMetaData() { 6618 if a.topScores == nil { 6619 return 6620 } 6621 6622 if a.ScoreMetaData == nil { 6623 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 6624 } 6625 heapItems := a.topScores.GetItemsReverse() 6626 for i, item := range heapItems { 6627 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 6628 } 6629 } 6630 6631 // NodeScoreMeta captures scoring meta data derived from 6632 // different scoring factors. 6633 type NodeScoreMeta struct { 6634 NodeID string 6635 Scores map[string]float64 6636 NormScore float64 6637 } 6638 6639 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 6640 if s == nil { 6641 return nil 6642 } 6643 ns := new(NodeScoreMeta) 6644 *ns = *s 6645 return ns 6646 } 6647 6648 func (s *NodeScoreMeta) String() string { 6649 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 6650 } 6651 6652 func (s *NodeScoreMeta) Score() float64 { 6653 return s.NormScore 6654 } 6655 6656 func (s *NodeScoreMeta) Data() interface{} { 6657 return s 6658 } 6659 6660 // AllocDeploymentStatus captures the status of the allocation as part of the 6661 // deployment. This can include things like if the allocation has been marked as 6662 // healthy. 6663 type AllocDeploymentStatus struct { 6664 // Healthy marks whether the allocation has been marked healthy or unhealthy 6665 // as part of a deployment. It can be unset if it has neither been marked 6666 // healthy or unhealthy. 6667 Healthy *bool 6668 6669 // Timestamp is the time at which the health status was set. 6670 Timestamp time.Time 6671 6672 // Canary marks whether the allocation is a canary or not. A canary that has 6673 // been promoted will have this field set to false. 6674 Canary bool 6675 6676 // ModifyIndex is the raft index in which the deployment status was last 6677 // changed. 6678 ModifyIndex uint64 6679 } 6680 6681 // HasHealth returns true if the allocation has its health set. 6682 func (a *AllocDeploymentStatus) HasHealth() bool { 6683 return a != nil && a.Healthy != nil 6684 } 6685 6686 // IsHealthy returns if the allocation is marked as healthy as part of a 6687 // deployment 6688 func (a *AllocDeploymentStatus) IsHealthy() bool { 6689 if a == nil { 6690 return false 6691 } 6692 6693 return a.Healthy != nil && *a.Healthy 6694 } 6695 6696 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 6697 // deployment 6698 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 6699 if a == nil { 6700 return false 6701 } 6702 6703 return a.Healthy != nil && !*a.Healthy 6704 } 6705 6706 // IsCanary returns if the allocation is marked as a canary 6707 func (a *AllocDeploymentStatus) IsCanary() bool { 6708 if a == nil { 6709 return false 6710 } 6711 6712 return a.Canary 6713 } 6714 6715 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 6716 if a == nil { 6717 return nil 6718 } 6719 6720 c := new(AllocDeploymentStatus) 6721 *c = *a 6722 6723 if a.Healthy != nil { 6724 c.Healthy = helper.BoolToPtr(*a.Healthy) 6725 } 6726 6727 return c 6728 } 6729 6730 const ( 6731 EvalStatusBlocked = "blocked" 6732 EvalStatusPending = "pending" 6733 EvalStatusComplete = "complete" 6734 EvalStatusFailed = "failed" 6735 EvalStatusCancelled = "canceled" 6736 ) 6737 6738 const ( 6739 EvalTriggerJobRegister = "job-register" 6740 EvalTriggerJobDeregister = "job-deregister" 6741 EvalTriggerPeriodicJob = "periodic-job" 6742 EvalTriggerNodeDrain = "node-drain" 6743 EvalTriggerNodeUpdate = "node-update" 6744 EvalTriggerScheduled = "scheduled" 6745 EvalTriggerRollingUpdate = "rolling-update" 6746 EvalTriggerDeploymentWatcher = "deployment-watcher" 6747 EvalTriggerFailedFollowUp = "failed-follow-up" 6748 EvalTriggerMaxPlans = "max-plan-attempts" 6749 EvalTriggerRetryFailedAlloc = "alloc-failure" 6750 ) 6751 6752 const ( 6753 // CoreJobEvalGC is used for the garbage collection of evaluations 6754 // and allocations. We periodically scan evaluations in a terminal state, 6755 // in which all the corresponding allocations are also terminal. We 6756 // delete these out of the system to bound the state. 6757 CoreJobEvalGC = "eval-gc" 6758 6759 // CoreJobNodeGC is used for the garbage collection of failed nodes. 6760 // We periodically scan nodes in a terminal state, and if they have no 6761 // corresponding allocations we delete these out of the system. 6762 CoreJobNodeGC = "node-gc" 6763 6764 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 6765 // periodically scan garbage collectible jobs and check if both their 6766 // evaluations and allocations are terminal. If so, we delete these out of 6767 // the system. 6768 CoreJobJobGC = "job-gc" 6769 6770 // CoreJobDeploymentGC is used for the garbage collection of eligible 6771 // deployments. We periodically scan garbage collectible deployments and 6772 // check if they are terminal. If so, we delete these out of the system. 6773 CoreJobDeploymentGC = "deployment-gc" 6774 6775 // CoreJobForceGC is used to force garbage collection of all GCable objects. 6776 CoreJobForceGC = "force-gc" 6777 ) 6778 6779 // Evaluation is used anytime we need to apply business logic as a result 6780 // of a change to our desired state (job specification) or the emergent state 6781 // (registered nodes). When the inputs change, we need to "evaluate" them, 6782 // potentially taking action (allocation of work) or doing nothing if the state 6783 // of the world does not require it. 6784 type Evaluation struct { 6785 // ID is a randomly generated UUID used for this evaluation. This 6786 // is assigned upon the creation of the evaluation. 6787 ID string 6788 6789 // Namespace is the namespace the evaluation is created in 6790 Namespace string 6791 6792 // Priority is used to control scheduling importance and if this job 6793 // can preempt other jobs. 6794 Priority int 6795 6796 // Type is used to control which schedulers are available to handle 6797 // this evaluation. 6798 Type string 6799 6800 // TriggeredBy is used to give some insight into why this Eval 6801 // was created. (Job change, node failure, alloc failure, etc). 6802 TriggeredBy string 6803 6804 // JobID is the job this evaluation is scoped to. Evaluations cannot 6805 // be run in parallel for a given JobID, so we serialize on this. 6806 JobID string 6807 6808 // JobModifyIndex is the modify index of the job at the time 6809 // the evaluation was created 6810 JobModifyIndex uint64 6811 6812 // NodeID is the node that was affected triggering the evaluation. 6813 NodeID string 6814 6815 // NodeModifyIndex is the modify index of the node at the time 6816 // the evaluation was created 6817 NodeModifyIndex uint64 6818 6819 // DeploymentID is the ID of the deployment that triggered the evaluation. 6820 DeploymentID string 6821 6822 // Status of the evaluation 6823 Status string 6824 6825 // StatusDescription is meant to provide more human useful information 6826 StatusDescription string 6827 6828 // Wait is a minimum wait time for running the eval. This is used to 6829 // support a rolling upgrade in versions prior to 0.7.0 6830 // Deprecated 6831 Wait time.Duration 6832 6833 // WaitUntil is the time when this eval should be run. This is used to 6834 // supported delayed rescheduling of failed allocations 6835 WaitUntil time.Time 6836 6837 // NextEval is the evaluation ID for the eval created to do a followup. 6838 // This is used to support rolling upgrades, where we need a chain of evaluations. 6839 NextEval string 6840 6841 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 6842 // This is used to support rolling upgrades, where we need a chain of evaluations. 6843 PreviousEval string 6844 6845 // BlockedEval is the evaluation ID for a created blocked eval. A 6846 // blocked eval will be created if all allocations could not be placed due 6847 // to constraints or lacking resources. 6848 BlockedEval string 6849 6850 // FailedTGAllocs are task groups which have allocations that could not be 6851 // made, but the metrics are persisted so that the user can use the feedback 6852 // to determine the cause. 6853 FailedTGAllocs map[string]*AllocMetric 6854 6855 // ClassEligibility tracks computed node classes that have been explicitly 6856 // marked as eligible or ineligible. 6857 ClassEligibility map[string]bool 6858 6859 // QuotaLimitReached marks whether a quota limit was reached for the 6860 // evaluation. 6861 QuotaLimitReached string 6862 6863 // EscapedComputedClass marks whether the job has constraints that are not 6864 // captured by computed node classes. 6865 EscapedComputedClass bool 6866 6867 // AnnotatePlan triggers the scheduler to provide additional annotations 6868 // during the evaluation. This should not be set during normal operations. 6869 AnnotatePlan bool 6870 6871 // QueuedAllocations is the number of unplaced allocations at the time the 6872 // evaluation was processed. The map is keyed by Task Group names. 6873 QueuedAllocations map[string]int 6874 6875 // LeaderACL provides the ACL token to when issuing RPCs back to the 6876 // leader. This will be a valid management token as long as the leader is 6877 // active. This should not ever be exposed via the API. 6878 LeaderACL string 6879 6880 // SnapshotIndex is the Raft index of the snapshot used to process the 6881 // evaluation. As such it will only be set once it has gone through the 6882 // scheduler. 6883 SnapshotIndex uint64 6884 6885 // Raft Indexes 6886 CreateIndex uint64 6887 ModifyIndex uint64 6888 } 6889 6890 // TerminalStatus returns if the current status is terminal and 6891 // will no longer transition. 6892 func (e *Evaluation) TerminalStatus() bool { 6893 switch e.Status { 6894 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 6895 return true 6896 default: 6897 return false 6898 } 6899 } 6900 6901 func (e *Evaluation) GoString() string { 6902 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 6903 } 6904 6905 func (e *Evaluation) Copy() *Evaluation { 6906 if e == nil { 6907 return nil 6908 } 6909 ne := new(Evaluation) 6910 *ne = *e 6911 6912 // Copy ClassEligibility 6913 if e.ClassEligibility != nil { 6914 classes := make(map[string]bool, len(e.ClassEligibility)) 6915 for class, elig := range e.ClassEligibility { 6916 classes[class] = elig 6917 } 6918 ne.ClassEligibility = classes 6919 } 6920 6921 // Copy FailedTGAllocs 6922 if e.FailedTGAllocs != nil { 6923 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 6924 for tg, metric := range e.FailedTGAllocs { 6925 failedTGs[tg] = metric.Copy() 6926 } 6927 ne.FailedTGAllocs = failedTGs 6928 } 6929 6930 // Copy queued allocations 6931 if e.QueuedAllocations != nil { 6932 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 6933 for tg, num := range e.QueuedAllocations { 6934 queuedAllocations[tg] = num 6935 } 6936 ne.QueuedAllocations = queuedAllocations 6937 } 6938 6939 return ne 6940 } 6941 6942 // ShouldEnqueue checks if a given evaluation should be enqueued into the 6943 // eval_broker 6944 func (e *Evaluation) ShouldEnqueue() bool { 6945 switch e.Status { 6946 case EvalStatusPending: 6947 return true 6948 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 6949 return false 6950 default: 6951 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 6952 } 6953 } 6954 6955 // ShouldBlock checks if a given evaluation should be entered into the blocked 6956 // eval tracker. 6957 func (e *Evaluation) ShouldBlock() bool { 6958 switch e.Status { 6959 case EvalStatusBlocked: 6960 return true 6961 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 6962 return false 6963 default: 6964 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 6965 } 6966 } 6967 6968 // MakePlan is used to make a plan from the given evaluation 6969 // for a given Job 6970 func (e *Evaluation) MakePlan(j *Job) *Plan { 6971 p := &Plan{ 6972 EvalID: e.ID, 6973 Priority: e.Priority, 6974 Job: j, 6975 NodeUpdate: make(map[string][]*Allocation), 6976 NodeAllocation: make(map[string][]*Allocation), 6977 } 6978 if j != nil { 6979 p.AllAtOnce = j.AllAtOnce 6980 } 6981 return p 6982 } 6983 6984 // NextRollingEval creates an evaluation to followup this eval for rolling updates 6985 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 6986 return &Evaluation{ 6987 ID: uuid.Generate(), 6988 Namespace: e.Namespace, 6989 Priority: e.Priority, 6990 Type: e.Type, 6991 TriggeredBy: EvalTriggerRollingUpdate, 6992 JobID: e.JobID, 6993 JobModifyIndex: e.JobModifyIndex, 6994 Status: EvalStatusPending, 6995 Wait: wait, 6996 PreviousEval: e.ID, 6997 } 6998 } 6999 7000 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 7001 // failed allocations. It takes the classes marked explicitly eligible or 7002 // ineligible, whether the job has escaped computed node classes and whether the 7003 // quota limit was reached. 7004 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 7005 escaped bool, quotaReached string) *Evaluation { 7006 7007 return &Evaluation{ 7008 ID: uuid.Generate(), 7009 Namespace: e.Namespace, 7010 Priority: e.Priority, 7011 Type: e.Type, 7012 TriggeredBy: e.TriggeredBy, 7013 JobID: e.JobID, 7014 JobModifyIndex: e.JobModifyIndex, 7015 Status: EvalStatusBlocked, 7016 PreviousEval: e.ID, 7017 ClassEligibility: classEligibility, 7018 EscapedComputedClass: escaped, 7019 QuotaLimitReached: quotaReached, 7020 } 7021 } 7022 7023 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 7024 // has been marked as failed because it has hit the delivery limit and will not 7025 // be retried by the eval_broker. 7026 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 7027 return &Evaluation{ 7028 ID: uuid.Generate(), 7029 Namespace: e.Namespace, 7030 Priority: e.Priority, 7031 Type: e.Type, 7032 TriggeredBy: EvalTriggerFailedFollowUp, 7033 JobID: e.JobID, 7034 JobModifyIndex: e.JobModifyIndex, 7035 Status: EvalStatusPending, 7036 Wait: wait, 7037 PreviousEval: e.ID, 7038 } 7039 } 7040 7041 // Plan is used to submit a commit plan for task allocations. These 7042 // are submitted to the leader which verifies that resources have 7043 // not been overcommitted before admitting the plan. 7044 type Plan struct { 7045 // EvalID is the evaluation ID this plan is associated with 7046 EvalID string 7047 7048 // EvalToken is used to prevent a split-brain processing of 7049 // an evaluation. There should only be a single scheduler running 7050 // an Eval at a time, but this could be violated after a leadership 7051 // transition. This unique token is used to reject plans that are 7052 // being submitted from a different leader. 7053 EvalToken string 7054 7055 // Priority is the priority of the upstream job 7056 Priority int 7057 7058 // AllAtOnce is used to control if incremental scheduling of task groups 7059 // is allowed or if we must do a gang scheduling of the entire job. 7060 // If this is false, a plan may be partially applied. Otherwise, the 7061 // entire plan must be able to make progress. 7062 AllAtOnce bool 7063 7064 // Job is the parent job of all the allocations in the Plan. 7065 // Since a Plan only involves a single Job, we can reduce the size 7066 // of the plan by only including it once. 7067 Job *Job 7068 7069 // NodeUpdate contains all the allocations for each node. For each node, 7070 // this is a list of the allocations to update to either stop or evict. 7071 NodeUpdate map[string][]*Allocation 7072 7073 // NodeAllocation contains all the allocations for each node. 7074 // The evicts must be considered prior to the allocations. 7075 NodeAllocation map[string][]*Allocation 7076 7077 // Annotations contains annotations by the scheduler to be used by operators 7078 // to understand the decisions made by the scheduler. 7079 Annotations *PlanAnnotations 7080 7081 // Deployment is the deployment created or updated by the scheduler that 7082 // should be applied by the planner. 7083 Deployment *Deployment 7084 7085 // DeploymentUpdates is a set of status updates to apply to the given 7086 // deployments. This allows the scheduler to cancel any unneeded deployment 7087 // because the job is stopped or the update block is removed. 7088 DeploymentUpdates []*DeploymentStatusUpdate 7089 } 7090 7091 // AppendUpdate marks the allocation for eviction. The clientStatus of the 7092 // allocation may be optionally set by passing in a non-empty value. 7093 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 7094 newAlloc := new(Allocation) 7095 *newAlloc = *alloc 7096 7097 // If the job is not set in the plan we are deregistering a job so we 7098 // extract the job from the allocation. 7099 if p.Job == nil && newAlloc.Job != nil { 7100 p.Job = newAlloc.Job 7101 } 7102 7103 // Normalize the job 7104 newAlloc.Job = nil 7105 7106 // Strip the resources as it can be rebuilt. 7107 newAlloc.Resources = nil 7108 7109 newAlloc.DesiredStatus = desiredStatus 7110 newAlloc.DesiredDescription = desiredDesc 7111 7112 if clientStatus != "" { 7113 newAlloc.ClientStatus = clientStatus 7114 } 7115 7116 node := alloc.NodeID 7117 existing := p.NodeUpdate[node] 7118 p.NodeUpdate[node] = append(existing, newAlloc) 7119 } 7120 7121 func (p *Plan) PopUpdate(alloc *Allocation) { 7122 existing := p.NodeUpdate[alloc.NodeID] 7123 n := len(existing) 7124 if n > 0 && existing[n-1].ID == alloc.ID { 7125 existing = existing[:n-1] 7126 if len(existing) > 0 { 7127 p.NodeUpdate[alloc.NodeID] = existing 7128 } else { 7129 delete(p.NodeUpdate, alloc.NodeID) 7130 } 7131 } 7132 } 7133 7134 func (p *Plan) AppendAlloc(alloc *Allocation) { 7135 node := alloc.NodeID 7136 existing := p.NodeAllocation[node] 7137 p.NodeAllocation[node] = append(existing, alloc) 7138 } 7139 7140 // IsNoOp checks if this plan would do nothing 7141 func (p *Plan) IsNoOp() bool { 7142 return len(p.NodeUpdate) == 0 && 7143 len(p.NodeAllocation) == 0 && 7144 p.Deployment == nil && 7145 len(p.DeploymentUpdates) == 0 7146 } 7147 7148 // PlanResult is the result of a plan submitted to the leader. 7149 type PlanResult struct { 7150 // NodeUpdate contains all the updates that were committed. 7151 NodeUpdate map[string][]*Allocation 7152 7153 // NodeAllocation contains all the allocations that were committed. 7154 NodeAllocation map[string][]*Allocation 7155 7156 // Deployment is the deployment that was committed. 7157 Deployment *Deployment 7158 7159 // DeploymentUpdates is the set of deployment updates that were committed. 7160 DeploymentUpdates []*DeploymentStatusUpdate 7161 7162 // RefreshIndex is the index the worker should refresh state up to. 7163 // This allows all evictions and allocations to be materialized. 7164 // If any allocations were rejected due to stale data (node state, 7165 // over committed) this can be used to force a worker refresh. 7166 RefreshIndex uint64 7167 7168 // AllocIndex is the Raft index in which the evictions and 7169 // allocations took place. This is used for the write index. 7170 AllocIndex uint64 7171 } 7172 7173 // IsNoOp checks if this plan result would do nothing 7174 func (p *PlanResult) IsNoOp() bool { 7175 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 7176 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 7177 } 7178 7179 // FullCommit is used to check if all the allocations in a plan 7180 // were committed as part of the result. Returns if there was 7181 // a match, and the number of expected and actual allocations. 7182 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 7183 expected := 0 7184 actual := 0 7185 for name, allocList := range plan.NodeAllocation { 7186 didAlloc, _ := p.NodeAllocation[name] 7187 expected += len(allocList) 7188 actual += len(didAlloc) 7189 } 7190 return actual == expected, expected, actual 7191 } 7192 7193 // PlanAnnotations holds annotations made by the scheduler to give further debug 7194 // information to operators. 7195 type PlanAnnotations struct { 7196 // DesiredTGUpdates is the set of desired updates per task group. 7197 DesiredTGUpdates map[string]*DesiredUpdates 7198 } 7199 7200 // DesiredUpdates is the set of changes the scheduler would like to make given 7201 // sufficient resources and cluster capacity. 7202 type DesiredUpdates struct { 7203 Ignore uint64 7204 Place uint64 7205 Migrate uint64 7206 Stop uint64 7207 InPlaceUpdate uint64 7208 DestructiveUpdate uint64 7209 Canary uint64 7210 } 7211 7212 func (d *DesiredUpdates) GoString() string { 7213 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 7214 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 7215 } 7216 7217 // msgpackHandle is a shared handle for encoding/decoding of structs 7218 var MsgpackHandle = func() *codec.MsgpackHandle { 7219 h := &codec.MsgpackHandle{RawToString: true} 7220 7221 // Sets the default type for decoding a map into a nil interface{}. 7222 // This is necessary in particular because we store the driver configs as a 7223 // nil interface{}. 7224 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 7225 return h 7226 }() 7227 7228 var ( 7229 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 7230 // structs. The pretty handle will add indents for easier human consumption. 7231 JsonHandle = &codec.JsonHandle{ 7232 HTMLCharsAsIs: true, 7233 } 7234 JsonHandlePretty = &codec.JsonHandle{ 7235 HTMLCharsAsIs: true, 7236 Indent: 4, 7237 } 7238 ) 7239 7240 // TODO Figure out if we can remove this. This is our fork that is just way 7241 // behind. I feel like its original purpose was to pin at a stable version but 7242 // now we can accomplish this with vendoring. 7243 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 7244 h := &hcodec.MsgpackHandle{RawToString: true} 7245 7246 // Sets the default type for decoding a map into a nil interface{}. 7247 // This is necessary in particular because we store the driver configs as a 7248 // nil interface{}. 7249 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 7250 return h 7251 }() 7252 7253 // Decode is used to decode a MsgPack encoded object 7254 func Decode(buf []byte, out interface{}) error { 7255 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 7256 } 7257 7258 // Encode is used to encode a MsgPack object with type prefix 7259 func Encode(t MessageType, msg interface{}) ([]byte, error) { 7260 var buf bytes.Buffer 7261 buf.WriteByte(uint8(t)) 7262 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 7263 return buf.Bytes(), err 7264 } 7265 7266 // KeyringResponse is a unified key response and can be used for install, 7267 // remove, use, as well as listing key queries. 7268 type KeyringResponse struct { 7269 Messages map[string]string 7270 Keys map[string]int 7271 NumNodes int 7272 } 7273 7274 // KeyringRequest is request objects for serf key operations. 7275 type KeyringRequest struct { 7276 Key string 7277 } 7278 7279 // RecoverableError wraps an error and marks whether it is recoverable and could 7280 // be retried or it is fatal. 7281 type RecoverableError struct { 7282 Err string 7283 Recoverable bool 7284 } 7285 7286 // NewRecoverableError is used to wrap an error and mark it as recoverable or 7287 // not. 7288 func NewRecoverableError(e error, recoverable bool) error { 7289 if e == nil { 7290 return nil 7291 } 7292 7293 return &RecoverableError{ 7294 Err: e.Error(), 7295 Recoverable: recoverable, 7296 } 7297 } 7298 7299 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 7300 // message. If the error was recoverable before the returned error is as well; 7301 // otherwise it is unrecoverable. 7302 func WrapRecoverable(msg string, err error) error { 7303 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 7304 } 7305 7306 func (r *RecoverableError) Error() string { 7307 return r.Err 7308 } 7309 7310 func (r *RecoverableError) IsRecoverable() bool { 7311 return r.Recoverable 7312 } 7313 7314 // Recoverable is an interface for errors to implement to indicate whether or 7315 // not they are fatal or recoverable. 7316 type Recoverable interface { 7317 error 7318 IsRecoverable() bool 7319 } 7320 7321 // IsRecoverable returns true if error is a RecoverableError with 7322 // Recoverable=true. Otherwise false is returned. 7323 func IsRecoverable(e error) bool { 7324 if re, ok := e.(Recoverable); ok { 7325 return re.IsRecoverable() 7326 } 7327 return false 7328 } 7329 7330 // WrappedServerError wraps an error and satisfies 7331 // both the Recoverable and the ServerSideError interfaces 7332 type WrappedServerError struct { 7333 Err error 7334 } 7335 7336 // NewWrappedServerError is used to create a wrapped server side error 7337 func NewWrappedServerError(e error) error { 7338 return &WrappedServerError{ 7339 Err: e, 7340 } 7341 } 7342 7343 func (r *WrappedServerError) IsRecoverable() bool { 7344 return IsRecoverable(r.Err) 7345 } 7346 7347 func (r *WrappedServerError) Error() string { 7348 return r.Err.Error() 7349 } 7350 7351 func (r *WrappedServerError) IsServerSide() bool { 7352 return true 7353 } 7354 7355 // ServerSideError is an interface for errors to implement to indicate 7356 // errors occurring after the request makes it to a server 7357 type ServerSideError interface { 7358 error 7359 IsServerSide() bool 7360 } 7361 7362 // IsServerSide returns true if error is a wrapped 7363 // server side error 7364 func IsServerSide(e error) bool { 7365 if se, ok := e.(ServerSideError); ok { 7366 return se.IsServerSide() 7367 } 7368 return false 7369 } 7370 7371 // ACLPolicy is used to represent an ACL policy 7372 type ACLPolicy struct { 7373 Name string // Unique name 7374 Description string // Human readable 7375 Rules string // HCL or JSON format 7376 Hash []byte 7377 CreateIndex uint64 7378 ModifyIndex uint64 7379 } 7380 7381 // SetHash is used to compute and set the hash of the ACL policy 7382 func (c *ACLPolicy) SetHash() []byte { 7383 // Initialize a 256bit Blake2 hash (32 bytes) 7384 hash, err := blake2b.New256(nil) 7385 if err != nil { 7386 panic(err) 7387 } 7388 7389 // Write all the user set fields 7390 hash.Write([]byte(c.Name)) 7391 hash.Write([]byte(c.Description)) 7392 hash.Write([]byte(c.Rules)) 7393 7394 // Finalize the hash 7395 hashVal := hash.Sum(nil) 7396 7397 // Set and return the hash 7398 c.Hash = hashVal 7399 return hashVal 7400 } 7401 7402 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 7403 return &ACLPolicyListStub{ 7404 Name: a.Name, 7405 Description: a.Description, 7406 Hash: a.Hash, 7407 CreateIndex: a.CreateIndex, 7408 ModifyIndex: a.ModifyIndex, 7409 } 7410 } 7411 7412 func (a *ACLPolicy) Validate() error { 7413 var mErr multierror.Error 7414 if !validPolicyName.MatchString(a.Name) { 7415 err := fmt.Errorf("invalid name '%s'", a.Name) 7416 mErr.Errors = append(mErr.Errors, err) 7417 } 7418 if _, err := acl.Parse(a.Rules); err != nil { 7419 err = fmt.Errorf("failed to parse rules: %v", err) 7420 mErr.Errors = append(mErr.Errors, err) 7421 } 7422 if len(a.Description) > maxPolicyDescriptionLength { 7423 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 7424 mErr.Errors = append(mErr.Errors, err) 7425 } 7426 return mErr.ErrorOrNil() 7427 } 7428 7429 // ACLPolicyListStub is used to for listing ACL policies 7430 type ACLPolicyListStub struct { 7431 Name string 7432 Description string 7433 Hash []byte 7434 CreateIndex uint64 7435 ModifyIndex uint64 7436 } 7437 7438 // ACLPolicyListRequest is used to request a list of policies 7439 type ACLPolicyListRequest struct { 7440 QueryOptions 7441 } 7442 7443 // ACLPolicySpecificRequest is used to query a specific policy 7444 type ACLPolicySpecificRequest struct { 7445 Name string 7446 QueryOptions 7447 } 7448 7449 // ACLPolicySetRequest is used to query a set of policies 7450 type ACLPolicySetRequest struct { 7451 Names []string 7452 QueryOptions 7453 } 7454 7455 // ACLPolicyListResponse is used for a list request 7456 type ACLPolicyListResponse struct { 7457 Policies []*ACLPolicyListStub 7458 QueryMeta 7459 } 7460 7461 // SingleACLPolicyResponse is used to return a single policy 7462 type SingleACLPolicyResponse struct { 7463 Policy *ACLPolicy 7464 QueryMeta 7465 } 7466 7467 // ACLPolicySetResponse is used to return a set of policies 7468 type ACLPolicySetResponse struct { 7469 Policies map[string]*ACLPolicy 7470 QueryMeta 7471 } 7472 7473 // ACLPolicyDeleteRequest is used to delete a set of policies 7474 type ACLPolicyDeleteRequest struct { 7475 Names []string 7476 WriteRequest 7477 } 7478 7479 // ACLPolicyUpsertRequest is used to upsert a set of policies 7480 type ACLPolicyUpsertRequest struct { 7481 Policies []*ACLPolicy 7482 WriteRequest 7483 } 7484 7485 // ACLToken represents a client token which is used to Authenticate 7486 type ACLToken struct { 7487 AccessorID string // Public Accessor ID (UUID) 7488 SecretID string // Secret ID, private (UUID) 7489 Name string // Human friendly name 7490 Type string // Client or Management 7491 Policies []string // Policies this token ties to 7492 Global bool // Global or Region local 7493 Hash []byte 7494 CreateTime time.Time // Time of creation 7495 CreateIndex uint64 7496 ModifyIndex uint64 7497 } 7498 7499 var ( 7500 // AnonymousACLToken is used no SecretID is provided, and the 7501 // request is made anonymously. 7502 AnonymousACLToken = &ACLToken{ 7503 AccessorID: "anonymous", 7504 Name: "Anonymous Token", 7505 Type: ACLClientToken, 7506 Policies: []string{"anonymous"}, 7507 Global: false, 7508 } 7509 ) 7510 7511 type ACLTokenListStub struct { 7512 AccessorID string 7513 Name string 7514 Type string 7515 Policies []string 7516 Global bool 7517 Hash []byte 7518 CreateTime time.Time 7519 CreateIndex uint64 7520 ModifyIndex uint64 7521 } 7522 7523 // SetHash is used to compute and set the hash of the ACL token 7524 func (a *ACLToken) SetHash() []byte { 7525 // Initialize a 256bit Blake2 hash (32 bytes) 7526 hash, err := blake2b.New256(nil) 7527 if err != nil { 7528 panic(err) 7529 } 7530 7531 // Write all the user set fields 7532 hash.Write([]byte(a.Name)) 7533 hash.Write([]byte(a.Type)) 7534 for _, policyName := range a.Policies { 7535 hash.Write([]byte(policyName)) 7536 } 7537 if a.Global { 7538 hash.Write([]byte("global")) 7539 } else { 7540 hash.Write([]byte("local")) 7541 } 7542 7543 // Finalize the hash 7544 hashVal := hash.Sum(nil) 7545 7546 // Set and return the hash 7547 a.Hash = hashVal 7548 return hashVal 7549 } 7550 7551 func (a *ACLToken) Stub() *ACLTokenListStub { 7552 return &ACLTokenListStub{ 7553 AccessorID: a.AccessorID, 7554 Name: a.Name, 7555 Type: a.Type, 7556 Policies: a.Policies, 7557 Global: a.Global, 7558 Hash: a.Hash, 7559 CreateTime: a.CreateTime, 7560 CreateIndex: a.CreateIndex, 7561 ModifyIndex: a.ModifyIndex, 7562 } 7563 } 7564 7565 // Validate is used to sanity check a token 7566 func (a *ACLToken) Validate() error { 7567 var mErr multierror.Error 7568 if len(a.Name) > maxTokenNameLength { 7569 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 7570 } 7571 switch a.Type { 7572 case ACLClientToken: 7573 if len(a.Policies) == 0 { 7574 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 7575 } 7576 case ACLManagementToken: 7577 if len(a.Policies) != 0 { 7578 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 7579 } 7580 default: 7581 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 7582 } 7583 return mErr.ErrorOrNil() 7584 } 7585 7586 // PolicySubset checks if a given set of policies is a subset of the token 7587 func (a *ACLToken) PolicySubset(policies []string) bool { 7588 // Hot-path the management tokens, superset of all policies. 7589 if a.Type == ACLManagementToken { 7590 return true 7591 } 7592 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 7593 for _, policy := range a.Policies { 7594 associatedPolicies[policy] = struct{}{} 7595 } 7596 for _, policy := range policies { 7597 if _, ok := associatedPolicies[policy]; !ok { 7598 return false 7599 } 7600 } 7601 return true 7602 } 7603 7604 // ACLTokenListRequest is used to request a list of tokens 7605 type ACLTokenListRequest struct { 7606 GlobalOnly bool 7607 QueryOptions 7608 } 7609 7610 // ACLTokenSpecificRequest is used to query a specific token 7611 type ACLTokenSpecificRequest struct { 7612 AccessorID string 7613 QueryOptions 7614 } 7615 7616 // ACLTokenSetRequest is used to query a set of tokens 7617 type ACLTokenSetRequest struct { 7618 AccessorIDS []string 7619 QueryOptions 7620 } 7621 7622 // ACLTokenListResponse is used for a list request 7623 type ACLTokenListResponse struct { 7624 Tokens []*ACLTokenListStub 7625 QueryMeta 7626 } 7627 7628 // SingleACLTokenResponse is used to return a single token 7629 type SingleACLTokenResponse struct { 7630 Token *ACLToken 7631 QueryMeta 7632 } 7633 7634 // ACLTokenSetResponse is used to return a set of token 7635 type ACLTokenSetResponse struct { 7636 Tokens map[string]*ACLToken // Keyed by Accessor ID 7637 QueryMeta 7638 } 7639 7640 // ResolveACLTokenRequest is used to resolve a specific token 7641 type ResolveACLTokenRequest struct { 7642 SecretID string 7643 QueryOptions 7644 } 7645 7646 // ResolveACLTokenResponse is used to resolve a single token 7647 type ResolveACLTokenResponse struct { 7648 Token *ACLToken 7649 QueryMeta 7650 } 7651 7652 // ACLTokenDeleteRequest is used to delete a set of tokens 7653 type ACLTokenDeleteRequest struct { 7654 AccessorIDs []string 7655 WriteRequest 7656 } 7657 7658 // ACLTokenBootstrapRequest is used to bootstrap ACLs 7659 type ACLTokenBootstrapRequest struct { 7660 Token *ACLToken // Not client specifiable 7661 ResetIndex uint64 // Reset index is used to clear the bootstrap token 7662 WriteRequest 7663 } 7664 7665 // ACLTokenUpsertRequest is used to upsert a set of tokens 7666 type ACLTokenUpsertRequest struct { 7667 Tokens []*ACLToken 7668 WriteRequest 7669 } 7670 7671 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 7672 type ACLTokenUpsertResponse struct { 7673 Tokens []*ACLToken 7674 WriteMeta 7675 }