github.com/hspak/nomad@v0.7.2-0.20180309000617-bc4ae22a39a5/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/base32" 10 "encoding/hex" 11 "errors" 12 "fmt" 13 "io" 14 "net" 15 "net/url" 16 "os" 17 "path/filepath" 18 "reflect" 19 "regexp" 20 "sort" 21 "strconv" 22 "strings" 23 "time" 24 25 "golang.org/x/crypto/blake2b" 26 27 "github.com/gorhill/cronexpr" 28 "github.com/hashicorp/consul/api" 29 multierror "github.com/hashicorp/go-multierror" 30 "github.com/hashicorp/go-version" 31 "github.com/hashicorp/nomad/acl" 32 "github.com/hashicorp/nomad/helper" 33 "github.com/hashicorp/nomad/helper/args" 34 "github.com/hashicorp/nomad/helper/uuid" 35 "github.com/mitchellh/copystructure" 36 "github.com/ugorji/go/codec" 37 38 hcodec "github.com/hashicorp/go-msgpack/codec" 39 ) 40 41 var ( 42 // validPolicyName is used to validate a policy name 43 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 44 45 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 46 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 47 ) 48 49 type MessageType uint8 50 51 const ( 52 NodeRegisterRequestType MessageType = iota 53 NodeDeregisterRequestType 54 NodeUpdateStatusRequestType 55 NodeUpdateDrainRequestType 56 JobRegisterRequestType 57 JobDeregisterRequestType 58 EvalUpdateRequestType 59 EvalDeleteRequestType 60 AllocUpdateRequestType 61 AllocClientUpdateRequestType 62 ReconcileJobSummariesRequestType 63 VaultAccessorRegisterRequestType 64 VaultAccessorDegisterRequestType 65 ApplyPlanResultsRequestType 66 DeploymentStatusUpdateRequestType 67 DeploymentPromoteRequestType 68 DeploymentAllocHealthRequestType 69 DeploymentDeleteRequestType 70 JobStabilityRequestType 71 ACLPolicyUpsertRequestType 72 ACLPolicyDeleteRequestType 73 ACLTokenUpsertRequestType 74 ACLTokenDeleteRequestType 75 ACLTokenBootstrapRequestType 76 AutopilotRequestType 77 ) 78 79 const ( 80 // IgnoreUnknownTypeFlag is set along with a MessageType 81 // to indicate that the message type can be safely ignored 82 // if it is not recognized. This is for future proofing, so 83 // that new commands can be added in a way that won't cause 84 // old servers to crash when the FSM attempts to process them. 85 IgnoreUnknownTypeFlag MessageType = 128 86 87 // ApiMajorVersion is returned as part of the Status.Version request. 88 // It should be incremented anytime the APIs are changed in a way 89 // that would break clients for sane client versioning. 90 ApiMajorVersion = 1 91 92 // ApiMinorVersion is returned as part of the Status.Version request. 93 // It should be incremented anytime the APIs are changed to allow 94 // for sane client versioning. Minor changes should be compatible 95 // within the major version. 96 ApiMinorVersion = 1 97 98 ProtocolVersion = "protocol" 99 APIMajorVersion = "api.major" 100 APIMinorVersion = "api.minor" 101 102 GetterModeAny = "any" 103 GetterModeFile = "file" 104 GetterModeDir = "dir" 105 106 // maxPolicyDescriptionLength limits a policy description length 107 maxPolicyDescriptionLength = 256 108 109 // maxTokenNameLength limits a ACL token name length 110 maxTokenNameLength = 64 111 112 // ACLClientToken and ACLManagementToken are the only types of tokens 113 ACLClientToken = "client" 114 ACLManagementToken = "management" 115 116 // DefaultNamespace is the default namespace. 117 DefaultNamespace = "default" 118 DefaultNamespaceDescription = "Default shared namespace" 119 120 // JitterFraction is a the limit to the amount of jitter we apply 121 // to a user specified MaxQueryTime. We divide the specified time by 122 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 123 // applied to RPCHoldTimeout. 124 JitterFraction = 16 125 ) 126 127 // Context defines the scope in which a search for Nomad object operates, and 128 // is also used to query the matching index value for this context 129 type Context string 130 131 const ( 132 Allocs Context = "allocs" 133 Deployments Context = "deployment" 134 Evals Context = "evals" 135 Jobs Context = "jobs" 136 Nodes Context = "nodes" 137 Namespaces Context = "namespaces" 138 Quotas Context = "quotas" 139 All Context = "all" 140 ) 141 142 // NamespacedID is a tuple of an ID and a namespace 143 type NamespacedID struct { 144 ID string 145 Namespace string 146 } 147 148 // RPCInfo is used to describe common information about query 149 type RPCInfo interface { 150 RequestRegion() string 151 IsRead() bool 152 AllowStaleRead() bool 153 } 154 155 // QueryOptions is used to specify various flags for read queries 156 type QueryOptions struct { 157 // The target region for this query 158 Region string 159 160 // Namespace is the target namespace for the query. 161 Namespace string 162 163 // If set, wait until query exceeds given index. Must be provided 164 // with MaxQueryTime. 165 MinQueryIndex uint64 166 167 // Provided with MinQueryIndex to wait for change. 168 MaxQueryTime time.Duration 169 170 // If set, any follower can service the request. Results 171 // may be arbitrarily stale. 172 AllowStale bool 173 174 // If set, used as prefix for resource list searches 175 Prefix string 176 177 // AuthToken is secret portion of the ACL token used for the request 178 AuthToken string 179 } 180 181 func (q QueryOptions) RequestRegion() string { 182 return q.Region 183 } 184 185 func (q QueryOptions) RequestNamespace() string { 186 if q.Namespace == "" { 187 return DefaultNamespace 188 } 189 return q.Namespace 190 } 191 192 // QueryOption only applies to reads, so always true 193 func (q QueryOptions) IsRead() bool { 194 return true 195 } 196 197 func (q QueryOptions) AllowStaleRead() bool { 198 return q.AllowStale 199 } 200 201 type WriteRequest struct { 202 // The target region for this write 203 Region string 204 205 // Namespace is the target namespace for the write. 206 Namespace string 207 208 // AuthToken is secret portion of the ACL token used for the request 209 AuthToken string 210 } 211 212 func (w WriteRequest) RequestRegion() string { 213 // The target region for this request 214 return w.Region 215 } 216 217 func (w WriteRequest) RequestNamespace() string { 218 if w.Namespace == "" { 219 return DefaultNamespace 220 } 221 return w.Namespace 222 } 223 224 // WriteRequest only applies to writes, always false 225 func (w WriteRequest) IsRead() bool { 226 return false 227 } 228 229 func (w WriteRequest) AllowStaleRead() bool { 230 return false 231 } 232 233 // QueryMeta allows a query response to include potentially 234 // useful metadata about a query 235 type QueryMeta struct { 236 // This is the index associated with the read 237 Index uint64 238 239 // If AllowStale is used, this is time elapsed since 240 // last contact between the follower and leader. This 241 // can be used to gauge staleness. 242 LastContact time.Duration 243 244 // Used to indicate if there is a known leader node 245 KnownLeader bool 246 } 247 248 // WriteMeta allows a write response to include potentially 249 // useful metadata about the write 250 type WriteMeta struct { 251 // This is the index associated with the write 252 Index uint64 253 } 254 255 // NodeRegisterRequest is used for Node.Register endpoint 256 // to register a node as being a schedulable entity. 257 type NodeRegisterRequest struct { 258 Node *Node 259 WriteRequest 260 } 261 262 // NodeDeregisterRequest is used for Node.Deregister endpoint 263 // to deregister a node as being a schedulable entity. 264 type NodeDeregisterRequest struct { 265 NodeID string 266 WriteRequest 267 } 268 269 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 270 // information used in RPC server lists. 271 type NodeServerInfo struct { 272 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 273 // be contacted at for RPCs. 274 RPCAdvertiseAddr string 275 276 // RpcMajorVersion is the major version number the Nomad Server 277 // supports 278 RPCMajorVersion int32 279 280 // RpcMinorVersion is the minor version number the Nomad Server 281 // supports 282 RPCMinorVersion int32 283 284 // Datacenter is the datacenter that a Nomad server belongs to 285 Datacenter string 286 } 287 288 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 289 // to update the status of a node. 290 type NodeUpdateStatusRequest struct { 291 NodeID string 292 Status string 293 WriteRequest 294 } 295 296 // NodeUpdateDrainRequest is used for updating the drain status 297 type NodeUpdateDrainRequest struct { 298 NodeID string 299 Drain bool 300 WriteRequest 301 } 302 303 // NodeEvaluateRequest is used to re-evaluate the node 304 type NodeEvaluateRequest struct { 305 NodeID string 306 WriteRequest 307 } 308 309 // NodeSpecificRequest is used when we just need to specify a target node 310 type NodeSpecificRequest struct { 311 NodeID string 312 SecretID string 313 QueryOptions 314 } 315 316 // SearchResponse is used to return matches and information about whether 317 // the match list is truncated specific to each type of context. 318 type SearchResponse struct { 319 // Map of context types to ids which match a specified prefix 320 Matches map[Context][]string 321 322 // Truncations indicates whether the matches for a particular context have 323 // been truncated 324 Truncations map[Context]bool 325 326 QueryMeta 327 } 328 329 // SearchRequest is used to parameterize a request, and returns a 330 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 331 // along with whether or not the information returned is truncated. 332 type SearchRequest struct { 333 // Prefix is what ids are matched to. I.e, if the given prefix were 334 // "a", potential matches might be "abcd" or "aabb" 335 Prefix string 336 337 // Context is the type that can be matched against. A context can be a job, 338 // node, evaluation, allocation, or empty (indicated every context should be 339 // matched) 340 Context Context 341 342 QueryOptions 343 } 344 345 // JobRegisterRequest is used for Job.Register endpoint 346 // to register a job as being a schedulable entity. 347 type JobRegisterRequest struct { 348 Job *Job 349 350 // If EnforceIndex is set then the job will only be registered if the passed 351 // JobModifyIndex matches the current Jobs index. If the index is zero, the 352 // register only occurs if the job is new. 353 EnforceIndex bool 354 JobModifyIndex uint64 355 356 // PolicyOverride is set when the user is attempting to override any policies 357 PolicyOverride bool 358 359 WriteRequest 360 } 361 362 // JobDeregisterRequest is used for Job.Deregister endpoint 363 // to deregister a job as being a schedulable entity. 364 type JobDeregisterRequest struct { 365 JobID string 366 367 // Purge controls whether the deregister purges the job from the system or 368 // whether the job is just marked as stopped and will be removed by the 369 // garbage collector 370 Purge bool 371 372 WriteRequest 373 } 374 375 // JobEvaluateRequest is used when we just need to re-evaluate a target job 376 type JobEvaluateRequest struct { 377 JobID string 378 WriteRequest 379 } 380 381 // JobSpecificRequest is used when we just need to specify a target job 382 type JobSpecificRequest struct { 383 JobID string 384 AllAllocs bool 385 QueryOptions 386 } 387 388 // JobListRequest is used to parameterize a list request 389 type JobListRequest struct { 390 QueryOptions 391 } 392 393 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 394 // evaluation of the Job. 395 type JobPlanRequest struct { 396 Job *Job 397 Diff bool // Toggles an annotated diff 398 // PolicyOverride is set when the user is attempting to override any policies 399 PolicyOverride bool 400 WriteRequest 401 } 402 403 // JobSummaryRequest is used when we just need to get a specific job summary 404 type JobSummaryRequest struct { 405 JobID string 406 QueryOptions 407 } 408 409 // JobDispatchRequest is used to dispatch a job based on a parameterized job 410 type JobDispatchRequest struct { 411 JobID string 412 Payload []byte 413 Meta map[string]string 414 WriteRequest 415 } 416 417 // JobValidateRequest is used to validate a job 418 type JobValidateRequest struct { 419 Job *Job 420 WriteRequest 421 } 422 423 // JobRevertRequest is used to revert a job to a prior version. 424 type JobRevertRequest struct { 425 // JobID is the ID of the job being reverted 426 JobID string 427 428 // JobVersion the version to revert to. 429 JobVersion uint64 430 431 // EnforcePriorVersion if set will enforce that the job is at the given 432 // version before reverting. 433 EnforcePriorVersion *uint64 434 435 WriteRequest 436 } 437 438 // JobStabilityRequest is used to marked a job as stable. 439 type JobStabilityRequest struct { 440 // Job to set the stability on 441 JobID string 442 JobVersion uint64 443 444 // Set the stability 445 Stable bool 446 WriteRequest 447 } 448 449 // JobStabilityResponse is the response when marking a job as stable. 450 type JobStabilityResponse struct { 451 WriteMeta 452 } 453 454 // NodeListRequest is used to parameterize a list request 455 type NodeListRequest struct { 456 QueryOptions 457 } 458 459 // EvalUpdateRequest is used for upserting evaluations. 460 type EvalUpdateRequest struct { 461 Evals []*Evaluation 462 EvalToken string 463 WriteRequest 464 } 465 466 // EvalDeleteRequest is used for deleting an evaluation. 467 type EvalDeleteRequest struct { 468 Evals []string 469 Allocs []string 470 WriteRequest 471 } 472 473 // EvalSpecificRequest is used when we just need to specify a target evaluation 474 type EvalSpecificRequest struct { 475 EvalID string 476 QueryOptions 477 } 478 479 // EvalAckRequest is used to Ack/Nack a specific evaluation 480 type EvalAckRequest struct { 481 EvalID string 482 Token string 483 WriteRequest 484 } 485 486 // EvalDequeueRequest is used when we want to dequeue an evaluation 487 type EvalDequeueRequest struct { 488 Schedulers []string 489 Timeout time.Duration 490 SchedulerVersion uint16 491 WriteRequest 492 } 493 494 // EvalListRequest is used to list the evaluations 495 type EvalListRequest struct { 496 QueryOptions 497 } 498 499 // PlanRequest is used to submit an allocation plan to the leader 500 type PlanRequest struct { 501 Plan *Plan 502 WriteRequest 503 } 504 505 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 506 // committing the result of a plan. 507 type ApplyPlanResultsRequest struct { 508 // AllocUpdateRequest holds the allocation updates to be made by the 509 // scheduler. 510 AllocUpdateRequest 511 512 // Deployment is the deployment created or updated as a result of a 513 // scheduling event. 514 Deployment *Deployment 515 516 // DeploymentUpdates is a set of status updates to apply to the given 517 // deployments. This allows the scheduler to cancel any unneeded deployment 518 // because the job is stopped or the update block is removed. 519 DeploymentUpdates []*DeploymentStatusUpdate 520 521 // EvalID is the eval ID of the plan being applied. The modify index of the 522 // evaluation is updated as part of applying the plan to ensure that subsequent 523 // scheduling events for the same job will wait for the index that last produced 524 // state changes. This is necessary for blocked evaluations since they can be 525 // processed many times, potentially making state updates, without the state of 526 // the evaluation itself being updated. 527 EvalID string 528 } 529 530 // AllocUpdateRequest is used to submit changes to allocations, either 531 // to cause evictions or to assign new allocaitons. Both can be done 532 // within a single transaction 533 type AllocUpdateRequest struct { 534 // Alloc is the list of new allocations to assign 535 Alloc []*Allocation 536 537 // Evals is the list of new evaluations to create 538 // Evals are valid only when used in the Raft RPC 539 Evals []*Evaluation 540 541 // Job is the shared parent job of the allocations. 542 // It is pulled out since it is common to reduce payload size. 543 Job *Job 544 545 WriteRequest 546 } 547 548 // AllocListRequest is used to request a list of allocations 549 type AllocListRequest struct { 550 QueryOptions 551 } 552 553 // AllocSpecificRequest is used to query a specific allocation 554 type AllocSpecificRequest struct { 555 AllocID string 556 QueryOptions 557 } 558 559 // AllocsGetRequest is used to query a set of allocations 560 type AllocsGetRequest struct { 561 AllocIDs []string 562 QueryOptions 563 } 564 565 // PeriodicForceReqeuest is used to force a specific periodic job. 566 type PeriodicForceRequest struct { 567 JobID string 568 WriteRequest 569 } 570 571 // ServerMembersResponse has the list of servers in a cluster 572 type ServerMembersResponse struct { 573 ServerName string 574 ServerRegion string 575 ServerDC string 576 Members []*ServerMember 577 } 578 579 // ServerMember holds information about a Nomad server agent in a cluster 580 type ServerMember struct { 581 Name string 582 Addr net.IP 583 Port uint16 584 Tags map[string]string 585 Status string 586 ProtocolMin uint8 587 ProtocolMax uint8 588 ProtocolCur uint8 589 DelegateMin uint8 590 DelegateMax uint8 591 DelegateCur uint8 592 } 593 594 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 595 // following tasks in the given allocation 596 type DeriveVaultTokenRequest struct { 597 NodeID string 598 SecretID string 599 AllocID string 600 Tasks []string 601 QueryOptions 602 } 603 604 // VaultAccessorsRequest is used to operate on a set of Vault accessors 605 type VaultAccessorsRequest struct { 606 Accessors []*VaultAccessor 607 } 608 609 // VaultAccessor is a reference to a created Vault token on behalf of 610 // an allocation's task. 611 type VaultAccessor struct { 612 AllocID string 613 Task string 614 NodeID string 615 Accessor string 616 CreationTTL int 617 618 // Raft Indexes 619 CreateIndex uint64 620 } 621 622 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 623 type DeriveVaultTokenResponse struct { 624 // Tasks is a mapping between the task name and the wrapped token 625 Tasks map[string]string 626 627 // Error stores any error that occurred. Errors are stored here so we can 628 // communicate whether it is retriable 629 Error *RecoverableError 630 631 QueryMeta 632 } 633 634 // GenericRequest is used to request where no 635 // specific information is needed. 636 type GenericRequest struct { 637 QueryOptions 638 } 639 640 // DeploymentListRequest is used to list the deployments 641 type DeploymentListRequest struct { 642 QueryOptions 643 } 644 645 // DeploymentDeleteRequest is used for deleting deployments. 646 type DeploymentDeleteRequest struct { 647 Deployments []string 648 WriteRequest 649 } 650 651 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 652 // well as optionally creating an evaluation atomically. 653 type DeploymentStatusUpdateRequest struct { 654 // Eval, if set, is used to create an evaluation at the same time as 655 // updating the status of a deployment. 656 Eval *Evaluation 657 658 // DeploymentUpdate is a status update to apply to the given 659 // deployment. 660 DeploymentUpdate *DeploymentStatusUpdate 661 662 // Job is used to optionally upsert a job. This is used when setting the 663 // allocation health results in a deployment failure and the deployment 664 // auto-reverts to the latest stable job. 665 Job *Job 666 } 667 668 // DeploymentAllocHealthRequest is used to set the health of a set of 669 // allocations as part of a deployment. 670 type DeploymentAllocHealthRequest struct { 671 DeploymentID string 672 673 // Marks these allocations as healthy, allow further allocations 674 // to be rolled. 675 HealthyAllocationIDs []string 676 677 // Any unhealthy allocations fail the deployment 678 UnhealthyAllocationIDs []string 679 680 WriteRequest 681 } 682 683 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 684 type ApplyDeploymentAllocHealthRequest struct { 685 DeploymentAllocHealthRequest 686 687 // An optional field to update the status of a deployment 688 DeploymentUpdate *DeploymentStatusUpdate 689 690 // Job is used to optionally upsert a job. This is used when setting the 691 // allocation health results in a deployment failure and the deployment 692 // auto-reverts to the latest stable job. 693 Job *Job 694 695 // An optional evaluation to create after promoting the canaries 696 Eval *Evaluation 697 } 698 699 // DeploymentPromoteRequest is used to promote task groups in a deployment 700 type DeploymentPromoteRequest struct { 701 DeploymentID string 702 703 // All is to promote all task groups 704 All bool 705 706 // Groups is used to set the promotion status per task group 707 Groups []string 708 709 WriteRequest 710 } 711 712 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 713 type ApplyDeploymentPromoteRequest struct { 714 DeploymentPromoteRequest 715 716 // An optional evaluation to create after promoting the canaries 717 Eval *Evaluation 718 } 719 720 // DeploymentPauseRequest is used to pause a deployment 721 type DeploymentPauseRequest struct { 722 DeploymentID string 723 724 // Pause sets the pause status 725 Pause bool 726 727 WriteRequest 728 } 729 730 // DeploymentSpecificRequest is used to make a request specific to a particular 731 // deployment 732 type DeploymentSpecificRequest struct { 733 DeploymentID string 734 QueryOptions 735 } 736 737 // DeploymentFailRequest is used to fail a particular deployment 738 type DeploymentFailRequest struct { 739 DeploymentID string 740 WriteRequest 741 } 742 743 // SingleDeploymentResponse is used to respond with a single deployment 744 type SingleDeploymentResponse struct { 745 Deployment *Deployment 746 QueryMeta 747 } 748 749 // GenericResponse is used to respond to a request where no 750 // specific response information is needed. 751 type GenericResponse struct { 752 WriteMeta 753 } 754 755 // VersionResponse is used for the Status.Version reseponse 756 type VersionResponse struct { 757 Build string 758 Versions map[string]int 759 QueryMeta 760 } 761 762 // JobRegisterResponse is used to respond to a job registration 763 type JobRegisterResponse struct { 764 EvalID string 765 EvalCreateIndex uint64 766 JobModifyIndex uint64 767 768 // Warnings contains any warnings about the given job. These may include 769 // deprecation warnings. 770 Warnings string 771 772 QueryMeta 773 } 774 775 // JobDeregisterResponse is used to respond to a job deregistration 776 type JobDeregisterResponse struct { 777 EvalID string 778 EvalCreateIndex uint64 779 JobModifyIndex uint64 780 QueryMeta 781 } 782 783 // JobValidateResponse is the response from validate request 784 type JobValidateResponse struct { 785 // DriverConfigValidated indicates whether the agent validated the driver 786 // config 787 DriverConfigValidated bool 788 789 // ValidationErrors is a list of validation errors 790 ValidationErrors []string 791 792 // Error is a string version of any error that may have occurred 793 Error string 794 795 // Warnings contains any warnings about the given job. These may include 796 // deprecation warnings. 797 Warnings string 798 } 799 800 // NodeUpdateResponse is used to respond to a node update 801 type NodeUpdateResponse struct { 802 HeartbeatTTL time.Duration 803 EvalIDs []string 804 EvalCreateIndex uint64 805 NodeModifyIndex uint64 806 807 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 808 // empty, the current Nomad Server is in the minority of a partition. 809 LeaderRPCAddr string 810 811 // NumNodes is the number of Nomad nodes attached to this quorum of 812 // Nomad Servers at the time of the response. This value can 813 // fluctuate based on the health of the cluster between heartbeats. 814 NumNodes int32 815 816 // Servers is the full list of known Nomad servers in the local 817 // region. 818 Servers []*NodeServerInfo 819 820 QueryMeta 821 } 822 823 // NodeDrainUpdateResponse is used to respond to a node drain update 824 type NodeDrainUpdateResponse struct { 825 EvalIDs []string 826 EvalCreateIndex uint64 827 NodeModifyIndex uint64 828 QueryMeta 829 } 830 831 // NodeAllocsResponse is used to return allocs for a single node 832 type NodeAllocsResponse struct { 833 Allocs []*Allocation 834 QueryMeta 835 } 836 837 // NodeClientAllocsResponse is used to return allocs meta data for a single node 838 type NodeClientAllocsResponse struct { 839 Allocs map[string]uint64 840 841 // MigrateTokens are used when ACLs are enabled to allow cross node, 842 // authenticated access to sticky volumes 843 MigrateTokens map[string]string 844 845 QueryMeta 846 } 847 848 // SingleNodeResponse is used to return a single node 849 type SingleNodeResponse struct { 850 Node *Node 851 QueryMeta 852 } 853 854 // NodeListResponse is used for a list request 855 type NodeListResponse struct { 856 Nodes []*NodeListStub 857 QueryMeta 858 } 859 860 // SingleJobResponse is used to return a single job 861 type SingleJobResponse struct { 862 Job *Job 863 QueryMeta 864 } 865 866 // JobSummaryResponse is used to return a single job summary 867 type JobSummaryResponse struct { 868 JobSummary *JobSummary 869 QueryMeta 870 } 871 872 type JobDispatchResponse struct { 873 DispatchedJobID string 874 EvalID string 875 EvalCreateIndex uint64 876 JobCreateIndex uint64 877 WriteMeta 878 } 879 880 // JobListResponse is used for a list request 881 type JobListResponse struct { 882 Jobs []*JobListStub 883 QueryMeta 884 } 885 886 // JobVersionsRequest is used to get a jobs versions 887 type JobVersionsRequest struct { 888 JobID string 889 Diffs bool 890 QueryOptions 891 } 892 893 // JobVersionsResponse is used for a job get versions request 894 type JobVersionsResponse struct { 895 Versions []*Job 896 Diffs []*JobDiff 897 QueryMeta 898 } 899 900 // JobPlanResponse is used to respond to a job plan request 901 type JobPlanResponse struct { 902 // Annotations stores annotations explaining decisions the scheduler made. 903 Annotations *PlanAnnotations 904 905 // FailedTGAllocs is the placement failures per task group. 906 FailedTGAllocs map[string]*AllocMetric 907 908 // JobModifyIndex is the modification index of the job. The value can be 909 // used when running `nomad run` to ensure that the Job wasn’t modified 910 // since the last plan. If the job is being created, the value is zero. 911 JobModifyIndex uint64 912 913 // CreatedEvals is the set of evaluations created by the scheduler. The 914 // reasons for this can be rolling-updates or blocked evals. 915 CreatedEvals []*Evaluation 916 917 // Diff contains the diff of the job and annotations on whether the change 918 // causes an in-place update or create/destroy 919 Diff *JobDiff 920 921 // NextPeriodicLaunch is the time duration till the job would be launched if 922 // submitted. 923 NextPeriodicLaunch time.Time 924 925 // Warnings contains any warnings about the given job. These may include 926 // deprecation warnings. 927 Warnings string 928 929 WriteMeta 930 } 931 932 // SingleAllocResponse is used to return a single allocation 933 type SingleAllocResponse struct { 934 Alloc *Allocation 935 QueryMeta 936 } 937 938 // AllocsGetResponse is used to return a set of allocations 939 type AllocsGetResponse struct { 940 Allocs []*Allocation 941 QueryMeta 942 } 943 944 // JobAllocationsResponse is used to return the allocations for a job 945 type JobAllocationsResponse struct { 946 Allocations []*AllocListStub 947 QueryMeta 948 } 949 950 // JobEvaluationsResponse is used to return the evaluations for a job 951 type JobEvaluationsResponse struct { 952 Evaluations []*Evaluation 953 QueryMeta 954 } 955 956 // SingleEvalResponse is used to return a single evaluation 957 type SingleEvalResponse struct { 958 Eval *Evaluation 959 QueryMeta 960 } 961 962 // EvalDequeueResponse is used to return from a dequeue 963 type EvalDequeueResponse struct { 964 Eval *Evaluation 965 Token string 966 967 // WaitIndex is the Raft index the worker should wait until invoking the 968 // scheduler. 969 WaitIndex uint64 970 971 QueryMeta 972 } 973 974 // GetWaitIndex is used to retrieve the Raft index in which state should be at 975 // or beyond before invoking the scheduler. 976 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 977 // Prefer the wait index sent. This will be populated on all responses from 978 // 0.7.0 and above 979 if e.WaitIndex != 0 { 980 return e.WaitIndex 981 } else if e.Eval != nil { 982 return e.Eval.ModifyIndex 983 } 984 985 // This should never happen 986 return 1 987 } 988 989 // PlanResponse is used to return from a PlanRequest 990 type PlanResponse struct { 991 Result *PlanResult 992 WriteMeta 993 } 994 995 // AllocListResponse is used for a list request 996 type AllocListResponse struct { 997 Allocations []*AllocListStub 998 QueryMeta 999 } 1000 1001 // DeploymentListResponse is used for a list request 1002 type DeploymentListResponse struct { 1003 Deployments []*Deployment 1004 QueryMeta 1005 } 1006 1007 // EvalListResponse is used for a list request 1008 type EvalListResponse struct { 1009 Evaluations []*Evaluation 1010 QueryMeta 1011 } 1012 1013 // EvalAllocationsResponse is used to return the allocations for an evaluation 1014 type EvalAllocationsResponse struct { 1015 Allocations []*AllocListStub 1016 QueryMeta 1017 } 1018 1019 // PeriodicForceResponse is used to respond to a periodic job force launch 1020 type PeriodicForceResponse struct { 1021 EvalID string 1022 EvalCreateIndex uint64 1023 WriteMeta 1024 } 1025 1026 // DeploymentUpdateResponse is used to respond to a deployment change. The 1027 // response will include the modify index of the deployment as well as details 1028 // of any triggered evaluation. 1029 type DeploymentUpdateResponse struct { 1030 EvalID string 1031 EvalCreateIndex uint64 1032 DeploymentModifyIndex uint64 1033 1034 // RevertedJobVersion is the version the job was reverted to. If unset, the 1035 // job wasn't reverted 1036 RevertedJobVersion *uint64 1037 1038 WriteMeta 1039 } 1040 1041 // NodeConnQueryResponse is used to respond to a query of whether a server has 1042 // a connection to a specific Node 1043 type NodeConnQueryResponse struct { 1044 // Connected indicates whether a connection to the Client exists 1045 Connected bool 1046 1047 // Established marks the time at which the connection was established 1048 Established time.Time 1049 1050 QueryMeta 1051 } 1052 1053 const ( 1054 NodeStatusInit = "initializing" 1055 NodeStatusReady = "ready" 1056 NodeStatusDown = "down" 1057 ) 1058 1059 // ShouldDrainNode checks if a given node status should trigger an 1060 // evaluation. Some states don't require any further action. 1061 func ShouldDrainNode(status string) bool { 1062 switch status { 1063 case NodeStatusInit, NodeStatusReady: 1064 return false 1065 case NodeStatusDown: 1066 return true 1067 default: 1068 panic(fmt.Sprintf("unhandled node status %s", status)) 1069 } 1070 } 1071 1072 // ValidNodeStatus is used to check if a node status is valid 1073 func ValidNodeStatus(status string) bool { 1074 switch status { 1075 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1076 return true 1077 default: 1078 return false 1079 } 1080 } 1081 1082 // Node is a representation of a schedulable client node 1083 type Node struct { 1084 // ID is a unique identifier for the node. It can be constructed 1085 // by doing a concatenation of the Name and Datacenter as a simple 1086 // approach. Alternatively a UUID may be used. 1087 ID string 1088 1089 // SecretID is an ID that is only known by the Node and the set of Servers. 1090 // It is not accessible via the API and is used to authenticate nodes 1091 // conducting privileged activities. 1092 SecretID string 1093 1094 // Datacenter for this node 1095 Datacenter string 1096 1097 // Node name 1098 Name string 1099 1100 // HTTPAddr is the address on which the Nomad client is listening for http 1101 // requests 1102 HTTPAddr string 1103 1104 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1105 TLSEnabled bool 1106 1107 // Attributes is an arbitrary set of key/value 1108 // data that can be used for constraints. Examples 1109 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1110 // "docker.runtime=1.8.3" 1111 Attributes map[string]string 1112 1113 // Resources is the available resources on the client. 1114 // For example 'cpu=2' 'memory=2048' 1115 Resources *Resources 1116 1117 // Reserved is the set of resources that are reserved, 1118 // and should be subtracted from the total resources for 1119 // the purposes of scheduling. This may be provide certain 1120 // high-watermark tolerances or because of external schedulers 1121 // consuming resources. 1122 Reserved *Resources 1123 1124 // Links are used to 'link' this client to external 1125 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1126 // 'ami=ami-123' 1127 Links map[string]string 1128 1129 // Meta is used to associate arbitrary metadata with this 1130 // client. This is opaque to Nomad. 1131 Meta map[string]string 1132 1133 // NodeClass is an opaque identifier used to group nodes 1134 // together for the purpose of determining scheduling pressure. 1135 NodeClass string 1136 1137 // ComputedClass is a unique id that identifies nodes with a common set of 1138 // attributes and capabilities. 1139 ComputedClass string 1140 1141 // Drain is controlled by the servers, and not the client. 1142 // If true, no jobs will be scheduled to this node, and existing 1143 // allocations will be drained. 1144 Drain bool 1145 1146 // Status of this node 1147 Status string 1148 1149 // StatusDescription is meant to provide more human useful information 1150 StatusDescription string 1151 1152 // StatusUpdatedAt is the time stamp at which the state of the node was 1153 // updated 1154 StatusUpdatedAt int64 1155 1156 // Raft Indexes 1157 CreateIndex uint64 1158 ModifyIndex uint64 1159 } 1160 1161 // Ready returns if the node is ready for running allocations 1162 func (n *Node) Ready() bool { 1163 return n.Status == NodeStatusReady && !n.Drain 1164 } 1165 1166 func (n *Node) Copy() *Node { 1167 if n == nil { 1168 return nil 1169 } 1170 nn := new(Node) 1171 *nn = *n 1172 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1173 nn.Resources = nn.Resources.Copy() 1174 nn.Reserved = nn.Reserved.Copy() 1175 nn.Links = helper.CopyMapStringString(nn.Links) 1176 nn.Meta = helper.CopyMapStringString(nn.Meta) 1177 return nn 1178 } 1179 1180 // TerminalStatus returns if the current status is terminal and 1181 // will no longer transition. 1182 func (n *Node) TerminalStatus() bool { 1183 switch n.Status { 1184 case NodeStatusDown: 1185 return true 1186 default: 1187 return false 1188 } 1189 } 1190 1191 // Stub returns a summarized version of the node 1192 func (n *Node) Stub() *NodeListStub { 1193 1194 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 1195 1196 return &NodeListStub{ 1197 Address: addr, 1198 ID: n.ID, 1199 Datacenter: n.Datacenter, 1200 Name: n.Name, 1201 NodeClass: n.NodeClass, 1202 Version: n.Attributes["nomad.version"], 1203 Drain: n.Drain, 1204 Status: n.Status, 1205 StatusDescription: n.StatusDescription, 1206 CreateIndex: n.CreateIndex, 1207 ModifyIndex: n.ModifyIndex, 1208 } 1209 } 1210 1211 // NodeListStub is used to return a subset of job information 1212 // for the job list 1213 type NodeListStub struct { 1214 Address string 1215 ID string 1216 Datacenter string 1217 Name string 1218 NodeClass string 1219 Version string 1220 Drain bool 1221 Status string 1222 StatusDescription string 1223 CreateIndex uint64 1224 ModifyIndex uint64 1225 } 1226 1227 // Networks defined for a task on the Resources struct. 1228 type Networks []*NetworkResource 1229 1230 // Port assignment and IP for the given label or empty values. 1231 func (ns Networks) Port(label string) (string, int) { 1232 for _, n := range ns { 1233 for _, p := range n.ReservedPorts { 1234 if p.Label == label { 1235 return n.IP, p.Value 1236 } 1237 } 1238 for _, p := range n.DynamicPorts { 1239 if p.Label == label { 1240 return n.IP, p.Value 1241 } 1242 } 1243 } 1244 return "", 0 1245 } 1246 1247 // Resources is used to define the resources available 1248 // on a client 1249 type Resources struct { 1250 CPU int 1251 MemoryMB int 1252 DiskMB int 1253 IOPS int 1254 Networks Networks 1255 } 1256 1257 const ( 1258 BytesInMegabyte = 1024 * 1024 1259 ) 1260 1261 // DefaultResources is a small resources object that contains the 1262 // default resources requests that we will provide to an object. 1263 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 1264 // be kept in sync. 1265 func DefaultResources() *Resources { 1266 return &Resources{ 1267 CPU: 100, 1268 MemoryMB: 300, 1269 IOPS: 0, 1270 } 1271 } 1272 1273 // MinResources is a small resources object that contains the 1274 // absolute minimum resources that we will provide to an object. 1275 // This should not be confused with the defaults which are 1276 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 1277 // api/resources.go and should be kept in sync. 1278 func MinResources() *Resources { 1279 return &Resources{ 1280 CPU: 20, 1281 MemoryMB: 10, 1282 IOPS: 0, 1283 } 1284 } 1285 1286 // DiskInBytes returns the amount of disk resources in bytes. 1287 func (r *Resources) DiskInBytes() int64 { 1288 return int64(r.DiskMB * BytesInMegabyte) 1289 } 1290 1291 // Merge merges this resource with another resource. 1292 func (r *Resources) Merge(other *Resources) { 1293 if other.CPU != 0 { 1294 r.CPU = other.CPU 1295 } 1296 if other.MemoryMB != 0 { 1297 r.MemoryMB = other.MemoryMB 1298 } 1299 if other.DiskMB != 0 { 1300 r.DiskMB = other.DiskMB 1301 } 1302 if other.IOPS != 0 { 1303 r.IOPS = other.IOPS 1304 } 1305 if len(other.Networks) != 0 { 1306 r.Networks = other.Networks 1307 } 1308 } 1309 1310 func (r *Resources) Canonicalize() { 1311 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1312 // problems since we use reflect DeepEquals. 1313 if len(r.Networks) == 0 { 1314 r.Networks = nil 1315 } 1316 1317 for _, n := range r.Networks { 1318 n.Canonicalize() 1319 } 1320 } 1321 1322 // MeetsMinResources returns an error if the resources specified are less than 1323 // the minimum allowed. 1324 // This is based on the minimums defined in the Resources type 1325 func (r *Resources) MeetsMinResources() error { 1326 var mErr multierror.Error 1327 minResources := MinResources() 1328 if r.CPU < minResources.CPU { 1329 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 1330 } 1331 if r.MemoryMB < minResources.MemoryMB { 1332 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 1333 } 1334 if r.IOPS < minResources.IOPS { 1335 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is %d; got %d", minResources.IOPS, r.IOPS)) 1336 } 1337 for i, n := range r.Networks { 1338 if err := n.MeetsMinResources(); err != nil { 1339 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 1340 } 1341 } 1342 1343 return mErr.ErrorOrNil() 1344 } 1345 1346 // Copy returns a deep copy of the resources 1347 func (r *Resources) Copy() *Resources { 1348 if r == nil { 1349 return nil 1350 } 1351 newR := new(Resources) 1352 *newR = *r 1353 if r.Networks != nil { 1354 n := len(r.Networks) 1355 newR.Networks = make([]*NetworkResource, n) 1356 for i := 0; i < n; i++ { 1357 newR.Networks[i] = r.Networks[i].Copy() 1358 } 1359 } 1360 return newR 1361 } 1362 1363 // NetIndex finds the matching net index using device name 1364 func (r *Resources) NetIndex(n *NetworkResource) int { 1365 for idx, net := range r.Networks { 1366 if net.Device == n.Device { 1367 return idx 1368 } 1369 } 1370 return -1 1371 } 1372 1373 // Superset checks if one set of resources is a superset 1374 // of another. This ignores network resources, and the NetworkIndex 1375 // should be used for that. 1376 func (r *Resources) Superset(other *Resources) (bool, string) { 1377 if r.CPU < other.CPU { 1378 return false, "cpu" 1379 } 1380 if r.MemoryMB < other.MemoryMB { 1381 return false, "memory" 1382 } 1383 if r.DiskMB < other.DiskMB { 1384 return false, "disk" 1385 } 1386 if r.IOPS < other.IOPS { 1387 return false, "iops" 1388 } 1389 return true, "" 1390 } 1391 1392 // Add adds the resources of the delta to this, potentially 1393 // returning an error if not possible. 1394 func (r *Resources) Add(delta *Resources) error { 1395 if delta == nil { 1396 return nil 1397 } 1398 r.CPU += delta.CPU 1399 r.MemoryMB += delta.MemoryMB 1400 r.DiskMB += delta.DiskMB 1401 r.IOPS += delta.IOPS 1402 1403 for _, n := range delta.Networks { 1404 // Find the matching interface by IP or CIDR 1405 idx := r.NetIndex(n) 1406 if idx == -1 { 1407 r.Networks = append(r.Networks, n.Copy()) 1408 } else { 1409 r.Networks[idx].Add(n) 1410 } 1411 } 1412 return nil 1413 } 1414 1415 func (r *Resources) GoString() string { 1416 return fmt.Sprintf("*%#v", *r) 1417 } 1418 1419 type Port struct { 1420 Label string 1421 Value int 1422 } 1423 1424 // NetworkResource is used to represent available network 1425 // resources 1426 type NetworkResource struct { 1427 Device string // Name of the device 1428 CIDR string // CIDR block of addresses 1429 IP string // Host IP address 1430 MBits int // Throughput 1431 ReservedPorts []Port // Host Reserved ports 1432 DynamicPorts []Port // Host Dynamically assigned ports 1433 } 1434 1435 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 1436 if nr.Device != other.Device { 1437 return false 1438 } 1439 1440 if nr.CIDR != other.CIDR { 1441 return false 1442 } 1443 1444 if nr.IP != other.IP { 1445 return false 1446 } 1447 1448 if nr.MBits != other.MBits { 1449 return false 1450 } 1451 1452 if len(nr.ReservedPorts) != len(other.ReservedPorts) { 1453 return false 1454 } 1455 1456 for i, port := range nr.ReservedPorts { 1457 if len(other.ReservedPorts) <= i { 1458 return false 1459 } 1460 if port != other.ReservedPorts[i] { 1461 return false 1462 } 1463 } 1464 1465 if len(nr.DynamicPorts) != len(other.DynamicPorts) { 1466 return false 1467 } 1468 for i, port := range nr.DynamicPorts { 1469 if len(other.DynamicPorts) <= i { 1470 return false 1471 } 1472 if port != other.DynamicPorts[i] { 1473 return false 1474 } 1475 } 1476 return true 1477 } 1478 1479 func (n *NetworkResource) Canonicalize() { 1480 // Ensure that an empty and nil slices are treated the same to avoid scheduling 1481 // problems since we use reflect DeepEquals. 1482 if len(n.ReservedPorts) == 0 { 1483 n.ReservedPorts = nil 1484 } 1485 if len(n.DynamicPorts) == 0 { 1486 n.DynamicPorts = nil 1487 } 1488 } 1489 1490 // MeetsMinResources returns an error if the resources specified are less than 1491 // the minimum allowed. 1492 func (n *NetworkResource) MeetsMinResources() error { 1493 var mErr multierror.Error 1494 if n.MBits < 1 { 1495 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 1496 } 1497 return mErr.ErrorOrNil() 1498 } 1499 1500 // Copy returns a deep copy of the network resource 1501 func (n *NetworkResource) Copy() *NetworkResource { 1502 if n == nil { 1503 return nil 1504 } 1505 newR := new(NetworkResource) 1506 *newR = *n 1507 if n.ReservedPorts != nil { 1508 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 1509 copy(newR.ReservedPorts, n.ReservedPorts) 1510 } 1511 if n.DynamicPorts != nil { 1512 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 1513 copy(newR.DynamicPorts, n.DynamicPorts) 1514 } 1515 return newR 1516 } 1517 1518 // Add adds the resources of the delta to this, potentially 1519 // returning an error if not possible. 1520 func (n *NetworkResource) Add(delta *NetworkResource) { 1521 if len(delta.ReservedPorts) > 0 { 1522 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 1523 } 1524 n.MBits += delta.MBits 1525 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 1526 } 1527 1528 func (n *NetworkResource) GoString() string { 1529 return fmt.Sprintf("*%#v", *n) 1530 } 1531 1532 // PortLabels returns a map of port labels to their assigned host ports. 1533 func (n *NetworkResource) PortLabels() map[string]int { 1534 num := len(n.ReservedPorts) + len(n.DynamicPorts) 1535 labelValues := make(map[string]int, num) 1536 for _, port := range n.ReservedPorts { 1537 labelValues[port.Label] = port.Value 1538 } 1539 for _, port := range n.DynamicPorts { 1540 labelValues[port.Label] = port.Value 1541 } 1542 return labelValues 1543 } 1544 1545 const ( 1546 // JobTypeNomad is reserved for internal system tasks and is 1547 // always handled by the CoreScheduler. 1548 JobTypeCore = "_core" 1549 JobTypeService = "service" 1550 JobTypeBatch = "batch" 1551 JobTypeSystem = "system" 1552 ) 1553 1554 const ( 1555 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 1556 JobStatusRunning = "running" // Running means the job has non-terminal allocations 1557 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 1558 ) 1559 1560 const ( 1561 // JobMinPriority is the minimum allowed priority 1562 JobMinPriority = 1 1563 1564 // JobDefaultPriority is the default priority if not 1565 // not specified. 1566 JobDefaultPriority = 50 1567 1568 // JobMaxPriority is the maximum allowed priority 1569 JobMaxPriority = 100 1570 1571 // Ensure CoreJobPriority is higher than any user 1572 // specified job so that it gets priority. This is important 1573 // for the system to remain healthy. 1574 CoreJobPriority = JobMaxPriority * 2 1575 1576 // JobTrackedVersions is the number of historic job versions that are 1577 // kept. 1578 JobTrackedVersions = 6 1579 ) 1580 1581 // Job is the scope of a scheduling request to Nomad. It is the largest 1582 // scoped object, and is a named collection of task groups. Each task group 1583 // is further composed of tasks. A task group (TG) is the unit of scheduling 1584 // however. 1585 type Job struct { 1586 // Stop marks whether the user has stopped the job. A stopped job will 1587 // have all created allocations stopped and acts as a way to stop a job 1588 // without purging it from the system. This allows existing allocs to be 1589 // queried and the job to be inspected as it is being killed. 1590 Stop bool 1591 1592 // Region is the Nomad region that handles scheduling this job 1593 Region string 1594 1595 // Namespace is the namespace the job is submitted into. 1596 Namespace string 1597 1598 // ID is a unique identifier for the job per region. It can be 1599 // specified hierarchically like LineOfBiz/OrgName/Team/Project 1600 ID string 1601 1602 // ParentID is the unique identifier of the job that spawned this job. 1603 ParentID string 1604 1605 // Name is the logical name of the job used to refer to it. This is unique 1606 // per region, but not unique globally. 1607 Name string 1608 1609 // Type is used to control various behaviors about the job. Most jobs 1610 // are service jobs, meaning they are expected to be long lived. 1611 // Some jobs are batch oriented meaning they run and then terminate. 1612 // This can be extended in the future to support custom schedulers. 1613 Type string 1614 1615 // Priority is used to control scheduling importance and if this job 1616 // can preempt other jobs. 1617 Priority int 1618 1619 // AllAtOnce is used to control if incremental scheduling of task groups 1620 // is allowed or if we must do a gang scheduling of the entire job. This 1621 // can slow down larger jobs if resources are not available. 1622 AllAtOnce bool 1623 1624 // Datacenters contains all the datacenters this job is allowed to span 1625 Datacenters []string 1626 1627 // Constraints can be specified at a job level and apply to 1628 // all the task groups and tasks. 1629 Constraints []*Constraint 1630 1631 // TaskGroups are the collections of task groups that this job needs 1632 // to run. Each task group is an atomic unit of scheduling and placement. 1633 TaskGroups []*TaskGroup 1634 1635 // COMPAT: Remove in 0.7.0. Stagger is deprecated in 0.6.0. 1636 Update UpdateStrategy 1637 1638 // Periodic is used to define the interval the job is run at. 1639 Periodic *PeriodicConfig 1640 1641 // ParameterizedJob is used to specify the job as a parameterized job 1642 // for dispatching. 1643 ParameterizedJob *ParameterizedJobConfig 1644 1645 // Payload is the payload supplied when the job was dispatched. 1646 Payload []byte 1647 1648 // Meta is used to associate arbitrary metadata with this 1649 // job. This is opaque to Nomad. 1650 Meta map[string]string 1651 1652 // VaultToken is the Vault token that proves the submitter of the job has 1653 // access to the specified Vault policies. This field is only used to 1654 // transfer the token and is not stored after Job submission. 1655 VaultToken string 1656 1657 // Job status 1658 Status string 1659 1660 // StatusDescription is meant to provide more human useful information 1661 StatusDescription string 1662 1663 // Stable marks a job as stable. Stability is only defined on "service" and 1664 // "system" jobs. The stability of a job will be set automatically as part 1665 // of a deployment and can be manually set via APIs. 1666 Stable bool 1667 1668 // Version is a monitonically increasing version number that is incremened 1669 // on each job register. 1670 Version uint64 1671 1672 // SubmitTime is the time at which the job was submitted as a UnixNano in 1673 // UTC 1674 SubmitTime int64 1675 1676 // Raft Indexes 1677 CreateIndex uint64 1678 ModifyIndex uint64 1679 JobModifyIndex uint64 1680 } 1681 1682 // Canonicalize is used to canonicalize fields in the Job. This should be called 1683 // when registering a Job. A set of warnings are returned if the job was changed 1684 // in anyway that the user should be made aware of. 1685 func (j *Job) Canonicalize() (warnings error) { 1686 if j == nil { 1687 return nil 1688 } 1689 1690 var mErr multierror.Error 1691 // Ensure that an empty and nil map are treated the same to avoid scheduling 1692 // problems since we use reflect DeepEquals. 1693 if len(j.Meta) == 0 { 1694 j.Meta = nil 1695 } 1696 1697 // Ensure the job is in a namespace. 1698 if j.Namespace == "" { 1699 j.Namespace = DefaultNamespace 1700 } 1701 1702 for _, tg := range j.TaskGroups { 1703 tg.Canonicalize(j) 1704 } 1705 1706 if j.ParameterizedJob != nil { 1707 j.ParameterizedJob.Canonicalize() 1708 } 1709 1710 if j.Periodic != nil { 1711 j.Periodic.Canonicalize() 1712 } 1713 1714 // COMPAT: Remove in 0.7.0 1715 // Rewrite any job that has an update block with pre 0.6.0 syntax. 1716 jobHasOldUpdate := j.Update.Stagger > 0 && j.Update.MaxParallel > 0 1717 if jobHasOldUpdate && j.Type != JobTypeBatch { 1718 // Build an appropriate update block and copy it down to each task group 1719 base := DefaultUpdateStrategy.Copy() 1720 base.MaxParallel = j.Update.MaxParallel 1721 base.MinHealthyTime = j.Update.Stagger 1722 1723 // Add to each task group, modifying as needed 1724 upgraded := false 1725 l := len(j.TaskGroups) 1726 for _, tg := range j.TaskGroups { 1727 // The task group doesn't need upgrading if it has an update block with the new syntax 1728 u := tg.Update 1729 if u != nil && u.Stagger > 0 && u.MaxParallel > 0 && 1730 u.HealthCheck != "" && u.MinHealthyTime > 0 && u.HealthyDeadline > 0 { 1731 continue 1732 } 1733 1734 upgraded = true 1735 1736 // The MaxParallel for the job should be 10% of the total count 1737 // unless there is just one task group then we can infer the old 1738 // max parallel should be the new 1739 tgu := base.Copy() 1740 if l != 1 { 1741 // RoundTo 10% 1742 var percent float64 = float64(tg.Count) * 0.1 1743 tgu.MaxParallel = int(percent + 0.5) 1744 } 1745 1746 // Safety guards 1747 if tgu.MaxParallel == 0 { 1748 tgu.MaxParallel = 1 1749 } else if tgu.MaxParallel > tg.Count { 1750 tgu.MaxParallel = tg.Count 1751 } 1752 1753 tg.Update = tgu 1754 } 1755 1756 if upgraded { 1757 w := "A best effort conversion to new update stanza introduced in v0.6.0 applied. " + 1758 "Please update upgrade stanza before v0.7.0." 1759 multierror.Append(&mErr, fmt.Errorf(w)) 1760 } 1761 } 1762 1763 // Ensure that the batch job doesn't have new style or old style update 1764 // stanza. Unfortunately are scanning here because we have to deprecate over 1765 // a release so we can't check in the task group since that may be new style 1766 // but wouldn't capture the old style and we don't want to have duplicate 1767 // warnings. 1768 if j.Type == JobTypeBatch { 1769 displayWarning := jobHasOldUpdate 1770 j.Update.Stagger = 0 1771 j.Update.MaxParallel = 0 1772 j.Update.HealthCheck = "" 1773 j.Update.MinHealthyTime = 0 1774 j.Update.HealthyDeadline = 0 1775 j.Update.AutoRevert = false 1776 j.Update.Canary = 0 1777 1778 // Remove any update spec from the task groups 1779 for _, tg := range j.TaskGroups { 1780 if tg.Update != nil { 1781 displayWarning = true 1782 tg.Update = nil 1783 } 1784 } 1785 1786 if displayWarning { 1787 w := "Update stanza is disallowed for batch jobs since v0.6.0. " + 1788 "The update block has automatically been removed" 1789 multierror.Append(&mErr, fmt.Errorf(w)) 1790 } 1791 } 1792 1793 return mErr.ErrorOrNil() 1794 } 1795 1796 // Copy returns a deep copy of the Job. It is expected that callers use recover. 1797 // This job can panic if the deep copy failed as it uses reflection. 1798 func (j *Job) Copy() *Job { 1799 if j == nil { 1800 return nil 1801 } 1802 nj := new(Job) 1803 *nj = *j 1804 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 1805 nj.Constraints = CopySliceConstraints(nj.Constraints) 1806 1807 if j.TaskGroups != nil { 1808 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 1809 for i, tg := range nj.TaskGroups { 1810 tgs[i] = tg.Copy() 1811 } 1812 nj.TaskGroups = tgs 1813 } 1814 1815 nj.Periodic = nj.Periodic.Copy() 1816 nj.Meta = helper.CopyMapStringString(nj.Meta) 1817 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 1818 return nj 1819 } 1820 1821 // Validate is used to sanity check a job input 1822 func (j *Job) Validate() error { 1823 var mErr multierror.Error 1824 1825 if j.Region == "" { 1826 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 1827 } 1828 if j.ID == "" { 1829 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 1830 } else if strings.Contains(j.ID, " ") { 1831 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 1832 } 1833 if j.Name == "" { 1834 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 1835 } 1836 if j.Namespace == "" { 1837 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 1838 } 1839 switch j.Type { 1840 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 1841 case "": 1842 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 1843 default: 1844 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 1845 } 1846 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 1847 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 1848 } 1849 if len(j.Datacenters) == 0 { 1850 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 1851 } 1852 if len(j.TaskGroups) == 0 { 1853 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 1854 } 1855 for idx, constr := range j.Constraints { 1856 if err := constr.Validate(); err != nil { 1857 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1858 mErr.Errors = append(mErr.Errors, outer) 1859 } 1860 } 1861 1862 // Check for duplicate task groups 1863 taskGroups := make(map[string]int) 1864 for idx, tg := range j.TaskGroups { 1865 if tg.Name == "" { 1866 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 1867 } else if existing, ok := taskGroups[tg.Name]; ok { 1868 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 1869 } else { 1870 taskGroups[tg.Name] = idx 1871 } 1872 1873 if j.Type == "system" && tg.Count > 1 { 1874 mErr.Errors = append(mErr.Errors, 1875 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 1876 tg.Name, tg.Count)) 1877 } 1878 } 1879 1880 // Validate the task group 1881 for _, tg := range j.TaskGroups { 1882 if err := tg.Validate(j); err != nil { 1883 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 1884 mErr.Errors = append(mErr.Errors, outer) 1885 } 1886 } 1887 1888 // Validate periodic is only used with batch jobs. 1889 if j.IsPeriodic() && j.Periodic.Enabled { 1890 if j.Type != JobTypeBatch { 1891 mErr.Errors = append(mErr.Errors, 1892 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1893 } 1894 1895 if err := j.Periodic.Validate(); err != nil { 1896 mErr.Errors = append(mErr.Errors, err) 1897 } 1898 } 1899 1900 if j.IsParameterized() { 1901 if j.Type != JobTypeBatch { 1902 mErr.Errors = append(mErr.Errors, 1903 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 1904 } 1905 1906 if err := j.ParameterizedJob.Validate(); err != nil { 1907 mErr.Errors = append(mErr.Errors, err) 1908 } 1909 } 1910 1911 return mErr.ErrorOrNil() 1912 } 1913 1914 // Warnings returns a list of warnings that may be from dubious settings or 1915 // deprecation warnings. 1916 func (j *Job) Warnings() error { 1917 var mErr multierror.Error 1918 1919 // Check the groups 1920 for _, tg := range j.TaskGroups { 1921 if err := tg.Warnings(j); err != nil { 1922 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 1923 mErr.Errors = append(mErr.Errors, outer) 1924 } 1925 } 1926 1927 return mErr.ErrorOrNil() 1928 } 1929 1930 // LookupTaskGroup finds a task group by name 1931 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1932 for _, tg := range j.TaskGroups { 1933 if tg.Name == name { 1934 return tg 1935 } 1936 } 1937 return nil 1938 } 1939 1940 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 1941 // meta data for the task. When joining Job, Group and Task Meta, the precedence 1942 // is by deepest scope (Task > Group > Job). 1943 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 1944 group := j.LookupTaskGroup(groupName) 1945 if group == nil { 1946 return nil 1947 } 1948 1949 task := group.LookupTask(taskName) 1950 if task == nil { 1951 return nil 1952 } 1953 1954 meta := helper.CopyMapStringString(task.Meta) 1955 if meta == nil { 1956 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 1957 } 1958 1959 // Add the group specific meta 1960 for k, v := range group.Meta { 1961 if _, ok := meta[k]; !ok { 1962 meta[k] = v 1963 } 1964 } 1965 1966 // Add the job specific meta 1967 for k, v := range j.Meta { 1968 if _, ok := meta[k]; !ok { 1969 meta[k] = v 1970 } 1971 } 1972 1973 return meta 1974 } 1975 1976 // Stopped returns if a job is stopped. 1977 func (j *Job) Stopped() bool { 1978 return j == nil || j.Stop 1979 } 1980 1981 // HasUpdateStrategy returns if any task group in the job has an update strategy 1982 func (j *Job) HasUpdateStrategy() bool { 1983 for _, tg := range j.TaskGroups { 1984 if tg.Update != nil { 1985 return true 1986 } 1987 } 1988 1989 return false 1990 } 1991 1992 // Stub is used to return a summary of the job 1993 func (j *Job) Stub(summary *JobSummary) *JobListStub { 1994 return &JobListStub{ 1995 ID: j.ID, 1996 ParentID: j.ParentID, 1997 Name: j.Name, 1998 Type: j.Type, 1999 Priority: j.Priority, 2000 Periodic: j.IsPeriodic(), 2001 ParameterizedJob: j.IsParameterized(), 2002 Stop: j.Stop, 2003 Status: j.Status, 2004 StatusDescription: j.StatusDescription, 2005 CreateIndex: j.CreateIndex, 2006 ModifyIndex: j.ModifyIndex, 2007 JobModifyIndex: j.JobModifyIndex, 2008 SubmitTime: j.SubmitTime, 2009 JobSummary: summary, 2010 } 2011 } 2012 2013 // IsPeriodic returns whether a job is periodic. 2014 func (j *Job) IsPeriodic() bool { 2015 return j.Periodic != nil 2016 } 2017 2018 // IsPeriodicActive returns whether the job is an active periodic job that will 2019 // create child jobs 2020 func (j *Job) IsPeriodicActive() bool { 2021 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 2022 } 2023 2024 // IsParameterized returns whether a job is parameterized job. 2025 func (j *Job) IsParameterized() bool { 2026 return j.ParameterizedJob != nil 2027 } 2028 2029 // VaultPolicies returns the set of Vault policies per task group, per task 2030 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 2031 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 2032 2033 for _, tg := range j.TaskGroups { 2034 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 2035 2036 for _, task := range tg.Tasks { 2037 if task.Vault == nil { 2038 continue 2039 } 2040 2041 tgPolicies[task.Name] = task.Vault 2042 } 2043 2044 if len(tgPolicies) != 0 { 2045 policies[tg.Name] = tgPolicies 2046 } 2047 } 2048 2049 return policies 2050 } 2051 2052 // RequiredSignals returns a mapping of task groups to tasks to their required 2053 // set of signals 2054 func (j *Job) RequiredSignals() map[string]map[string][]string { 2055 signals := make(map[string]map[string][]string) 2056 2057 for _, tg := range j.TaskGroups { 2058 for _, task := range tg.Tasks { 2059 // Use this local one as a set 2060 taskSignals := make(map[string]struct{}) 2061 2062 // Check if the Vault change mode uses signals 2063 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 2064 taskSignals[task.Vault.ChangeSignal] = struct{}{} 2065 } 2066 2067 // If a user has specified a KillSignal, add it to required signals 2068 if task.KillSignal != "" { 2069 taskSignals[task.KillSignal] = struct{}{} 2070 } 2071 2072 // Check if any template change mode uses signals 2073 for _, t := range task.Templates { 2074 if t.ChangeMode != TemplateChangeModeSignal { 2075 continue 2076 } 2077 2078 taskSignals[t.ChangeSignal] = struct{}{} 2079 } 2080 2081 // Flatten and sort the signals 2082 l := len(taskSignals) 2083 if l == 0 { 2084 continue 2085 } 2086 2087 flat := make([]string, 0, l) 2088 for sig := range taskSignals { 2089 flat = append(flat, sig) 2090 } 2091 2092 sort.Strings(flat) 2093 tgSignals, ok := signals[tg.Name] 2094 if !ok { 2095 tgSignals = make(map[string][]string) 2096 signals[tg.Name] = tgSignals 2097 } 2098 tgSignals[task.Name] = flat 2099 } 2100 2101 } 2102 2103 return signals 2104 } 2105 2106 // SpecChanged determines if the functional specification has changed between 2107 // two job versions. 2108 func (j *Job) SpecChanged(new *Job) bool { 2109 if j == nil { 2110 return new != nil 2111 } 2112 2113 // Create a copy of the new job 2114 c := new.Copy() 2115 2116 // Update the new job so we can do a reflect 2117 c.Status = j.Status 2118 c.StatusDescription = j.StatusDescription 2119 c.Stable = j.Stable 2120 c.Version = j.Version 2121 c.CreateIndex = j.CreateIndex 2122 c.ModifyIndex = j.ModifyIndex 2123 c.JobModifyIndex = j.JobModifyIndex 2124 c.SubmitTime = j.SubmitTime 2125 2126 // Deep equals the jobs 2127 return !reflect.DeepEqual(j, c) 2128 } 2129 2130 func (j *Job) SetSubmitTime() { 2131 j.SubmitTime = time.Now().UTC().UnixNano() 2132 } 2133 2134 // JobListStub is used to return a subset of job information 2135 // for the job list 2136 type JobListStub struct { 2137 ID string 2138 ParentID string 2139 Name string 2140 Type string 2141 Priority int 2142 Periodic bool 2143 ParameterizedJob bool 2144 Stop bool 2145 Status string 2146 StatusDescription string 2147 JobSummary *JobSummary 2148 CreateIndex uint64 2149 ModifyIndex uint64 2150 JobModifyIndex uint64 2151 SubmitTime int64 2152 } 2153 2154 // JobSummary summarizes the state of the allocations of a job 2155 type JobSummary struct { 2156 // JobID is the ID of the job the summary is for 2157 JobID string 2158 2159 // Namespace is the namespace of the job and its summary 2160 Namespace string 2161 2162 // Summmary contains the summary per task group for the Job 2163 Summary map[string]TaskGroupSummary 2164 2165 // Children contains a summary for the children of this job. 2166 Children *JobChildrenSummary 2167 2168 // Raft Indexes 2169 CreateIndex uint64 2170 ModifyIndex uint64 2171 } 2172 2173 // Copy returns a new copy of JobSummary 2174 func (js *JobSummary) Copy() *JobSummary { 2175 newJobSummary := new(JobSummary) 2176 *newJobSummary = *js 2177 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 2178 for k, v := range js.Summary { 2179 newTGSummary[k] = v 2180 } 2181 newJobSummary.Summary = newTGSummary 2182 newJobSummary.Children = newJobSummary.Children.Copy() 2183 return newJobSummary 2184 } 2185 2186 // JobChildrenSummary contains the summary of children job statuses 2187 type JobChildrenSummary struct { 2188 Pending int64 2189 Running int64 2190 Dead int64 2191 } 2192 2193 // Copy returns a new copy of a JobChildrenSummary 2194 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 2195 if jc == nil { 2196 return nil 2197 } 2198 2199 njc := new(JobChildrenSummary) 2200 *njc = *jc 2201 return njc 2202 } 2203 2204 // TaskGroup summarizes the state of all the allocations of a particular 2205 // TaskGroup 2206 type TaskGroupSummary struct { 2207 Queued int 2208 Complete int 2209 Failed int 2210 Running int 2211 Starting int 2212 Lost int 2213 } 2214 2215 const ( 2216 // Checks uses any registered health check state in combination with task 2217 // states to determine if a allocation is healthy. 2218 UpdateStrategyHealthCheck_Checks = "checks" 2219 2220 // TaskStates uses the task states of an allocation to determine if the 2221 // allocation is healthy. 2222 UpdateStrategyHealthCheck_TaskStates = "task_states" 2223 2224 // Manual allows the operator to manually signal to Nomad when an 2225 // allocations is healthy. This allows more advanced health checking that is 2226 // outside of the scope of Nomad. 2227 UpdateStrategyHealthCheck_Manual = "manual" 2228 ) 2229 2230 var ( 2231 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 2232 // jobs with the old policy or for populating field defaults. 2233 DefaultUpdateStrategy = &UpdateStrategy{ 2234 Stagger: 30 * time.Second, 2235 MaxParallel: 1, 2236 HealthCheck: UpdateStrategyHealthCheck_Checks, 2237 MinHealthyTime: 10 * time.Second, 2238 HealthyDeadline: 5 * time.Minute, 2239 AutoRevert: false, 2240 Canary: 0, 2241 } 2242 ) 2243 2244 // UpdateStrategy is used to modify how updates are done 2245 type UpdateStrategy struct { 2246 // Stagger is used to determine the rate at which allocations are migrated 2247 // due to down or draining nodes. 2248 Stagger time.Duration 2249 2250 // MaxParallel is how many updates can be done in parallel 2251 MaxParallel int 2252 2253 // HealthCheck specifies the mechanism in which allocations are marked 2254 // healthy or unhealthy as part of a deployment. 2255 HealthCheck string 2256 2257 // MinHealthyTime is the minimum time an allocation must be in the healthy 2258 // state before it is marked as healthy, unblocking more alllocations to be 2259 // rolled. 2260 MinHealthyTime time.Duration 2261 2262 // HealthyDeadline is the time in which an allocation must be marked as 2263 // healthy before it is automatically transistioned to unhealthy. This time 2264 // period doesn't count against the MinHealthyTime. 2265 HealthyDeadline time.Duration 2266 2267 // AutoRevert declares that if a deployment fails because of unhealthy 2268 // allocations, there should be an attempt to auto-revert the job to a 2269 // stable version. 2270 AutoRevert bool 2271 2272 // Canary is the number of canaries to deploy when a change to the task 2273 // group is detected. 2274 Canary int 2275 } 2276 2277 func (u *UpdateStrategy) Copy() *UpdateStrategy { 2278 if u == nil { 2279 return nil 2280 } 2281 2282 copy := new(UpdateStrategy) 2283 *copy = *u 2284 return copy 2285 } 2286 2287 func (u *UpdateStrategy) Validate() error { 2288 if u == nil { 2289 return nil 2290 } 2291 2292 var mErr multierror.Error 2293 switch u.HealthCheck { 2294 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 2295 default: 2296 multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 2297 } 2298 2299 if u.MaxParallel < 1 { 2300 multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than one: %d < 1", u.MaxParallel)) 2301 } 2302 if u.Canary < 0 { 2303 multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 2304 } 2305 if u.MinHealthyTime < 0 { 2306 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 2307 } 2308 if u.HealthyDeadline <= 0 { 2309 multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 2310 } 2311 if u.MinHealthyTime >= u.HealthyDeadline { 2312 multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 2313 } 2314 if u.Stagger <= 0 { 2315 multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 2316 } 2317 2318 return mErr.ErrorOrNil() 2319 } 2320 2321 // TODO(alexdadgar): Remove once no longer used by the scheduler. 2322 // Rolling returns if a rolling strategy should be used 2323 func (u *UpdateStrategy) Rolling() bool { 2324 return u.Stagger > 0 && u.MaxParallel > 0 2325 } 2326 2327 const ( 2328 // PeriodicSpecCron is used for a cron spec. 2329 PeriodicSpecCron = "cron" 2330 2331 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 2332 // separated list of unix timestamps at which to launch. 2333 PeriodicSpecTest = "_internal_test" 2334 ) 2335 2336 // Periodic defines the interval a job should be run at. 2337 type PeriodicConfig struct { 2338 // Enabled determines if the job should be run periodically. 2339 Enabled bool 2340 2341 // Spec specifies the interval the job should be run as. It is parsed based 2342 // on the SpecType. 2343 Spec string 2344 2345 // SpecType defines the format of the spec. 2346 SpecType string 2347 2348 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 2349 ProhibitOverlap bool 2350 2351 // TimeZone is the user specified string that determines the time zone to 2352 // launch against. The time zones must be specified from IANA Time Zone 2353 // database, such as "America/New_York". 2354 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 2355 // Reference: https://www.iana.org/time-zones 2356 TimeZone string 2357 2358 // location is the time zone to evaluate the launch time against 2359 location *time.Location 2360 } 2361 2362 func (p *PeriodicConfig) Copy() *PeriodicConfig { 2363 if p == nil { 2364 return nil 2365 } 2366 np := new(PeriodicConfig) 2367 *np = *p 2368 return np 2369 } 2370 2371 func (p *PeriodicConfig) Validate() error { 2372 if !p.Enabled { 2373 return nil 2374 } 2375 2376 var mErr multierror.Error 2377 if p.Spec == "" { 2378 multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 2379 } 2380 2381 // Check if we got a valid time zone 2382 if p.TimeZone != "" { 2383 if _, err := time.LoadLocation(p.TimeZone); err != nil { 2384 multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 2385 } 2386 } 2387 2388 switch p.SpecType { 2389 case PeriodicSpecCron: 2390 // Validate the cron spec 2391 if _, err := cronexpr.Parse(p.Spec); err != nil { 2392 multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 2393 } 2394 case PeriodicSpecTest: 2395 // No-op 2396 default: 2397 multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 2398 } 2399 2400 return mErr.ErrorOrNil() 2401 } 2402 2403 func (p *PeriodicConfig) Canonicalize() { 2404 // Load the location 2405 l, err := time.LoadLocation(p.TimeZone) 2406 if err != nil { 2407 p.location = time.UTC 2408 } 2409 2410 p.location = l 2411 } 2412 2413 // Next returns the closest time instant matching the spec that is after the 2414 // passed time. If no matching instance exists, the zero value of time.Time is 2415 // returned. The `time.Location` of the returned value matches that of the 2416 // passed time. 2417 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 2418 switch p.SpecType { 2419 case PeriodicSpecCron: 2420 if e, err := cronexpr.Parse(p.Spec); err == nil { 2421 return e.Next(fromTime) 2422 } 2423 case PeriodicSpecTest: 2424 split := strings.Split(p.Spec, ",") 2425 if len(split) == 1 && split[0] == "" { 2426 return time.Time{} 2427 } 2428 2429 // Parse the times 2430 times := make([]time.Time, len(split)) 2431 for i, s := range split { 2432 unix, err := strconv.Atoi(s) 2433 if err != nil { 2434 return time.Time{} 2435 } 2436 2437 times[i] = time.Unix(int64(unix), 0) 2438 } 2439 2440 // Find the next match 2441 for _, next := range times { 2442 if fromTime.Before(next) { 2443 return next 2444 } 2445 } 2446 } 2447 2448 return time.Time{} 2449 } 2450 2451 // GetLocation returns the location to use for determining the time zone to run 2452 // the periodic job against. 2453 func (p *PeriodicConfig) GetLocation() *time.Location { 2454 // Jobs pre 0.5.5 will not have this 2455 if p.location != nil { 2456 return p.location 2457 } 2458 2459 return time.UTC 2460 } 2461 2462 const ( 2463 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 2464 // when launching derived instances of it. 2465 PeriodicLaunchSuffix = "/periodic-" 2466 ) 2467 2468 // PeriodicLaunch tracks the last launch time of a periodic job. 2469 type PeriodicLaunch struct { 2470 ID string // ID of the periodic job. 2471 Namespace string // Namespace of the periodic job 2472 Launch time.Time // The last launch time. 2473 2474 // Raft Indexes 2475 CreateIndex uint64 2476 ModifyIndex uint64 2477 } 2478 2479 const ( 2480 DispatchPayloadForbidden = "forbidden" 2481 DispatchPayloadOptional = "optional" 2482 DispatchPayloadRequired = "required" 2483 2484 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 2485 // when dispatching instances of it. 2486 DispatchLaunchSuffix = "/dispatch-" 2487 ) 2488 2489 // ParameterizedJobConfig is used to configure the parameterized job 2490 type ParameterizedJobConfig struct { 2491 // Payload configure the payload requirements 2492 Payload string 2493 2494 // MetaRequired is metadata keys that must be specified by the dispatcher 2495 MetaRequired []string 2496 2497 // MetaOptional is metadata keys that may be specified by the dispatcher 2498 MetaOptional []string 2499 } 2500 2501 func (d *ParameterizedJobConfig) Validate() error { 2502 var mErr multierror.Error 2503 switch d.Payload { 2504 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 2505 default: 2506 multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 2507 } 2508 2509 // Check that the meta configurations are disjoint sets 2510 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 2511 if !disjoint { 2512 multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 2513 } 2514 2515 return mErr.ErrorOrNil() 2516 } 2517 2518 func (d *ParameterizedJobConfig) Canonicalize() { 2519 if d.Payload == "" { 2520 d.Payload = DispatchPayloadOptional 2521 } 2522 } 2523 2524 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 2525 if d == nil { 2526 return nil 2527 } 2528 nd := new(ParameterizedJobConfig) 2529 *nd = *d 2530 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 2531 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 2532 return nd 2533 } 2534 2535 // DispatchedID returns an ID appropriate for a job dispatched against a 2536 // particular parameterized job 2537 func DispatchedID(templateID string, t time.Time) string { 2538 u := uuid.Generate()[:8] 2539 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 2540 } 2541 2542 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 2543 type DispatchPayloadConfig struct { 2544 // File specifies a relative path to where the input data should be written 2545 File string 2546 } 2547 2548 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 2549 if d == nil { 2550 return nil 2551 } 2552 nd := new(DispatchPayloadConfig) 2553 *nd = *d 2554 return nd 2555 } 2556 2557 func (d *DispatchPayloadConfig) Validate() error { 2558 // Verify the destination doesn't escape 2559 escaped, err := PathEscapesAllocDir("task/local/", d.File) 2560 if err != nil { 2561 return fmt.Errorf("invalid destination path: %v", err) 2562 } else if escaped { 2563 return fmt.Errorf("destination escapes allocation directory") 2564 } 2565 2566 return nil 2567 } 2568 2569 var ( 2570 DefaultServiceJobRestartPolicy = RestartPolicy{ 2571 Delay: 15 * time.Second, 2572 Attempts: 2, 2573 Interval: 30 * time.Minute, 2574 Mode: RestartPolicyModeFail, 2575 } 2576 DefaultBatchJobRestartPolicy = RestartPolicy{ 2577 Delay: 15 * time.Second, 2578 Attempts: 3, 2579 Interval: 24 * time.Hour, 2580 Mode: RestartPolicyModeFail, 2581 } 2582 ) 2583 2584 var ( 2585 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 2586 Attempts: 2, 2587 Interval: 1 * time.Hour, 2588 } 2589 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 2590 Attempts: 1, 2591 Interval: 24 * time.Hour, 2592 } 2593 ) 2594 2595 const ( 2596 // RestartPolicyModeDelay causes an artificial delay till the next interval is 2597 // reached when the specified attempts have been reached in the interval. 2598 RestartPolicyModeDelay = "delay" 2599 2600 // RestartPolicyModeFail causes a job to fail if the specified number of 2601 // attempts are reached within an interval. 2602 RestartPolicyModeFail = "fail" 2603 2604 // RestartPolicyMinInterval is the minimum interval that is accepted for a 2605 // restart policy. 2606 RestartPolicyMinInterval = 5 * time.Second 2607 2608 // ReasonWithinPolicy describes restart events that are within policy 2609 ReasonWithinPolicy = "Restart within policy" 2610 ) 2611 2612 // RestartPolicy configures how Tasks are restarted when they crash or fail. 2613 type RestartPolicy struct { 2614 // Attempts is the number of restart that will occur in an interval. 2615 Attempts int 2616 2617 // Interval is a duration in which we can limit the number of restarts 2618 // within. 2619 Interval time.Duration 2620 2621 // Delay is the time between a failure and a restart. 2622 Delay time.Duration 2623 2624 // Mode controls what happens when the task restarts more than attempt times 2625 // in an interval. 2626 Mode string 2627 } 2628 2629 func (r *RestartPolicy) Copy() *RestartPolicy { 2630 if r == nil { 2631 return nil 2632 } 2633 nrp := new(RestartPolicy) 2634 *nrp = *r 2635 return nrp 2636 } 2637 2638 func (r *RestartPolicy) Validate() error { 2639 var mErr multierror.Error 2640 switch r.Mode { 2641 case RestartPolicyModeDelay, RestartPolicyModeFail: 2642 default: 2643 multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 2644 } 2645 2646 // Check for ambiguous/confusing settings 2647 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 2648 multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 2649 } 2650 2651 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 2652 multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 2653 } 2654 if time.Duration(r.Attempts)*r.Delay > r.Interval { 2655 multierror.Append(&mErr, 2656 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 2657 } 2658 return mErr.ErrorOrNil() 2659 } 2660 2661 func NewRestartPolicy(jobType string) *RestartPolicy { 2662 switch jobType { 2663 case JobTypeService, JobTypeSystem: 2664 rp := DefaultServiceJobRestartPolicy 2665 return &rp 2666 case JobTypeBatch: 2667 rp := DefaultBatchJobRestartPolicy 2668 return &rp 2669 } 2670 return nil 2671 } 2672 2673 const ReschedulePolicyMinInterval = 15 * time.Second 2674 2675 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 2676 type ReschedulePolicy struct { 2677 // Attempts limits the number of rescheduling attempts that can occur in an interval. 2678 Attempts int 2679 2680 // Interval is a duration in which we can limit the number of reschedule attempts. 2681 Interval time.Duration 2682 2683 //TODO delay 2684 } 2685 2686 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 2687 if r == nil { 2688 return nil 2689 } 2690 nrp := new(ReschedulePolicy) 2691 *nrp = *r 2692 return nrp 2693 } 2694 2695 func (r *ReschedulePolicy) Validate() error { 2696 if r != nil && r.Attempts > 0 { 2697 var mErr multierror.Error 2698 // Check for ambiguous/confusing settings 2699 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 2700 multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 2701 } 2702 2703 return mErr.ErrorOrNil() 2704 } 2705 return nil 2706 } 2707 2708 func NewReshedulePolicy(jobType string) *ReschedulePolicy { 2709 switch jobType { 2710 case JobTypeService: 2711 rp := DefaultServiceJobReschedulePolicy 2712 return &rp 2713 case JobTypeBatch: 2714 rp := DefaultBatchJobReschedulePolicy 2715 return &rp 2716 } 2717 return nil 2718 } 2719 2720 // TaskGroup is an atomic unit of placement. Each task group belongs to 2721 // a job and may contain any number of tasks. A task group support running 2722 // in many replicas using the same configuration.. 2723 type TaskGroup struct { 2724 // Name of the task group 2725 Name string 2726 2727 // Count is the number of replicas of this task group that should 2728 // be scheduled. 2729 Count int 2730 2731 // Update is used to control the update strategy for this task group 2732 Update *UpdateStrategy 2733 2734 // Constraints can be specified at a task group level and apply to 2735 // all the tasks contained. 2736 Constraints []*Constraint 2737 2738 //RestartPolicy of a TaskGroup 2739 RestartPolicy *RestartPolicy 2740 2741 // Tasks are the collection of tasks that this task group needs to run 2742 Tasks []*Task 2743 2744 // EphemeralDisk is the disk resources that the task group requests 2745 EphemeralDisk *EphemeralDisk 2746 2747 // Meta is used to associate arbitrary metadata with this 2748 // task group. This is opaque to Nomad. 2749 Meta map[string]string 2750 2751 // ReschedulePolicy is used to configure how the scheduler should 2752 // retry failed allocations. 2753 ReschedulePolicy *ReschedulePolicy 2754 } 2755 2756 func (tg *TaskGroup) Copy() *TaskGroup { 2757 if tg == nil { 2758 return nil 2759 } 2760 ntg := new(TaskGroup) 2761 *ntg = *tg 2762 ntg.Update = ntg.Update.Copy() 2763 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 2764 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 2765 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 2766 2767 if tg.Tasks != nil { 2768 tasks := make([]*Task, len(ntg.Tasks)) 2769 for i, t := range ntg.Tasks { 2770 tasks[i] = t.Copy() 2771 } 2772 ntg.Tasks = tasks 2773 } 2774 2775 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 2776 2777 if tg.EphemeralDisk != nil { 2778 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 2779 } 2780 return ntg 2781 } 2782 2783 // Canonicalize is used to canonicalize fields in the TaskGroup. 2784 func (tg *TaskGroup) Canonicalize(job *Job) { 2785 // Ensure that an empty and nil map are treated the same to avoid scheduling 2786 // problems since we use reflect DeepEquals. 2787 if len(tg.Meta) == 0 { 2788 tg.Meta = nil 2789 } 2790 2791 // Set the default restart policy. 2792 if tg.RestartPolicy == nil { 2793 tg.RestartPolicy = NewRestartPolicy(job.Type) 2794 } 2795 2796 if tg.ReschedulePolicy == nil { 2797 tg.ReschedulePolicy = NewReshedulePolicy(job.Type) 2798 } 2799 2800 // Set a default ephemeral disk object if the user has not requested for one 2801 if tg.EphemeralDisk == nil { 2802 tg.EphemeralDisk = DefaultEphemeralDisk() 2803 } 2804 2805 for _, task := range tg.Tasks { 2806 task.Canonicalize(job, tg) 2807 } 2808 2809 // Add up the disk resources to EphemeralDisk. This is done so that users 2810 // are not required to move their disk attribute from resources to 2811 // EphemeralDisk section of the job spec in Nomad 0.5 2812 // COMPAT 0.4.1 -> 0.5 2813 // Remove in 0.6 2814 var diskMB int 2815 for _, task := range tg.Tasks { 2816 diskMB += task.Resources.DiskMB 2817 } 2818 if diskMB > 0 { 2819 tg.EphemeralDisk.SizeMB = diskMB 2820 } 2821 } 2822 2823 // Validate is used to sanity check a task group 2824 func (tg *TaskGroup) Validate(j *Job) error { 2825 var mErr multierror.Error 2826 if tg.Name == "" { 2827 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 2828 } 2829 if tg.Count < 0 { 2830 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 2831 } 2832 if len(tg.Tasks) == 0 { 2833 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 2834 } 2835 for idx, constr := range tg.Constraints { 2836 if err := constr.Validate(); err != nil { 2837 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2838 mErr.Errors = append(mErr.Errors, outer) 2839 } 2840 } 2841 2842 if tg.RestartPolicy != nil { 2843 if err := tg.RestartPolicy.Validate(); err != nil { 2844 mErr.Errors = append(mErr.Errors, err) 2845 } 2846 } else { 2847 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 2848 } 2849 2850 if tg.ReschedulePolicy != nil { 2851 if err := tg.ReschedulePolicy.Validate(); err != nil { 2852 mErr.Errors = append(mErr.Errors, err) 2853 } 2854 } else { 2855 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 2856 } 2857 2858 if tg.EphemeralDisk != nil { 2859 if err := tg.EphemeralDisk.Validate(); err != nil { 2860 mErr.Errors = append(mErr.Errors, err) 2861 } 2862 } else { 2863 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 2864 } 2865 2866 // Validate the update strategy 2867 if u := tg.Update; u != nil { 2868 switch j.Type { 2869 case JobTypeService, JobTypeSystem: 2870 default: 2871 // COMPAT: Enable in 0.7.0 2872 //mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 2873 } 2874 if err := u.Validate(); err != nil { 2875 mErr.Errors = append(mErr.Errors, err) 2876 } 2877 } 2878 2879 // Check for duplicate tasks, that there is only leader task if any, 2880 // and no duplicated static ports 2881 tasks := make(map[string]int) 2882 staticPorts := make(map[int]string) 2883 leaderTasks := 0 2884 for idx, task := range tg.Tasks { 2885 if task.Name == "" { 2886 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 2887 } else if existing, ok := tasks[task.Name]; ok { 2888 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 2889 } else { 2890 tasks[task.Name] = idx 2891 } 2892 2893 if task.Leader { 2894 leaderTasks++ 2895 } 2896 2897 if task.Resources == nil { 2898 continue 2899 } 2900 2901 for _, net := range task.Resources.Networks { 2902 for _, port := range net.ReservedPorts { 2903 if other, ok := staticPorts[port.Value]; ok { 2904 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 2905 mErr.Errors = append(mErr.Errors, err) 2906 } else { 2907 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 2908 } 2909 } 2910 } 2911 } 2912 2913 if leaderTasks > 1 { 2914 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 2915 } 2916 2917 // Validate the tasks 2918 for _, task := range tg.Tasks { 2919 if err := task.Validate(tg.EphemeralDisk); err != nil { 2920 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 2921 mErr.Errors = append(mErr.Errors, outer) 2922 } 2923 } 2924 return mErr.ErrorOrNil() 2925 } 2926 2927 // Warnings returns a list of warnings that may be from dubious settings or 2928 // deprecation warnings. 2929 func (tg *TaskGroup) Warnings(j *Job) error { 2930 var mErr multierror.Error 2931 2932 // Validate the update strategy 2933 if u := tg.Update; u != nil { 2934 // Check the counts are appropriate 2935 if u.MaxParallel > tg.Count { 2936 mErr.Errors = append(mErr.Errors, 2937 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 2938 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 2939 } 2940 } 2941 2942 return mErr.ErrorOrNil() 2943 } 2944 2945 // LookupTask finds a task by name 2946 func (tg *TaskGroup) LookupTask(name string) *Task { 2947 for _, t := range tg.Tasks { 2948 if t.Name == name { 2949 return t 2950 } 2951 } 2952 return nil 2953 } 2954 2955 func (tg *TaskGroup) GoString() string { 2956 return fmt.Sprintf("*%#v", *tg) 2957 } 2958 2959 // CombinedResources returns the combined resources for the task group 2960 func (tg *TaskGroup) CombinedResources() *Resources { 2961 r := &Resources{ 2962 DiskMB: tg.EphemeralDisk.SizeMB, 2963 } 2964 for _, task := range tg.Tasks { 2965 r.Add(task.Resources) 2966 } 2967 return r 2968 } 2969 2970 // CheckRestart describes if and when a task should be restarted based on 2971 // failing health checks. 2972 type CheckRestart struct { 2973 Limit int // Restart task after this many unhealthy intervals 2974 Grace time.Duration // Grace time to give tasks after starting to get healthy 2975 IgnoreWarnings bool // If true treat checks in `warning` as passing 2976 } 2977 2978 func (c *CheckRestart) Copy() *CheckRestart { 2979 if c == nil { 2980 return nil 2981 } 2982 2983 nc := new(CheckRestart) 2984 *nc = *c 2985 return nc 2986 } 2987 2988 func (c *CheckRestart) Validate() error { 2989 if c == nil { 2990 return nil 2991 } 2992 2993 var mErr multierror.Error 2994 if c.Limit < 0 { 2995 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 2996 } 2997 2998 if c.Grace < 0 { 2999 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 3000 } 3001 3002 return mErr.ErrorOrNil() 3003 } 3004 3005 const ( 3006 ServiceCheckHTTP = "http" 3007 ServiceCheckTCP = "tcp" 3008 ServiceCheckScript = "script" 3009 3010 // minCheckInterval is the minimum check interval permitted. Consul 3011 // currently has its MinInterval set to 1s. Mirror that here for 3012 // consistency. 3013 minCheckInterval = 1 * time.Second 3014 3015 // minCheckTimeout is the minimum check timeout permitted for Consul 3016 // script TTL checks. 3017 minCheckTimeout = 1 * time.Second 3018 ) 3019 3020 // The ServiceCheck data model represents the consul health check that 3021 // Nomad registers for a Task 3022 type ServiceCheck struct { 3023 Name string // Name of the check, defaults to id 3024 Type string // Type of the check - tcp, http, docker and script 3025 Command string // Command is the command to run for script checks 3026 Args []string // Args is a list of argumes for script checks 3027 Path string // path of the health check url for http type check 3028 Protocol string // Protocol to use if check is http, defaults to http 3029 PortLabel string // The port to use for tcp/http checks 3030 AddressMode string // 'host' to use host ip:port or 'driver' to use driver's 3031 Interval time.Duration // Interval of the check 3032 Timeout time.Duration // Timeout of the response from the check before consul fails the check 3033 InitialStatus string // Initial status of the check 3034 TLSSkipVerify bool // Skip TLS verification when Protocol=https 3035 Method string // HTTP Method to use (GET by default) 3036 Header map[string][]string // HTTP Headers for Consul to set when making HTTP checks 3037 CheckRestart *CheckRestart // If and when a task should be restarted based on checks 3038 } 3039 3040 func (sc *ServiceCheck) Copy() *ServiceCheck { 3041 if sc == nil { 3042 return nil 3043 } 3044 nsc := new(ServiceCheck) 3045 *nsc = *sc 3046 nsc.Args = helper.CopySliceString(sc.Args) 3047 nsc.Header = helper.CopyMapStringSliceString(sc.Header) 3048 nsc.CheckRestart = sc.CheckRestart.Copy() 3049 return nsc 3050 } 3051 3052 func (sc *ServiceCheck) Canonicalize(serviceName string) { 3053 // Ensure empty maps/slices are treated as null to avoid scheduling 3054 // issues when using DeepEquals. 3055 if len(sc.Args) == 0 { 3056 sc.Args = nil 3057 } 3058 3059 if len(sc.Header) == 0 { 3060 sc.Header = nil 3061 } else { 3062 for k, v := range sc.Header { 3063 if len(v) == 0 { 3064 sc.Header[k] = nil 3065 } 3066 } 3067 } 3068 3069 if sc.Name == "" { 3070 sc.Name = fmt.Sprintf("service: %q check", serviceName) 3071 } 3072 } 3073 3074 // validate a Service's ServiceCheck 3075 func (sc *ServiceCheck) validate() error { 3076 // Validate Type 3077 switch strings.ToLower(sc.Type) { 3078 case ServiceCheckTCP: 3079 case ServiceCheckHTTP: 3080 if sc.Path == "" { 3081 return fmt.Errorf("http type must have a valid http path") 3082 } 3083 url, err := url.Parse(sc.Path) 3084 if err != nil { 3085 return fmt.Errorf("http type must have a valid http path") 3086 } 3087 if url.IsAbs() { 3088 return fmt.Errorf("http type must have a relative http path") 3089 } 3090 3091 case ServiceCheckScript: 3092 if sc.Command == "" { 3093 return fmt.Errorf("script type must have a valid script path") 3094 } 3095 default: 3096 return fmt.Errorf(`invalid type (%+q), must be one of "http", "tcp", or "script" type`, sc.Type) 3097 } 3098 3099 // Validate interval and timeout 3100 if sc.Interval == 0 { 3101 return fmt.Errorf("missing required value interval. Interval cannot be less than %v", minCheckInterval) 3102 } else if sc.Interval < minCheckInterval { 3103 return fmt.Errorf("interval (%v) cannot be lower than %v", sc.Interval, minCheckInterval) 3104 } 3105 3106 if sc.Timeout == 0 { 3107 return fmt.Errorf("missing required value timeout. Timeout cannot be less than %v", minCheckInterval) 3108 } else if sc.Timeout < minCheckTimeout { 3109 return fmt.Errorf("timeout (%v) is lower than required minimum timeout %v", sc.Timeout, minCheckInterval) 3110 } 3111 3112 // Validate InitialStatus 3113 switch sc.InitialStatus { 3114 case "": 3115 case api.HealthPassing: 3116 case api.HealthWarning: 3117 case api.HealthCritical: 3118 default: 3119 return fmt.Errorf(`invalid initial check state (%s), must be one of %q, %q, %q or empty`, sc.InitialStatus, api.HealthPassing, api.HealthWarning, api.HealthCritical) 3120 3121 } 3122 3123 // Validate AddressMode 3124 switch sc.AddressMode { 3125 case "", AddressModeHost, AddressModeDriver: 3126 // Ok 3127 case AddressModeAuto: 3128 return fmt.Errorf("invalid address_mode %q - %s only valid for services", sc.AddressMode, AddressModeAuto) 3129 default: 3130 return fmt.Errorf("invalid address_mode %q", sc.AddressMode) 3131 } 3132 3133 return sc.CheckRestart.Validate() 3134 } 3135 3136 // RequiresPort returns whether the service check requires the task has a port. 3137 func (sc *ServiceCheck) RequiresPort() bool { 3138 switch sc.Type { 3139 case ServiceCheckHTTP, ServiceCheckTCP: 3140 return true 3141 default: 3142 return false 3143 } 3144 } 3145 3146 // TriggersRestarts returns true if this check should be watched and trigger a restart 3147 // on failure. 3148 func (sc *ServiceCheck) TriggersRestarts() bool { 3149 return sc.CheckRestart != nil && sc.CheckRestart.Limit > 0 3150 } 3151 3152 // Hash all ServiceCheck fields and the check's corresponding service ID to 3153 // create an identifier. The identifier is not guaranteed to be unique as if 3154 // the PortLabel is blank, the Service's PortLabel will be used after Hash is 3155 // called. 3156 func (sc *ServiceCheck) Hash(serviceID string) string { 3157 h := sha1.New() 3158 io.WriteString(h, serviceID) 3159 io.WriteString(h, sc.Name) 3160 io.WriteString(h, sc.Type) 3161 io.WriteString(h, sc.Command) 3162 io.WriteString(h, strings.Join(sc.Args, "")) 3163 io.WriteString(h, sc.Path) 3164 io.WriteString(h, sc.Protocol) 3165 io.WriteString(h, sc.PortLabel) 3166 io.WriteString(h, sc.Interval.String()) 3167 io.WriteString(h, sc.Timeout.String()) 3168 io.WriteString(h, sc.Method) 3169 // Only include TLSSkipVerify if set to maintain ID stability with Nomad <0.6 3170 if sc.TLSSkipVerify { 3171 io.WriteString(h, "true") 3172 } 3173 3174 // Since map iteration order isn't stable we need to write k/v pairs to 3175 // a slice and sort it before hashing. 3176 if len(sc.Header) > 0 { 3177 headers := make([]string, 0, len(sc.Header)) 3178 for k, v := range sc.Header { 3179 headers = append(headers, k+strings.Join(v, "")) 3180 } 3181 sort.Strings(headers) 3182 io.WriteString(h, strings.Join(headers, "")) 3183 } 3184 3185 // Only include AddressMode if set to maintain ID stability with Nomad <0.7.1 3186 if len(sc.AddressMode) > 0 { 3187 io.WriteString(h, sc.AddressMode) 3188 } 3189 3190 return fmt.Sprintf("%x", h.Sum(nil)) 3191 } 3192 3193 const ( 3194 AddressModeAuto = "auto" 3195 AddressModeHost = "host" 3196 AddressModeDriver = "driver" 3197 ) 3198 3199 // Service represents a Consul service definition in Nomad 3200 type Service struct { 3201 // Name of the service registered with Consul. Consul defaults the 3202 // Name to ServiceID if not specified. The Name if specified is used 3203 // as one of the seed values when generating a Consul ServiceID. 3204 Name string 3205 3206 // PortLabel is either the numeric port number or the `host:port`. 3207 // To specify the port number using the host's Consul Advertise 3208 // address, specify an empty host in the PortLabel (e.g. `:port`). 3209 PortLabel string 3210 3211 // AddressMode specifies whether or not to use the host ip:port for 3212 // this service. 3213 AddressMode string 3214 3215 Tags []string // List of tags for the service 3216 Checks []*ServiceCheck // List of checks associated with the service 3217 } 3218 3219 func (s *Service) Copy() *Service { 3220 if s == nil { 3221 return nil 3222 } 3223 ns := new(Service) 3224 *ns = *s 3225 ns.Tags = helper.CopySliceString(ns.Tags) 3226 3227 if s.Checks != nil { 3228 checks := make([]*ServiceCheck, len(ns.Checks)) 3229 for i, c := range ns.Checks { 3230 checks[i] = c.Copy() 3231 } 3232 ns.Checks = checks 3233 } 3234 3235 return ns 3236 } 3237 3238 // Canonicalize interpolates values of Job, Task Group and Task in the Service 3239 // Name. This also generates check names, service id and check ids. 3240 func (s *Service) Canonicalize(job string, taskGroup string, task string) { 3241 // Ensure empty lists are treated as null to avoid scheduler issues when 3242 // using DeepEquals 3243 if len(s.Tags) == 0 { 3244 s.Tags = nil 3245 } 3246 if len(s.Checks) == 0 { 3247 s.Checks = nil 3248 } 3249 3250 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 3251 "JOB": job, 3252 "TASKGROUP": taskGroup, 3253 "TASK": task, 3254 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 3255 }, 3256 ) 3257 3258 for _, check := range s.Checks { 3259 check.Canonicalize(s.Name) 3260 } 3261 } 3262 3263 // Validate checks if the Check definition is valid 3264 func (s *Service) Validate() error { 3265 var mErr multierror.Error 3266 3267 // Ensure the service name is valid per the below RFCs but make an exception 3268 // for our interpolation syntax by first stripping any environment variables from the name 3269 3270 serviceNameStripped := args.ReplaceEnvWithPlaceHolder(s.Name, "ENV-VAR") 3271 3272 if err := s.ValidateName(serviceNameStripped); err != nil { 3273 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes: %q", s.Name)) 3274 } 3275 3276 switch s.AddressMode { 3277 case "", AddressModeAuto, AddressModeHost, AddressModeDriver: 3278 // OK 3279 default: 3280 mErr.Errors = append(mErr.Errors, fmt.Errorf("service address_mode must be %q, %q, or %q; not %q", AddressModeAuto, AddressModeHost, AddressModeDriver, s.AddressMode)) 3281 } 3282 3283 for _, c := range s.Checks { 3284 if s.PortLabel == "" && c.PortLabel == "" && c.RequiresPort() { 3285 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: check requires a port but neither check nor service %+q have a port", c.Name, s.Name)) 3286 continue 3287 } 3288 3289 if err := c.validate(); err != nil { 3290 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %s invalid: %v", c.Name, err)) 3291 } 3292 } 3293 3294 return mErr.ErrorOrNil() 3295 } 3296 3297 // ValidateName checks if the services Name is valid and should be called after 3298 // the name has been interpolated 3299 func (s *Service) ValidateName(name string) error { 3300 // Ensure the service name is valid per RFC-952 §1 3301 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 3302 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 3303 // (https://tools.ietf.org/html/rfc2782). 3304 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 3305 if !re.MatchString(name) { 3306 return fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be no longer than 63 characters: %q", name) 3307 } 3308 return nil 3309 } 3310 3311 // Hash returns a base32 encoded hash of a Service's contents excluding checks 3312 // as they're hashed independently. 3313 func (s *Service) Hash(allocID, taskName string) string { 3314 h := sha1.New() 3315 io.WriteString(h, allocID) 3316 io.WriteString(h, taskName) 3317 io.WriteString(h, s.Name) 3318 io.WriteString(h, s.PortLabel) 3319 io.WriteString(h, s.AddressMode) 3320 for _, tag := range s.Tags { 3321 io.WriteString(h, tag) 3322 } 3323 3324 // Base32 is used for encoding the hash as sha1 hashes can always be 3325 // encoded without padding, only 4 bytes larger than base64, and saves 3326 // 8 bytes vs hex. Since these hashes are used in Consul URLs it's nice 3327 // to have a reasonably compact URL-safe representation. 3328 return b32.EncodeToString(h.Sum(nil)) 3329 } 3330 3331 const ( 3332 // DefaultKillTimeout is the default timeout between signaling a task it 3333 // will be killed and killing it. 3334 DefaultKillTimeout = 5 * time.Second 3335 ) 3336 3337 // LogConfig provides configuration for log rotation 3338 type LogConfig struct { 3339 MaxFiles int 3340 MaxFileSizeMB int 3341 } 3342 3343 // DefaultLogConfig returns the default LogConfig values. 3344 func DefaultLogConfig() *LogConfig { 3345 return &LogConfig{ 3346 MaxFiles: 10, 3347 MaxFileSizeMB: 10, 3348 } 3349 } 3350 3351 // Validate returns an error if the log config specified are less than 3352 // the minimum allowed. 3353 func (l *LogConfig) Validate() error { 3354 var mErr multierror.Error 3355 if l.MaxFiles < 1 { 3356 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 3357 } 3358 if l.MaxFileSizeMB < 1 { 3359 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 3360 } 3361 return mErr.ErrorOrNil() 3362 } 3363 3364 // Task is a single process typically that is executed as part of a task group. 3365 type Task struct { 3366 // Name of the task 3367 Name string 3368 3369 // Driver is used to control which driver is used 3370 Driver string 3371 3372 // User is used to determine which user will run the task. It defaults to 3373 // the same user the Nomad client is being run as. 3374 User string 3375 3376 // Config is provided to the driver to initialize 3377 Config map[string]interface{} 3378 3379 // Map of environment variables to be used by the driver 3380 Env map[string]string 3381 3382 // List of service definitions exposed by the Task 3383 Services []*Service 3384 3385 // Vault is used to define the set of Vault policies that this task should 3386 // have access to. 3387 Vault *Vault 3388 3389 // Templates are the set of templates to be rendered for the task. 3390 Templates []*Template 3391 3392 // Constraints can be specified at a task level and apply only to 3393 // the particular task. 3394 Constraints []*Constraint 3395 3396 // Resources is the resources needed by this task 3397 Resources *Resources 3398 3399 // DispatchPayload configures how the task retrieves its input from a dispatch 3400 DispatchPayload *DispatchPayloadConfig 3401 3402 // Meta is used to associate arbitrary metadata with this 3403 // task. This is opaque to Nomad. 3404 Meta map[string]string 3405 3406 // KillTimeout is the time between signaling a task that it will be 3407 // killed and killing it. 3408 KillTimeout time.Duration 3409 3410 // LogConfig provides configuration for log rotation 3411 LogConfig *LogConfig 3412 3413 // Artifacts is a list of artifacts to download and extract before running 3414 // the task. 3415 Artifacts []*TaskArtifact 3416 3417 // Leader marks the task as the leader within the group. When the leader 3418 // task exits, other tasks will be gracefully terminated. 3419 Leader bool 3420 3421 // ShutdownDelay is the duration of the delay between deregistering a 3422 // task from Consul and sending it a signal to shutdown. See #2441 3423 ShutdownDelay time.Duration 3424 3425 // The kill signal to use for the task. This is an optional specification, 3426 3427 // KillSignal is the kill signal to use for the task. This is an optional 3428 // specification and defaults to SIGINT 3429 KillSignal string 3430 } 3431 3432 func (t *Task) Copy() *Task { 3433 if t == nil { 3434 return nil 3435 } 3436 nt := new(Task) 3437 *nt = *t 3438 nt.Env = helper.CopyMapStringString(nt.Env) 3439 3440 if t.Services != nil { 3441 services := make([]*Service, len(nt.Services)) 3442 for i, s := range nt.Services { 3443 services[i] = s.Copy() 3444 } 3445 nt.Services = services 3446 } 3447 3448 nt.Constraints = CopySliceConstraints(nt.Constraints) 3449 3450 nt.Vault = nt.Vault.Copy() 3451 nt.Resources = nt.Resources.Copy() 3452 nt.Meta = helper.CopyMapStringString(nt.Meta) 3453 nt.DispatchPayload = nt.DispatchPayload.Copy() 3454 3455 if t.Artifacts != nil { 3456 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 3457 for _, a := range nt.Artifacts { 3458 artifacts = append(artifacts, a.Copy()) 3459 } 3460 nt.Artifacts = artifacts 3461 } 3462 3463 if i, err := copystructure.Copy(nt.Config); err != nil { 3464 panic(err.Error()) 3465 } else { 3466 nt.Config = i.(map[string]interface{}) 3467 } 3468 3469 if t.Templates != nil { 3470 templates := make([]*Template, len(t.Templates)) 3471 for i, tmpl := range nt.Templates { 3472 templates[i] = tmpl.Copy() 3473 } 3474 nt.Templates = templates 3475 } 3476 3477 return nt 3478 } 3479 3480 // Canonicalize canonicalizes fields in the task. 3481 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 3482 // Ensure that an empty and nil map are treated the same to avoid scheduling 3483 // problems since we use reflect DeepEquals. 3484 if len(t.Meta) == 0 { 3485 t.Meta = nil 3486 } 3487 if len(t.Config) == 0 { 3488 t.Config = nil 3489 } 3490 if len(t.Env) == 0 { 3491 t.Env = nil 3492 } 3493 3494 for _, service := range t.Services { 3495 service.Canonicalize(job.Name, tg.Name, t.Name) 3496 } 3497 3498 // If Resources are nil initialize them to defaults, otherwise canonicalize 3499 if t.Resources == nil { 3500 t.Resources = DefaultResources() 3501 } else { 3502 t.Resources.Canonicalize() 3503 } 3504 3505 // Set the default timeout if it is not specified. 3506 if t.KillTimeout == 0 { 3507 t.KillTimeout = DefaultKillTimeout 3508 } 3509 3510 if t.Vault != nil { 3511 t.Vault.Canonicalize() 3512 } 3513 3514 for _, template := range t.Templates { 3515 template.Canonicalize() 3516 } 3517 } 3518 3519 func (t *Task) GoString() string { 3520 return fmt.Sprintf("*%#v", *t) 3521 } 3522 3523 // Validate is used to sanity check a task 3524 func (t *Task) Validate(ephemeralDisk *EphemeralDisk) error { 3525 var mErr multierror.Error 3526 if t.Name == "" { 3527 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 3528 } 3529 if strings.ContainsAny(t.Name, `/\`) { 3530 // We enforce this so that when creating the directory on disk it will 3531 // not have any slashes. 3532 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 3533 } 3534 if t.Driver == "" { 3535 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 3536 } 3537 if t.KillTimeout < 0 { 3538 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 3539 } 3540 if t.ShutdownDelay < 0 { 3541 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 3542 } 3543 3544 // Validate the resources. 3545 if t.Resources == nil { 3546 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 3547 } else { 3548 if err := t.Resources.MeetsMinResources(); err != nil { 3549 mErr.Errors = append(mErr.Errors, err) 3550 } 3551 3552 // Ensure the task isn't asking for disk resources 3553 if t.Resources.DiskMB > 0 { 3554 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 3555 } 3556 } 3557 3558 // Validate the log config 3559 if t.LogConfig == nil { 3560 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 3561 } else if err := t.LogConfig.Validate(); err != nil { 3562 mErr.Errors = append(mErr.Errors, err) 3563 } 3564 3565 for idx, constr := range t.Constraints { 3566 if err := constr.Validate(); err != nil { 3567 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 3568 mErr.Errors = append(mErr.Errors, outer) 3569 } 3570 3571 switch constr.Operand { 3572 case ConstraintDistinctHosts, ConstraintDistinctProperty: 3573 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 3574 mErr.Errors = append(mErr.Errors, outer) 3575 } 3576 } 3577 3578 // Validate Services 3579 if err := validateServices(t); err != nil { 3580 mErr.Errors = append(mErr.Errors, err) 3581 } 3582 3583 if t.LogConfig != nil && ephemeralDisk != nil { 3584 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 3585 if ephemeralDisk.SizeMB <= logUsage { 3586 mErr.Errors = append(mErr.Errors, 3587 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 3588 logUsage, ephemeralDisk.SizeMB)) 3589 } 3590 } 3591 3592 for idx, artifact := range t.Artifacts { 3593 if err := artifact.Validate(); err != nil { 3594 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 3595 mErr.Errors = append(mErr.Errors, outer) 3596 } 3597 } 3598 3599 if t.Vault != nil { 3600 if err := t.Vault.Validate(); err != nil { 3601 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 3602 } 3603 } 3604 3605 destinations := make(map[string]int, len(t.Templates)) 3606 for idx, tmpl := range t.Templates { 3607 if err := tmpl.Validate(); err != nil { 3608 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 3609 mErr.Errors = append(mErr.Errors, outer) 3610 } 3611 3612 if other, ok := destinations[tmpl.DestPath]; ok { 3613 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 3614 mErr.Errors = append(mErr.Errors, outer) 3615 } else { 3616 destinations[tmpl.DestPath] = idx + 1 3617 } 3618 } 3619 3620 // Validate the dispatch payload block if there 3621 if t.DispatchPayload != nil { 3622 if err := t.DispatchPayload.Validate(); err != nil { 3623 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 3624 } 3625 } 3626 3627 return mErr.ErrorOrNil() 3628 } 3629 3630 // validateServices takes a task and validates the services within it are valid 3631 // and reference ports that exist. 3632 func validateServices(t *Task) error { 3633 var mErr multierror.Error 3634 3635 // Ensure that services don't ask for non-existent ports and their names are 3636 // unique. 3637 servicePorts := make(map[string]map[string]struct{}) 3638 addServicePort := func(label, service string) { 3639 if _, ok := servicePorts[label]; !ok { 3640 servicePorts[label] = map[string]struct{}{} 3641 } 3642 servicePorts[label][service] = struct{}{} 3643 } 3644 knownServices := make(map[string]struct{}) 3645 for i, service := range t.Services { 3646 if err := service.Validate(); err != nil { 3647 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 3648 mErr.Errors = append(mErr.Errors, outer) 3649 } 3650 3651 // Ensure that services with the same name are not being registered for 3652 // the same port 3653 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 3654 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 3655 } 3656 knownServices[service.Name+service.PortLabel] = struct{}{} 3657 3658 if service.PortLabel != "" { 3659 if service.AddressMode == "driver" { 3660 // Numeric port labels are valid for address_mode=driver 3661 _, err := strconv.Atoi(service.PortLabel) 3662 if err != nil { 3663 // Not a numeric port label, add it to list to check 3664 addServicePort(service.PortLabel, service.Name) 3665 } 3666 } else { 3667 addServicePort(service.PortLabel, service.Name) 3668 } 3669 } 3670 3671 // Ensure that check names are unique and have valid ports 3672 knownChecks := make(map[string]struct{}) 3673 for _, check := range service.Checks { 3674 if _, ok := knownChecks[check.Name]; ok { 3675 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 3676 } 3677 knownChecks[check.Name] = struct{}{} 3678 3679 if !check.RequiresPort() { 3680 // No need to continue validating check if it doesn't need a port 3681 continue 3682 } 3683 3684 effectivePort := check.PortLabel 3685 if effectivePort == "" { 3686 // Inherits from service 3687 effectivePort = service.PortLabel 3688 } 3689 3690 if effectivePort == "" { 3691 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 3692 continue 3693 } 3694 3695 isNumeric := false 3696 portNumber, err := strconv.Atoi(effectivePort) 3697 if err == nil { 3698 isNumeric = true 3699 } 3700 3701 // Numeric ports are fine for address_mode = "driver" 3702 if check.AddressMode == "driver" && isNumeric { 3703 if portNumber <= 0 { 3704 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 3705 } 3706 continue 3707 } 3708 3709 if isNumeric { 3710 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 3711 continue 3712 } 3713 3714 // PortLabel must exist, report errors by its parent service 3715 addServicePort(effectivePort, service.Name) 3716 } 3717 } 3718 3719 // Get the set of port labels. 3720 portLabels := make(map[string]struct{}) 3721 if t.Resources != nil { 3722 for _, network := range t.Resources.Networks { 3723 ports := network.PortLabels() 3724 for portLabel := range ports { 3725 portLabels[portLabel] = struct{}{} 3726 } 3727 } 3728 } 3729 3730 // Iterate over a sorted list of keys to make error listings stable 3731 keys := make([]string, 0, len(servicePorts)) 3732 for p := range servicePorts { 3733 keys = append(keys, p) 3734 } 3735 sort.Strings(keys) 3736 3737 // Ensure all ports referenced in services exist. 3738 for _, servicePort := range keys { 3739 services := servicePorts[servicePort] 3740 _, ok := portLabels[servicePort] 3741 if !ok { 3742 names := make([]string, 0, len(services)) 3743 for name := range services { 3744 names = append(names, name) 3745 } 3746 3747 // Keep order deterministic 3748 sort.Strings(names) 3749 joined := strings.Join(names, ", ") 3750 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 3751 mErr.Errors = append(mErr.Errors, err) 3752 } 3753 } 3754 3755 // Ensure address mode is valid 3756 return mErr.ErrorOrNil() 3757 } 3758 3759 const ( 3760 // TemplateChangeModeNoop marks that no action should be taken if the 3761 // template is re-rendered 3762 TemplateChangeModeNoop = "noop" 3763 3764 // TemplateChangeModeSignal marks that the task should be signaled if the 3765 // template is re-rendered 3766 TemplateChangeModeSignal = "signal" 3767 3768 // TemplateChangeModeRestart marks that the task should be restarted if the 3769 // template is re-rendered 3770 TemplateChangeModeRestart = "restart" 3771 ) 3772 3773 var ( 3774 // TemplateChangeModeInvalidError is the error for when an invalid change 3775 // mode is given 3776 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 3777 ) 3778 3779 // Template represents a template configuration to be rendered for a given task 3780 type Template struct { 3781 // SourcePath is the path to the template to be rendered 3782 SourcePath string 3783 3784 // DestPath is the path to where the template should be rendered 3785 DestPath string 3786 3787 // EmbeddedTmpl store the raw template. This is useful for smaller templates 3788 // where they are embedded in the job file rather than sent as an artifact 3789 EmbeddedTmpl string 3790 3791 // ChangeMode indicates what should be done if the template is re-rendered 3792 ChangeMode string 3793 3794 // ChangeSignal is the signal that should be sent if the change mode 3795 // requires it. 3796 ChangeSignal string 3797 3798 // Splay is used to avoid coordinated restarts of processes by applying a 3799 // random wait between 0 and the given splay value before signalling the 3800 // application of a change 3801 Splay time.Duration 3802 3803 // Perms is the permission the file should be written out with. 3804 Perms string 3805 3806 // LeftDelim and RightDelim are optional configurations to control what 3807 // delimiter is utilized when parsing the template. 3808 LeftDelim string 3809 RightDelim string 3810 3811 // Envvars enables exposing the template as environment variables 3812 // instead of as a file. The template must be of the form: 3813 // 3814 // VAR_NAME_1={{ key service/my-key }} 3815 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 3816 // 3817 // Lines will be split on the initial "=" with the first part being the 3818 // key name and the second part the value. 3819 // Empty lines and lines starting with # will be ignored, but to avoid 3820 // escaping issues #s within lines will not be treated as comments. 3821 Envvars bool 3822 3823 // VaultGrace is the grace duration between lease renewal and reacquiring a 3824 // secret. If the lease of a secret is less than the grace, a new secret is 3825 // acquired. 3826 VaultGrace time.Duration 3827 } 3828 3829 // DefaultTemplate returns a default template. 3830 func DefaultTemplate() *Template { 3831 return &Template{ 3832 ChangeMode: TemplateChangeModeRestart, 3833 Splay: 5 * time.Second, 3834 Perms: "0644", 3835 } 3836 } 3837 3838 func (t *Template) Copy() *Template { 3839 if t == nil { 3840 return nil 3841 } 3842 copy := new(Template) 3843 *copy = *t 3844 return copy 3845 } 3846 3847 func (t *Template) Canonicalize() { 3848 if t.ChangeSignal != "" { 3849 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 3850 } 3851 } 3852 3853 func (t *Template) Validate() error { 3854 var mErr multierror.Error 3855 3856 // Verify we have something to render 3857 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 3858 multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 3859 } 3860 3861 // Verify we can render somewhere 3862 if t.DestPath == "" { 3863 multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 3864 } 3865 3866 // Verify the destination doesn't escape 3867 escaped, err := PathEscapesAllocDir("task", t.DestPath) 3868 if err != nil { 3869 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 3870 } else if escaped { 3871 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 3872 } 3873 3874 // Verify a proper change mode 3875 switch t.ChangeMode { 3876 case TemplateChangeModeNoop, TemplateChangeModeRestart: 3877 case TemplateChangeModeSignal: 3878 if t.ChangeSignal == "" { 3879 multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 3880 } 3881 if t.Envvars { 3882 multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 3883 } 3884 default: 3885 multierror.Append(&mErr, TemplateChangeModeInvalidError) 3886 } 3887 3888 // Verify the splay is positive 3889 if t.Splay < 0 { 3890 multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 3891 } 3892 3893 // Verify the permissions 3894 if t.Perms != "" { 3895 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 3896 multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 3897 } 3898 } 3899 3900 if t.VaultGrace.Nanoseconds() < 0 { 3901 multierror.Append(&mErr, fmt.Errorf("Vault grace must be greater than zero: %v < 0", t.VaultGrace)) 3902 } 3903 3904 return mErr.ErrorOrNil() 3905 } 3906 3907 // Set of possible states for a task. 3908 const ( 3909 TaskStatePending = "pending" // The task is waiting to be run. 3910 TaskStateRunning = "running" // The task is currently running. 3911 TaskStateDead = "dead" // Terminal state of task. 3912 ) 3913 3914 // TaskState tracks the current state of a task and events that caused state 3915 // transitions. 3916 type TaskState struct { 3917 // The current state of the task. 3918 State string 3919 3920 // Failed marks a task as having failed 3921 Failed bool 3922 3923 // Restarts is the number of times the task has restarted 3924 Restarts uint64 3925 3926 // LastRestart is the time the task last restarted. It is updated each time the 3927 // task restarts 3928 LastRestart time.Time 3929 3930 // StartedAt is the time the task is started. It is updated each time the 3931 // task starts 3932 StartedAt time.Time 3933 3934 // FinishedAt is the time at which the task transistioned to dead and will 3935 // not be started again. 3936 FinishedAt time.Time 3937 3938 // Series of task events that transition the state of the task. 3939 Events []*TaskEvent 3940 } 3941 3942 func (ts *TaskState) Copy() *TaskState { 3943 if ts == nil { 3944 return nil 3945 } 3946 copy := new(TaskState) 3947 *copy = *ts 3948 3949 if ts.Events != nil { 3950 copy.Events = make([]*TaskEvent, len(ts.Events)) 3951 for i, e := range ts.Events { 3952 copy.Events[i] = e.Copy() 3953 } 3954 } 3955 return copy 3956 } 3957 3958 // Successful returns whether a task finished successfully. This doesn't really 3959 // have meaning on a non-batch allocation because a service and system 3960 // allocation should not finish. 3961 func (ts *TaskState) Successful() bool { 3962 l := len(ts.Events) 3963 if ts.State != TaskStateDead || l == 0 { 3964 return false 3965 } 3966 3967 e := ts.Events[l-1] 3968 if e.Type != TaskTerminated { 3969 return false 3970 } 3971 3972 return e.ExitCode == 0 3973 } 3974 3975 const ( 3976 // TaskSetupFailure indicates that the task could not be started due to a 3977 // a setup failure. 3978 TaskSetupFailure = "Setup Failure" 3979 3980 // TaskDriveFailure indicates that the task could not be started due to a 3981 // failure in the driver. 3982 TaskDriverFailure = "Driver Failure" 3983 3984 // TaskReceived signals that the task has been pulled by the client at the 3985 // given timestamp. 3986 TaskReceived = "Received" 3987 3988 // TaskFailedValidation indicates the task was invalid and as such was not 3989 // run. 3990 TaskFailedValidation = "Failed Validation" 3991 3992 // TaskStarted signals that the task was started and its timestamp can be 3993 // used to determine the running length of the task. 3994 TaskStarted = "Started" 3995 3996 // TaskTerminated indicates that the task was started and exited. 3997 TaskTerminated = "Terminated" 3998 3999 // TaskKilling indicates a kill signal has been sent to the task. 4000 TaskKilling = "Killing" 4001 4002 // TaskKilled indicates a user has killed the task. 4003 TaskKilled = "Killed" 4004 4005 // TaskRestarting indicates that task terminated and is being restarted. 4006 TaskRestarting = "Restarting" 4007 4008 // TaskNotRestarting indicates that the task has failed and is not being 4009 // restarted because it has exceeded its restart policy. 4010 TaskNotRestarting = "Not Restarting" 4011 4012 // TaskRestartSignal indicates that the task has been signalled to be 4013 // restarted 4014 TaskRestartSignal = "Restart Signaled" 4015 4016 // TaskSignaling indicates that the task is being signalled. 4017 TaskSignaling = "Signaling" 4018 4019 // TaskDownloadingArtifacts means the task is downloading the artifacts 4020 // specified in the task. 4021 TaskDownloadingArtifacts = "Downloading Artifacts" 4022 4023 // TaskArtifactDownloadFailed indicates that downloading the artifacts 4024 // failed. 4025 TaskArtifactDownloadFailed = "Failed Artifact Download" 4026 4027 // TaskBuildingTaskDir indicates that the task directory/chroot is being 4028 // built. 4029 TaskBuildingTaskDir = "Building Task Directory" 4030 4031 // TaskSetup indicates the task runner is setting up the task environment 4032 TaskSetup = "Task Setup" 4033 4034 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 4035 // exceeded the requested disk resources. 4036 TaskDiskExceeded = "Disk Resources Exceeded" 4037 4038 // TaskSiblingFailed indicates that a sibling task in the task group has 4039 // failed. 4040 TaskSiblingFailed = "Sibling Task Failed" 4041 4042 // TaskDriverMessage is an informational event message emitted by 4043 // drivers such as when they're performing a long running action like 4044 // downloading an image. 4045 TaskDriverMessage = "Driver" 4046 4047 // TaskLeaderDead indicates that the leader task within the has finished. 4048 TaskLeaderDead = "Leader Task Dead" 4049 ) 4050 4051 // TaskEvent is an event that effects the state of a task and contains meta-data 4052 // appropriate to the events type. 4053 type TaskEvent struct { 4054 Type string 4055 Time int64 // Unix Nanosecond timestamp 4056 4057 Message string // A possible message explaining the termination of the task. 4058 4059 // DisplayMessage is a human friendly message about the event 4060 DisplayMessage string 4061 4062 // Details is a map with annotated info about the event 4063 Details map[string]string 4064 4065 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 4066 // in a future release. Field values are available in the Details map. 4067 4068 // FailsTask marks whether this event fails the task. 4069 // Deprecated, use Details["fails_task"] to access this. 4070 FailsTask bool 4071 4072 // Restart fields. 4073 // Deprecated, use Details["restart_reason"] to access this. 4074 RestartReason string 4075 4076 // Setup Failure fields. 4077 // Deprecated, use Details["setup_error"] to access this. 4078 SetupError string 4079 4080 // Driver Failure fields. 4081 // Deprecated, use Details["driver_error"] to access this. 4082 DriverError string // A driver error occurred while starting the task. 4083 4084 // Task Terminated Fields. 4085 4086 // Deprecated, use Details["exit_code"] to access this. 4087 ExitCode int // The exit code of the task. 4088 4089 // Deprecated, use Details["signal"] to access this. 4090 Signal int // The signal that terminated the task. 4091 4092 // Killing fields 4093 // Deprecated, use Details["kill_timeout"] to access this. 4094 KillTimeout time.Duration 4095 4096 // Task Killed Fields. 4097 // Deprecated, use Details["kill_error"] to access this. 4098 KillError string // Error killing the task. 4099 4100 // KillReason is the reason the task was killed 4101 // Deprecated, use Details["kill_reason"] to access this. 4102 KillReason string 4103 4104 // TaskRestarting fields. 4105 // Deprecated, use Details["start_delay"] to access this. 4106 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 4107 4108 // Artifact Download fields 4109 // Deprecated, use Details["download_error"] to access this. 4110 DownloadError string // Error downloading artifacts 4111 4112 // Validation fields 4113 // Deprecated, use Details["validation_error"] to access this. 4114 ValidationError string // Validation error 4115 4116 // The maximum allowed task disk size. 4117 // Deprecated, use Details["disk_limit"] to access this. 4118 DiskLimit int64 4119 4120 // Name of the sibling task that caused termination of the task that 4121 // the TaskEvent refers to. 4122 // Deprecated, use Details["failed_sibling"] to access this. 4123 FailedSibling string 4124 4125 // VaultError is the error from token renewal 4126 // Deprecated, use Details["vault_renewal_error"] to access this. 4127 VaultError string 4128 4129 // TaskSignalReason indicates the reason the task is being signalled. 4130 // Deprecated, use Details["task_signal_reason"] to access this. 4131 TaskSignalReason string 4132 4133 // TaskSignal is the signal that was sent to the task 4134 // Deprecated, use Details["task_signal"] to access this. 4135 TaskSignal string 4136 4137 // DriverMessage indicates a driver action being taken. 4138 // Deprecated, use Details["driver_message"] to access this. 4139 DriverMessage string 4140 4141 // GenericSource is the source of a message. 4142 // Deprecated, is redundant with event type. 4143 GenericSource string 4144 } 4145 4146 func (event *TaskEvent) PopulateEventDisplayMessage() { 4147 // Build up the description based on the event type. 4148 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 4149 return 4150 } 4151 4152 if event.DisplayMessage != "" { 4153 return 4154 } 4155 4156 var desc string 4157 switch event.Type { 4158 case TaskSetup: 4159 desc = event.Message 4160 case TaskStarted: 4161 desc = "Task started by client" 4162 case TaskReceived: 4163 desc = "Task received by client" 4164 case TaskFailedValidation: 4165 if event.ValidationError != "" { 4166 desc = event.ValidationError 4167 } else { 4168 desc = "Validation of task failed" 4169 } 4170 case TaskSetupFailure: 4171 if event.SetupError != "" { 4172 desc = event.SetupError 4173 } else { 4174 desc = "Task setup failed" 4175 } 4176 case TaskDriverFailure: 4177 if event.DriverError != "" { 4178 desc = event.DriverError 4179 } else { 4180 desc = "Failed to start task" 4181 } 4182 case TaskDownloadingArtifacts: 4183 desc = "Client is downloading artifacts" 4184 case TaskArtifactDownloadFailed: 4185 if event.DownloadError != "" { 4186 desc = event.DownloadError 4187 } else { 4188 desc = "Failed to download artifacts" 4189 } 4190 case TaskKilling: 4191 if event.KillReason != "" { 4192 desc = fmt.Sprintf("Killing task: %v", event.KillReason) 4193 } else if event.KillTimeout != 0 { 4194 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 4195 } else { 4196 desc = "Sent interrupt" 4197 } 4198 case TaskKilled: 4199 if event.KillError != "" { 4200 desc = event.KillError 4201 } else { 4202 desc = "Task successfully killed" 4203 } 4204 case TaskTerminated: 4205 var parts []string 4206 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 4207 4208 if event.Signal != 0 { 4209 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 4210 } 4211 4212 if event.Message != "" { 4213 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 4214 } 4215 desc = strings.Join(parts, ", ") 4216 case TaskRestarting: 4217 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 4218 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 4219 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 4220 } else { 4221 desc = in 4222 } 4223 case TaskNotRestarting: 4224 if event.RestartReason != "" { 4225 desc = event.RestartReason 4226 } else { 4227 desc = "Task exceeded restart policy" 4228 } 4229 case TaskSiblingFailed: 4230 if event.FailedSibling != "" { 4231 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 4232 } else { 4233 desc = "Task's sibling failed" 4234 } 4235 case TaskSignaling: 4236 sig := event.TaskSignal 4237 reason := event.TaskSignalReason 4238 4239 if sig == "" && reason == "" { 4240 desc = "Task being sent a signal" 4241 } else if sig == "" { 4242 desc = reason 4243 } else if reason == "" { 4244 desc = fmt.Sprintf("Task being sent signal %v", sig) 4245 } else { 4246 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 4247 } 4248 case TaskRestartSignal: 4249 if event.RestartReason != "" { 4250 desc = event.RestartReason 4251 } else { 4252 desc = "Task signaled to restart" 4253 } 4254 case TaskDriverMessage: 4255 desc = event.DriverMessage 4256 case TaskLeaderDead: 4257 desc = "Leader Task in Group dead" 4258 default: 4259 desc = event.Message 4260 } 4261 4262 event.DisplayMessage = desc 4263 } 4264 4265 func (te *TaskEvent) GoString() string { 4266 return fmt.Sprintf("%v - %v", te.Time, te.Type) 4267 } 4268 4269 // SetMessage sets the message of TaskEvent 4270 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 4271 te.Message = msg 4272 te.Details["message"] = msg 4273 return te 4274 } 4275 4276 func (te *TaskEvent) Copy() *TaskEvent { 4277 if te == nil { 4278 return nil 4279 } 4280 copy := new(TaskEvent) 4281 *copy = *te 4282 return copy 4283 } 4284 4285 func NewTaskEvent(event string) *TaskEvent { 4286 return &TaskEvent{ 4287 Type: event, 4288 Time: time.Now().UnixNano(), 4289 Details: make(map[string]string), 4290 } 4291 } 4292 4293 // SetSetupError is used to store an error that occurred while setting up the 4294 // task 4295 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 4296 if err != nil { 4297 e.SetupError = err.Error() 4298 e.Details["setup_error"] = err.Error() 4299 } 4300 return e 4301 } 4302 4303 func (e *TaskEvent) SetFailsTask() *TaskEvent { 4304 e.FailsTask = true 4305 e.Details["fails_task"] = "true" 4306 return e 4307 } 4308 4309 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 4310 if err != nil { 4311 e.DriverError = err.Error() 4312 e.Details["driver_error"] = err.Error() 4313 } 4314 return e 4315 } 4316 4317 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 4318 e.ExitCode = c 4319 e.Details["exit_code"] = fmt.Sprintf("%d", c) 4320 return e 4321 } 4322 4323 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 4324 e.Signal = s 4325 e.Details["signal"] = fmt.Sprintf("%d", s) 4326 return e 4327 } 4328 4329 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 4330 if err != nil { 4331 e.Message = err.Error() 4332 e.Details["exit_message"] = err.Error() 4333 } 4334 return e 4335 } 4336 4337 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 4338 if err != nil { 4339 e.KillError = err.Error() 4340 e.Details["kill_error"] = err.Error() 4341 } 4342 return e 4343 } 4344 4345 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 4346 e.KillReason = r 4347 e.Details["kill_reason"] = r 4348 return e 4349 } 4350 4351 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 4352 e.StartDelay = int64(delay) 4353 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 4354 return e 4355 } 4356 4357 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 4358 e.RestartReason = reason 4359 e.Details["restart_reason"] = reason 4360 return e 4361 } 4362 4363 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 4364 e.TaskSignalReason = r 4365 e.Details["task_signal_reason"] = r 4366 return e 4367 } 4368 4369 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 4370 e.TaskSignal = s.String() 4371 e.Details["task_signal"] = s.String() 4372 return e 4373 } 4374 4375 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 4376 if err != nil { 4377 e.DownloadError = err.Error() 4378 e.Details["download_error"] = err.Error() 4379 } 4380 return e 4381 } 4382 4383 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 4384 if err != nil { 4385 e.ValidationError = err.Error() 4386 e.Details["validation_error"] = err.Error() 4387 } 4388 return e 4389 } 4390 4391 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 4392 e.KillTimeout = timeout 4393 e.Details["kill_timeout"] = timeout.String() 4394 return e 4395 } 4396 4397 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 4398 e.DiskLimit = limit 4399 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 4400 return e 4401 } 4402 4403 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 4404 e.FailedSibling = sibling 4405 e.Details["failed_sibling"] = sibling 4406 return e 4407 } 4408 4409 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 4410 if err != nil { 4411 e.VaultError = err.Error() 4412 e.Details["vault_renewal_error"] = err.Error() 4413 } 4414 return e 4415 } 4416 4417 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 4418 e.DriverMessage = m 4419 e.Details["driver_message"] = m 4420 return e 4421 } 4422 4423 // TaskArtifact is an artifact to download before running the task. 4424 type TaskArtifact struct { 4425 // GetterSource is the source to download an artifact using go-getter 4426 GetterSource string 4427 4428 // GetterOptions are options to use when downloading the artifact using 4429 // go-getter. 4430 GetterOptions map[string]string 4431 4432 // GetterMode is the go-getter.ClientMode for fetching resources. 4433 // Defaults to "any" but can be set to "file" or "dir". 4434 GetterMode string 4435 4436 // RelativeDest is the download destination given relative to the task's 4437 // directory. 4438 RelativeDest string 4439 } 4440 4441 func (ta *TaskArtifact) Copy() *TaskArtifact { 4442 if ta == nil { 4443 return nil 4444 } 4445 nta := new(TaskArtifact) 4446 *nta = *ta 4447 nta.GetterOptions = helper.CopyMapStringString(ta.GetterOptions) 4448 return nta 4449 } 4450 4451 func (ta *TaskArtifact) GoString() string { 4452 return fmt.Sprintf("%+v", ta) 4453 } 4454 4455 // PathEscapesAllocDir returns if the given path escapes the allocation 4456 // directory. The prefix allows adding a prefix if the path will be joined, for 4457 // example a "task/local" prefix may be provided if the path will be joined 4458 // against that prefix. 4459 func PathEscapesAllocDir(prefix, path string) (bool, error) { 4460 // Verify the destination doesn't escape the tasks directory 4461 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 4462 if err != nil { 4463 return false, err 4464 } 4465 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 4466 if err != nil { 4467 return false, err 4468 } 4469 rel, err := filepath.Rel(alloc, abs) 4470 if err != nil { 4471 return false, err 4472 } 4473 4474 return strings.HasPrefix(rel, ".."), nil 4475 } 4476 4477 func (ta *TaskArtifact) Validate() error { 4478 // Verify the source 4479 var mErr multierror.Error 4480 if ta.GetterSource == "" { 4481 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 4482 } 4483 4484 switch ta.GetterMode { 4485 case "": 4486 // Default to any 4487 ta.GetterMode = GetterModeAny 4488 case GetterModeAny, GetterModeFile, GetterModeDir: 4489 // Ok 4490 default: 4491 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 4492 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 4493 } 4494 4495 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 4496 if err != nil { 4497 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 4498 } else if escaped { 4499 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 4500 } 4501 4502 // Verify the checksum 4503 if check, ok := ta.GetterOptions["checksum"]; ok { 4504 check = strings.TrimSpace(check) 4505 if check == "" { 4506 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value cannot be empty")) 4507 return mErr.ErrorOrNil() 4508 } 4509 4510 parts := strings.Split(check, ":") 4511 if l := len(parts); l != 2 { 4512 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 4513 return mErr.ErrorOrNil() 4514 } 4515 4516 checksumVal := parts[1] 4517 checksumBytes, err := hex.DecodeString(checksumVal) 4518 if err != nil { 4519 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 4520 return mErr.ErrorOrNil() 4521 } 4522 4523 checksumType := parts[0] 4524 expectedLength := 0 4525 switch checksumType { 4526 case "md5": 4527 expectedLength = md5.Size 4528 case "sha1": 4529 expectedLength = sha1.Size 4530 case "sha256": 4531 expectedLength = sha256.Size 4532 case "sha512": 4533 expectedLength = sha512.Size 4534 default: 4535 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 4536 return mErr.ErrorOrNil() 4537 } 4538 4539 if len(checksumBytes) != expectedLength { 4540 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 4541 return mErr.ErrorOrNil() 4542 } 4543 } 4544 4545 return mErr.ErrorOrNil() 4546 } 4547 4548 const ( 4549 ConstraintDistinctProperty = "distinct_property" 4550 ConstraintDistinctHosts = "distinct_hosts" 4551 ConstraintRegex = "regexp" 4552 ConstraintVersion = "version" 4553 ConstraintSetContains = "set_contains" 4554 ) 4555 4556 // Constraints are used to restrict placement options. 4557 type Constraint struct { 4558 LTarget string // Left-hand target 4559 RTarget string // Right-hand target 4560 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 4561 str string // Memoized string 4562 } 4563 4564 // Equal checks if two constraints are equal 4565 func (c *Constraint) Equal(o *Constraint) bool { 4566 return c.LTarget == o.LTarget && 4567 c.RTarget == o.RTarget && 4568 c.Operand == o.Operand 4569 } 4570 4571 func (c *Constraint) Copy() *Constraint { 4572 if c == nil { 4573 return nil 4574 } 4575 nc := new(Constraint) 4576 *nc = *c 4577 return nc 4578 } 4579 4580 func (c *Constraint) String() string { 4581 if c.str != "" { 4582 return c.str 4583 } 4584 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 4585 return c.str 4586 } 4587 4588 func (c *Constraint) Validate() error { 4589 var mErr multierror.Error 4590 if c.Operand == "" { 4591 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 4592 } 4593 4594 // requireLtarget specifies whether the constraint requires an LTarget to be 4595 // provided. 4596 requireLtarget := true 4597 4598 // Perform additional validation based on operand 4599 switch c.Operand { 4600 case ConstraintDistinctHosts: 4601 requireLtarget = false 4602 case ConstraintSetContains: 4603 if c.RTarget == "" { 4604 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 4605 } 4606 case ConstraintRegex: 4607 if _, err := regexp.Compile(c.RTarget); err != nil { 4608 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 4609 } 4610 case ConstraintVersion: 4611 if _, err := version.NewConstraint(c.RTarget); err != nil { 4612 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 4613 } 4614 case ConstraintDistinctProperty: 4615 // If a count is set, make sure it is convertible to a uint64 4616 if c.RTarget != "" { 4617 count, err := strconv.ParseUint(c.RTarget, 10, 64) 4618 if err != nil { 4619 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 4620 } else if count < 1 { 4621 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 4622 } 4623 } 4624 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 4625 if c.RTarget == "" { 4626 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 4627 } 4628 default: 4629 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 4630 } 4631 4632 // Ensure we have an LTarget for the constraints that need one 4633 if requireLtarget && c.LTarget == "" { 4634 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 4635 } 4636 4637 return mErr.ErrorOrNil() 4638 } 4639 4640 // EphemeralDisk is an ephemeral disk object 4641 type EphemeralDisk struct { 4642 // Sticky indicates whether the allocation is sticky to a node 4643 Sticky bool 4644 4645 // SizeMB is the size of the local disk 4646 SizeMB int 4647 4648 // Migrate determines if Nomad client should migrate the allocation dir for 4649 // sticky allocations 4650 Migrate bool 4651 } 4652 4653 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 4654 func DefaultEphemeralDisk() *EphemeralDisk { 4655 return &EphemeralDisk{ 4656 SizeMB: 300, 4657 } 4658 } 4659 4660 // Validate validates EphemeralDisk 4661 func (d *EphemeralDisk) Validate() error { 4662 if d.SizeMB < 10 { 4663 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 4664 } 4665 return nil 4666 } 4667 4668 // Copy copies the EphemeralDisk struct and returns a new one 4669 func (d *EphemeralDisk) Copy() *EphemeralDisk { 4670 ld := new(EphemeralDisk) 4671 *ld = *d 4672 return ld 4673 } 4674 4675 const ( 4676 // VaultChangeModeNoop takes no action when a new token is retrieved. 4677 VaultChangeModeNoop = "noop" 4678 4679 // VaultChangeModeSignal signals the task when a new token is retrieved. 4680 VaultChangeModeSignal = "signal" 4681 4682 // VaultChangeModeRestart restarts the task when a new token is retrieved. 4683 VaultChangeModeRestart = "restart" 4684 ) 4685 4686 // Vault stores the set of permissions a task needs access to from Vault. 4687 type Vault struct { 4688 // Policies is the set of policies that the task needs access to 4689 Policies []string 4690 4691 // Env marks whether the Vault Token should be exposed as an environment 4692 // variable 4693 Env bool 4694 4695 // ChangeMode is used to configure the task's behavior when the Vault 4696 // token changes because the original token could not be renewed in time. 4697 ChangeMode string 4698 4699 // ChangeSignal is the signal sent to the task when a new token is 4700 // retrieved. This is only valid when using the signal change mode. 4701 ChangeSignal string 4702 } 4703 4704 func DefaultVaultBlock() *Vault { 4705 return &Vault{ 4706 Env: true, 4707 ChangeMode: VaultChangeModeRestart, 4708 } 4709 } 4710 4711 // Copy returns a copy of this Vault block. 4712 func (v *Vault) Copy() *Vault { 4713 if v == nil { 4714 return nil 4715 } 4716 4717 nv := new(Vault) 4718 *nv = *v 4719 return nv 4720 } 4721 4722 func (v *Vault) Canonicalize() { 4723 if v.ChangeSignal != "" { 4724 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 4725 } 4726 } 4727 4728 // Validate returns if the Vault block is valid. 4729 func (v *Vault) Validate() error { 4730 if v == nil { 4731 return nil 4732 } 4733 4734 var mErr multierror.Error 4735 if len(v.Policies) == 0 { 4736 multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 4737 } 4738 4739 for _, p := range v.Policies { 4740 if p == "root" { 4741 multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 4742 } 4743 } 4744 4745 switch v.ChangeMode { 4746 case VaultChangeModeSignal: 4747 if v.ChangeSignal == "" { 4748 multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 4749 } 4750 case VaultChangeModeNoop, VaultChangeModeRestart: 4751 default: 4752 multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 4753 } 4754 4755 return mErr.ErrorOrNil() 4756 } 4757 4758 const ( 4759 // DeploymentStatuses are the various states a deployment can be be in 4760 DeploymentStatusRunning = "running" 4761 DeploymentStatusPaused = "paused" 4762 DeploymentStatusFailed = "failed" 4763 DeploymentStatusSuccessful = "successful" 4764 DeploymentStatusCancelled = "cancelled" 4765 4766 // DeploymentStatusDescriptions are the various descriptions of the states a 4767 // deployment can be in. 4768 DeploymentStatusDescriptionRunning = "Deployment is running" 4769 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires promotion" 4770 DeploymentStatusDescriptionPaused = "Deployment is paused" 4771 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 4772 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 4773 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 4774 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 4775 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 4776 ) 4777 4778 // DeploymentStatusDescriptionRollback is used to get the status description of 4779 // a deployment when rolling back to an older job. 4780 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 4781 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 4782 } 4783 4784 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 4785 // a deployment when rolling back is not possible because it has the same specification 4786 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 4787 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 4788 } 4789 4790 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 4791 // a deployment when there is no target to rollback to but autorevet is desired. 4792 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 4793 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 4794 } 4795 4796 // Deployment is the object that represents a job deployment which is used to 4797 // transition a job between versions. 4798 type Deployment struct { 4799 // ID is a generated UUID for the deployment 4800 ID string 4801 4802 // Namespace is the namespace the deployment is created in 4803 Namespace string 4804 4805 // JobID is the job the deployment is created for 4806 JobID string 4807 4808 // JobVersion is the version of the job at which the deployment is tracking 4809 JobVersion uint64 4810 4811 // JobModifyIndex is the modify index of the job at which the deployment is tracking 4812 JobModifyIndex uint64 4813 4814 // JobCreateIndex is the create index of the job which the deployment is 4815 // tracking. It is needed so that if the job gets stopped and reran we can 4816 // present the correct list of deployments for the job and not old ones. 4817 JobCreateIndex uint64 4818 4819 // TaskGroups is the set of task groups effected by the deployment and their 4820 // current deployment status. 4821 TaskGroups map[string]*DeploymentState 4822 4823 // The status of the deployment 4824 Status string 4825 4826 // StatusDescription allows a human readable description of the deployment 4827 // status. 4828 StatusDescription string 4829 4830 CreateIndex uint64 4831 ModifyIndex uint64 4832 } 4833 4834 // NewDeployment creates a new deployment given the job. 4835 func NewDeployment(job *Job) *Deployment { 4836 return &Deployment{ 4837 ID: uuid.Generate(), 4838 Namespace: job.Namespace, 4839 JobID: job.ID, 4840 JobVersion: job.Version, 4841 JobModifyIndex: job.ModifyIndex, 4842 JobCreateIndex: job.CreateIndex, 4843 Status: DeploymentStatusRunning, 4844 StatusDescription: DeploymentStatusDescriptionRunning, 4845 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 4846 } 4847 } 4848 4849 func (d *Deployment) Copy() *Deployment { 4850 if d == nil { 4851 return nil 4852 } 4853 4854 c := &Deployment{} 4855 *c = *d 4856 4857 c.TaskGroups = nil 4858 if l := len(d.TaskGroups); d.TaskGroups != nil { 4859 c.TaskGroups = make(map[string]*DeploymentState, l) 4860 for tg, s := range d.TaskGroups { 4861 c.TaskGroups[tg] = s.Copy() 4862 } 4863 } 4864 4865 return c 4866 } 4867 4868 // Active returns whether the deployment is active or terminal. 4869 func (d *Deployment) Active() bool { 4870 switch d.Status { 4871 case DeploymentStatusRunning, DeploymentStatusPaused: 4872 return true 4873 default: 4874 return false 4875 } 4876 } 4877 4878 // GetID is a helper for getting the ID when the object may be nil 4879 func (d *Deployment) GetID() string { 4880 if d == nil { 4881 return "" 4882 } 4883 return d.ID 4884 } 4885 4886 // HasPlacedCanaries returns whether the deployment has placed canaries 4887 func (d *Deployment) HasPlacedCanaries() bool { 4888 if d == nil || len(d.TaskGroups) == 0 { 4889 return false 4890 } 4891 for _, group := range d.TaskGroups { 4892 if len(group.PlacedCanaries) != 0 { 4893 return true 4894 } 4895 } 4896 return false 4897 } 4898 4899 // RequiresPromotion returns whether the deployment requires promotion to 4900 // continue 4901 func (d *Deployment) RequiresPromotion() bool { 4902 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 4903 return false 4904 } 4905 for _, group := range d.TaskGroups { 4906 if group.DesiredCanaries > 0 && !group.Promoted { 4907 return true 4908 } 4909 } 4910 return false 4911 } 4912 4913 func (d *Deployment) GoString() string { 4914 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 4915 for group, state := range d.TaskGroups { 4916 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 4917 } 4918 return base 4919 } 4920 4921 // DeploymentState tracks the state of a deployment for a given task group. 4922 type DeploymentState struct { 4923 // AutoRevert marks whether the task group has indicated the job should be 4924 // reverted on failure 4925 AutoRevert bool 4926 4927 // Promoted marks whether the canaries have been promoted 4928 Promoted bool 4929 4930 // PlacedCanaries is the set of placed canary allocations 4931 PlacedCanaries []string 4932 4933 // DesiredCanaries is the number of canaries that should be created. 4934 DesiredCanaries int 4935 4936 // DesiredTotal is the total number of allocations that should be created as 4937 // part of the deployment. 4938 DesiredTotal int 4939 4940 // PlacedAllocs is the number of allocations that have been placed 4941 PlacedAllocs int 4942 4943 // HealthyAllocs is the number of allocations that have been marked healthy. 4944 HealthyAllocs int 4945 4946 // UnhealthyAllocs are allocations that have been marked as unhealthy. 4947 UnhealthyAllocs int 4948 } 4949 4950 func (d *DeploymentState) GoString() string { 4951 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 4952 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 4953 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 4954 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 4955 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 4956 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 4957 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 4958 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 4959 return base 4960 } 4961 4962 func (d *DeploymentState) Copy() *DeploymentState { 4963 c := &DeploymentState{} 4964 *c = *d 4965 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 4966 return c 4967 } 4968 4969 // DeploymentStatusUpdate is used to update the status of a given deployment 4970 type DeploymentStatusUpdate struct { 4971 // DeploymentID is the ID of the deployment to update 4972 DeploymentID string 4973 4974 // Status is the new status of the deployment. 4975 Status string 4976 4977 // StatusDescription is the new status description of the deployment. 4978 StatusDescription string 4979 } 4980 4981 // RescheduleTracker encapsulates previous reschedule events 4982 type RescheduleTracker struct { 4983 Events []*RescheduleEvent 4984 } 4985 4986 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 4987 if rt == nil { 4988 return nil 4989 } 4990 nt := &RescheduleTracker{} 4991 *nt = *rt 4992 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 4993 for _, tracker := range rt.Events { 4994 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 4995 } 4996 nt.Events = rescheduleEvents 4997 return nt 4998 } 4999 5000 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 5001 type RescheduleEvent struct { 5002 // RescheduleTime is the timestamp of a reschedule attempt 5003 RescheduleTime int64 5004 5005 // PrevAllocID is the ID of the previous allocation being restarted 5006 PrevAllocID string 5007 5008 // PrevNodeID is the node ID of the previous allocation 5009 PrevNodeID string 5010 } 5011 5012 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string) *RescheduleEvent { 5013 return &RescheduleEvent{RescheduleTime: rescheduleTime, 5014 PrevAllocID: prevAllocID, 5015 PrevNodeID: prevNodeID} 5016 } 5017 5018 func (re *RescheduleEvent) Copy() *RescheduleEvent { 5019 if re == nil { 5020 return nil 5021 } 5022 copy := new(RescheduleEvent) 5023 *copy = *re 5024 return copy 5025 } 5026 5027 const ( 5028 AllocDesiredStatusRun = "run" // Allocation should run 5029 AllocDesiredStatusStop = "stop" // Allocation should stop 5030 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 5031 ) 5032 5033 const ( 5034 AllocClientStatusPending = "pending" 5035 AllocClientStatusRunning = "running" 5036 AllocClientStatusComplete = "complete" 5037 AllocClientStatusFailed = "failed" 5038 AllocClientStatusLost = "lost" 5039 ) 5040 5041 // Allocation is used to allocate the placement of a task group to a node. 5042 type Allocation struct { 5043 // ID of the allocation (UUID) 5044 ID string 5045 5046 // Namespace is the namespace the allocation is created in 5047 Namespace string 5048 5049 // ID of the evaluation that generated this allocation 5050 EvalID string 5051 5052 // Name is a logical name of the allocation. 5053 Name string 5054 5055 // NodeID is the node this is being placed on 5056 NodeID string 5057 5058 // Job is the parent job of the task group being allocated. 5059 // This is copied at allocation time to avoid issues if the job 5060 // definition is updated. 5061 JobID string 5062 Job *Job 5063 5064 // TaskGroup is the name of the task group that should be run 5065 TaskGroup string 5066 5067 // Resources is the total set of resources allocated as part 5068 // of this allocation of the task group. 5069 Resources *Resources 5070 5071 // SharedResources are the resources that are shared by all the tasks in an 5072 // allocation 5073 SharedResources *Resources 5074 5075 // TaskResources is the set of resources allocated to each 5076 // task. These should sum to the total Resources. 5077 TaskResources map[string]*Resources 5078 5079 // Metrics associated with this allocation 5080 Metrics *AllocMetric 5081 5082 // Desired Status of the allocation on the client 5083 DesiredStatus string 5084 5085 // DesiredStatusDescription is meant to provide more human useful information 5086 DesiredDescription string 5087 5088 // Status of the allocation on the client 5089 ClientStatus string 5090 5091 // ClientStatusDescription is meant to provide more human useful information 5092 ClientDescription string 5093 5094 // TaskStates stores the state of each task, 5095 TaskStates map[string]*TaskState 5096 5097 // PreviousAllocation is the allocation that this allocation is replacing 5098 PreviousAllocation string 5099 5100 // NextAllocation is the allocation that this allocation is being replaced by 5101 NextAllocation string 5102 5103 // DeploymentID identifies an allocation as being created from a 5104 // particular deployment 5105 DeploymentID string 5106 5107 // DeploymentStatus captures the status of the allocation as part of the 5108 // given deployment 5109 DeploymentStatus *AllocDeploymentStatus 5110 5111 // Raft Indexes 5112 CreateIndex uint64 5113 ModifyIndex uint64 5114 5115 // AllocModifyIndex is not updated when the client updates allocations. This 5116 // lets the client pull only the allocs updated by the server. 5117 AllocModifyIndex uint64 5118 5119 // CreateTime is the time the allocation has finished scheduling and been 5120 // verified by the plan applier. 5121 CreateTime int64 5122 5123 // ModifyTime is the time the allocation was last updated. 5124 ModifyTime int64 5125 5126 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 5127 RescheduleTracker *RescheduleTracker 5128 } 5129 5130 // Index returns the index of the allocation. If the allocation is from a task 5131 // group with count greater than 1, there will be multiple allocations for it. 5132 func (a *Allocation) Index() uint { 5133 l := len(a.Name) 5134 prefix := len(a.JobID) + len(a.TaskGroup) + 2 5135 if l <= 3 || l <= prefix { 5136 return uint(0) 5137 } 5138 5139 strNum := a.Name[prefix : len(a.Name)-1] 5140 num, _ := strconv.Atoi(strNum) 5141 return uint(num) 5142 } 5143 5144 func (a *Allocation) Copy() *Allocation { 5145 return a.copyImpl(true) 5146 } 5147 5148 // Copy provides a copy of the allocation but doesn't deep copy the job 5149 func (a *Allocation) CopySkipJob() *Allocation { 5150 return a.copyImpl(false) 5151 } 5152 5153 func (a *Allocation) copyImpl(job bool) *Allocation { 5154 if a == nil { 5155 return nil 5156 } 5157 na := new(Allocation) 5158 *na = *a 5159 5160 if job { 5161 na.Job = na.Job.Copy() 5162 } 5163 5164 na.Resources = na.Resources.Copy() 5165 na.SharedResources = na.SharedResources.Copy() 5166 5167 if a.TaskResources != nil { 5168 tr := make(map[string]*Resources, len(na.TaskResources)) 5169 for task, resource := range na.TaskResources { 5170 tr[task] = resource.Copy() 5171 } 5172 na.TaskResources = tr 5173 } 5174 5175 na.Metrics = na.Metrics.Copy() 5176 na.DeploymentStatus = na.DeploymentStatus.Copy() 5177 5178 if a.TaskStates != nil { 5179 ts := make(map[string]*TaskState, len(na.TaskStates)) 5180 for task, state := range na.TaskStates { 5181 ts[task] = state.Copy() 5182 } 5183 na.TaskStates = ts 5184 } 5185 5186 na.RescheduleTracker = a.RescheduleTracker.Copy() 5187 return na 5188 } 5189 5190 // TerminalStatus returns if the desired or actual status is terminal and 5191 // will no longer transition. 5192 func (a *Allocation) TerminalStatus() bool { 5193 // First check the desired state and if that isn't terminal, check client 5194 // state. 5195 switch a.DesiredStatus { 5196 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 5197 return true 5198 default: 5199 } 5200 5201 switch a.ClientStatus { 5202 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 5203 return true 5204 default: 5205 return false 5206 } 5207 } 5208 5209 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 5210 // to its status and ReschedulePolicy given its failure time 5211 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 5212 // First check the desired state 5213 switch a.DesiredStatus { 5214 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 5215 return false 5216 default: 5217 } 5218 switch a.ClientStatus { 5219 case AllocClientStatusFailed: 5220 return a.RescheduleEligible(reschedulePolicy, failTime) 5221 default: 5222 return false 5223 } 5224 } 5225 5226 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 5227 // to its ReschedulePolicy and the current state of its reschedule trackers 5228 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 5229 if reschedulePolicy == nil { 5230 return false 5231 } 5232 attempts := reschedulePolicy.Attempts 5233 interval := reschedulePolicy.Interval 5234 5235 if attempts == 0 { 5236 return false 5237 } 5238 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 5239 return true 5240 } 5241 attempted := 0 5242 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 5243 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 5244 timeDiff := failTime.UTC().UnixNano() - lastAttempt 5245 if timeDiff < interval.Nanoseconds() { 5246 attempted += 1 5247 } 5248 } 5249 return attempted < attempts 5250 } 5251 5252 // Terminated returns if the allocation is in a terminal state on a client. 5253 func (a *Allocation) Terminated() bool { 5254 if a.ClientStatus == AllocClientStatusFailed || 5255 a.ClientStatus == AllocClientStatusComplete || 5256 a.ClientStatus == AllocClientStatusLost { 5257 return true 5258 } 5259 return false 5260 } 5261 5262 // RanSuccessfully returns whether the client has ran the allocation and all 5263 // tasks finished successfully. Critically this function returns whether the 5264 // allocation has ran to completion and not just that the alloc has converged to 5265 // its desired state. That is to say that a batch allocation must have finished 5266 // with exit code 0 on all task groups. This doesn't really have meaning on a 5267 // non-batch allocation because a service and system allocation should not 5268 // finish. 5269 func (a *Allocation) RanSuccessfully() bool { 5270 // Handle the case the client hasn't started the allocation. 5271 if len(a.TaskStates) == 0 { 5272 return false 5273 } 5274 5275 // Check to see if all the tasks finished successfully in the allocation 5276 allSuccess := true 5277 for _, state := range a.TaskStates { 5278 allSuccess = allSuccess && state.Successful() 5279 } 5280 5281 return allSuccess 5282 } 5283 5284 // ShouldMigrate returns if the allocation needs data migration 5285 func (a *Allocation) ShouldMigrate() bool { 5286 if a.PreviousAllocation == "" { 5287 return false 5288 } 5289 5290 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 5291 return false 5292 } 5293 5294 tg := a.Job.LookupTaskGroup(a.TaskGroup) 5295 5296 // if the task group is nil or the ephemeral disk block isn't present then 5297 // we won't migrate 5298 if tg == nil || tg.EphemeralDisk == nil { 5299 return false 5300 } 5301 5302 // We won't migrate any data is the user hasn't enabled migration or the 5303 // disk is not marked as sticky 5304 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 5305 return false 5306 } 5307 5308 return true 5309 } 5310 5311 // SetEventDisplayMessage populates the display message if its not already set, 5312 // a temporary fix to handle old allocations that don't have it. 5313 // This method will be removed in a future release. 5314 func (a *Allocation) SetEventDisplayMessages() { 5315 setDisplayMsg(a.TaskStates) 5316 } 5317 5318 // Stub returns a list stub for the allocation 5319 func (a *Allocation) Stub() *AllocListStub { 5320 return &AllocListStub{ 5321 ID: a.ID, 5322 EvalID: a.EvalID, 5323 Name: a.Name, 5324 NodeID: a.NodeID, 5325 JobID: a.JobID, 5326 JobVersion: a.Job.Version, 5327 TaskGroup: a.TaskGroup, 5328 DesiredStatus: a.DesiredStatus, 5329 DesiredDescription: a.DesiredDescription, 5330 ClientStatus: a.ClientStatus, 5331 ClientDescription: a.ClientDescription, 5332 TaskStates: a.TaskStates, 5333 DeploymentStatus: a.DeploymentStatus, 5334 CreateIndex: a.CreateIndex, 5335 ModifyIndex: a.ModifyIndex, 5336 CreateTime: a.CreateTime, 5337 ModifyTime: a.ModifyTime, 5338 } 5339 } 5340 5341 // AllocListStub is used to return a subset of alloc information 5342 type AllocListStub struct { 5343 ID string 5344 EvalID string 5345 Name string 5346 NodeID string 5347 JobID string 5348 JobVersion uint64 5349 TaskGroup string 5350 DesiredStatus string 5351 DesiredDescription string 5352 ClientStatus string 5353 ClientDescription string 5354 TaskStates map[string]*TaskState 5355 DeploymentStatus *AllocDeploymentStatus 5356 CreateIndex uint64 5357 ModifyIndex uint64 5358 CreateTime int64 5359 ModifyTime int64 5360 } 5361 5362 // SetEventDisplayMessage populates the display message if its not already set, 5363 // a temporary fix to handle old allocations that don't have it. 5364 // This method will be removed in a future release. 5365 func (a *AllocListStub) SetEventDisplayMessages() { 5366 setDisplayMsg(a.TaskStates) 5367 } 5368 5369 func setDisplayMsg(taskStates map[string]*TaskState) { 5370 if taskStates != nil { 5371 for _, taskState := range taskStates { 5372 for _, event := range taskState.Events { 5373 event.PopulateEventDisplayMessage() 5374 } 5375 } 5376 } 5377 } 5378 5379 // AllocMetric is used to track various metrics while attempting 5380 // to make an allocation. These are used to debug a job, or to better 5381 // understand the pressure within the system. 5382 type AllocMetric struct { 5383 // NodesEvaluated is the number of nodes that were evaluated 5384 NodesEvaluated int 5385 5386 // NodesFiltered is the number of nodes filtered due to a constraint 5387 NodesFiltered int 5388 5389 // NodesAvailable is the number of nodes available for evaluation per DC. 5390 NodesAvailable map[string]int 5391 5392 // ClassFiltered is the number of nodes filtered by class 5393 ClassFiltered map[string]int 5394 5395 // ConstraintFiltered is the number of failures caused by constraint 5396 ConstraintFiltered map[string]int 5397 5398 // NodesExhausted is the number of nodes skipped due to being 5399 // exhausted of at least one resource 5400 NodesExhausted int 5401 5402 // ClassExhausted is the number of nodes exhausted by class 5403 ClassExhausted map[string]int 5404 5405 // DimensionExhausted provides the count by dimension or reason 5406 DimensionExhausted map[string]int 5407 5408 // QuotaExhausted provides the exhausted dimensions 5409 QuotaExhausted []string 5410 5411 // Scores is the scores of the final few nodes remaining 5412 // for placement. The top score is typically selected. 5413 Scores map[string]float64 5414 5415 // AllocationTime is a measure of how long the allocation 5416 // attempt took. This can affect performance and SLAs. 5417 AllocationTime time.Duration 5418 5419 // CoalescedFailures indicates the number of other 5420 // allocations that were coalesced into this failed allocation. 5421 // This is to prevent creating many failed allocations for a 5422 // single task group. 5423 CoalescedFailures int 5424 } 5425 5426 func (a *AllocMetric) Copy() *AllocMetric { 5427 if a == nil { 5428 return nil 5429 } 5430 na := new(AllocMetric) 5431 *na = *a 5432 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 5433 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 5434 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 5435 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 5436 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 5437 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 5438 na.Scores = helper.CopyMapStringFloat64(na.Scores) 5439 return na 5440 } 5441 5442 func (a *AllocMetric) EvaluateNode() { 5443 a.NodesEvaluated += 1 5444 } 5445 5446 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 5447 a.NodesFiltered += 1 5448 if node != nil && node.NodeClass != "" { 5449 if a.ClassFiltered == nil { 5450 a.ClassFiltered = make(map[string]int) 5451 } 5452 a.ClassFiltered[node.NodeClass] += 1 5453 } 5454 if constraint != "" { 5455 if a.ConstraintFiltered == nil { 5456 a.ConstraintFiltered = make(map[string]int) 5457 } 5458 a.ConstraintFiltered[constraint] += 1 5459 } 5460 } 5461 5462 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 5463 a.NodesExhausted += 1 5464 if node != nil && node.NodeClass != "" { 5465 if a.ClassExhausted == nil { 5466 a.ClassExhausted = make(map[string]int) 5467 } 5468 a.ClassExhausted[node.NodeClass] += 1 5469 } 5470 if dimension != "" { 5471 if a.DimensionExhausted == nil { 5472 a.DimensionExhausted = make(map[string]int) 5473 } 5474 a.DimensionExhausted[dimension] += 1 5475 } 5476 } 5477 5478 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 5479 if a.QuotaExhausted == nil { 5480 a.QuotaExhausted = make([]string, 0, len(dimensions)) 5481 } 5482 5483 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 5484 } 5485 5486 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 5487 if a.Scores == nil { 5488 a.Scores = make(map[string]float64) 5489 } 5490 key := fmt.Sprintf("%s.%s", node.ID, name) 5491 a.Scores[key] = score 5492 } 5493 5494 // AllocDeploymentStatus captures the status of the allocation as part of the 5495 // deployment. This can include things like if the allocation has been marked as 5496 // heatlhy. 5497 type AllocDeploymentStatus struct { 5498 // Healthy marks whether the allocation has been marked healthy or unhealthy 5499 // as part of a deployment. It can be unset if it has neither been marked 5500 // healthy or unhealthy. 5501 Healthy *bool 5502 5503 // ModifyIndex is the raft index in which the deployment status was last 5504 // changed. 5505 ModifyIndex uint64 5506 } 5507 5508 // IsHealthy returns if the allocation is marked as healthy as part of a 5509 // deployment 5510 func (a *AllocDeploymentStatus) IsHealthy() bool { 5511 if a == nil { 5512 return false 5513 } 5514 5515 return a.Healthy != nil && *a.Healthy 5516 } 5517 5518 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 5519 // deployment 5520 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 5521 if a == nil { 5522 return false 5523 } 5524 5525 return a.Healthy != nil && !*a.Healthy 5526 } 5527 5528 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 5529 if a == nil { 5530 return nil 5531 } 5532 5533 c := new(AllocDeploymentStatus) 5534 *c = *a 5535 5536 if a.Healthy != nil { 5537 c.Healthy = helper.BoolToPtr(*a.Healthy) 5538 } 5539 5540 return c 5541 } 5542 5543 const ( 5544 EvalStatusBlocked = "blocked" 5545 EvalStatusPending = "pending" 5546 EvalStatusComplete = "complete" 5547 EvalStatusFailed = "failed" 5548 EvalStatusCancelled = "canceled" 5549 ) 5550 5551 const ( 5552 EvalTriggerJobRegister = "job-register" 5553 EvalTriggerJobDeregister = "job-deregister" 5554 EvalTriggerPeriodicJob = "periodic-job" 5555 EvalTriggerNodeUpdate = "node-update" 5556 EvalTriggerScheduled = "scheduled" 5557 EvalTriggerRollingUpdate = "rolling-update" 5558 EvalTriggerDeploymentWatcher = "deployment-watcher" 5559 EvalTriggerFailedFollowUp = "failed-follow-up" 5560 EvalTriggerMaxPlans = "max-plan-attempts" 5561 EvalTriggerRetryFailedAlloc = "alloc-failure" 5562 ) 5563 5564 const ( 5565 // CoreJobEvalGC is used for the garbage collection of evaluations 5566 // and allocations. We periodically scan evaluations in a terminal state, 5567 // in which all the corresponding allocations are also terminal. We 5568 // delete these out of the system to bound the state. 5569 CoreJobEvalGC = "eval-gc" 5570 5571 // CoreJobNodeGC is used for the garbage collection of failed nodes. 5572 // We periodically scan nodes in a terminal state, and if they have no 5573 // corresponding allocations we delete these out of the system. 5574 CoreJobNodeGC = "node-gc" 5575 5576 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 5577 // periodically scan garbage collectible jobs and check if both their 5578 // evaluations and allocations are terminal. If so, we delete these out of 5579 // the system. 5580 CoreJobJobGC = "job-gc" 5581 5582 // CoreJobDeploymentGC is used for the garbage collection of eligible 5583 // deployments. We periodically scan garbage collectible deployments and 5584 // check if they are terminal. If so, we delete these out of the system. 5585 CoreJobDeploymentGC = "deployment-gc" 5586 5587 // CoreJobForceGC is used to force garbage collection of all GCable objects. 5588 CoreJobForceGC = "force-gc" 5589 ) 5590 5591 // Evaluation is used anytime we need to apply business logic as a result 5592 // of a change to our desired state (job specification) or the emergent state 5593 // (registered nodes). When the inputs change, we need to "evaluate" them, 5594 // potentially taking action (allocation of work) or doing nothing if the state 5595 // of the world does not require it. 5596 type Evaluation struct { 5597 // ID is a randomly generated UUID used for this evaluation. This 5598 // is assigned upon the creation of the evaluation. 5599 ID string 5600 5601 // Namespace is the namespace the evaluation is created in 5602 Namespace string 5603 5604 // Priority is used to control scheduling importance and if this job 5605 // can preempt other jobs. 5606 Priority int 5607 5608 // Type is used to control which schedulers are available to handle 5609 // this evaluation. 5610 Type string 5611 5612 // TriggeredBy is used to give some insight into why this Eval 5613 // was created. (Job change, node failure, alloc failure, etc). 5614 TriggeredBy string 5615 5616 // JobID is the job this evaluation is scoped to. Evaluations cannot 5617 // be run in parallel for a given JobID, so we serialize on this. 5618 JobID string 5619 5620 // JobModifyIndex is the modify index of the job at the time 5621 // the evaluation was created 5622 JobModifyIndex uint64 5623 5624 // NodeID is the node that was affected triggering the evaluation. 5625 NodeID string 5626 5627 // NodeModifyIndex is the modify index of the node at the time 5628 // the evaluation was created 5629 NodeModifyIndex uint64 5630 5631 // DeploymentID is the ID of the deployment that triggered the evaluation. 5632 DeploymentID string 5633 5634 // Status of the evaluation 5635 Status string 5636 5637 // StatusDescription is meant to provide more human useful information 5638 StatusDescription string 5639 5640 // Wait is a minimum wait time for running the eval. This is used to 5641 // support a rolling upgrade. 5642 Wait time.Duration 5643 5644 // NextEval is the evaluation ID for the eval created to do a followup. 5645 // This is used to support rolling upgrades, where we need a chain of evaluations. 5646 NextEval string 5647 5648 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 5649 // This is used to support rolling upgrades, where we need a chain of evaluations. 5650 PreviousEval string 5651 5652 // BlockedEval is the evaluation ID for a created blocked eval. A 5653 // blocked eval will be created if all allocations could not be placed due 5654 // to constraints or lacking resources. 5655 BlockedEval string 5656 5657 // FailedTGAllocs are task groups which have allocations that could not be 5658 // made, but the metrics are persisted so that the user can use the feedback 5659 // to determine the cause. 5660 FailedTGAllocs map[string]*AllocMetric 5661 5662 // ClassEligibility tracks computed node classes that have been explicitly 5663 // marked as eligible or ineligible. 5664 ClassEligibility map[string]bool 5665 5666 // QuotaLimitReached marks whether a quota limit was reached for the 5667 // evaluation. 5668 QuotaLimitReached string 5669 5670 // EscapedComputedClass marks whether the job has constraints that are not 5671 // captured by computed node classes. 5672 EscapedComputedClass bool 5673 5674 // AnnotatePlan triggers the scheduler to provide additional annotations 5675 // during the evaluation. This should not be set during normal operations. 5676 AnnotatePlan bool 5677 5678 // QueuedAllocations is the number of unplaced allocations at the time the 5679 // evaluation was processed. The map is keyed by Task Group names. 5680 QueuedAllocations map[string]int 5681 5682 // LeaderACL provides the ACL token to when issuing RPCs back to the 5683 // leader. This will be a valid management token as long as the leader is 5684 // active. This should not ever be exposed via the API. 5685 LeaderACL string 5686 5687 // SnapshotIndex is the Raft index of the snapshot used to process the 5688 // evaluation. As such it will only be set once it has gone through the 5689 // scheduler. 5690 SnapshotIndex uint64 5691 5692 // Raft Indexes 5693 CreateIndex uint64 5694 ModifyIndex uint64 5695 } 5696 5697 // TerminalStatus returns if the current status is terminal and 5698 // will no longer transition. 5699 func (e *Evaluation) TerminalStatus() bool { 5700 switch e.Status { 5701 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 5702 return true 5703 default: 5704 return false 5705 } 5706 } 5707 5708 func (e *Evaluation) GoString() string { 5709 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 5710 } 5711 5712 func (e *Evaluation) Copy() *Evaluation { 5713 if e == nil { 5714 return nil 5715 } 5716 ne := new(Evaluation) 5717 *ne = *e 5718 5719 // Copy ClassEligibility 5720 if e.ClassEligibility != nil { 5721 classes := make(map[string]bool, len(e.ClassEligibility)) 5722 for class, elig := range e.ClassEligibility { 5723 classes[class] = elig 5724 } 5725 ne.ClassEligibility = classes 5726 } 5727 5728 // Copy FailedTGAllocs 5729 if e.FailedTGAllocs != nil { 5730 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 5731 for tg, metric := range e.FailedTGAllocs { 5732 failedTGs[tg] = metric.Copy() 5733 } 5734 ne.FailedTGAllocs = failedTGs 5735 } 5736 5737 // Copy queued allocations 5738 if e.QueuedAllocations != nil { 5739 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 5740 for tg, num := range e.QueuedAllocations { 5741 queuedAllocations[tg] = num 5742 } 5743 ne.QueuedAllocations = queuedAllocations 5744 } 5745 5746 return ne 5747 } 5748 5749 // ShouldEnqueue checks if a given evaluation should be enqueued into the 5750 // eval_broker 5751 func (e *Evaluation) ShouldEnqueue() bool { 5752 switch e.Status { 5753 case EvalStatusPending: 5754 return true 5755 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 5756 return false 5757 default: 5758 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 5759 } 5760 } 5761 5762 // ShouldBlock checks if a given evaluation should be entered into the blocked 5763 // eval tracker. 5764 func (e *Evaluation) ShouldBlock() bool { 5765 switch e.Status { 5766 case EvalStatusBlocked: 5767 return true 5768 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 5769 return false 5770 default: 5771 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 5772 } 5773 } 5774 5775 // MakePlan is used to make a plan from the given evaluation 5776 // for a given Job 5777 func (e *Evaluation) MakePlan(j *Job) *Plan { 5778 p := &Plan{ 5779 EvalID: e.ID, 5780 Priority: e.Priority, 5781 Job: j, 5782 NodeUpdate: make(map[string][]*Allocation), 5783 NodeAllocation: make(map[string][]*Allocation), 5784 } 5785 if j != nil { 5786 p.AllAtOnce = j.AllAtOnce 5787 } 5788 return p 5789 } 5790 5791 // NextRollingEval creates an evaluation to followup this eval for rolling updates 5792 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 5793 return &Evaluation{ 5794 ID: uuid.Generate(), 5795 Namespace: e.Namespace, 5796 Priority: e.Priority, 5797 Type: e.Type, 5798 TriggeredBy: EvalTriggerRollingUpdate, 5799 JobID: e.JobID, 5800 JobModifyIndex: e.JobModifyIndex, 5801 Status: EvalStatusPending, 5802 Wait: wait, 5803 PreviousEval: e.ID, 5804 } 5805 } 5806 5807 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 5808 // failed allocations. It takes the classes marked explicitly eligible or 5809 // ineligible, whether the job has escaped computed node classes and whether the 5810 // quota limit was reached. 5811 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 5812 escaped bool, quotaReached string) *Evaluation { 5813 5814 return &Evaluation{ 5815 ID: uuid.Generate(), 5816 Namespace: e.Namespace, 5817 Priority: e.Priority, 5818 Type: e.Type, 5819 TriggeredBy: e.TriggeredBy, 5820 JobID: e.JobID, 5821 JobModifyIndex: e.JobModifyIndex, 5822 Status: EvalStatusBlocked, 5823 PreviousEval: e.ID, 5824 ClassEligibility: classEligibility, 5825 EscapedComputedClass: escaped, 5826 QuotaLimitReached: quotaReached, 5827 } 5828 } 5829 5830 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 5831 // has been marked as failed because it has hit the delivery limit and will not 5832 // be retried by the eval_broker. 5833 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 5834 return &Evaluation{ 5835 ID: uuid.Generate(), 5836 Namespace: e.Namespace, 5837 Priority: e.Priority, 5838 Type: e.Type, 5839 TriggeredBy: EvalTriggerFailedFollowUp, 5840 JobID: e.JobID, 5841 JobModifyIndex: e.JobModifyIndex, 5842 Status: EvalStatusPending, 5843 Wait: wait, 5844 PreviousEval: e.ID, 5845 } 5846 } 5847 5848 // Plan is used to submit a commit plan for task allocations. These 5849 // are submitted to the leader which verifies that resources have 5850 // not been overcommitted before admiting the plan. 5851 type Plan struct { 5852 // EvalID is the evaluation ID this plan is associated with 5853 EvalID string 5854 5855 // EvalToken is used to prevent a split-brain processing of 5856 // an evaluation. There should only be a single scheduler running 5857 // an Eval at a time, but this could be violated after a leadership 5858 // transition. This unique token is used to reject plans that are 5859 // being submitted from a different leader. 5860 EvalToken string 5861 5862 // Priority is the priority of the upstream job 5863 Priority int 5864 5865 // AllAtOnce is used to control if incremental scheduling of task groups 5866 // is allowed or if we must do a gang scheduling of the entire job. 5867 // If this is false, a plan may be partially applied. Otherwise, the 5868 // entire plan must be able to make progress. 5869 AllAtOnce bool 5870 5871 // Job is the parent job of all the allocations in the Plan. 5872 // Since a Plan only involves a single Job, we can reduce the size 5873 // of the plan by only including it once. 5874 Job *Job 5875 5876 // NodeUpdate contains all the allocations for each node. For each node, 5877 // this is a list of the allocations to update to either stop or evict. 5878 NodeUpdate map[string][]*Allocation 5879 5880 // NodeAllocation contains all the allocations for each node. 5881 // The evicts must be considered prior to the allocations. 5882 NodeAllocation map[string][]*Allocation 5883 5884 // Annotations contains annotations by the scheduler to be used by operators 5885 // to understand the decisions made by the scheduler. 5886 Annotations *PlanAnnotations 5887 5888 // Deployment is the deployment created or updated by the scheduler that 5889 // should be applied by the planner. 5890 Deployment *Deployment 5891 5892 // DeploymentUpdates is a set of status updates to apply to the given 5893 // deployments. This allows the scheduler to cancel any unneeded deployment 5894 // because the job is stopped or the update block is removed. 5895 DeploymentUpdates []*DeploymentStatusUpdate 5896 } 5897 5898 // AppendUpdate marks the allocation for eviction. The clientStatus of the 5899 // allocation may be optionally set by passing in a non-empty value. 5900 func (p *Plan) AppendUpdate(alloc *Allocation, desiredStatus, desiredDesc, clientStatus string) { 5901 newAlloc := new(Allocation) 5902 *newAlloc = *alloc 5903 5904 // If the job is not set in the plan we are deregistering a job so we 5905 // extract the job from the allocation. 5906 if p.Job == nil && newAlloc.Job != nil { 5907 p.Job = newAlloc.Job 5908 } 5909 5910 // Normalize the job 5911 newAlloc.Job = nil 5912 5913 // Strip the resources as it can be rebuilt. 5914 newAlloc.Resources = nil 5915 5916 newAlloc.DesiredStatus = desiredStatus 5917 newAlloc.DesiredDescription = desiredDesc 5918 5919 if clientStatus != "" { 5920 newAlloc.ClientStatus = clientStatus 5921 } 5922 5923 node := alloc.NodeID 5924 existing := p.NodeUpdate[node] 5925 p.NodeUpdate[node] = append(existing, newAlloc) 5926 } 5927 5928 func (p *Plan) PopUpdate(alloc *Allocation) { 5929 existing := p.NodeUpdate[alloc.NodeID] 5930 n := len(existing) 5931 if n > 0 && existing[n-1].ID == alloc.ID { 5932 existing = existing[:n-1] 5933 if len(existing) > 0 { 5934 p.NodeUpdate[alloc.NodeID] = existing 5935 } else { 5936 delete(p.NodeUpdate, alloc.NodeID) 5937 } 5938 } 5939 } 5940 5941 func (p *Plan) AppendAlloc(alloc *Allocation) { 5942 node := alloc.NodeID 5943 existing := p.NodeAllocation[node] 5944 p.NodeAllocation[node] = append(existing, alloc) 5945 } 5946 5947 // IsNoOp checks if this plan would do nothing 5948 func (p *Plan) IsNoOp() bool { 5949 return len(p.NodeUpdate) == 0 && 5950 len(p.NodeAllocation) == 0 && 5951 p.Deployment == nil && 5952 len(p.DeploymentUpdates) == 0 5953 } 5954 5955 // PlanResult is the result of a plan submitted to the leader. 5956 type PlanResult struct { 5957 // NodeUpdate contains all the updates that were committed. 5958 NodeUpdate map[string][]*Allocation 5959 5960 // NodeAllocation contains all the allocations that were committed. 5961 NodeAllocation map[string][]*Allocation 5962 5963 // Deployment is the deployment that was committed. 5964 Deployment *Deployment 5965 5966 // DeploymentUpdates is the set of deployment updates that were committed. 5967 DeploymentUpdates []*DeploymentStatusUpdate 5968 5969 // RefreshIndex is the index the worker should refresh state up to. 5970 // This allows all evictions and allocations to be materialized. 5971 // If any allocations were rejected due to stale data (node state, 5972 // over committed) this can be used to force a worker refresh. 5973 RefreshIndex uint64 5974 5975 // AllocIndex is the Raft index in which the evictions and 5976 // allocations took place. This is used for the write index. 5977 AllocIndex uint64 5978 } 5979 5980 // IsNoOp checks if this plan result would do nothing 5981 func (p *PlanResult) IsNoOp() bool { 5982 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 5983 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 5984 } 5985 5986 // FullCommit is used to check if all the allocations in a plan 5987 // were committed as part of the result. Returns if there was 5988 // a match, and the number of expected and actual allocations. 5989 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 5990 expected := 0 5991 actual := 0 5992 for name, allocList := range plan.NodeAllocation { 5993 didAlloc, _ := p.NodeAllocation[name] 5994 expected += len(allocList) 5995 actual += len(didAlloc) 5996 } 5997 return actual == expected, expected, actual 5998 } 5999 6000 // PlanAnnotations holds annotations made by the scheduler to give further debug 6001 // information to operators. 6002 type PlanAnnotations struct { 6003 // DesiredTGUpdates is the set of desired updates per task group. 6004 DesiredTGUpdates map[string]*DesiredUpdates 6005 } 6006 6007 // DesiredUpdates is the set of changes the scheduler would like to make given 6008 // sufficient resources and cluster capacity. 6009 type DesiredUpdates struct { 6010 Ignore uint64 6011 Place uint64 6012 Migrate uint64 6013 Stop uint64 6014 InPlaceUpdate uint64 6015 DestructiveUpdate uint64 6016 Canary uint64 6017 } 6018 6019 func (d *DesiredUpdates) GoString() string { 6020 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 6021 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 6022 } 6023 6024 // msgpackHandle is a shared handle for encoding/decoding of structs 6025 var MsgpackHandle = func() *codec.MsgpackHandle { 6026 h := &codec.MsgpackHandle{RawToString: true} 6027 6028 // Sets the default type for decoding a map into a nil interface{}. 6029 // This is necessary in particular because we store the driver configs as a 6030 // nil interface{}. 6031 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 6032 return h 6033 }() 6034 6035 var ( 6036 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 6037 // structs. The pretty handle will add indents for easier human consumption. 6038 JsonHandle = &codec.JsonHandle{ 6039 HTMLCharsAsIs: true, 6040 } 6041 JsonHandlePretty = &codec.JsonHandle{ 6042 HTMLCharsAsIs: true, 6043 Indent: 4, 6044 } 6045 ) 6046 6047 // TODO Figure out if we can remove this. This is our fork that is just way 6048 // behind. I feel like its original purpose was to pin at a stable version but 6049 // now we can accomplish this with vendoring. 6050 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 6051 h := &hcodec.MsgpackHandle{RawToString: true} 6052 6053 // Sets the default type for decoding a map into a nil interface{}. 6054 // This is necessary in particular because we store the driver configs as a 6055 // nil interface{}. 6056 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 6057 return h 6058 }() 6059 6060 // Decode is used to decode a MsgPack encoded object 6061 func Decode(buf []byte, out interface{}) error { 6062 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 6063 } 6064 6065 // Encode is used to encode a MsgPack object with type prefix 6066 func Encode(t MessageType, msg interface{}) ([]byte, error) { 6067 var buf bytes.Buffer 6068 buf.WriteByte(uint8(t)) 6069 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 6070 return buf.Bytes(), err 6071 } 6072 6073 // KeyringResponse is a unified key response and can be used for install, 6074 // remove, use, as well as listing key queries. 6075 type KeyringResponse struct { 6076 Messages map[string]string 6077 Keys map[string]int 6078 NumNodes int 6079 } 6080 6081 // KeyringRequest is request objects for serf key operations. 6082 type KeyringRequest struct { 6083 Key string 6084 } 6085 6086 // RecoverableError wraps an error and marks whether it is recoverable and could 6087 // be retried or it is fatal. 6088 type RecoverableError struct { 6089 Err string 6090 Recoverable bool 6091 } 6092 6093 // NewRecoverableError is used to wrap an error and mark it as recoverable or 6094 // not. 6095 func NewRecoverableError(e error, recoverable bool) error { 6096 if e == nil { 6097 return nil 6098 } 6099 6100 return &RecoverableError{ 6101 Err: e.Error(), 6102 Recoverable: recoverable, 6103 } 6104 } 6105 6106 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 6107 // message. If the error was recoverable before the returned error is as well; 6108 // otherwise it is unrecoverable. 6109 func WrapRecoverable(msg string, err error) error { 6110 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 6111 } 6112 6113 func (r *RecoverableError) Error() string { 6114 return r.Err 6115 } 6116 6117 func (r *RecoverableError) IsRecoverable() bool { 6118 return r.Recoverable 6119 } 6120 6121 // Recoverable is an interface for errors to implement to indicate whether or 6122 // not they are fatal or recoverable. 6123 type Recoverable interface { 6124 error 6125 IsRecoverable() bool 6126 } 6127 6128 // IsRecoverable returns true if error is a RecoverableError with 6129 // Recoverable=true. Otherwise false is returned. 6130 func IsRecoverable(e error) bool { 6131 if re, ok := e.(Recoverable); ok { 6132 return re.IsRecoverable() 6133 } 6134 return false 6135 } 6136 6137 // ACLPolicy is used to represent an ACL policy 6138 type ACLPolicy struct { 6139 Name string // Unique name 6140 Description string // Human readable 6141 Rules string // HCL or JSON format 6142 Hash []byte 6143 CreateIndex uint64 6144 ModifyIndex uint64 6145 } 6146 6147 // SetHash is used to compute and set the hash of the ACL policy 6148 func (c *ACLPolicy) SetHash() []byte { 6149 // Initialize a 256bit Blake2 hash (32 bytes) 6150 hash, err := blake2b.New256(nil) 6151 if err != nil { 6152 panic(err) 6153 } 6154 6155 // Write all the user set fields 6156 hash.Write([]byte(c.Name)) 6157 hash.Write([]byte(c.Description)) 6158 hash.Write([]byte(c.Rules)) 6159 6160 // Finalize the hash 6161 hashVal := hash.Sum(nil) 6162 6163 // Set and return the hash 6164 c.Hash = hashVal 6165 return hashVal 6166 } 6167 6168 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 6169 return &ACLPolicyListStub{ 6170 Name: a.Name, 6171 Description: a.Description, 6172 Hash: a.Hash, 6173 CreateIndex: a.CreateIndex, 6174 ModifyIndex: a.ModifyIndex, 6175 } 6176 } 6177 6178 func (a *ACLPolicy) Validate() error { 6179 var mErr multierror.Error 6180 if !validPolicyName.MatchString(a.Name) { 6181 err := fmt.Errorf("invalid name '%s'", a.Name) 6182 mErr.Errors = append(mErr.Errors, err) 6183 } 6184 if _, err := acl.Parse(a.Rules); err != nil { 6185 err = fmt.Errorf("failed to parse rules: %v", err) 6186 mErr.Errors = append(mErr.Errors, err) 6187 } 6188 if len(a.Description) > maxPolicyDescriptionLength { 6189 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 6190 mErr.Errors = append(mErr.Errors, err) 6191 } 6192 return mErr.ErrorOrNil() 6193 } 6194 6195 // ACLPolicyListStub is used to for listing ACL policies 6196 type ACLPolicyListStub struct { 6197 Name string 6198 Description string 6199 Hash []byte 6200 CreateIndex uint64 6201 ModifyIndex uint64 6202 } 6203 6204 // ACLPolicyListRequest is used to request a list of policies 6205 type ACLPolicyListRequest struct { 6206 QueryOptions 6207 } 6208 6209 // ACLPolicySpecificRequest is used to query a specific policy 6210 type ACLPolicySpecificRequest struct { 6211 Name string 6212 QueryOptions 6213 } 6214 6215 // ACLPolicySetRequest is used to query a set of policies 6216 type ACLPolicySetRequest struct { 6217 Names []string 6218 QueryOptions 6219 } 6220 6221 // ACLPolicyListResponse is used for a list request 6222 type ACLPolicyListResponse struct { 6223 Policies []*ACLPolicyListStub 6224 QueryMeta 6225 } 6226 6227 // SingleACLPolicyResponse is used to return a single policy 6228 type SingleACLPolicyResponse struct { 6229 Policy *ACLPolicy 6230 QueryMeta 6231 } 6232 6233 // ACLPolicySetResponse is used to return a set of policies 6234 type ACLPolicySetResponse struct { 6235 Policies map[string]*ACLPolicy 6236 QueryMeta 6237 } 6238 6239 // ACLPolicyDeleteRequest is used to delete a set of policies 6240 type ACLPolicyDeleteRequest struct { 6241 Names []string 6242 WriteRequest 6243 } 6244 6245 // ACLPolicyUpsertRequest is used to upsert a set of policies 6246 type ACLPolicyUpsertRequest struct { 6247 Policies []*ACLPolicy 6248 WriteRequest 6249 } 6250 6251 // ACLToken represents a client token which is used to Authenticate 6252 type ACLToken struct { 6253 AccessorID string // Public Accessor ID (UUID) 6254 SecretID string // Secret ID, private (UUID) 6255 Name string // Human friendly name 6256 Type string // Client or Management 6257 Policies []string // Policies this token ties to 6258 Global bool // Global or Region local 6259 Hash []byte 6260 CreateTime time.Time // Time of creation 6261 CreateIndex uint64 6262 ModifyIndex uint64 6263 } 6264 6265 var ( 6266 // AnonymousACLToken is used no SecretID is provided, and the 6267 // request is made anonymously. 6268 AnonymousACLToken = &ACLToken{ 6269 AccessorID: "anonymous", 6270 Name: "Anonymous Token", 6271 Type: ACLClientToken, 6272 Policies: []string{"anonymous"}, 6273 Global: false, 6274 } 6275 ) 6276 6277 type ACLTokenListStub struct { 6278 AccessorID string 6279 Name string 6280 Type string 6281 Policies []string 6282 Global bool 6283 Hash []byte 6284 CreateTime time.Time 6285 CreateIndex uint64 6286 ModifyIndex uint64 6287 } 6288 6289 // SetHash is used to compute and set the hash of the ACL token 6290 func (a *ACLToken) SetHash() []byte { 6291 // Initialize a 256bit Blake2 hash (32 bytes) 6292 hash, err := blake2b.New256(nil) 6293 if err != nil { 6294 panic(err) 6295 } 6296 6297 // Write all the user set fields 6298 hash.Write([]byte(a.Name)) 6299 hash.Write([]byte(a.Type)) 6300 for _, policyName := range a.Policies { 6301 hash.Write([]byte(policyName)) 6302 } 6303 if a.Global { 6304 hash.Write([]byte("global")) 6305 } else { 6306 hash.Write([]byte("local")) 6307 } 6308 6309 // Finalize the hash 6310 hashVal := hash.Sum(nil) 6311 6312 // Set and return the hash 6313 a.Hash = hashVal 6314 return hashVal 6315 } 6316 6317 func (a *ACLToken) Stub() *ACLTokenListStub { 6318 return &ACLTokenListStub{ 6319 AccessorID: a.AccessorID, 6320 Name: a.Name, 6321 Type: a.Type, 6322 Policies: a.Policies, 6323 Global: a.Global, 6324 Hash: a.Hash, 6325 CreateTime: a.CreateTime, 6326 CreateIndex: a.CreateIndex, 6327 ModifyIndex: a.ModifyIndex, 6328 } 6329 } 6330 6331 // Validate is used to sanity check a token 6332 func (a *ACLToken) Validate() error { 6333 var mErr multierror.Error 6334 if len(a.Name) > maxTokenNameLength { 6335 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 6336 } 6337 switch a.Type { 6338 case ACLClientToken: 6339 if len(a.Policies) == 0 { 6340 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 6341 } 6342 case ACLManagementToken: 6343 if len(a.Policies) != 0 { 6344 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 6345 } 6346 default: 6347 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 6348 } 6349 return mErr.ErrorOrNil() 6350 } 6351 6352 // PolicySubset checks if a given set of policies is a subset of the token 6353 func (a *ACLToken) PolicySubset(policies []string) bool { 6354 // Hot-path the management tokens, superset of all policies. 6355 if a.Type == ACLManagementToken { 6356 return true 6357 } 6358 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 6359 for _, policy := range a.Policies { 6360 associatedPolicies[policy] = struct{}{} 6361 } 6362 for _, policy := range policies { 6363 if _, ok := associatedPolicies[policy]; !ok { 6364 return false 6365 } 6366 } 6367 return true 6368 } 6369 6370 // ACLTokenListRequest is used to request a list of tokens 6371 type ACLTokenListRequest struct { 6372 GlobalOnly bool 6373 QueryOptions 6374 } 6375 6376 // ACLTokenSpecificRequest is used to query a specific token 6377 type ACLTokenSpecificRequest struct { 6378 AccessorID string 6379 QueryOptions 6380 } 6381 6382 // ACLTokenSetRequest is used to query a set of tokens 6383 type ACLTokenSetRequest struct { 6384 AccessorIDS []string 6385 QueryOptions 6386 } 6387 6388 // ACLTokenListResponse is used for a list request 6389 type ACLTokenListResponse struct { 6390 Tokens []*ACLTokenListStub 6391 QueryMeta 6392 } 6393 6394 // SingleACLTokenResponse is used to return a single token 6395 type SingleACLTokenResponse struct { 6396 Token *ACLToken 6397 QueryMeta 6398 } 6399 6400 // ACLTokenSetResponse is used to return a set of token 6401 type ACLTokenSetResponse struct { 6402 Tokens map[string]*ACLToken // Keyed by Accessor ID 6403 QueryMeta 6404 } 6405 6406 // ResolveACLTokenRequest is used to resolve a specific token 6407 type ResolveACLTokenRequest struct { 6408 SecretID string 6409 QueryOptions 6410 } 6411 6412 // ResolveACLTokenResponse is used to resolve a single token 6413 type ResolveACLTokenResponse struct { 6414 Token *ACLToken 6415 QueryMeta 6416 } 6417 6418 // ACLTokenDeleteRequest is used to delete a set of tokens 6419 type ACLTokenDeleteRequest struct { 6420 AccessorIDs []string 6421 WriteRequest 6422 } 6423 6424 // ACLTokenBootstrapRequest is used to bootstrap ACLs 6425 type ACLTokenBootstrapRequest struct { 6426 Token *ACLToken // Not client specifiable 6427 ResetIndex uint64 // Reset index is used to clear the bootstrap token 6428 WriteRequest 6429 } 6430 6431 // ACLTokenUpsertRequest is used to upsert a set of tokens 6432 type ACLTokenUpsertRequest struct { 6433 Tokens []*ACLToken 6434 WriteRequest 6435 } 6436 6437 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 6438 type ACLTokenUpsertResponse struct { 6439 Tokens []*ACLToken 6440 WriteMeta 6441 }