github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "hash" 16 "hash/crc32" 17 "math" 18 "net" 19 "os" 20 "path/filepath" 21 "reflect" 22 "regexp" 23 "sort" 24 "strconv" 25 "strings" 26 "time" 27 28 "github.com/hashicorp/cronexpr" 29 "github.com/hashicorp/go-msgpack/codec" 30 "github.com/hashicorp/go-multierror" 31 "github.com/hashicorp/go-version" 32 "github.com/mitchellh/copystructure" 33 "golang.org/x/crypto/blake2b" 34 35 "github.com/hashicorp/nomad/acl" 36 "github.com/hashicorp/nomad/command/agent/host" 37 "github.com/hashicorp/nomad/command/agent/pprof" 38 "github.com/hashicorp/nomad/helper" 39 "github.com/hashicorp/nomad/helper/args" 40 "github.com/hashicorp/nomad/helper/constraints/semver" 41 "github.com/hashicorp/nomad/helper/uuid" 42 "github.com/hashicorp/nomad/lib/kheap" 43 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 44 ) 45 46 var ( 47 // validPolicyName is used to validate a policy name 48 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 49 50 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 51 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 52 ) 53 54 type MessageType uint8 55 56 // note: new raft message types need to be added to the end of this 57 // list of contents 58 const ( 59 NodeRegisterRequestType MessageType = 0 60 NodeDeregisterRequestType MessageType = 1 61 NodeUpdateStatusRequestType MessageType = 2 62 NodeUpdateDrainRequestType MessageType = 3 63 JobRegisterRequestType MessageType = 4 64 JobDeregisterRequestType MessageType = 5 65 EvalUpdateRequestType MessageType = 6 66 EvalDeleteRequestType MessageType = 7 67 AllocUpdateRequestType MessageType = 8 68 AllocClientUpdateRequestType MessageType = 9 69 ReconcileJobSummariesRequestType MessageType = 10 70 VaultAccessorRegisterRequestType MessageType = 11 71 VaultAccessorDeregisterRequestType MessageType = 12 72 ApplyPlanResultsRequestType MessageType = 13 73 DeploymentStatusUpdateRequestType MessageType = 14 74 DeploymentPromoteRequestType MessageType = 15 75 DeploymentAllocHealthRequestType MessageType = 16 76 DeploymentDeleteRequestType MessageType = 17 77 JobStabilityRequestType MessageType = 18 78 ACLPolicyUpsertRequestType MessageType = 19 79 ACLPolicyDeleteRequestType MessageType = 20 80 ACLTokenUpsertRequestType MessageType = 21 81 ACLTokenDeleteRequestType MessageType = 22 82 ACLTokenBootstrapRequestType MessageType = 23 83 AutopilotRequestType MessageType = 24 84 UpsertNodeEventsType MessageType = 25 85 JobBatchDeregisterRequestType MessageType = 26 86 AllocUpdateDesiredTransitionRequestType MessageType = 27 87 NodeUpdateEligibilityRequestType MessageType = 28 88 BatchNodeUpdateDrainRequestType MessageType = 29 89 SchedulerConfigRequestType MessageType = 30 90 NodeBatchDeregisterRequestType MessageType = 31 91 ClusterMetadataRequestType MessageType = 32 92 ServiceIdentityAccessorRegisterRequestType MessageType = 33 93 ServiceIdentityAccessorDeregisterRequestType MessageType = 34 94 CSIVolumeRegisterRequestType MessageType = 35 95 CSIVolumeDeregisterRequestType MessageType = 36 96 CSIVolumeClaimRequestType MessageType = 37 97 ScalingEventRegisterRequestType MessageType = 38 98 CSIVolumeClaimBatchRequestType MessageType = 39 99 CSIPluginDeleteRequestType MessageType = 40 100 EventSinkUpsertRequestType MessageType = 41 101 EventSinkDeleteRequestType MessageType = 42 102 BatchEventSinkUpdateProgressType MessageType = 43 103 104 // Namespace types were moved from enterprise and therefore start at 64 105 NamespaceUpsertRequestType MessageType = 64 106 NamespaceDeleteRequestType MessageType = 65 107 ) 108 109 const ( 110 // IgnoreUnknownTypeFlag is set along with a MessageType 111 // to indicate that the message type can be safely ignored 112 // if it is not recognized. This is for future proofing, so 113 // that new commands can be added in a way that won't cause 114 // old servers to crash when the FSM attempts to process them. 115 IgnoreUnknownTypeFlag MessageType = 128 116 117 // MsgTypeTestSetup is used during testing when calling state store 118 // methods directly that require an FSM MessageType 119 MsgTypeTestSetup MessageType = IgnoreUnknownTypeFlag 120 121 // ApiMajorVersion is returned as part of the Status.Version request. 122 // It should be incremented anytime the APIs are changed in a way 123 // that would break clients for sane client versioning. 124 ApiMajorVersion = 1 125 126 // ApiMinorVersion is returned as part of the Status.Version request. 127 // It should be incremented anytime the APIs are changed to allow 128 // for sane client versioning. Minor changes should be compatible 129 // within the major version. 130 ApiMinorVersion = 1 131 132 ProtocolVersion = "protocol" 133 APIMajorVersion = "api.major" 134 APIMinorVersion = "api.minor" 135 136 GetterModeAny = "any" 137 GetterModeFile = "file" 138 GetterModeDir = "dir" 139 140 // maxPolicyDescriptionLength limits a policy description length 141 maxPolicyDescriptionLength = 256 142 143 // maxTokenNameLength limits a ACL token name length 144 maxTokenNameLength = 256 145 146 // ACLClientToken and ACLManagementToken are the only types of tokens 147 ACLClientToken = "client" 148 ACLManagementToken = "management" 149 150 // DefaultNamespace is the default namespace. 151 DefaultNamespace = "default" 152 DefaultNamespaceDescription = "Default shared namespace" 153 154 // AllNamespacesSentinel is the value used as a namespace RPC value 155 // to indicate that endpoints must search in all namespaces 156 AllNamespacesSentinel = "*" 157 158 // maxNamespaceDescriptionLength limits a namespace description length 159 maxNamespaceDescriptionLength = 256 160 161 // JitterFraction is a the limit to the amount of jitter we apply 162 // to a user specified MaxQueryTime. We divide the specified time by 163 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 164 // applied to RPCHoldTimeout. 165 JitterFraction = 16 166 167 // MaxRetainedNodeEvents is the maximum number of node events that will be 168 // retained for a single node 169 MaxRetainedNodeEvents = 10 170 171 // MaxRetainedNodeScores is the number of top scoring nodes for which we 172 // retain scoring metadata 173 MaxRetainedNodeScores = 5 174 175 // Normalized scorer name 176 NormScorerName = "normalized-score" 177 178 // MaxBlockingRPCQueryTime is used to bound the limit of a blocking query 179 MaxBlockingRPCQueryTime = 300 * time.Second 180 181 // DefaultBlockingRPCQueryTime is the amount of time we block waiting for a change 182 // if no time is specified. Previously we would wait the MaxBlockingRPCQueryTime. 183 DefaultBlockingRPCQueryTime = 300 * time.Second 184 ) 185 186 var ( 187 // validNamespaceName is used to validate a namespace name 188 validNamespaceName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 189 ) 190 191 // Context defines the scope in which a search for Nomad object operates, and 192 // is also used to query the matching index value for this context 193 type Context string 194 195 const ( 196 Allocs Context = "allocs" 197 Deployments Context = "deployment" 198 Evals Context = "evals" 199 Jobs Context = "jobs" 200 Nodes Context = "nodes" 201 Namespaces Context = "namespaces" 202 Quotas Context = "quotas" 203 Recommendations Context = "recommendations" 204 ScalingPolicies Context = "scaling_policy" 205 All Context = "all" 206 Plugins Context = "plugins" 207 Volumes Context = "volumes" 208 ) 209 210 // NamespacedID is a tuple of an ID and a namespace 211 type NamespacedID struct { 212 ID string 213 Namespace string 214 } 215 216 // NewNamespacedID returns a new namespaced ID given the ID and namespace 217 func NewNamespacedID(id, ns string) NamespacedID { 218 return NamespacedID{ 219 ID: id, 220 Namespace: ns, 221 } 222 } 223 224 func (n NamespacedID) String() string { 225 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 226 } 227 228 // RPCInfo is used to describe common information about query 229 type RPCInfo interface { 230 RequestRegion() string 231 IsRead() bool 232 AllowStaleRead() bool 233 IsForwarded() bool 234 SetForwarded() 235 TimeToBlock() time.Duration 236 // TimeToBlock sets how long this request can block. The requested time may not be possible, 237 // so Callers should readback TimeToBlock. E.g. you cannot set time to block at all on WriteRequests 238 // and it cannot exceed MaxBlockingRPCQueryTime 239 SetTimeToBlock(t time.Duration) 240 } 241 242 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 243 // should NOT be replicated in the API package as it is internal only. 244 type InternalRpcInfo struct { 245 // Forwarded marks whether the RPC has been forwarded. 246 Forwarded bool 247 } 248 249 // IsForwarded returns whether the RPC is forwarded from another server. 250 func (i *InternalRpcInfo) IsForwarded() bool { 251 return i.Forwarded 252 } 253 254 // SetForwarded marks that the RPC is being forwarded from another server. 255 func (i *InternalRpcInfo) SetForwarded() { 256 i.Forwarded = true 257 } 258 259 // QueryOptions is used to specify various flags for read queries 260 type QueryOptions struct { 261 // The target region for this query 262 Region string 263 264 // Namespace is the target namespace for the query. 265 // 266 // Since handlers do not have a default value set they should access 267 // the Namespace via the RequestNamespace method. 268 // 269 // Requests accessing specific namespaced objects must check ACLs 270 // against the namespace of the object, not the namespace in the 271 // request. 272 Namespace string 273 274 // If set, wait until query exceeds given index. Must be provided 275 // with MaxQueryTime. 276 MinQueryIndex uint64 277 278 // Provided with MinQueryIndex to wait for change. 279 MaxQueryTime time.Duration 280 281 // If set, any follower can service the request. Results 282 // may be arbitrarily stale. 283 AllowStale bool 284 285 // If set, used as prefix for resource list searches 286 Prefix string 287 288 // AuthToken is secret portion of the ACL token used for the request 289 AuthToken string 290 291 InternalRpcInfo 292 } 293 294 // TimeToBlock returns MaxQueryTime adjusted for maximums and defaults 295 // it will return 0 if this is not a blocking query 296 func (q QueryOptions) TimeToBlock() time.Duration { 297 if q.MinQueryIndex == 0 { 298 return 0 299 } 300 if q.MaxQueryTime > MaxBlockingRPCQueryTime { 301 return MaxBlockingRPCQueryTime 302 } else if q.MaxQueryTime <= 0 { 303 return DefaultBlockingRPCQueryTime 304 } 305 return q.MaxQueryTime 306 } 307 308 func (q QueryOptions) SetTimeToBlock(t time.Duration) { 309 q.MaxQueryTime = t 310 } 311 312 func (q QueryOptions) RequestRegion() string { 313 return q.Region 314 } 315 316 // RequestNamespace returns the request's namespace or the default namespace if 317 // no explicit namespace was sent. 318 // 319 // Requests accessing specific namespaced objects must check ACLs against the 320 // namespace of the object, not the namespace in the request. 321 func (q QueryOptions) RequestNamespace() string { 322 if q.Namespace == "" { 323 return DefaultNamespace 324 } 325 return q.Namespace 326 } 327 328 // QueryOption only applies to reads, so always true 329 func (q QueryOptions) IsRead() bool { 330 return true 331 } 332 333 func (q QueryOptions) AllowStaleRead() bool { 334 return q.AllowStale 335 } 336 337 // AgentPprofRequest is used to request a pprof report for a given node. 338 type AgentPprofRequest struct { 339 // ReqType specifies the profile to use 340 ReqType pprof.ReqType 341 342 // Profile specifies the runtime/pprof profile to lookup and generate. 343 Profile string 344 345 // Seconds is the number of seconds to capture a profile 346 Seconds int 347 348 // Debug specifies if pprof profile should inclue debug output 349 Debug int 350 351 // GC specifies if the profile should call runtime.GC() before 352 // running its profile. This is only used for "heap" profiles 353 GC int 354 355 // NodeID is the node we want to track the logs of 356 NodeID string 357 358 // ServerID is the server we want to track the logs of 359 ServerID string 360 361 QueryOptions 362 } 363 364 // AgentPprofResponse is used to return a generated pprof profile 365 type AgentPprofResponse struct { 366 // ID of the agent that fulfilled the request 367 AgentID string 368 369 // Payload is the generated pprof profile 370 Payload []byte 371 372 // HTTPHeaders are a set of key value pairs to be applied as 373 // HTTP headers for a specific runtime profile 374 HTTPHeaders map[string]string 375 } 376 377 type WriteRequest struct { 378 // The target region for this write 379 Region string 380 381 // Namespace is the target namespace for the write. 382 // 383 // Since RPC handlers do not have a default value set they should 384 // access the Namespace via the RequestNamespace method. 385 // 386 // Requests accessing specific namespaced objects must check ACLs 387 // against the namespace of the object, not the namespace in the 388 // request. 389 Namespace string 390 391 // AuthToken is secret portion of the ACL token used for the request 392 AuthToken string 393 394 InternalRpcInfo 395 } 396 397 func (w WriteRequest) TimeToBlock() time.Duration { 398 return 0 399 } 400 401 func (w WriteRequest) SetTimeToBlock(_ time.Duration) { 402 } 403 404 func (w WriteRequest) RequestRegion() string { 405 // The target region for this request 406 return w.Region 407 } 408 409 // RequestNamespace returns the request's namespace or the default namespace if 410 // no explicit namespace was sent. 411 // 412 // Requests accessing specific namespaced objects must check ACLs against the 413 // namespace of the object, not the namespace in the request. 414 func (w WriteRequest) RequestNamespace() string { 415 if w.Namespace == "" { 416 return DefaultNamespace 417 } 418 return w.Namespace 419 } 420 421 // WriteRequest only applies to writes, always false 422 func (w WriteRequest) IsRead() bool { 423 return false 424 } 425 426 func (w WriteRequest) AllowStaleRead() bool { 427 return false 428 } 429 430 // QueryMeta allows a query response to include potentially 431 // useful metadata about a query 432 type QueryMeta struct { 433 // This is the index associated with the read 434 Index uint64 435 436 // If AllowStale is used, this is time elapsed since 437 // last contact between the follower and leader. This 438 // can be used to gauge staleness. 439 LastContact time.Duration 440 441 // Used to indicate if there is a known leader node 442 KnownLeader bool 443 } 444 445 // WriteMeta allows a write response to include potentially 446 // useful metadata about the write 447 type WriteMeta struct { 448 // This is the index associated with the write 449 Index uint64 450 } 451 452 // NodeRegisterRequest is used for Node.Register endpoint 453 // to register a node as being a schedulable entity. 454 type NodeRegisterRequest struct { 455 Node *Node 456 NodeEvent *NodeEvent 457 WriteRequest 458 } 459 460 // NodeDeregisterRequest is used for Node.Deregister endpoint 461 // to deregister a node as being a schedulable entity. 462 type NodeDeregisterRequest struct { 463 NodeID string 464 WriteRequest 465 } 466 467 // NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 468 // to deregister a batch of nodes from being schedulable entities. 469 type NodeBatchDeregisterRequest struct { 470 NodeIDs []string 471 WriteRequest 472 } 473 474 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 475 // information used in RPC server lists. 476 type NodeServerInfo struct { 477 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 478 // be contacted at for RPCs. 479 RPCAdvertiseAddr string 480 481 // RpcMajorVersion is the major version number the Nomad Server 482 // supports 483 RPCMajorVersion int32 484 485 // RpcMinorVersion is the minor version number the Nomad Server 486 // supports 487 RPCMinorVersion int32 488 489 // Datacenter is the datacenter that a Nomad server belongs to 490 Datacenter string 491 } 492 493 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 494 // to update the status of a node. 495 type NodeUpdateStatusRequest struct { 496 NodeID string 497 Status string 498 NodeEvent *NodeEvent 499 UpdatedAt int64 500 WriteRequest 501 } 502 503 // NodeUpdateDrainRequest is used for updating the drain strategy 504 type NodeUpdateDrainRequest struct { 505 NodeID string 506 DrainStrategy *DrainStrategy 507 508 // COMPAT Remove in version 0.10 509 // As part of Nomad 0.8 we have deprecated the drain boolean in favor of a 510 // drain strategy but we need to handle the upgrade path where the Raft log 511 // contains drain updates with just the drain boolean being manipulated. 512 Drain bool 513 514 // MarkEligible marks the node as eligible if removing the drain strategy. 515 MarkEligible bool 516 517 // NodeEvent is the event added to the node 518 NodeEvent *NodeEvent 519 520 // UpdatedAt represents server time of receiving request 521 UpdatedAt int64 522 523 WriteRequest 524 } 525 526 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 527 // batch of nodes 528 type BatchNodeUpdateDrainRequest struct { 529 // Updates is a mapping of nodes to their updated drain strategy 530 Updates map[string]*DrainUpdate 531 532 // NodeEvents is a mapping of the node to the event to add to the node 533 NodeEvents map[string]*NodeEvent 534 535 // UpdatedAt represents server time of receiving request 536 UpdatedAt int64 537 538 WriteRequest 539 } 540 541 // DrainUpdate is used to update the drain of a node 542 type DrainUpdate struct { 543 // DrainStrategy is the new strategy for the node 544 DrainStrategy *DrainStrategy 545 546 // MarkEligible marks the node as eligible if removing the drain strategy. 547 MarkEligible bool 548 } 549 550 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 551 type NodeUpdateEligibilityRequest struct { 552 NodeID string 553 Eligibility string 554 555 // NodeEvent is the event added to the node 556 NodeEvent *NodeEvent 557 558 // UpdatedAt represents server time of receiving request 559 UpdatedAt int64 560 561 WriteRequest 562 } 563 564 // NodeEvaluateRequest is used to re-evaluate the node 565 type NodeEvaluateRequest struct { 566 NodeID string 567 WriteRequest 568 } 569 570 // NodeSpecificRequest is used when we just need to specify a target node 571 type NodeSpecificRequest struct { 572 NodeID string 573 SecretID string 574 QueryOptions 575 } 576 577 // SearchResponse is used to return matches and information about whether 578 // the match list is truncated specific to each type of context. 579 type SearchResponse struct { 580 // Map of context types to ids which match a specified prefix 581 Matches map[Context][]string 582 583 // Truncations indicates whether the matches for a particular context have 584 // been truncated 585 Truncations map[Context]bool 586 587 QueryMeta 588 } 589 590 // SearchRequest is used to parameterize a request, and returns a 591 // list of matches made up of jobs, allocations, evaluations, and/or nodes, 592 // along with whether or not the information returned is truncated. 593 type SearchRequest struct { 594 // Prefix is what ids are matched to. I.e, if the given prefix were 595 // "a", potential matches might be "abcd" or "aabb" 596 Prefix string 597 598 // Context is the type that can be matched against. A context can be a job, 599 // node, evaluation, allocation, or empty (indicated every context should be 600 // matched) 601 Context Context 602 603 QueryOptions 604 } 605 606 // JobRegisterRequest is used for Job.Register endpoint 607 // to register a job as being a schedulable entity. 608 type JobRegisterRequest struct { 609 Job *Job 610 611 // If EnforceIndex is set then the job will only be registered if the passed 612 // JobModifyIndex matches the current Jobs index. If the index is zero, the 613 // register only occurs if the job is new. 614 EnforceIndex bool 615 JobModifyIndex uint64 616 617 // PreserveCounts indicates that during job update, existing task group 618 // counts should be preserved, over those specified in the new job spec 619 // PreserveCounts is ignored for newly created jobs. 620 PreserveCounts bool 621 622 // PolicyOverride is set when the user is attempting to override any policies 623 PolicyOverride bool 624 625 // Eval is the evaluation that is associated with the job registration 626 Eval *Evaluation 627 628 WriteRequest 629 } 630 631 // JobDeregisterRequest is used for Job.Deregister endpoint 632 // to deregister a job as being a schedulable entity. 633 type JobDeregisterRequest struct { 634 JobID string 635 636 // Purge controls whether the deregister purges the job from the system or 637 // whether the job is just marked as stopped and will be removed by the 638 // garbage collector 639 Purge bool 640 641 // Global controls whether all regions of a multi-region job are 642 // deregistered. It is ignored for single-region jobs. 643 Global bool 644 645 // Eval is the evaluation to create that's associated with job deregister 646 Eval *Evaluation 647 648 WriteRequest 649 } 650 651 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 652 // evaluations. 653 type JobBatchDeregisterRequest struct { 654 // Jobs is the set of jobs to deregister 655 Jobs map[NamespacedID]*JobDeregisterOptions 656 657 // Evals is the set of evaluations to create. 658 Evals []*Evaluation 659 660 WriteRequest 661 } 662 663 // JobDeregisterOptions configures how a job is deregistered. 664 type JobDeregisterOptions struct { 665 // Purge controls whether the deregister purges the job from the system or 666 // whether the job is just marked as stopped and will be removed by the 667 // garbage collector 668 Purge bool 669 } 670 671 // JobEvaluateRequest is used when we just need to re-evaluate a target job 672 type JobEvaluateRequest struct { 673 JobID string 674 EvalOptions EvalOptions 675 WriteRequest 676 } 677 678 // EvalOptions is used to encapsulate options when forcing a job evaluation 679 type EvalOptions struct { 680 ForceReschedule bool 681 } 682 683 // JobSpecificRequest is used when we just need to specify a target job 684 type JobSpecificRequest struct { 685 JobID string 686 All bool 687 QueryOptions 688 } 689 690 // JobListRequest is used to parameterize a list request 691 type JobListRequest struct { 692 QueryOptions 693 } 694 695 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 696 // evaluation of the Job. 697 type JobPlanRequest struct { 698 Job *Job 699 Diff bool // Toggles an annotated diff 700 // PolicyOverride is set when the user is attempting to override any policies 701 PolicyOverride bool 702 WriteRequest 703 } 704 705 // JobScaleRequest is used for the Job.Scale endpoint to scale one of the 706 // scaling targets in a job 707 type JobScaleRequest struct { 708 JobID string 709 Target map[string]string 710 Count *int64 711 Message string 712 Error bool 713 Meta map[string]interface{} 714 // PolicyOverride is set when the user is attempting to override any policies 715 PolicyOverride bool 716 WriteRequest 717 } 718 719 // Validate is used to validate the arguments in the request 720 func (r *JobScaleRequest) Validate() error { 721 namespace := r.Target[ScalingTargetNamespace] 722 if namespace != "" && namespace != r.RequestNamespace() { 723 return NewErrRPCCoded(400, "namespace in payload did not match header") 724 } 725 726 jobID := r.Target[ScalingTargetJob] 727 if jobID != "" && jobID != r.JobID { 728 return fmt.Errorf("job ID in payload did not match URL") 729 } 730 731 groupName := r.Target[ScalingTargetGroup] 732 if groupName == "" { 733 return NewErrRPCCoded(400, "missing task group name for scaling action") 734 } 735 736 if r.Count != nil { 737 if *r.Count < 0 { 738 return NewErrRPCCoded(400, "scaling action count can't be negative") 739 } 740 741 if r.Error { 742 return NewErrRPCCoded(400, "scaling action should not contain count if error is true") 743 } 744 745 truncCount := int(*r.Count) 746 if int64(truncCount) != *r.Count { 747 return NewErrRPCCoded(400, 748 fmt.Sprintf("new scaling count is too large for TaskGroup.Count (int): %v", r.Count)) 749 } 750 } 751 752 return nil 753 } 754 755 // JobSummaryRequest is used when we just need to get a specific job summary 756 type JobSummaryRequest struct { 757 JobID string 758 QueryOptions 759 } 760 761 // JobScaleStatusRequest is used to get the scale status for a job 762 type JobScaleStatusRequest struct { 763 JobID string 764 QueryOptions 765 } 766 767 // JobDispatchRequest is used to dispatch a job based on a parameterized job 768 type JobDispatchRequest struct { 769 JobID string 770 Payload []byte 771 Meta map[string]string 772 WriteRequest 773 } 774 775 // JobValidateRequest is used to validate a job 776 type JobValidateRequest struct { 777 Job *Job 778 WriteRequest 779 } 780 781 // JobRevertRequest is used to revert a job to a prior version. 782 type JobRevertRequest struct { 783 // JobID is the ID of the job being reverted 784 JobID string 785 786 // JobVersion the version to revert to. 787 JobVersion uint64 788 789 // EnforcePriorVersion if set will enforce that the job is at the given 790 // version before reverting. 791 EnforcePriorVersion *uint64 792 793 // ConsulToken is the Consul token that proves the submitter of the job revert 794 // has access to the Service Identity policies associated with the job's 795 // Consul Connect enabled services. This field is only used to transfer the 796 // token and is not stored after the Job revert. 797 ConsulToken string 798 799 // VaultToken is the Vault token that proves the submitter of the job revert 800 // has access to any Vault policies specified in the targeted job version. This 801 // field is only used to transfer the token and is not stored after the Job 802 // revert. 803 VaultToken string 804 805 WriteRequest 806 } 807 808 // JobStabilityRequest is used to marked a job as stable. 809 type JobStabilityRequest struct { 810 // Job to set the stability on 811 JobID string 812 JobVersion uint64 813 814 // Set the stability 815 Stable bool 816 WriteRequest 817 } 818 819 // JobStabilityResponse is the response when marking a job as stable. 820 type JobStabilityResponse struct { 821 WriteMeta 822 } 823 824 // NodeListRequest is used to parameterize a list request 825 type NodeListRequest struct { 826 QueryOptions 827 828 Fields *NodeStubFields 829 } 830 831 // EvalUpdateRequest is used for upserting evaluations. 832 type EvalUpdateRequest struct { 833 Evals []*Evaluation 834 EvalToken string 835 WriteRequest 836 } 837 838 // EvalDeleteRequest is used for deleting an evaluation. 839 type EvalDeleteRequest struct { 840 Evals []string 841 Allocs []string 842 WriteRequest 843 } 844 845 // EvalSpecificRequest is used when we just need to specify a target evaluation 846 type EvalSpecificRequest struct { 847 EvalID string 848 QueryOptions 849 } 850 851 // EvalAckRequest is used to Ack/Nack a specific evaluation 852 type EvalAckRequest struct { 853 EvalID string 854 Token string 855 WriteRequest 856 } 857 858 // EvalDequeueRequest is used when we want to dequeue an evaluation 859 type EvalDequeueRequest struct { 860 Schedulers []string 861 Timeout time.Duration 862 SchedulerVersion uint16 863 WriteRequest 864 } 865 866 // EvalListRequest is used to list the evaluations 867 type EvalListRequest struct { 868 QueryOptions 869 } 870 871 // PlanRequest is used to submit an allocation plan to the leader 872 type PlanRequest struct { 873 Plan *Plan 874 WriteRequest 875 } 876 877 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 878 // committing the result of a plan. 879 type ApplyPlanResultsRequest struct { 880 // AllocUpdateRequest holds the allocation updates to be made by the 881 // scheduler. 882 AllocUpdateRequest 883 884 // Deployment is the deployment created or updated as a result of a 885 // scheduling event. 886 Deployment *Deployment 887 888 // DeploymentUpdates is a set of status updates to apply to the given 889 // deployments. This allows the scheduler to cancel any unneeded deployment 890 // because the job is stopped or the update block is removed. 891 DeploymentUpdates []*DeploymentStatusUpdate 892 893 // EvalID is the eval ID of the plan being applied. The modify index of the 894 // evaluation is updated as part of applying the plan to ensure that subsequent 895 // scheduling events for the same job will wait for the index that last produced 896 // state changes. This is necessary for blocked evaluations since they can be 897 // processed many times, potentially making state updates, without the state of 898 // the evaluation itself being updated. 899 EvalID string 900 901 // COMPAT 0.11 902 // NodePreemptions is a slice of allocations from other lower priority jobs 903 // that are preempted. Preempted allocations are marked as evicted. 904 // Deprecated: Replaced with AllocsPreempted which contains only the diff 905 NodePreemptions []*Allocation 906 907 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 908 // that are preempted. Preempted allocations are marked as evicted. 909 AllocsPreempted []*AllocationDiff 910 911 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 912 // have been preempted to place allocs in this plan 913 PreemptionEvals []*Evaluation 914 } 915 916 // AllocUpdateRequest is used to submit changes to allocations, either 917 // to cause evictions or to assign new allocations. Both can be done 918 // within a single transaction 919 type AllocUpdateRequest struct { 920 // COMPAT 0.11 921 // Alloc is the list of new allocations to assign 922 // Deprecated: Replaced with two separate slices, one containing stopped allocations 923 // and another containing updated allocations 924 Alloc []*Allocation 925 926 // Allocations to stop. Contains only the diff, not the entire allocation 927 AllocsStopped []*AllocationDiff 928 929 // New or updated allocations 930 AllocsUpdated []*Allocation 931 932 // Evals is the list of new evaluations to create 933 // Evals are valid only when used in the Raft RPC 934 Evals []*Evaluation 935 936 // Job is the shared parent job of the allocations. 937 // It is pulled out since it is common to reduce payload size. 938 Job *Job 939 940 WriteRequest 941 } 942 943 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 944 // desired transition state. 945 type AllocUpdateDesiredTransitionRequest struct { 946 // Allocs is the mapping of allocation ids to their desired state 947 // transition 948 Allocs map[string]*DesiredTransition 949 950 // Evals is the set of evaluations to create 951 Evals []*Evaluation 952 953 WriteRequest 954 } 955 956 // AllocStopRequest is used to stop and reschedule a running Allocation. 957 type AllocStopRequest struct { 958 AllocID string 959 960 WriteRequest 961 } 962 963 // AllocStopResponse is the response to an `AllocStopRequest` 964 type AllocStopResponse struct { 965 // EvalID is the id of the follow up evalution for the rescheduled alloc. 966 EvalID string 967 968 WriteMeta 969 } 970 971 // AllocListRequest is used to request a list of allocations 972 type AllocListRequest struct { 973 QueryOptions 974 975 Fields *AllocStubFields 976 } 977 978 // AllocSpecificRequest is used to query a specific allocation 979 type AllocSpecificRequest struct { 980 AllocID string 981 QueryOptions 982 } 983 984 // AllocSignalRequest is used to signal a specific allocation 985 type AllocSignalRequest struct { 986 AllocID string 987 Task string 988 Signal string 989 QueryOptions 990 } 991 992 // AllocsGetRequest is used to query a set of allocations 993 type AllocsGetRequest struct { 994 AllocIDs []string 995 QueryOptions 996 } 997 998 // AllocRestartRequest is used to restart a specific allocations tasks. 999 type AllocRestartRequest struct { 1000 AllocID string 1001 TaskName string 1002 1003 QueryOptions 1004 } 1005 1006 // PeriodicForceRequest is used to force a specific periodic job. 1007 type PeriodicForceRequest struct { 1008 JobID string 1009 WriteRequest 1010 } 1011 1012 // ServerMembersResponse has the list of servers in a cluster 1013 type ServerMembersResponse struct { 1014 ServerName string 1015 ServerRegion string 1016 ServerDC string 1017 Members []*ServerMember 1018 } 1019 1020 // ServerMember holds information about a Nomad server agent in a cluster 1021 type ServerMember struct { 1022 Name string 1023 Addr net.IP 1024 Port uint16 1025 Tags map[string]string 1026 Status string 1027 ProtocolMin uint8 1028 ProtocolMax uint8 1029 ProtocolCur uint8 1030 DelegateMin uint8 1031 DelegateMax uint8 1032 DelegateCur uint8 1033 } 1034 1035 // ClusterMetadata is used to store per-cluster metadata. 1036 type ClusterMetadata struct { 1037 ClusterID string 1038 CreateTime int64 1039 } 1040 1041 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 1042 // following tasks in the given allocation 1043 type DeriveVaultTokenRequest struct { 1044 NodeID string 1045 SecretID string 1046 AllocID string 1047 Tasks []string 1048 QueryOptions 1049 } 1050 1051 // VaultAccessorsRequest is used to operate on a set of Vault accessors 1052 type VaultAccessorsRequest struct { 1053 Accessors []*VaultAccessor 1054 } 1055 1056 // VaultAccessor is a reference to a created Vault token on behalf of 1057 // an allocation's task. 1058 type VaultAccessor struct { 1059 AllocID string 1060 Task string 1061 NodeID string 1062 Accessor string 1063 CreationTTL int 1064 1065 // Raft Indexes 1066 CreateIndex uint64 1067 } 1068 1069 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 1070 type DeriveVaultTokenResponse struct { 1071 // Tasks is a mapping between the task name and the wrapped token 1072 Tasks map[string]string 1073 1074 // Error stores any error that occurred. Errors are stored here so we can 1075 // communicate whether it is retryable 1076 Error *RecoverableError 1077 1078 QueryMeta 1079 } 1080 1081 // GenericRequest is used to request where no 1082 // specific information is needed. 1083 type GenericRequest struct { 1084 QueryOptions 1085 } 1086 1087 // DeploymentListRequest is used to list the deployments 1088 type DeploymentListRequest struct { 1089 QueryOptions 1090 } 1091 1092 // DeploymentDeleteRequest is used for deleting deployments. 1093 type DeploymentDeleteRequest struct { 1094 Deployments []string 1095 WriteRequest 1096 } 1097 1098 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 1099 // well as optionally creating an evaluation atomically. 1100 type DeploymentStatusUpdateRequest struct { 1101 // Eval, if set, is used to create an evaluation at the same time as 1102 // updating the status of a deployment. 1103 Eval *Evaluation 1104 1105 // DeploymentUpdate is a status update to apply to the given 1106 // deployment. 1107 DeploymentUpdate *DeploymentStatusUpdate 1108 1109 // Job is used to optionally upsert a job. This is used when setting the 1110 // allocation health results in a deployment failure and the deployment 1111 // auto-reverts to the latest stable job. 1112 Job *Job 1113 } 1114 1115 // DeploymentAllocHealthRequest is used to set the health of a set of 1116 // allocations as part of a deployment. 1117 type DeploymentAllocHealthRequest struct { 1118 DeploymentID string 1119 1120 // Marks these allocations as healthy, allow further allocations 1121 // to be rolled. 1122 HealthyAllocationIDs []string 1123 1124 // Any unhealthy allocations fail the deployment 1125 UnhealthyAllocationIDs []string 1126 1127 WriteRequest 1128 } 1129 1130 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 1131 type ApplyDeploymentAllocHealthRequest struct { 1132 DeploymentAllocHealthRequest 1133 1134 // Timestamp is the timestamp to use when setting the allocations health. 1135 Timestamp time.Time 1136 1137 // An optional field to update the status of a deployment 1138 DeploymentUpdate *DeploymentStatusUpdate 1139 1140 // Job is used to optionally upsert a job. This is used when setting the 1141 // allocation health results in a deployment failure and the deployment 1142 // auto-reverts to the latest stable job. 1143 Job *Job 1144 1145 // An optional evaluation to create after promoting the canaries 1146 Eval *Evaluation 1147 } 1148 1149 // DeploymentPromoteRequest is used to promote task groups in a deployment 1150 type DeploymentPromoteRequest struct { 1151 DeploymentID string 1152 1153 // All is to promote all task groups 1154 All bool 1155 1156 // Groups is used to set the promotion status per task group 1157 Groups []string 1158 1159 WriteRequest 1160 } 1161 1162 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1163 type ApplyDeploymentPromoteRequest struct { 1164 DeploymentPromoteRequest 1165 1166 // An optional evaluation to create after promoting the canaries 1167 Eval *Evaluation 1168 } 1169 1170 // DeploymentPauseRequest is used to pause a deployment 1171 type DeploymentPauseRequest struct { 1172 DeploymentID string 1173 1174 // Pause sets the pause status 1175 Pause bool 1176 1177 WriteRequest 1178 } 1179 1180 // DeploymentRunRequest is used to remotely start a pending deployment. 1181 // Used only for multiregion deployments. 1182 type DeploymentRunRequest struct { 1183 DeploymentID string 1184 1185 WriteRequest 1186 } 1187 1188 // DeploymentUnblockRequest is used to remotely unblock a deployment. 1189 // Used only for multiregion deployments. 1190 type DeploymentUnblockRequest struct { 1191 DeploymentID string 1192 1193 WriteRequest 1194 } 1195 1196 // DeploymentCancelRequest is used to remotely cancel a deployment. 1197 // Used only for multiregion deployments. 1198 type DeploymentCancelRequest struct { 1199 DeploymentID string 1200 1201 WriteRequest 1202 } 1203 1204 // DeploymentSpecificRequest is used to make a request specific to a particular 1205 // deployment 1206 type DeploymentSpecificRequest struct { 1207 DeploymentID string 1208 QueryOptions 1209 } 1210 1211 // DeploymentFailRequest is used to fail a particular deployment 1212 type DeploymentFailRequest struct { 1213 DeploymentID string 1214 WriteRequest 1215 } 1216 1217 // ScalingPolicySpecificRequest is used when we just need to specify a target scaling policy 1218 type ScalingPolicySpecificRequest struct { 1219 ID string 1220 QueryOptions 1221 } 1222 1223 // SingleScalingPolicyResponse is used to return a single job 1224 type SingleScalingPolicyResponse struct { 1225 Policy *ScalingPolicy 1226 QueryMeta 1227 } 1228 1229 // ScalingPolicyListRequest is used to parameterize a scaling policy list request 1230 type ScalingPolicyListRequest struct { 1231 Job string 1232 Type string 1233 QueryOptions 1234 } 1235 1236 // ScalingPolicyListResponse is used for a list request 1237 type ScalingPolicyListResponse struct { 1238 Policies []*ScalingPolicyListStub 1239 QueryMeta 1240 } 1241 1242 // SingleDeploymentResponse is used to respond with a single deployment 1243 type SingleDeploymentResponse struct { 1244 Deployment *Deployment 1245 QueryMeta 1246 } 1247 1248 // GenericResponse is used to respond to a request where no 1249 // specific response information is needed. 1250 type GenericResponse struct { 1251 WriteMeta 1252 } 1253 1254 // VersionResponse is used for the Status.Version response 1255 type VersionResponse struct { 1256 Build string 1257 Versions map[string]int 1258 QueryMeta 1259 } 1260 1261 // JobRegisterResponse is used to respond to a job registration 1262 type JobRegisterResponse struct { 1263 EvalID string 1264 EvalCreateIndex uint64 1265 JobModifyIndex uint64 1266 1267 // Warnings contains any warnings about the given job. These may include 1268 // deprecation warnings. 1269 Warnings string 1270 1271 QueryMeta 1272 } 1273 1274 // JobDeregisterResponse is used to respond to a job deregistration 1275 type JobDeregisterResponse struct { 1276 EvalID string 1277 EvalCreateIndex uint64 1278 JobModifyIndex uint64 1279 VolumeEvalID string 1280 VolumeEvalIndex uint64 1281 QueryMeta 1282 } 1283 1284 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 1285 type JobBatchDeregisterResponse struct { 1286 // JobEvals maps the job to its created evaluation 1287 JobEvals map[NamespacedID]string 1288 QueryMeta 1289 } 1290 1291 // JobValidateResponse is the response from validate request 1292 type JobValidateResponse struct { 1293 // DriverConfigValidated indicates whether the agent validated the driver 1294 // config 1295 DriverConfigValidated bool 1296 1297 // ValidationErrors is a list of validation errors 1298 ValidationErrors []string 1299 1300 // Error is a string version of any error that may have occurred 1301 Error string 1302 1303 // Warnings contains any warnings about the given job. These may include 1304 // deprecation warnings. 1305 Warnings string 1306 } 1307 1308 // NodeUpdateResponse is used to respond to a node update 1309 type NodeUpdateResponse struct { 1310 HeartbeatTTL time.Duration 1311 EvalIDs []string 1312 EvalCreateIndex uint64 1313 NodeModifyIndex uint64 1314 1315 // Features informs clients what enterprise features are allowed 1316 Features uint64 1317 1318 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1319 // empty, the current Nomad Server is in the minority of a partition. 1320 LeaderRPCAddr string 1321 1322 // NumNodes is the number of Nomad nodes attached to this quorum of 1323 // Nomad Servers at the time of the response. This value can 1324 // fluctuate based on the health of the cluster between heartbeats. 1325 NumNodes int32 1326 1327 // Servers is the full list of known Nomad servers in the local 1328 // region. 1329 Servers []*NodeServerInfo 1330 1331 QueryMeta 1332 } 1333 1334 // NodeDrainUpdateResponse is used to respond to a node drain update 1335 type NodeDrainUpdateResponse struct { 1336 NodeModifyIndex uint64 1337 EvalIDs []string 1338 EvalCreateIndex uint64 1339 WriteMeta 1340 } 1341 1342 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1343 type NodeEligibilityUpdateResponse struct { 1344 NodeModifyIndex uint64 1345 EvalIDs []string 1346 EvalCreateIndex uint64 1347 WriteMeta 1348 } 1349 1350 // NodeAllocsResponse is used to return allocs for a single node 1351 type NodeAllocsResponse struct { 1352 Allocs []*Allocation 1353 QueryMeta 1354 } 1355 1356 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1357 type NodeClientAllocsResponse struct { 1358 Allocs map[string]uint64 1359 1360 // MigrateTokens are used when ACLs are enabled to allow cross node, 1361 // authenticated access to sticky volumes 1362 MigrateTokens map[string]string 1363 1364 QueryMeta 1365 } 1366 1367 // SingleNodeResponse is used to return a single node 1368 type SingleNodeResponse struct { 1369 Node *Node 1370 QueryMeta 1371 } 1372 1373 // NodeListResponse is used for a list request 1374 type NodeListResponse struct { 1375 Nodes []*NodeListStub 1376 QueryMeta 1377 } 1378 1379 // SingleJobResponse is used to return a single job 1380 type SingleJobResponse struct { 1381 Job *Job 1382 QueryMeta 1383 } 1384 1385 // JobSummaryResponse is used to return a single job summary 1386 type JobSummaryResponse struct { 1387 JobSummary *JobSummary 1388 QueryMeta 1389 } 1390 1391 // JobScaleStatusResponse is used to return the scale status for a job 1392 type JobScaleStatusResponse struct { 1393 JobScaleStatus *JobScaleStatus 1394 QueryMeta 1395 } 1396 1397 type JobScaleStatus struct { 1398 JobID string 1399 Namespace string 1400 JobCreateIndex uint64 1401 JobModifyIndex uint64 1402 JobStopped bool 1403 TaskGroups map[string]*TaskGroupScaleStatus 1404 } 1405 1406 // TaskGroupScaleStatus is used to return the scale status for a given task group 1407 type TaskGroupScaleStatus struct { 1408 Desired int 1409 Placed int 1410 Running int 1411 Healthy int 1412 Unhealthy int 1413 Events []*ScalingEvent 1414 } 1415 1416 type JobDispatchResponse struct { 1417 DispatchedJobID string 1418 EvalID string 1419 EvalCreateIndex uint64 1420 JobCreateIndex uint64 1421 WriteMeta 1422 } 1423 1424 // JobListResponse is used for a list request 1425 type JobListResponse struct { 1426 Jobs []*JobListStub 1427 QueryMeta 1428 } 1429 1430 // JobVersionsRequest is used to get a jobs versions 1431 type JobVersionsRequest struct { 1432 JobID string 1433 Diffs bool 1434 QueryOptions 1435 } 1436 1437 // JobVersionsResponse is used for a job get versions request 1438 type JobVersionsResponse struct { 1439 Versions []*Job 1440 Diffs []*JobDiff 1441 QueryMeta 1442 } 1443 1444 // JobPlanResponse is used to respond to a job plan request 1445 type JobPlanResponse struct { 1446 // Annotations stores annotations explaining decisions the scheduler made. 1447 Annotations *PlanAnnotations 1448 1449 // FailedTGAllocs is the placement failures per task group. 1450 FailedTGAllocs map[string]*AllocMetric 1451 1452 // JobModifyIndex is the modification index of the job. The value can be 1453 // used when running `nomad run` to ensure that the Job wasn’t modified 1454 // since the last plan. If the job is being created, the value is zero. 1455 JobModifyIndex uint64 1456 1457 // CreatedEvals is the set of evaluations created by the scheduler. The 1458 // reasons for this can be rolling-updates or blocked evals. 1459 CreatedEvals []*Evaluation 1460 1461 // Diff contains the diff of the job and annotations on whether the change 1462 // causes an in-place update or create/destroy 1463 Diff *JobDiff 1464 1465 // NextPeriodicLaunch is the time duration till the job would be launched if 1466 // submitted. 1467 NextPeriodicLaunch time.Time 1468 1469 // Warnings contains any warnings about the given job. These may include 1470 // deprecation warnings. 1471 Warnings string 1472 1473 WriteMeta 1474 } 1475 1476 // SingleAllocResponse is used to return a single allocation 1477 type SingleAllocResponse struct { 1478 Alloc *Allocation 1479 QueryMeta 1480 } 1481 1482 // AllocsGetResponse is used to return a set of allocations 1483 type AllocsGetResponse struct { 1484 Allocs []*Allocation 1485 QueryMeta 1486 } 1487 1488 // JobAllocationsResponse is used to return the allocations for a job 1489 type JobAllocationsResponse struct { 1490 Allocations []*AllocListStub 1491 QueryMeta 1492 } 1493 1494 // JobEvaluationsResponse is used to return the evaluations for a job 1495 type JobEvaluationsResponse struct { 1496 Evaluations []*Evaluation 1497 QueryMeta 1498 } 1499 1500 // SingleEvalResponse is used to return a single evaluation 1501 type SingleEvalResponse struct { 1502 Eval *Evaluation 1503 QueryMeta 1504 } 1505 1506 // EvalDequeueResponse is used to return from a dequeue 1507 type EvalDequeueResponse struct { 1508 Eval *Evaluation 1509 Token string 1510 1511 // WaitIndex is the Raft index the worker should wait until invoking the 1512 // scheduler. 1513 WaitIndex uint64 1514 1515 QueryMeta 1516 } 1517 1518 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1519 // or beyond before invoking the scheduler. 1520 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1521 // Prefer the wait index sent. This will be populated on all responses from 1522 // 0.7.0 and above 1523 if e.WaitIndex != 0 { 1524 return e.WaitIndex 1525 } else if e.Eval != nil { 1526 return e.Eval.ModifyIndex 1527 } 1528 1529 // This should never happen 1530 return 1 1531 } 1532 1533 // PlanResponse is used to return from a PlanRequest 1534 type PlanResponse struct { 1535 Result *PlanResult 1536 WriteMeta 1537 } 1538 1539 // AllocListResponse is used for a list request 1540 type AllocListResponse struct { 1541 Allocations []*AllocListStub 1542 QueryMeta 1543 } 1544 1545 // DeploymentListResponse is used for a list request 1546 type DeploymentListResponse struct { 1547 Deployments []*Deployment 1548 QueryMeta 1549 } 1550 1551 // EvalListResponse is used for a list request 1552 type EvalListResponse struct { 1553 Evaluations []*Evaluation 1554 QueryMeta 1555 } 1556 1557 // EvalAllocationsResponse is used to return the allocations for an evaluation 1558 type EvalAllocationsResponse struct { 1559 Allocations []*AllocListStub 1560 QueryMeta 1561 } 1562 1563 // PeriodicForceResponse is used to respond to a periodic job force launch 1564 type PeriodicForceResponse struct { 1565 EvalID string 1566 EvalCreateIndex uint64 1567 WriteMeta 1568 } 1569 1570 // DeploymentUpdateResponse is used to respond to a deployment change. The 1571 // response will include the modify index of the deployment as well as details 1572 // of any triggered evaluation. 1573 type DeploymentUpdateResponse struct { 1574 EvalID string 1575 EvalCreateIndex uint64 1576 DeploymentModifyIndex uint64 1577 1578 // RevertedJobVersion is the version the job was reverted to. If unset, the 1579 // job wasn't reverted 1580 RevertedJobVersion *uint64 1581 1582 WriteMeta 1583 } 1584 1585 // NodeConnQueryResponse is used to respond to a query of whether a server has 1586 // a connection to a specific Node 1587 type NodeConnQueryResponse struct { 1588 // Connected indicates whether a connection to the Client exists 1589 Connected bool 1590 1591 // Established marks the time at which the connection was established 1592 Established time.Time 1593 1594 QueryMeta 1595 } 1596 1597 // HostDataRequest is used by /agent/host to retrieve data about the agent's host system. If 1598 // ServerID or NodeID is specified, the request is forwarded to the remote agent 1599 type HostDataRequest struct { 1600 ServerID string 1601 NodeID string 1602 QueryOptions 1603 } 1604 1605 // HostDataResponse contains the HostData content 1606 type HostDataResponse struct { 1607 AgentID string 1608 HostData *host.HostData 1609 } 1610 1611 // EmitNodeEventsRequest is a request to update the node events source 1612 // with a new client-side event 1613 type EmitNodeEventsRequest struct { 1614 // NodeEvents are a map where the key is a node id, and value is a list of 1615 // events for that node 1616 NodeEvents map[string][]*NodeEvent 1617 1618 WriteRequest 1619 } 1620 1621 // EmitNodeEventsResponse is a response to the client about the status of 1622 // the node event source update. 1623 type EmitNodeEventsResponse struct { 1624 WriteMeta 1625 } 1626 1627 const ( 1628 NodeEventSubsystemDrain = "Drain" 1629 NodeEventSubsystemDriver = "Driver" 1630 NodeEventSubsystemHeartbeat = "Heartbeat" 1631 NodeEventSubsystemCluster = "Cluster" 1632 NodeEventSubsystemStorage = "Storage" 1633 ) 1634 1635 // NodeEvent is a single unit representing a node’s state change 1636 type NodeEvent struct { 1637 Message string 1638 Subsystem string 1639 Details map[string]string 1640 Timestamp time.Time 1641 CreateIndex uint64 1642 } 1643 1644 func (ne *NodeEvent) String() string { 1645 var details []string 1646 for k, v := range ne.Details { 1647 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1648 } 1649 1650 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1651 } 1652 1653 func (ne *NodeEvent) Copy() *NodeEvent { 1654 c := new(NodeEvent) 1655 *c = *ne 1656 c.Details = helper.CopyMapStringString(ne.Details) 1657 return c 1658 } 1659 1660 // NewNodeEvent generates a new node event storing the current time as the 1661 // timestamp 1662 func NewNodeEvent() *NodeEvent { 1663 return &NodeEvent{Timestamp: time.Now()} 1664 } 1665 1666 // SetMessage is used to set the message on the node event 1667 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1668 ne.Message = msg 1669 return ne 1670 } 1671 1672 // SetSubsystem is used to set the subsystem on the node event 1673 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1674 ne.Subsystem = sys 1675 return ne 1676 } 1677 1678 // SetTimestamp is used to set the timestamp on the node event 1679 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1680 ne.Timestamp = ts 1681 return ne 1682 } 1683 1684 // AddDetail is used to add a detail to the node event 1685 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1686 if ne.Details == nil { 1687 ne.Details = make(map[string]string, 1) 1688 } 1689 ne.Details[k] = v 1690 return ne 1691 } 1692 1693 const ( 1694 NodeStatusInit = "initializing" 1695 NodeStatusReady = "ready" 1696 NodeStatusDown = "down" 1697 ) 1698 1699 // ShouldDrainNode checks if a given node status should trigger an 1700 // evaluation. Some states don't require any further action. 1701 func ShouldDrainNode(status string) bool { 1702 switch status { 1703 case NodeStatusInit, NodeStatusReady: 1704 return false 1705 case NodeStatusDown: 1706 return true 1707 default: 1708 panic(fmt.Sprintf("unhandled node status %s", status)) 1709 } 1710 } 1711 1712 // ValidNodeStatus is used to check if a node status is valid 1713 func ValidNodeStatus(status string) bool { 1714 switch status { 1715 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 1716 return true 1717 default: 1718 return false 1719 } 1720 } 1721 1722 const ( 1723 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1724 // respectively, for receiving allocations. This is orthoginal to the node 1725 // status being ready. 1726 NodeSchedulingEligible = "eligible" 1727 NodeSchedulingIneligible = "ineligible" 1728 ) 1729 1730 // DrainSpec describes a Node's desired drain behavior. 1731 type DrainSpec struct { 1732 // Deadline is the duration after StartTime when the remaining 1733 // allocations on a draining Node should be told to stop. 1734 Deadline time.Duration 1735 1736 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1737 // has been marked for draining. 1738 IgnoreSystemJobs bool 1739 } 1740 1741 // DrainStrategy describes a Node's drain behavior. 1742 type DrainStrategy struct { 1743 // DrainSpec is the user declared drain specification 1744 DrainSpec 1745 1746 // ForceDeadline is the deadline time for the drain after which drains will 1747 // be forced 1748 ForceDeadline time.Time 1749 1750 // StartedAt is the time the drain process started 1751 StartedAt time.Time 1752 } 1753 1754 func (d *DrainStrategy) Copy() *DrainStrategy { 1755 if d == nil { 1756 return nil 1757 } 1758 1759 nd := new(DrainStrategy) 1760 *nd = *d 1761 return nd 1762 } 1763 1764 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1765 // duration or otherwise the deadline time. The force drain is captured by the 1766 // deadline time being in the past. 1767 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1768 // Treat the nil case as a force drain so during an upgrade where a node may 1769 // not have a drain strategy but has Drain set to true, it is treated as a 1770 // force to mimick old behavior. 1771 if d == nil { 1772 return false, time.Time{} 1773 } 1774 1775 ns := d.Deadline.Nanoseconds() 1776 switch { 1777 case ns < 0: // Force 1778 return false, time.Time{} 1779 case ns == 0: // Infinite 1780 return true, time.Time{} 1781 default: 1782 return false, d.ForceDeadline 1783 } 1784 } 1785 1786 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1787 if d == nil && o == nil { 1788 return true 1789 } else if o != nil && d == nil { 1790 return false 1791 } else if d != nil && o == nil { 1792 return false 1793 } 1794 1795 // Compare values 1796 if d.ForceDeadline != o.ForceDeadline { 1797 return false 1798 } else if d.Deadline != o.Deadline { 1799 return false 1800 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1801 return false 1802 } 1803 1804 return true 1805 } 1806 1807 // Node is a representation of a schedulable client node 1808 type Node struct { 1809 // ID is a unique identifier for the node. It can be constructed 1810 // by doing a concatenation of the Name and Datacenter as a simple 1811 // approach. Alternatively a UUID may be used. 1812 ID string 1813 1814 // SecretID is an ID that is only known by the Node and the set of Servers. 1815 // It is not accessible via the API and is used to authenticate nodes 1816 // conducting privileged activities. 1817 SecretID string 1818 1819 // Datacenter for this node 1820 Datacenter string 1821 1822 // Node name 1823 Name string 1824 1825 // HTTPAddr is the address on which the Nomad client is listening for http 1826 // requests 1827 HTTPAddr string 1828 1829 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1830 TLSEnabled bool 1831 1832 // Attributes is an arbitrary set of key/value 1833 // data that can be used for constraints. Examples 1834 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1835 // "docker.runtime=1.8.3" 1836 Attributes map[string]string 1837 1838 // NodeResources captures the available resources on the client. 1839 NodeResources *NodeResources 1840 1841 // ReservedResources captures the set resources on the client that are 1842 // reserved from scheduling. 1843 ReservedResources *NodeReservedResources 1844 1845 // Resources is the available resources on the client. 1846 // For example 'cpu=2' 'memory=2048' 1847 // COMPAT(0.10): Remove after 0.10 1848 Resources *Resources 1849 1850 // Reserved is the set of resources that are reserved, 1851 // and should be subtracted from the total resources for 1852 // the purposes of scheduling. This may be provide certain 1853 // high-watermark tolerances or because of external schedulers 1854 // consuming resources. 1855 // COMPAT(0.10): Remove after 0.10 1856 Reserved *Resources 1857 1858 // Links are used to 'link' this client to external 1859 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 1860 // 'ami=ami-123' 1861 Links map[string]string 1862 1863 // Meta is used to associate arbitrary metadata with this 1864 // client. This is opaque to Nomad. 1865 Meta map[string]string 1866 1867 // NodeClass is an opaque identifier used to group nodes 1868 // together for the purpose of determining scheduling pressure. 1869 NodeClass string 1870 1871 // ComputedClass is a unique id that identifies nodes with a common set of 1872 // attributes and capabilities. 1873 ComputedClass string 1874 1875 // COMPAT: Remove in Nomad 0.9 1876 // Drain is controlled by the servers, and not the client. 1877 // If true, no jobs will be scheduled to this node, and existing 1878 // allocations will be drained. Superseded by DrainStrategy in Nomad 1879 // 0.8 but kept for backward compat. 1880 Drain bool 1881 1882 // DrainStrategy determines the node's draining behavior. Will be nil 1883 // when Drain=false. 1884 DrainStrategy *DrainStrategy 1885 1886 // SchedulingEligibility determines whether this node will receive new 1887 // placements. 1888 SchedulingEligibility string 1889 1890 // Status of this node 1891 Status string 1892 1893 // StatusDescription is meant to provide more human useful information 1894 StatusDescription string 1895 1896 // StatusUpdatedAt is the time stamp at which the state of the node was 1897 // updated 1898 StatusUpdatedAt int64 1899 1900 // Events is the most recent set of events generated for the node, 1901 // retaining only MaxRetainedNodeEvents number at a time 1902 Events []*NodeEvent 1903 1904 // Drivers is a map of driver names to current driver information 1905 Drivers map[string]*DriverInfo 1906 1907 // CSIControllerPlugins is a map of plugin names to current CSI Plugin info 1908 CSIControllerPlugins map[string]*CSIInfo 1909 // CSINodePlugins is a map of plugin names to current CSI Plugin info 1910 CSINodePlugins map[string]*CSIInfo 1911 1912 // HostVolumes is a map of host volume names to their configuration 1913 HostVolumes map[string]*ClientHostVolumeConfig 1914 1915 // Raft Indexes 1916 CreateIndex uint64 1917 ModifyIndex uint64 1918 } 1919 1920 // Ready returns true if the node is ready for running allocations 1921 func (n *Node) Ready() bool { 1922 // Drain is checked directly to support pre-0.8 Node data 1923 return n.Status == NodeStatusReady && !n.Drain && n.SchedulingEligibility == NodeSchedulingEligible 1924 } 1925 1926 func (n *Node) Canonicalize() { 1927 if n == nil { 1928 return 1929 } 1930 1931 // COMPAT Remove in 0.10 1932 // In v0.8.0 we introduced scheduling eligibility, so we need to set it for 1933 // upgrading nodes 1934 if n.SchedulingEligibility == "" { 1935 if n.Drain { 1936 n.SchedulingEligibility = NodeSchedulingIneligible 1937 } else { 1938 n.SchedulingEligibility = NodeSchedulingEligible 1939 } 1940 } 1941 1942 // COMPAT remove in 1.0 1943 // In v0.12.0 we introduced a separate node specific network resource struct 1944 // so we need to covert any pre 0.12 clients to the correct struct 1945 if n.NodeResources != nil && n.NodeResources.NodeNetworks == nil { 1946 if n.NodeResources.Networks != nil { 1947 for _, nr := range n.NodeResources.Networks { 1948 nnr := &NodeNetworkResource{ 1949 Mode: nr.Mode, 1950 Speed: nr.MBits, 1951 Device: nr.Device, 1952 } 1953 if nr.IP != "" { 1954 nnr.Addresses = []NodeNetworkAddress{ 1955 { 1956 Alias: "default", 1957 Address: nr.IP, 1958 }, 1959 } 1960 } 1961 n.NodeResources.NodeNetworks = append(n.NodeResources.NodeNetworks, nnr) 1962 } 1963 } 1964 } 1965 } 1966 1967 func (n *Node) Copy() *Node { 1968 if n == nil { 1969 return nil 1970 } 1971 nn := new(Node) 1972 *nn = *n 1973 nn.Attributes = helper.CopyMapStringString(nn.Attributes) 1974 nn.Resources = nn.Resources.Copy() 1975 nn.Reserved = nn.Reserved.Copy() 1976 nn.NodeResources = nn.NodeResources.Copy() 1977 nn.ReservedResources = nn.ReservedResources.Copy() 1978 nn.Links = helper.CopyMapStringString(nn.Links) 1979 nn.Meta = helper.CopyMapStringString(nn.Meta) 1980 nn.Events = copyNodeEvents(n.Events) 1981 nn.DrainStrategy = nn.DrainStrategy.Copy() 1982 nn.CSIControllerPlugins = copyNodeCSI(nn.CSIControllerPlugins) 1983 nn.CSINodePlugins = copyNodeCSI(nn.CSINodePlugins) 1984 nn.Drivers = copyNodeDrivers(n.Drivers) 1985 nn.HostVolumes = copyNodeHostVolumes(n.HostVolumes) 1986 return nn 1987 } 1988 1989 // copyNodeEvents is a helper to copy a list of NodeEvent's 1990 func copyNodeEvents(events []*NodeEvent) []*NodeEvent { 1991 l := len(events) 1992 if l == 0 { 1993 return nil 1994 } 1995 1996 c := make([]*NodeEvent, l) 1997 for i, event := range events { 1998 c[i] = event.Copy() 1999 } 2000 return c 2001 } 2002 2003 // copyNodeCSI is a helper to copy a map of CSIInfo 2004 func copyNodeCSI(plugins map[string]*CSIInfo) map[string]*CSIInfo { 2005 l := len(plugins) 2006 if l == 0 { 2007 return nil 2008 } 2009 2010 c := make(map[string]*CSIInfo, l) 2011 for plugin, info := range plugins { 2012 c[plugin] = info.Copy() 2013 } 2014 2015 return c 2016 } 2017 2018 // copyNodeDrivers is a helper to copy a map of DriverInfo 2019 func copyNodeDrivers(drivers map[string]*DriverInfo) map[string]*DriverInfo { 2020 l := len(drivers) 2021 if l == 0 { 2022 return nil 2023 } 2024 2025 c := make(map[string]*DriverInfo, l) 2026 for driver, info := range drivers { 2027 c[driver] = info.Copy() 2028 } 2029 return c 2030 } 2031 2032 // copyNodeHostVolumes is a helper to copy a map of string to Volume 2033 func copyNodeHostVolumes(volumes map[string]*ClientHostVolumeConfig) map[string]*ClientHostVolumeConfig { 2034 l := len(volumes) 2035 if l == 0 { 2036 return nil 2037 } 2038 2039 c := make(map[string]*ClientHostVolumeConfig, l) 2040 for volume, v := range volumes { 2041 c[volume] = v.Copy() 2042 } 2043 2044 return c 2045 } 2046 2047 // TerminalStatus returns if the current status is terminal and 2048 // will no longer transition. 2049 func (n *Node) TerminalStatus() bool { 2050 switch n.Status { 2051 case NodeStatusDown: 2052 return true 2053 default: 2054 return false 2055 } 2056 } 2057 2058 // COMPAT(0.11): Remove in 0.11 2059 // ComparableReservedResources returns the reserved resouces on the node 2060 // handling upgrade paths. Reserved networks must be handled separately. After 2061 // 0.11 calls to this should be replaced with: 2062 // node.ReservedResources.Comparable() 2063 func (n *Node) ComparableReservedResources() *ComparableResources { 2064 // See if we can no-op 2065 if n.Reserved == nil && n.ReservedResources == nil { 2066 return nil 2067 } 2068 2069 // Node already has 0.9+ behavior 2070 if n.ReservedResources != nil { 2071 return n.ReservedResources.Comparable() 2072 } 2073 2074 // Upgrade path 2075 return &ComparableResources{ 2076 Flattened: AllocatedTaskResources{ 2077 Cpu: AllocatedCpuResources{ 2078 CpuShares: int64(n.Reserved.CPU), 2079 }, 2080 Memory: AllocatedMemoryResources{ 2081 MemoryMB: int64(n.Reserved.MemoryMB), 2082 }, 2083 }, 2084 Shared: AllocatedSharedResources{ 2085 DiskMB: int64(n.Reserved.DiskMB), 2086 }, 2087 } 2088 } 2089 2090 // COMPAT(0.11): Remove in 0.11 2091 // ComparableResources returns the resouces on the node 2092 // handling upgrade paths. Networking must be handled separately. After 0.11 2093 // calls to this should be replaced with: node.NodeResources.Comparable() 2094 func (n *Node) ComparableResources() *ComparableResources { 2095 // Node already has 0.9+ behavior 2096 if n.NodeResources != nil { 2097 return n.NodeResources.Comparable() 2098 } 2099 2100 // Upgrade path 2101 return &ComparableResources{ 2102 Flattened: AllocatedTaskResources{ 2103 Cpu: AllocatedCpuResources{ 2104 CpuShares: int64(n.Resources.CPU), 2105 }, 2106 Memory: AllocatedMemoryResources{ 2107 MemoryMB: int64(n.Resources.MemoryMB), 2108 }, 2109 }, 2110 Shared: AllocatedSharedResources{ 2111 DiskMB: int64(n.Resources.DiskMB), 2112 }, 2113 } 2114 } 2115 2116 // Stub returns a summarized version of the node 2117 func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { 2118 2119 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 2120 2121 s := &NodeListStub{ 2122 Address: addr, 2123 ID: n.ID, 2124 Datacenter: n.Datacenter, 2125 Name: n.Name, 2126 NodeClass: n.NodeClass, 2127 Version: n.Attributes["nomad.version"], 2128 Drain: n.Drain, 2129 SchedulingEligibility: n.SchedulingEligibility, 2130 Status: n.Status, 2131 StatusDescription: n.StatusDescription, 2132 Drivers: n.Drivers, 2133 HostVolumes: n.HostVolumes, 2134 CreateIndex: n.CreateIndex, 2135 ModifyIndex: n.ModifyIndex, 2136 } 2137 2138 if fields != nil { 2139 if fields.Resources { 2140 s.NodeResources = n.NodeResources 2141 s.ReservedResources = n.ReservedResources 2142 } 2143 } 2144 2145 return s 2146 } 2147 2148 // NodeListStub is used to return a subset of job information 2149 // for the job list 2150 type NodeListStub struct { 2151 Address string 2152 ID string 2153 Datacenter string 2154 Name string 2155 NodeClass string 2156 Version string 2157 Drain bool 2158 SchedulingEligibility string 2159 Status string 2160 StatusDescription string 2161 Drivers map[string]*DriverInfo 2162 HostVolumes map[string]*ClientHostVolumeConfig 2163 NodeResources *NodeResources `json:",omitempty"` 2164 ReservedResources *NodeReservedResources `json:",omitempty"` 2165 CreateIndex uint64 2166 ModifyIndex uint64 2167 } 2168 2169 // NodeStubFields defines which fields are included in the NodeListStub. 2170 type NodeStubFields struct { 2171 Resources bool 2172 } 2173 2174 // Resources is used to define the resources available 2175 // on a client 2176 type Resources struct { 2177 CPU int 2178 MemoryMB int 2179 DiskMB int 2180 IOPS int // COMPAT(0.10): Only being used to issue warnings 2181 Networks Networks 2182 Devices ResourceDevices 2183 } 2184 2185 const ( 2186 BytesInMegabyte = 1024 * 1024 2187 ) 2188 2189 // DefaultResources is a small resources object that contains the 2190 // default resources requests that we will provide to an object. 2191 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 2192 // be kept in sync. 2193 func DefaultResources() *Resources { 2194 return &Resources{ 2195 CPU: 100, 2196 MemoryMB: 300, 2197 } 2198 } 2199 2200 // MinResources is a small resources object that contains the 2201 // absolute minimum resources that we will provide to an object. 2202 // This should not be confused with the defaults which are 2203 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 2204 // api/resources.go and should be kept in sync. 2205 func MinResources() *Resources { 2206 return &Resources{ 2207 CPU: 1, 2208 MemoryMB: 10, 2209 } 2210 } 2211 2212 // DiskInBytes returns the amount of disk resources in bytes. 2213 func (r *Resources) DiskInBytes() int64 { 2214 return int64(r.DiskMB * BytesInMegabyte) 2215 } 2216 2217 func (r *Resources) Validate() error { 2218 var mErr multierror.Error 2219 if err := r.MeetsMinResources(); err != nil { 2220 mErr.Errors = append(mErr.Errors, err) 2221 } 2222 2223 // Ensure the task isn't asking for disk resources 2224 if r.DiskMB > 0 { 2225 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2226 } 2227 2228 for i, d := range r.Devices { 2229 if err := d.Validate(); err != nil { 2230 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 2231 } 2232 } 2233 2234 return mErr.ErrorOrNil() 2235 } 2236 2237 // Merge merges this resource with another resource. 2238 // COMPAT(0.10): Remove in 0.10 2239 func (r *Resources) Merge(other *Resources) { 2240 if other.CPU != 0 { 2241 r.CPU = other.CPU 2242 } 2243 if other.MemoryMB != 0 { 2244 r.MemoryMB = other.MemoryMB 2245 } 2246 if other.DiskMB != 0 { 2247 r.DiskMB = other.DiskMB 2248 } 2249 if len(other.Networks) != 0 { 2250 r.Networks = other.Networks 2251 } 2252 if len(other.Devices) != 0 { 2253 r.Devices = other.Devices 2254 } 2255 } 2256 2257 // COMPAT(0.10): Remove in 0.10 2258 func (r *Resources) Equals(o *Resources) bool { 2259 if r == o { 2260 return true 2261 } 2262 if r == nil || o == nil { 2263 return false 2264 } 2265 return r.CPU == o.CPU && 2266 r.MemoryMB == o.MemoryMB && 2267 r.DiskMB == o.DiskMB && 2268 r.IOPS == o.IOPS && 2269 r.Networks.Equals(&o.Networks) && 2270 r.Devices.Equals(&o.Devices) 2271 } 2272 2273 // COMPAT(0.10): Remove in 0.10 2274 // ResourceDevices are part of Resources 2275 type ResourceDevices []*RequestedDevice 2276 2277 // COMPAT(0.10): Remove in 0.10 2278 // Equals ResourceDevices as set keyed by Name 2279 func (d *ResourceDevices) Equals(o *ResourceDevices) bool { 2280 if d == o { 2281 return true 2282 } 2283 if d == nil || o == nil { 2284 return false 2285 } 2286 if len(*d) != len(*o) { 2287 return false 2288 } 2289 m := make(map[string]*RequestedDevice, len(*d)) 2290 for _, e := range *d { 2291 m[e.Name] = e 2292 } 2293 for _, oe := range *o { 2294 de, ok := m[oe.Name] 2295 if !ok || !de.Equals(oe) { 2296 return false 2297 } 2298 } 2299 return true 2300 } 2301 2302 // COMPAT(0.10): Remove in 0.10 2303 func (r *Resources) Canonicalize() { 2304 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2305 // problems since we use reflect DeepEquals. 2306 if len(r.Networks) == 0 { 2307 r.Networks = nil 2308 } 2309 if len(r.Devices) == 0 { 2310 r.Devices = nil 2311 } 2312 2313 for _, n := range r.Networks { 2314 n.Canonicalize() 2315 } 2316 } 2317 2318 // MeetsMinResources returns an error if the resources specified are less than 2319 // the minimum allowed. 2320 // This is based on the minimums defined in the Resources type 2321 // COMPAT(0.10): Remove in 0.10 2322 func (r *Resources) MeetsMinResources() error { 2323 var mErr multierror.Error 2324 minResources := MinResources() 2325 if r.CPU < minResources.CPU { 2326 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2327 } 2328 if r.MemoryMB < minResources.MemoryMB { 2329 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2330 } 2331 return mErr.ErrorOrNil() 2332 } 2333 2334 // Copy returns a deep copy of the resources 2335 func (r *Resources) Copy() *Resources { 2336 if r == nil { 2337 return nil 2338 } 2339 newR := new(Resources) 2340 *newR = *r 2341 2342 // Copy the network objects 2343 newR.Networks = r.Networks.Copy() 2344 2345 // Copy the devices 2346 if r.Devices != nil { 2347 n := len(r.Devices) 2348 newR.Devices = make([]*RequestedDevice, n) 2349 for i := 0; i < n; i++ { 2350 newR.Devices[i] = r.Devices[i].Copy() 2351 } 2352 } 2353 2354 return newR 2355 } 2356 2357 // NetIndex finds the matching net index using device name 2358 // COMPAT(0.10): Remove in 0.10 2359 func (r *Resources) NetIndex(n *NetworkResource) int { 2360 return r.Networks.NetIndex(n) 2361 } 2362 2363 // Superset checks if one set of resources is a superset 2364 // of another. This ignores network resources, and the NetworkIndex 2365 // should be used for that. 2366 // COMPAT(0.10): Remove in 0.10 2367 func (r *Resources) Superset(other *Resources) (bool, string) { 2368 if r.CPU < other.CPU { 2369 return false, "cpu" 2370 } 2371 if r.MemoryMB < other.MemoryMB { 2372 return false, "memory" 2373 } 2374 if r.DiskMB < other.DiskMB { 2375 return false, "disk" 2376 } 2377 return true, "" 2378 } 2379 2380 // Add adds the resources of the delta to this, potentially 2381 // returning an error if not possible. 2382 // COMPAT(0.10): Remove in 0.10 2383 func (r *Resources) Add(delta *Resources) { 2384 if delta == nil { 2385 return 2386 } 2387 2388 r.CPU += delta.CPU 2389 r.MemoryMB += delta.MemoryMB 2390 r.DiskMB += delta.DiskMB 2391 2392 for _, n := range delta.Networks { 2393 // Find the matching interface by IP or CIDR 2394 idx := r.NetIndex(n) 2395 if idx == -1 { 2396 r.Networks = append(r.Networks, n.Copy()) 2397 } else { 2398 r.Networks[idx].Add(n) 2399 } 2400 } 2401 } 2402 2403 // COMPAT(0.10): Remove in 0.10 2404 func (r *Resources) GoString() string { 2405 return fmt.Sprintf("*%#v", *r) 2406 } 2407 2408 // NodeNetworkResource is used to describe a fingerprinted network of a node 2409 type NodeNetworkResource struct { 2410 Mode string // host for physical networks, cni/<name> for cni networks 2411 2412 // The following apply only to host networks 2413 Device string // interface name 2414 MacAddress string 2415 Speed int 2416 2417 Addresses []NodeNetworkAddress // not valid for cni, for bridge there will only be 1 ip 2418 } 2419 2420 func (n *NodeNetworkResource) Equals(o *NodeNetworkResource) bool { 2421 return reflect.DeepEqual(n, o) 2422 } 2423 2424 func (n *NodeNetworkResource) HasAlias(alias string) bool { 2425 for _, addr := range n.Addresses { 2426 if addr.Alias == alias { 2427 return true 2428 } 2429 } 2430 return false 2431 } 2432 2433 type NodeNetworkAF string 2434 2435 const ( 2436 NodeNetworkAF_IPv4 NodeNetworkAF = "ipv4" 2437 NodeNetworkAF_IPv6 NodeNetworkAF = "ipv6" 2438 ) 2439 2440 type NodeNetworkAddress struct { 2441 Family NodeNetworkAF 2442 Alias string 2443 Address string 2444 ReservedPorts string 2445 Gateway string // default route for this address 2446 } 2447 2448 type AllocatedPortMapping struct { 2449 Label string 2450 Value int 2451 To int 2452 HostIP string 2453 } 2454 2455 type AllocatedPorts []AllocatedPortMapping 2456 2457 func (p AllocatedPorts) Get(label string) (AllocatedPortMapping, bool) { 2458 for _, port := range p { 2459 if port.Label == label { 2460 return port, true 2461 } 2462 } 2463 2464 return AllocatedPortMapping{}, false 2465 } 2466 2467 type Port struct { 2468 // Label is the key for HCL port stanzas: port "foo" {} 2469 Label string 2470 2471 // Value is the static or dynamic port value. For dynamic ports this 2472 // will be 0 in the jobspec and set by the scheduler. 2473 Value int 2474 2475 // To is the port inside a network namespace where this port is 2476 // forwarded. -1 is an internal sentinel value used by Consul Connect 2477 // to mean "same as the host port." 2478 To int 2479 2480 // HostNetwork is the name of the network this port should be assigned 2481 // to. Jobs with a HostNetwork set can only be placed on nodes with 2482 // that host network available. 2483 HostNetwork string 2484 } 2485 2486 type DNSConfig struct { 2487 Servers []string 2488 Searches []string 2489 Options []string 2490 } 2491 2492 func (d *DNSConfig) Copy() *DNSConfig { 2493 if d == nil { 2494 return nil 2495 } 2496 newD := new(DNSConfig) 2497 newD.Servers = make([]string, len(d.Servers)) 2498 copy(newD.Servers, d.Servers) 2499 newD.Searches = make([]string, len(d.Searches)) 2500 copy(newD.Searches, d.Searches) 2501 newD.Options = make([]string, len(d.Options)) 2502 copy(newD.Options, d.Options) 2503 return newD 2504 } 2505 2506 // NetworkResource is used to represent available network 2507 // resources 2508 type NetworkResource struct { 2509 Mode string // Mode of the network 2510 Device string // Name of the device 2511 CIDR string // CIDR block of addresses 2512 IP string // Host IP address 2513 MBits int // Throughput 2514 DNS *DNSConfig // DNS Configuration 2515 ReservedPorts []Port // Host Reserved ports 2516 DynamicPorts []Port // Host Dynamically assigned ports 2517 } 2518 2519 func (nr *NetworkResource) Hash() uint32 { 2520 var data []byte 2521 data = append(data, []byte(fmt.Sprintf("%s%s%s%s%d", nr.Mode, nr.Device, nr.CIDR, nr.IP, nr.MBits))...) 2522 2523 for i, port := range nr.ReservedPorts { 2524 data = append(data, []byte(fmt.Sprintf("r%d%s%d%d", i, port.Label, port.Value, port.To))...) 2525 } 2526 2527 for i, port := range nr.DynamicPorts { 2528 data = append(data, []byte(fmt.Sprintf("d%d%s%d%d", i, port.Label, port.Value, port.To))...) 2529 } 2530 2531 return crc32.ChecksumIEEE(data) 2532 } 2533 2534 func (nr *NetworkResource) Equals(other *NetworkResource) bool { 2535 return nr.Hash() == other.Hash() 2536 } 2537 2538 func (n *NetworkResource) Canonicalize() { 2539 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2540 // problems since we use reflect DeepEquals. 2541 if len(n.ReservedPorts) == 0 { 2542 n.ReservedPorts = nil 2543 } 2544 if len(n.DynamicPorts) == 0 { 2545 n.DynamicPorts = nil 2546 } 2547 2548 for i, p := range n.DynamicPorts { 2549 if p.HostNetwork == "" { 2550 n.DynamicPorts[i].HostNetwork = "default" 2551 } 2552 } 2553 for i, p := range n.ReservedPorts { 2554 if p.HostNetwork == "" { 2555 n.ReservedPorts[i].HostNetwork = "default" 2556 } 2557 } 2558 } 2559 2560 // Copy returns a deep copy of the network resource 2561 func (n *NetworkResource) Copy() *NetworkResource { 2562 if n == nil { 2563 return nil 2564 } 2565 newR := new(NetworkResource) 2566 *newR = *n 2567 if n.ReservedPorts != nil { 2568 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2569 copy(newR.ReservedPorts, n.ReservedPorts) 2570 } 2571 if n.DynamicPorts != nil { 2572 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2573 copy(newR.DynamicPorts, n.DynamicPorts) 2574 } 2575 return newR 2576 } 2577 2578 // Add adds the resources of the delta to this, potentially 2579 // returning an error if not possible. 2580 func (n *NetworkResource) Add(delta *NetworkResource) { 2581 if len(delta.ReservedPorts) > 0 { 2582 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2583 } 2584 n.MBits += delta.MBits 2585 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2586 } 2587 2588 func (n *NetworkResource) GoString() string { 2589 return fmt.Sprintf("*%#v", *n) 2590 } 2591 2592 // PortLabels returns a map of port labels to their assigned host ports. 2593 func (n *NetworkResource) PortLabels() map[string]int { 2594 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2595 labelValues := make(map[string]int, num) 2596 for _, port := range n.ReservedPorts { 2597 labelValues[port.Label] = port.Value 2598 } 2599 for _, port := range n.DynamicPorts { 2600 labelValues[port.Label] = port.Value 2601 } 2602 return labelValues 2603 } 2604 2605 // Networks defined for a task on the Resources struct. 2606 type Networks []*NetworkResource 2607 2608 func (ns Networks) Copy() Networks { 2609 if len(ns) == 0 { 2610 return nil 2611 } 2612 2613 out := make([]*NetworkResource, len(ns)) 2614 for i := range ns { 2615 out[i] = ns[i].Copy() 2616 } 2617 return out 2618 } 2619 2620 // Port assignment and IP for the given label or empty values. 2621 func (ns Networks) Port(label string) AllocatedPortMapping { 2622 for _, n := range ns { 2623 for _, p := range n.ReservedPorts { 2624 if p.Label == label { 2625 return AllocatedPortMapping{ 2626 Label: label, 2627 Value: p.Value, 2628 To: p.To, 2629 HostIP: n.IP, 2630 } 2631 } 2632 } 2633 for _, p := range n.DynamicPorts { 2634 if p.Label == label { 2635 return AllocatedPortMapping{ 2636 Label: label, 2637 Value: p.Value, 2638 To: p.To, 2639 HostIP: n.IP, 2640 } 2641 } 2642 } 2643 } 2644 return AllocatedPortMapping{} 2645 } 2646 2647 func (ns Networks) NetIndex(n *NetworkResource) int { 2648 for idx, net := range ns { 2649 if net.Device == n.Device { 2650 return idx 2651 } 2652 } 2653 return -1 2654 } 2655 2656 // RequestedDevice is used to request a device for a task. 2657 type RequestedDevice struct { 2658 // Name is the request name. The possible values are as follows: 2659 // * <type>: A single value only specifies the type of request. 2660 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2661 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2662 // 2663 // Examples are as follows: 2664 // * "gpu" 2665 // * "nvidia/gpu" 2666 // * "nvidia/gpu/GTX2080Ti" 2667 Name string 2668 2669 // Count is the number of requested devices 2670 Count uint64 2671 2672 // Constraints are a set of constraints to apply when selecting the device 2673 // to use. 2674 Constraints Constraints 2675 2676 // Affinities are a set of affinities to apply when selecting the device 2677 // to use. 2678 Affinities Affinities 2679 } 2680 2681 func (r *RequestedDevice) Equals(o *RequestedDevice) bool { 2682 if r == o { 2683 return true 2684 } 2685 if r == nil || o == nil { 2686 return false 2687 } 2688 return r.Name == o.Name && 2689 r.Count == o.Count && 2690 r.Constraints.Equals(&o.Constraints) && 2691 r.Affinities.Equals(&o.Affinities) 2692 } 2693 2694 func (r *RequestedDevice) Copy() *RequestedDevice { 2695 if r == nil { 2696 return nil 2697 } 2698 2699 nr := *r 2700 nr.Constraints = CopySliceConstraints(nr.Constraints) 2701 nr.Affinities = CopySliceAffinities(nr.Affinities) 2702 2703 return &nr 2704 } 2705 2706 func (r *RequestedDevice) ID() *DeviceIdTuple { 2707 if r == nil || r.Name == "" { 2708 return nil 2709 } 2710 2711 parts := strings.SplitN(r.Name, "/", 3) 2712 switch len(parts) { 2713 case 1: 2714 return &DeviceIdTuple{ 2715 Type: parts[0], 2716 } 2717 case 2: 2718 return &DeviceIdTuple{ 2719 Vendor: parts[0], 2720 Type: parts[1], 2721 } 2722 default: 2723 return &DeviceIdTuple{ 2724 Vendor: parts[0], 2725 Type: parts[1], 2726 Name: parts[2], 2727 } 2728 } 2729 } 2730 2731 func (r *RequestedDevice) Validate() error { 2732 if r == nil { 2733 return nil 2734 } 2735 2736 var mErr multierror.Error 2737 if r.Name == "" { 2738 _ = multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2739 } 2740 2741 for idx, constr := range r.Constraints { 2742 // Ensure that the constraint doesn't use an operand we do not allow 2743 switch constr.Operand { 2744 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2745 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2746 _ = multierror.Append(&mErr, outer) 2747 default: 2748 if err := constr.Validate(); err != nil { 2749 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2750 _ = multierror.Append(&mErr, outer) 2751 } 2752 } 2753 } 2754 for idx, affinity := range r.Affinities { 2755 if err := affinity.Validate(); err != nil { 2756 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2757 _ = multierror.Append(&mErr, outer) 2758 } 2759 } 2760 2761 return mErr.ErrorOrNil() 2762 } 2763 2764 // NodeResources is used to define the resources available on a client node. 2765 type NodeResources struct { 2766 Cpu NodeCpuResources 2767 Memory NodeMemoryResources 2768 Disk NodeDiskResources 2769 Networks Networks 2770 NodeNetworks []*NodeNetworkResource 2771 Devices []*NodeDeviceResource 2772 } 2773 2774 func (n *NodeResources) Copy() *NodeResources { 2775 if n == nil { 2776 return nil 2777 } 2778 2779 newN := new(NodeResources) 2780 *newN = *n 2781 2782 // Copy the networks 2783 newN.Networks = n.Networks.Copy() 2784 2785 // Copy the devices 2786 if n.Devices != nil { 2787 devices := len(n.Devices) 2788 newN.Devices = make([]*NodeDeviceResource, devices) 2789 for i := 0; i < devices; i++ { 2790 newN.Devices[i] = n.Devices[i].Copy() 2791 } 2792 } 2793 2794 return newN 2795 } 2796 2797 // Comparable returns a comparable version of the nodes resources. This 2798 // conversion can be lossy so care must be taken when using it. 2799 func (n *NodeResources) Comparable() *ComparableResources { 2800 if n == nil { 2801 return nil 2802 } 2803 2804 c := &ComparableResources{ 2805 Flattened: AllocatedTaskResources{ 2806 Cpu: AllocatedCpuResources{ 2807 CpuShares: n.Cpu.CpuShares, 2808 }, 2809 Memory: AllocatedMemoryResources{ 2810 MemoryMB: n.Memory.MemoryMB, 2811 }, 2812 Networks: n.Networks, 2813 }, 2814 Shared: AllocatedSharedResources{ 2815 DiskMB: n.Disk.DiskMB, 2816 }, 2817 } 2818 return c 2819 } 2820 2821 func (n *NodeResources) Merge(o *NodeResources) { 2822 if o == nil { 2823 return 2824 } 2825 2826 n.Cpu.Merge(&o.Cpu) 2827 n.Memory.Merge(&o.Memory) 2828 n.Disk.Merge(&o.Disk) 2829 2830 if len(o.Networks) != 0 { 2831 n.Networks = append(n.Networks, o.Networks...) 2832 } 2833 2834 if len(o.Devices) != 0 { 2835 n.Devices = o.Devices 2836 } 2837 2838 if len(o.NodeNetworks) != 0 { 2839 lookupNetwork := func(nets []*NodeNetworkResource, name string) (int, *NodeNetworkResource) { 2840 for i, nw := range nets { 2841 if nw.Device == name { 2842 return i, nw 2843 } 2844 } 2845 return 0, nil 2846 } 2847 2848 for _, nw := range o.NodeNetworks { 2849 if i, nnw := lookupNetwork(n.NodeNetworks, nw.Device); nnw != nil { 2850 n.NodeNetworks[i] = nw 2851 } else { 2852 n.NodeNetworks = append(n.NodeNetworks, nw) 2853 } 2854 } 2855 } 2856 } 2857 2858 func (n *NodeResources) Equals(o *NodeResources) bool { 2859 if o == nil && n == nil { 2860 return true 2861 } else if o == nil { 2862 return false 2863 } else if n == nil { 2864 return false 2865 } 2866 2867 if !n.Cpu.Equals(&o.Cpu) { 2868 return false 2869 } 2870 if !n.Memory.Equals(&o.Memory) { 2871 return false 2872 } 2873 if !n.Disk.Equals(&o.Disk) { 2874 return false 2875 } 2876 if !n.Networks.Equals(&o.Networks) { 2877 return false 2878 } 2879 2880 // Check the devices 2881 if !DevicesEquals(n.Devices, o.Devices) { 2882 return false 2883 } 2884 2885 if !NodeNetworksEquals(n.NodeNetworks, o.NodeNetworks) { 2886 return false 2887 } 2888 2889 return true 2890 } 2891 2892 // Equals equates Networks as a set 2893 func (ns *Networks) Equals(o *Networks) bool { 2894 if ns == o { 2895 return true 2896 } 2897 if ns == nil || o == nil { 2898 return false 2899 } 2900 if len(*ns) != len(*o) { 2901 return false 2902 } 2903 SETEQUALS: 2904 for _, ne := range *ns { 2905 for _, oe := range *o { 2906 if ne.Equals(oe) { 2907 continue SETEQUALS 2908 } 2909 } 2910 return false 2911 } 2912 return true 2913 } 2914 2915 // DevicesEquals returns true if the two device arrays are set equal 2916 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 2917 if len(d1) != len(d2) { 2918 return false 2919 } 2920 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 2921 for _, d := range d1 { 2922 idMap[*d.ID()] = d 2923 } 2924 for _, otherD := range d2 { 2925 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equals(otherD) { 2926 return false 2927 } 2928 } 2929 2930 return true 2931 } 2932 2933 func NodeNetworksEquals(n1, n2 []*NodeNetworkResource) bool { 2934 if len(n1) != len(n2) { 2935 return false 2936 } 2937 2938 netMap := make(map[string]*NodeNetworkResource, len(n1)) 2939 for _, n := range n1 { 2940 netMap[n.Device] = n 2941 } 2942 for _, otherN := range n2 { 2943 if n, ok := netMap[otherN.Device]; !ok || !n.Equals(otherN) { 2944 return false 2945 } 2946 } 2947 2948 return true 2949 2950 } 2951 2952 // NodeCpuResources captures the CPU resources of the node. 2953 type NodeCpuResources struct { 2954 // CpuShares is the CPU shares available. This is calculated by number of 2955 // cores multiplied by the core frequency. 2956 CpuShares int64 2957 } 2958 2959 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 2960 if o == nil { 2961 return 2962 } 2963 2964 if o.CpuShares != 0 { 2965 n.CpuShares = o.CpuShares 2966 } 2967 } 2968 2969 func (n *NodeCpuResources) Equals(o *NodeCpuResources) bool { 2970 if o == nil && n == nil { 2971 return true 2972 } else if o == nil { 2973 return false 2974 } else if n == nil { 2975 return false 2976 } 2977 2978 if n.CpuShares != o.CpuShares { 2979 return false 2980 } 2981 2982 return true 2983 } 2984 2985 // NodeMemoryResources captures the memory resources of the node 2986 type NodeMemoryResources struct { 2987 // MemoryMB is the total available memory on the node 2988 MemoryMB int64 2989 } 2990 2991 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 2992 if o == nil { 2993 return 2994 } 2995 2996 if o.MemoryMB != 0 { 2997 n.MemoryMB = o.MemoryMB 2998 } 2999 } 3000 3001 func (n *NodeMemoryResources) Equals(o *NodeMemoryResources) bool { 3002 if o == nil && n == nil { 3003 return true 3004 } else if o == nil { 3005 return false 3006 } else if n == nil { 3007 return false 3008 } 3009 3010 if n.MemoryMB != o.MemoryMB { 3011 return false 3012 } 3013 3014 return true 3015 } 3016 3017 // NodeDiskResources captures the disk resources of the node 3018 type NodeDiskResources struct { 3019 // DiskMB is the total available disk space on the node 3020 DiskMB int64 3021 } 3022 3023 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 3024 if o == nil { 3025 return 3026 } 3027 if o.DiskMB != 0 { 3028 n.DiskMB = o.DiskMB 3029 } 3030 } 3031 3032 func (n *NodeDiskResources) Equals(o *NodeDiskResources) bool { 3033 if o == nil && n == nil { 3034 return true 3035 } else if o == nil { 3036 return false 3037 } else if n == nil { 3038 return false 3039 } 3040 3041 if n.DiskMB != o.DiskMB { 3042 return false 3043 } 3044 3045 return true 3046 } 3047 3048 // DeviceIdTuple is the tuple that identifies a device 3049 type DeviceIdTuple struct { 3050 Vendor string 3051 Type string 3052 Name string 3053 } 3054 3055 func (d *DeviceIdTuple) String() string { 3056 if d == nil { 3057 return "" 3058 } 3059 3060 return fmt.Sprintf("%s/%s/%s", d.Vendor, d.Type, d.Name) 3061 } 3062 3063 // Matches returns if this Device ID is a superset of the passed ID. 3064 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 3065 if other == nil { 3066 return false 3067 } 3068 3069 if other.Name != "" && other.Name != id.Name { 3070 return false 3071 } 3072 3073 if other.Vendor != "" && other.Vendor != id.Vendor { 3074 return false 3075 } 3076 3077 if other.Type != "" && other.Type != id.Type { 3078 return false 3079 } 3080 3081 return true 3082 } 3083 3084 // Equals returns if this Device ID is the same as the passed ID. 3085 func (id *DeviceIdTuple) Equals(o *DeviceIdTuple) bool { 3086 if id == nil && o == nil { 3087 return true 3088 } else if id == nil || o == nil { 3089 return false 3090 } 3091 3092 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 3093 } 3094 3095 // NodeDeviceResource captures a set of devices sharing a common 3096 // vendor/type/device_name tuple. 3097 type NodeDeviceResource struct { 3098 Vendor string 3099 Type string 3100 Name string 3101 Instances []*NodeDevice 3102 Attributes map[string]*psstructs.Attribute 3103 } 3104 3105 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 3106 if n == nil { 3107 return nil 3108 } 3109 3110 return &DeviceIdTuple{ 3111 Vendor: n.Vendor, 3112 Type: n.Type, 3113 Name: n.Name, 3114 } 3115 } 3116 3117 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 3118 if n == nil { 3119 return nil 3120 } 3121 3122 // Copy the primitives 3123 nn := *n 3124 3125 // Copy the device instances 3126 if l := len(nn.Instances); l != 0 { 3127 nn.Instances = make([]*NodeDevice, 0, l) 3128 for _, d := range n.Instances { 3129 nn.Instances = append(nn.Instances, d.Copy()) 3130 } 3131 } 3132 3133 // Copy the Attributes 3134 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 3135 3136 return &nn 3137 } 3138 3139 func (n *NodeDeviceResource) Equals(o *NodeDeviceResource) bool { 3140 if o == nil && n == nil { 3141 return true 3142 } else if o == nil { 3143 return false 3144 } else if n == nil { 3145 return false 3146 } 3147 3148 if n.Vendor != o.Vendor { 3149 return false 3150 } else if n.Type != o.Type { 3151 return false 3152 } else if n.Name != o.Name { 3153 return false 3154 } 3155 3156 // Check the attributes 3157 if len(n.Attributes) != len(o.Attributes) { 3158 return false 3159 } 3160 for k, v := range n.Attributes { 3161 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 3162 return false 3163 } 3164 } 3165 3166 // Check the instances 3167 if len(n.Instances) != len(o.Instances) { 3168 return false 3169 } 3170 idMap := make(map[string]*NodeDevice, len(n.Instances)) 3171 for _, d := range n.Instances { 3172 idMap[d.ID] = d 3173 } 3174 for _, otherD := range o.Instances { 3175 if d, ok := idMap[otherD.ID]; !ok || !d.Equals(otherD) { 3176 return false 3177 } 3178 } 3179 3180 return true 3181 } 3182 3183 // NodeDevice is an instance of a particular device. 3184 type NodeDevice struct { 3185 // ID is the ID of the device. 3186 ID string 3187 3188 // Healthy captures whether the device is healthy. 3189 Healthy bool 3190 3191 // HealthDescription is used to provide a human readable description of why 3192 // the device may be unhealthy. 3193 HealthDescription string 3194 3195 // Locality stores HW locality information for the node to optionally be 3196 // used when making placement decisions. 3197 Locality *NodeDeviceLocality 3198 } 3199 3200 func (n *NodeDevice) Equals(o *NodeDevice) bool { 3201 if o == nil && n == nil { 3202 return true 3203 } else if o == nil { 3204 return false 3205 } else if n == nil { 3206 return false 3207 } 3208 3209 if n.ID != o.ID { 3210 return false 3211 } else if n.Healthy != o.Healthy { 3212 return false 3213 } else if n.HealthDescription != o.HealthDescription { 3214 return false 3215 } else if !n.Locality.Equals(o.Locality) { 3216 return false 3217 } 3218 3219 return false 3220 } 3221 3222 func (n *NodeDevice) Copy() *NodeDevice { 3223 if n == nil { 3224 return nil 3225 } 3226 3227 // Copy the primitives 3228 nn := *n 3229 3230 // Copy the locality 3231 nn.Locality = nn.Locality.Copy() 3232 3233 return &nn 3234 } 3235 3236 // NodeDeviceLocality stores information about the devices hardware locality on 3237 // the node. 3238 type NodeDeviceLocality struct { 3239 // PciBusID is the PCI Bus ID for the device. 3240 PciBusID string 3241 } 3242 3243 func (n *NodeDeviceLocality) Equals(o *NodeDeviceLocality) bool { 3244 if o == nil && n == nil { 3245 return true 3246 } else if o == nil { 3247 return false 3248 } else if n == nil { 3249 return false 3250 } 3251 3252 if n.PciBusID != o.PciBusID { 3253 return false 3254 } 3255 3256 return true 3257 } 3258 3259 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 3260 if n == nil { 3261 return nil 3262 } 3263 3264 // Copy the primitives 3265 nn := *n 3266 return &nn 3267 } 3268 3269 // NodeReservedResources is used to capture the resources on a client node that 3270 // should be reserved and not made available to jobs. 3271 type NodeReservedResources struct { 3272 Cpu NodeReservedCpuResources 3273 Memory NodeReservedMemoryResources 3274 Disk NodeReservedDiskResources 3275 Networks NodeReservedNetworkResources 3276 } 3277 3278 func (n *NodeReservedResources) Copy() *NodeReservedResources { 3279 if n == nil { 3280 return nil 3281 } 3282 newN := new(NodeReservedResources) 3283 *newN = *n 3284 return newN 3285 } 3286 3287 // Comparable returns a comparable version of the node's reserved resources. The 3288 // returned resources doesn't contain any network information. This conversion 3289 // can be lossy so care must be taken when using it. 3290 func (n *NodeReservedResources) Comparable() *ComparableResources { 3291 if n == nil { 3292 return nil 3293 } 3294 3295 c := &ComparableResources{ 3296 Flattened: AllocatedTaskResources{ 3297 Cpu: AllocatedCpuResources{ 3298 CpuShares: n.Cpu.CpuShares, 3299 }, 3300 Memory: AllocatedMemoryResources{ 3301 MemoryMB: n.Memory.MemoryMB, 3302 }, 3303 }, 3304 Shared: AllocatedSharedResources{ 3305 DiskMB: n.Disk.DiskMB, 3306 }, 3307 } 3308 return c 3309 } 3310 3311 // NodeReservedCpuResources captures the reserved CPU resources of the node. 3312 type NodeReservedCpuResources struct { 3313 CpuShares int64 3314 } 3315 3316 // NodeReservedMemoryResources captures the reserved memory resources of the node. 3317 type NodeReservedMemoryResources struct { 3318 MemoryMB int64 3319 } 3320 3321 // NodeReservedDiskResources captures the reserved disk resources of the node. 3322 type NodeReservedDiskResources struct { 3323 DiskMB int64 3324 } 3325 3326 // NodeReservedNetworkResources captures the reserved network resources of the node. 3327 type NodeReservedNetworkResources struct { 3328 // ReservedHostPorts is the set of ports reserved on all host network 3329 // interfaces. Its format is a comma separate list of integers or integer 3330 // ranges. (80,443,1000-2000,2005) 3331 ReservedHostPorts string 3332 } 3333 3334 // ParsePortHostPorts returns the reserved host ports. 3335 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 3336 return ParsePortRanges(n.ReservedHostPorts) 3337 } 3338 3339 // AllocatedResources is the set of resources to be used by an allocation. 3340 type AllocatedResources struct { 3341 // Tasks is a mapping of task name to the resources for the task. 3342 Tasks map[string]*AllocatedTaskResources 3343 TaskLifecycles map[string]*TaskLifecycleConfig 3344 3345 // Shared is the set of resource that are shared by all tasks in the group. 3346 Shared AllocatedSharedResources 3347 } 3348 3349 func (a *AllocatedResources) Copy() *AllocatedResources { 3350 if a == nil { 3351 return nil 3352 } 3353 3354 out := AllocatedResources{ 3355 Shared: a.Shared.Copy(), 3356 } 3357 3358 if a.Tasks != nil { 3359 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 3360 for task, resource := range a.Tasks { 3361 out.Tasks[task] = resource.Copy() 3362 } 3363 } 3364 if a.TaskLifecycles != nil { 3365 out.TaskLifecycles = make(map[string]*TaskLifecycleConfig, len(out.TaskLifecycles)) 3366 for task, lifecycle := range a.TaskLifecycles { 3367 out.TaskLifecycles[task] = lifecycle.Copy() 3368 } 3369 3370 } 3371 3372 return &out 3373 } 3374 3375 // Comparable returns a comparable version of the allocations allocated 3376 // resources. This conversion can be lossy so care must be taken when using it. 3377 func (a *AllocatedResources) Comparable() *ComparableResources { 3378 if a == nil { 3379 return nil 3380 } 3381 3382 c := &ComparableResources{ 3383 Shared: a.Shared, 3384 } 3385 3386 prestartSidecarTasks := &AllocatedTaskResources{} 3387 prestartEphemeralTasks := &AllocatedTaskResources{} 3388 main := &AllocatedTaskResources{} 3389 poststopTasks := &AllocatedTaskResources{} 3390 3391 for taskName, r := range a.Tasks { 3392 lc := a.TaskLifecycles[taskName] 3393 if lc == nil { 3394 main.Add(r) 3395 } else if lc.Hook == TaskLifecycleHookPrestart { 3396 if lc.Sidecar { 3397 prestartSidecarTasks.Add(r) 3398 } else { 3399 prestartEphemeralTasks.Add(r) 3400 } 3401 } else if lc.Hook == TaskLifecycleHookPoststop { 3402 poststopTasks.Add(r) 3403 } 3404 } 3405 3406 // update this loop to account for lifecycle hook 3407 prestartEphemeralTasks.Max(main) 3408 prestartEphemeralTasks.Max(poststopTasks) 3409 prestartSidecarTasks.Add(prestartEphemeralTasks) 3410 c.Flattened.Add(prestartSidecarTasks) 3411 3412 // Add network resources that are at the task group level 3413 for _, network := range a.Shared.Networks { 3414 c.Flattened.Add(&AllocatedTaskResources{ 3415 Networks: []*NetworkResource{network}, 3416 }) 3417 } 3418 3419 return c 3420 } 3421 3422 // OldTaskResources returns the pre-0.9.0 map of task resources 3423 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 3424 m := make(map[string]*Resources, len(a.Tasks)) 3425 for name, res := range a.Tasks { 3426 m[name] = &Resources{ 3427 CPU: int(res.Cpu.CpuShares), 3428 MemoryMB: int(res.Memory.MemoryMB), 3429 Networks: res.Networks, 3430 } 3431 } 3432 3433 return m 3434 } 3435 3436 func (a *AllocatedResources) Canonicalize() { 3437 a.Shared.Canonicalize() 3438 3439 for _, r := range a.Tasks { 3440 for _, nw := range r.Networks { 3441 for _, port := range append(nw.DynamicPorts, nw.ReservedPorts...) { 3442 a.Shared.Ports = append(a.Shared.Ports, AllocatedPortMapping{ 3443 Label: port.Label, 3444 Value: port.Value, 3445 To: port.To, 3446 HostIP: nw.IP, 3447 }) 3448 } 3449 } 3450 } 3451 } 3452 3453 // AllocatedTaskResources are the set of resources allocated to a task. 3454 type AllocatedTaskResources struct { 3455 Cpu AllocatedCpuResources 3456 Memory AllocatedMemoryResources 3457 Networks Networks 3458 Devices []*AllocatedDeviceResource 3459 } 3460 3461 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3462 if a == nil { 3463 return nil 3464 } 3465 newA := new(AllocatedTaskResources) 3466 *newA = *a 3467 3468 // Copy the networks 3469 newA.Networks = a.Networks.Copy() 3470 3471 // Copy the devices 3472 if newA.Devices != nil { 3473 n := len(a.Devices) 3474 newA.Devices = make([]*AllocatedDeviceResource, n) 3475 for i := 0; i < n; i++ { 3476 newA.Devices[i] = a.Devices[i].Copy() 3477 } 3478 } 3479 3480 return newA 3481 } 3482 3483 // NetIndex finds the matching net index using device name 3484 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3485 return a.Networks.NetIndex(n) 3486 } 3487 3488 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3489 if delta == nil { 3490 return 3491 } 3492 3493 a.Cpu.Add(&delta.Cpu) 3494 a.Memory.Add(&delta.Memory) 3495 3496 for _, n := range delta.Networks { 3497 // Find the matching interface by IP or CIDR 3498 idx := a.NetIndex(n) 3499 if idx == -1 { 3500 a.Networks = append(a.Networks, n.Copy()) 3501 } else { 3502 a.Networks[idx].Add(n) 3503 } 3504 } 3505 3506 for _, d := range delta.Devices { 3507 // Find the matching device 3508 idx := AllocatedDevices(a.Devices).Index(d) 3509 if idx == -1 { 3510 a.Devices = append(a.Devices, d.Copy()) 3511 } else { 3512 a.Devices[idx].Add(d) 3513 } 3514 } 3515 } 3516 3517 func (a *AllocatedTaskResources) Max(other *AllocatedTaskResources) { 3518 if other == nil { 3519 return 3520 } 3521 3522 a.Cpu.Max(&other.Cpu) 3523 a.Memory.Max(&other.Memory) 3524 3525 for _, n := range other.Networks { 3526 // Find the matching interface by IP or CIDR 3527 idx := a.NetIndex(n) 3528 if idx == -1 { 3529 a.Networks = append(a.Networks, n.Copy()) 3530 } else { 3531 a.Networks[idx].Add(n) 3532 } 3533 } 3534 3535 for _, d := range other.Devices { 3536 // Find the matching device 3537 idx := AllocatedDevices(a.Devices).Index(d) 3538 if idx == -1 { 3539 a.Devices = append(a.Devices, d.Copy()) 3540 } else { 3541 a.Devices[idx].Add(d) 3542 } 3543 } 3544 } 3545 3546 // Comparable turns AllocatedTaskResources into ComparableResources 3547 // as a helper step in preemption 3548 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3549 ret := &ComparableResources{ 3550 Flattened: AllocatedTaskResources{ 3551 Cpu: AllocatedCpuResources{ 3552 CpuShares: a.Cpu.CpuShares, 3553 }, 3554 Memory: AllocatedMemoryResources{ 3555 MemoryMB: a.Memory.MemoryMB, 3556 }, 3557 }, 3558 } 3559 ret.Flattened.Networks = append(ret.Flattened.Networks, a.Networks...) 3560 return ret 3561 } 3562 3563 // Subtract only subtracts CPU and Memory resources. Network utilization 3564 // is managed separately in NetworkIndex 3565 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3566 if delta == nil { 3567 return 3568 } 3569 3570 a.Cpu.Subtract(&delta.Cpu) 3571 a.Memory.Subtract(&delta.Memory) 3572 } 3573 3574 // AllocatedSharedResources are the set of resources allocated to a task group. 3575 type AllocatedSharedResources struct { 3576 Networks Networks 3577 DiskMB int64 3578 Ports AllocatedPorts 3579 } 3580 3581 func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3582 return AllocatedSharedResources{ 3583 Networks: a.Networks.Copy(), 3584 DiskMB: a.DiskMB, 3585 Ports: a.Ports, 3586 } 3587 } 3588 3589 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3590 if delta == nil { 3591 return 3592 } 3593 a.Networks = append(a.Networks, delta.Networks...) 3594 a.DiskMB += delta.DiskMB 3595 3596 } 3597 3598 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3599 if delta == nil { 3600 return 3601 } 3602 3603 diff := map[*NetworkResource]bool{} 3604 for _, n := range delta.Networks { 3605 diff[n] = true 3606 } 3607 var nets Networks 3608 for _, n := range a.Networks { 3609 if _, ok := diff[n]; !ok { 3610 nets = append(nets, n) 3611 } 3612 } 3613 a.Networks = nets 3614 a.DiskMB -= delta.DiskMB 3615 } 3616 3617 func (a *AllocatedSharedResources) Canonicalize() { 3618 if len(a.Networks) > 0 { 3619 if len(a.Networks[0].DynamicPorts)+len(a.Networks[0].ReservedPorts) > 0 && len(a.Ports) == 0 { 3620 for _, ports := range [][]Port{a.Networks[0].DynamicPorts, a.Networks[0].ReservedPorts} { 3621 for _, p := range ports { 3622 a.Ports = append(a.Ports, AllocatedPortMapping{ 3623 Label: p.Label, 3624 Value: p.Value, 3625 To: p.To, 3626 HostIP: a.Networks[0].IP, 3627 }) 3628 } 3629 } 3630 } 3631 } 3632 } 3633 3634 // AllocatedCpuResources captures the allocated CPU resources. 3635 type AllocatedCpuResources struct { 3636 CpuShares int64 3637 } 3638 3639 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3640 if delta == nil { 3641 return 3642 } 3643 3644 a.CpuShares += delta.CpuShares 3645 } 3646 3647 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3648 if delta == nil { 3649 return 3650 } 3651 3652 a.CpuShares -= delta.CpuShares 3653 } 3654 3655 func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { 3656 if other == nil { 3657 return 3658 } 3659 3660 if other.CpuShares > a.CpuShares { 3661 a.CpuShares = other.CpuShares 3662 } 3663 } 3664 3665 // AllocatedMemoryResources captures the allocated memory resources. 3666 type AllocatedMemoryResources struct { 3667 MemoryMB int64 3668 } 3669 3670 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3671 if delta == nil { 3672 return 3673 } 3674 3675 a.MemoryMB += delta.MemoryMB 3676 } 3677 3678 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3679 if delta == nil { 3680 return 3681 } 3682 3683 a.MemoryMB -= delta.MemoryMB 3684 } 3685 3686 func (a *AllocatedMemoryResources) Max(other *AllocatedMemoryResources) { 3687 if other == nil { 3688 return 3689 } 3690 3691 if other.MemoryMB > a.MemoryMB { 3692 a.MemoryMB = other.MemoryMB 3693 } 3694 } 3695 3696 type AllocatedDevices []*AllocatedDeviceResource 3697 3698 // Index finds the matching index using the passed device. If not found, -1 is 3699 // returned. 3700 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3701 if d == nil { 3702 return -1 3703 } 3704 3705 for i, o := range a { 3706 if o.ID().Equals(d.ID()) { 3707 return i 3708 } 3709 } 3710 3711 return -1 3712 } 3713 3714 // AllocatedDeviceResource captures a set of allocated devices. 3715 type AllocatedDeviceResource struct { 3716 // Vendor, Type, and Name are used to select the plugin to request the 3717 // device IDs from. 3718 Vendor string 3719 Type string 3720 Name string 3721 3722 // DeviceIDs is the set of allocated devices 3723 DeviceIDs []string 3724 } 3725 3726 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3727 if a == nil { 3728 return nil 3729 } 3730 3731 return &DeviceIdTuple{ 3732 Vendor: a.Vendor, 3733 Type: a.Type, 3734 Name: a.Name, 3735 } 3736 } 3737 3738 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3739 if delta == nil { 3740 return 3741 } 3742 3743 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 3744 } 3745 3746 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 3747 if a == nil { 3748 return a 3749 } 3750 3751 na := *a 3752 3753 // Copy the devices 3754 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 3755 for i, id := range a.DeviceIDs { 3756 na.DeviceIDs[i] = id 3757 } 3758 3759 return &na 3760 } 3761 3762 // ComparableResources is the set of resources allocated to a task group but 3763 // not keyed by Task, making it easier to compare. 3764 type ComparableResources struct { 3765 Flattened AllocatedTaskResources 3766 Shared AllocatedSharedResources 3767 } 3768 3769 func (c *ComparableResources) Add(delta *ComparableResources) { 3770 if delta == nil { 3771 return 3772 } 3773 3774 c.Flattened.Add(&delta.Flattened) 3775 c.Shared.Add(&delta.Shared) 3776 } 3777 3778 func (c *ComparableResources) Subtract(delta *ComparableResources) { 3779 if delta == nil { 3780 return 3781 } 3782 3783 c.Flattened.Subtract(&delta.Flattened) 3784 c.Shared.Subtract(&delta.Shared) 3785 } 3786 3787 func (c *ComparableResources) Copy() *ComparableResources { 3788 if c == nil { 3789 return nil 3790 } 3791 newR := new(ComparableResources) 3792 *newR = *c 3793 return newR 3794 } 3795 3796 // Superset checks if one set of resources is a superset of another. This 3797 // ignores network resources, and the NetworkIndex should be used for that. 3798 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 3799 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 3800 return false, "cpu" 3801 } 3802 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 3803 return false, "memory" 3804 } 3805 if c.Shared.DiskMB < other.Shared.DiskMB { 3806 return false, "disk" 3807 } 3808 return true, "" 3809 } 3810 3811 // allocated finds the matching net index using device name 3812 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 3813 return c.Flattened.Networks.NetIndex(n) 3814 } 3815 3816 const ( 3817 // JobTypeNomad is reserved for internal system tasks and is 3818 // always handled by the CoreScheduler. 3819 JobTypeCore = "_core" 3820 JobTypeService = "service" 3821 JobTypeBatch = "batch" 3822 JobTypeSystem = "system" 3823 ) 3824 3825 const ( 3826 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 3827 JobStatusRunning = "running" // Running means the job has non-terminal allocations 3828 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 3829 ) 3830 3831 const ( 3832 // JobMinPriority is the minimum allowed priority 3833 JobMinPriority = 1 3834 3835 // JobDefaultPriority is the default priority if not 3836 // not specified. 3837 JobDefaultPriority = 50 3838 3839 // JobMaxPriority is the maximum allowed priority 3840 JobMaxPriority = 100 3841 3842 // Ensure CoreJobPriority is higher than any user 3843 // specified job so that it gets priority. This is important 3844 // for the system to remain healthy. 3845 CoreJobPriority = JobMaxPriority * 2 3846 3847 // JobTrackedVersions is the number of historic job versions that are 3848 // kept. 3849 JobTrackedVersions = 6 3850 3851 // JobTrackedScalingEvents is the number of scaling events that are 3852 // kept for a single task group. 3853 JobTrackedScalingEvents = 20 3854 ) 3855 3856 // Job is the scope of a scheduling request to Nomad. It is the largest 3857 // scoped object, and is a named collection of task groups. Each task group 3858 // is further composed of tasks. A task group (TG) is the unit of scheduling 3859 // however. 3860 type Job struct { 3861 // Stop marks whether the user has stopped the job. A stopped job will 3862 // have all created allocations stopped and acts as a way to stop a job 3863 // without purging it from the system. This allows existing allocs to be 3864 // queried and the job to be inspected as it is being killed. 3865 Stop bool 3866 3867 // Region is the Nomad region that handles scheduling this job 3868 Region string 3869 3870 // Namespace is the namespace the job is submitted into. 3871 Namespace string 3872 3873 // ID is a unique identifier for the job per region. It can be 3874 // specified hierarchically like LineOfBiz/OrgName/Team/Project 3875 ID string 3876 3877 // ParentID is the unique identifier of the job that spawned this job. 3878 ParentID string 3879 3880 // Name is the logical name of the job used to refer to it. This is unique 3881 // per region, but not unique globally. 3882 Name string 3883 3884 // Type is used to control various behaviors about the job. Most jobs 3885 // are service jobs, meaning they are expected to be long lived. 3886 // Some jobs are batch oriented meaning they run and then terminate. 3887 // This can be extended in the future to support custom schedulers. 3888 Type string 3889 3890 // Priority is used to control scheduling importance and if this job 3891 // can preempt other jobs. 3892 Priority int 3893 3894 // AllAtOnce is used to control if incremental scheduling of task groups 3895 // is allowed or if we must do a gang scheduling of the entire job. This 3896 // can slow down larger jobs if resources are not available. 3897 AllAtOnce bool 3898 3899 // Datacenters contains all the datacenters this job is allowed to span 3900 Datacenters []string 3901 3902 // Constraints can be specified at a job level and apply to 3903 // all the task groups and tasks. 3904 Constraints []*Constraint 3905 3906 // Affinities can be specified at the job level to express 3907 // scheduling preferences that apply to all groups and tasks 3908 Affinities []*Affinity 3909 3910 // Spread can be specified at the job level to express spreading 3911 // allocations across a desired attribute, such as datacenter 3912 Spreads []*Spread 3913 3914 // TaskGroups are the collections of task groups that this job needs 3915 // to run. Each task group is an atomic unit of scheduling and placement. 3916 TaskGroups []*TaskGroup 3917 3918 // See agent.ApiJobToStructJob 3919 // Update provides defaults for the TaskGroup Update stanzas 3920 Update UpdateStrategy 3921 3922 Multiregion *Multiregion 3923 3924 // Periodic is used to define the interval the job is run at. 3925 Periodic *PeriodicConfig 3926 3927 // ParameterizedJob is used to specify the job as a parameterized job 3928 // for dispatching. 3929 ParameterizedJob *ParameterizedJobConfig 3930 3931 // Dispatched is used to identify if the Job has been dispatched from a 3932 // parameterized job. 3933 Dispatched bool 3934 3935 // Payload is the payload supplied when the job was dispatched. 3936 Payload []byte 3937 3938 // Meta is used to associate arbitrary metadata with this 3939 // job. This is opaque to Nomad. 3940 Meta map[string]string 3941 3942 // ConsulToken is the Consul token that proves the submitter of the job has 3943 // access to the Service Identity policies associated with the job's 3944 // Consul Connect enabled services. This field is only used to transfer the 3945 // token and is not stored after Job submission. 3946 ConsulToken string 3947 3948 // VaultToken is the Vault token that proves the submitter of the job has 3949 // access to the specified Vault policies. This field is only used to 3950 // transfer the token and is not stored after Job submission. 3951 VaultToken string 3952 3953 // VaultNamespace is the Vault namepace 3954 VaultNamespace string 3955 3956 // NomadTokenID is the Accessor ID of the ACL token (if any) 3957 // used to register this version of the job. Used by deploymentwatcher. 3958 NomadTokenID string 3959 3960 // Job status 3961 Status string 3962 3963 // StatusDescription is meant to provide more human useful information 3964 StatusDescription string 3965 3966 // Stable marks a job as stable. Stability is only defined on "service" and 3967 // "system" jobs. The stability of a job will be set automatically as part 3968 // of a deployment and can be manually set via APIs. This field is updated 3969 // when the status of a corresponding deployment transitions to Failed 3970 // or Successful. This field is not meaningful for jobs that don't have an 3971 // update stanza. 3972 Stable bool 3973 3974 // Version is a monotonically increasing version number that is incremented 3975 // on each job register. 3976 Version uint64 3977 3978 // SubmitTime is the time at which the job was submitted as a UnixNano in 3979 // UTC 3980 SubmitTime int64 3981 3982 // Raft Indexes 3983 CreateIndex uint64 3984 ModifyIndex uint64 3985 JobModifyIndex uint64 3986 } 3987 3988 // NamespacedID returns the namespaced id useful for logging 3989 func (j *Job) NamespacedID() *NamespacedID { 3990 return &NamespacedID{ 3991 ID: j.ID, 3992 Namespace: j.Namespace, 3993 } 3994 } 3995 3996 // Canonicalize is used to canonicalize fields in the Job. This should be 3997 // called when registering a Job. 3998 func (j *Job) Canonicalize() { 3999 if j == nil { 4000 return 4001 } 4002 4003 // Ensure that an empty and nil map are treated the same to avoid scheduling 4004 // problems since we use reflect DeepEquals. 4005 if len(j.Meta) == 0 { 4006 j.Meta = nil 4007 } 4008 4009 // Ensure the job is in a namespace. 4010 if j.Namespace == "" { 4011 j.Namespace = DefaultNamespace 4012 } 4013 4014 for _, tg := range j.TaskGroups { 4015 tg.Canonicalize(j) 4016 } 4017 4018 if j.ParameterizedJob != nil { 4019 j.ParameterizedJob.Canonicalize() 4020 } 4021 4022 if j.Multiregion != nil { 4023 j.Multiregion.Canonicalize() 4024 } 4025 4026 if j.Periodic != nil { 4027 j.Periodic.Canonicalize() 4028 } 4029 } 4030 4031 // Copy returns a deep copy of the Job. It is expected that callers use recover. 4032 // This job can panic if the deep copy failed as it uses reflection. 4033 func (j *Job) Copy() *Job { 4034 if j == nil { 4035 return nil 4036 } 4037 nj := new(Job) 4038 *nj = *j 4039 nj.Datacenters = helper.CopySliceString(nj.Datacenters) 4040 nj.Constraints = CopySliceConstraints(nj.Constraints) 4041 nj.Affinities = CopySliceAffinities(nj.Affinities) 4042 nj.Multiregion = nj.Multiregion.Copy() 4043 4044 if j.TaskGroups != nil { 4045 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 4046 for i, tg := range nj.TaskGroups { 4047 tgs[i] = tg.Copy() 4048 } 4049 nj.TaskGroups = tgs 4050 } 4051 4052 nj.Periodic = nj.Periodic.Copy() 4053 nj.Meta = helper.CopyMapStringString(nj.Meta) 4054 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 4055 return nj 4056 } 4057 4058 // Validate is used to sanity check a job input 4059 func (j *Job) Validate() error { 4060 var mErr multierror.Error 4061 4062 if j.Region == "" && j.Multiregion == nil { 4063 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 4064 } 4065 if j.ID == "" { 4066 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 4067 } else if strings.Contains(j.ID, " ") { 4068 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 4069 } else if strings.Contains(j.ID, "\000") { 4070 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a null character")) 4071 } 4072 if j.Name == "" { 4073 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 4074 } else if strings.Contains(j.Name, "\000") { 4075 mErr.Errors = append(mErr.Errors, errors.New("Job Name contains a null character")) 4076 } 4077 if j.Namespace == "" { 4078 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 4079 } 4080 switch j.Type { 4081 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem: 4082 case "": 4083 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 4084 default: 4085 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 4086 } 4087 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 4088 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 4089 } 4090 if len(j.Datacenters) == 0 && !j.IsMultiregion() { 4091 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 4092 } else { 4093 for _, v := range j.Datacenters { 4094 if v == "" { 4095 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 4096 } 4097 } 4098 } 4099 if len(j.TaskGroups) == 0 { 4100 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 4101 } 4102 for idx, constr := range j.Constraints { 4103 if err := constr.Validate(); err != nil { 4104 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4105 mErr.Errors = append(mErr.Errors, outer) 4106 } 4107 } 4108 if j.Type == JobTypeSystem { 4109 if j.Affinities != nil { 4110 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4111 } 4112 } else { 4113 for idx, affinity := range j.Affinities { 4114 if err := affinity.Validate(); err != nil { 4115 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4116 mErr.Errors = append(mErr.Errors, outer) 4117 } 4118 } 4119 } 4120 4121 if j.Type == JobTypeSystem { 4122 if j.Spreads != nil { 4123 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 4124 } 4125 } else { 4126 for idx, spread := range j.Spreads { 4127 if err := spread.Validate(); err != nil { 4128 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 4129 mErr.Errors = append(mErr.Errors, outer) 4130 } 4131 } 4132 } 4133 4134 // Check for duplicate task groups 4135 taskGroups := make(map[string]int) 4136 for idx, tg := range j.TaskGroups { 4137 if tg.Name == "" { 4138 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 4139 } else if existing, ok := taskGroups[tg.Name]; ok { 4140 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 4141 } else { 4142 taskGroups[tg.Name] = idx 4143 } 4144 4145 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 4146 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 4147 } 4148 4149 if tg.StopAfterClientDisconnect != nil && *tg.StopAfterClientDisconnect != 0 { 4150 if *tg.StopAfterClientDisconnect > 0 && 4151 !(j.Type == JobTypeBatch || j.Type == JobTypeService) { 4152 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect can only be set in batch and service jobs")) 4153 } else if *tg.StopAfterClientDisconnect < 0 { 4154 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect must be a positive value")) 4155 } 4156 } 4157 4158 if j.Type == "system" && tg.Count > 1 { 4159 mErr.Errors = append(mErr.Errors, 4160 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 4161 tg.Name, tg.Count)) 4162 } 4163 } 4164 4165 // Validate the task group 4166 for _, tg := range j.TaskGroups { 4167 if err := tg.Validate(j); err != nil { 4168 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 4169 mErr.Errors = append(mErr.Errors, outer) 4170 } 4171 } 4172 4173 // Validate periodic is only used with batch jobs. 4174 if j.IsPeriodic() && j.Periodic.Enabled { 4175 if j.Type != JobTypeBatch { 4176 mErr.Errors = append(mErr.Errors, 4177 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 4178 } 4179 4180 if err := j.Periodic.Validate(); err != nil { 4181 mErr.Errors = append(mErr.Errors, err) 4182 } 4183 } 4184 4185 if j.IsParameterized() { 4186 if j.Type != JobTypeBatch { 4187 mErr.Errors = append(mErr.Errors, 4188 fmt.Errorf("Parameterized job can only be used with %q scheduler", JobTypeBatch)) 4189 } 4190 4191 if err := j.ParameterizedJob.Validate(); err != nil { 4192 mErr.Errors = append(mErr.Errors, err) 4193 } 4194 } 4195 4196 if j.IsMultiregion() { 4197 if err := j.Multiregion.Validate(j.Type, j.Datacenters); err != nil { 4198 mErr.Errors = append(mErr.Errors, err) 4199 } 4200 } 4201 4202 return mErr.ErrorOrNil() 4203 } 4204 4205 // Warnings returns a list of warnings that may be from dubious settings or 4206 // deprecation warnings. 4207 func (j *Job) Warnings() error { 4208 var mErr multierror.Error 4209 4210 // Check the groups 4211 ap := 0 4212 for _, tg := range j.TaskGroups { 4213 if err := tg.Warnings(j); err != nil { 4214 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 4215 mErr.Errors = append(mErr.Errors, outer) 4216 } 4217 if tg.Update != nil && tg.Update.AutoPromote { 4218 ap += 1 4219 } 4220 } 4221 4222 // Check AutoPromote, should be all or none 4223 if ap > 0 && ap < len(j.TaskGroups) { 4224 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 4225 mErr.Errors = append(mErr.Errors, err) 4226 } 4227 4228 return mErr.ErrorOrNil() 4229 } 4230 4231 // LookupTaskGroup finds a task group by name 4232 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 4233 for _, tg := range j.TaskGroups { 4234 if tg.Name == name { 4235 return tg 4236 } 4237 } 4238 return nil 4239 } 4240 4241 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 4242 // meta data for the task. When joining Job, Group and Task Meta, the precedence 4243 // is by deepest scope (Task > Group > Job). 4244 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 4245 group := j.LookupTaskGroup(groupName) 4246 if group == nil { 4247 return j.Meta 4248 } 4249 4250 var meta map[string]string 4251 4252 task := group.LookupTask(taskName) 4253 if task != nil { 4254 meta = helper.CopyMapStringString(task.Meta) 4255 } 4256 4257 if meta == nil { 4258 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 4259 } 4260 4261 // Add the group specific meta 4262 for k, v := range group.Meta { 4263 if _, ok := meta[k]; !ok { 4264 meta[k] = v 4265 } 4266 } 4267 4268 // Add the job specific meta 4269 for k, v := range j.Meta { 4270 if _, ok := meta[k]; !ok { 4271 meta[k] = v 4272 } 4273 } 4274 4275 return meta 4276 } 4277 4278 // Stopped returns if a job is stopped. 4279 func (j *Job) Stopped() bool { 4280 return j == nil || j.Stop 4281 } 4282 4283 // HasUpdateStrategy returns if any task group in the job has an update strategy 4284 func (j *Job) HasUpdateStrategy() bool { 4285 for _, tg := range j.TaskGroups { 4286 if !tg.Update.IsEmpty() { 4287 return true 4288 } 4289 } 4290 4291 return false 4292 } 4293 4294 // Stub is used to return a summary of the job 4295 func (j *Job) Stub(summary *JobSummary) *JobListStub { 4296 return &JobListStub{ 4297 ID: j.ID, 4298 ParentID: j.ParentID, 4299 Name: j.Name, 4300 Datacenters: j.Datacenters, 4301 Multiregion: j.Multiregion, 4302 Type: j.Type, 4303 Priority: j.Priority, 4304 Periodic: j.IsPeriodic(), 4305 ParameterizedJob: j.IsParameterized(), 4306 Stop: j.Stop, 4307 Status: j.Status, 4308 StatusDescription: j.StatusDescription, 4309 CreateIndex: j.CreateIndex, 4310 ModifyIndex: j.ModifyIndex, 4311 JobModifyIndex: j.JobModifyIndex, 4312 SubmitTime: j.SubmitTime, 4313 JobSummary: summary, 4314 } 4315 } 4316 4317 // IsPeriodic returns whether a job is periodic. 4318 func (j *Job) IsPeriodic() bool { 4319 return j.Periodic != nil 4320 } 4321 4322 // IsPeriodicActive returns whether the job is an active periodic job that will 4323 // create child jobs 4324 func (j *Job) IsPeriodicActive() bool { 4325 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 4326 } 4327 4328 // IsParameterized returns whether a job is parameterized job. 4329 func (j *Job) IsParameterized() bool { 4330 return j.ParameterizedJob != nil && !j.Dispatched 4331 } 4332 4333 // IsMultiregion returns whether a job is multiregion 4334 func (j *Job) IsMultiregion() bool { 4335 return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0 4336 } 4337 4338 // VaultPolicies returns the set of Vault policies per task group, per task 4339 func (j *Job) VaultPolicies() map[string]map[string]*Vault { 4340 policies := make(map[string]map[string]*Vault, len(j.TaskGroups)) 4341 4342 for _, tg := range j.TaskGroups { 4343 tgPolicies := make(map[string]*Vault, len(tg.Tasks)) 4344 4345 for _, task := range tg.Tasks { 4346 if task.Vault == nil { 4347 continue 4348 } 4349 4350 tgPolicies[task.Name] = task.Vault 4351 } 4352 4353 if len(tgPolicies) != 0 { 4354 policies[tg.Name] = tgPolicies 4355 } 4356 } 4357 4358 return policies 4359 } 4360 4361 // ConnectTasks returns the set of Consul Connect enabled tasks defined on the 4362 // job that will require a Service Identity token in the case that Consul ACLs 4363 // are enabled. The TaskKind.Value is the name of the Consul service. 4364 // 4365 // This method is meaningful only after the Job has passed through the job 4366 // submission Mutator functions. 4367 func (j *Job) ConnectTasks() []TaskKind { 4368 var kinds []TaskKind 4369 for _, tg := range j.TaskGroups { 4370 for _, task := range tg.Tasks { 4371 if task.Kind.IsConnectProxy() || 4372 task.Kind.IsConnectNative() || 4373 task.Kind.IsAnyConnectGateway() { 4374 kinds = append(kinds, task.Kind) 4375 } 4376 } 4377 } 4378 return kinds 4379 } 4380 4381 // RequiredSignals returns a mapping of task groups to tasks to their required 4382 // set of signals 4383 func (j *Job) RequiredSignals() map[string]map[string][]string { 4384 signals := make(map[string]map[string][]string) 4385 4386 for _, tg := range j.TaskGroups { 4387 for _, task := range tg.Tasks { 4388 // Use this local one as a set 4389 taskSignals := make(map[string]struct{}) 4390 4391 // Check if the Vault change mode uses signals 4392 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 4393 taskSignals[task.Vault.ChangeSignal] = struct{}{} 4394 } 4395 4396 // If a user has specified a KillSignal, add it to required signals 4397 if task.KillSignal != "" { 4398 taskSignals[task.KillSignal] = struct{}{} 4399 } 4400 4401 // Check if any template change mode uses signals 4402 for _, t := range task.Templates { 4403 if t.ChangeMode != TemplateChangeModeSignal { 4404 continue 4405 } 4406 4407 taskSignals[t.ChangeSignal] = struct{}{} 4408 } 4409 4410 // Flatten and sort the signals 4411 l := len(taskSignals) 4412 if l == 0 { 4413 continue 4414 } 4415 4416 flat := make([]string, 0, l) 4417 for sig := range taskSignals { 4418 flat = append(flat, sig) 4419 } 4420 4421 sort.Strings(flat) 4422 tgSignals, ok := signals[tg.Name] 4423 if !ok { 4424 tgSignals = make(map[string][]string) 4425 signals[tg.Name] = tgSignals 4426 } 4427 tgSignals[task.Name] = flat 4428 } 4429 4430 } 4431 4432 return signals 4433 } 4434 4435 // SpecChanged determines if the functional specification has changed between 4436 // two job versions. 4437 func (j *Job) SpecChanged(new *Job) bool { 4438 if j == nil { 4439 return new != nil 4440 } 4441 4442 // Create a copy of the new job 4443 c := new.Copy() 4444 4445 // Update the new job so we can do a reflect 4446 c.Status = j.Status 4447 c.StatusDescription = j.StatusDescription 4448 c.Stable = j.Stable 4449 c.Version = j.Version 4450 c.CreateIndex = j.CreateIndex 4451 c.ModifyIndex = j.ModifyIndex 4452 c.JobModifyIndex = j.JobModifyIndex 4453 c.SubmitTime = j.SubmitTime 4454 4455 // cgbaker: FINISH: probably need some consideration of scaling policy ID here 4456 4457 // Deep equals the jobs 4458 return !reflect.DeepEqual(j, c) 4459 } 4460 4461 func (j *Job) SetSubmitTime() { 4462 j.SubmitTime = time.Now().UTC().UnixNano() 4463 } 4464 4465 // JobListStub is used to return a subset of job information 4466 // for the job list 4467 type JobListStub struct { 4468 ID string 4469 ParentID string 4470 Name string 4471 Namespace string `json:",omitempty"` 4472 Datacenters []string 4473 Multiregion *Multiregion 4474 Type string 4475 Priority int 4476 Periodic bool 4477 ParameterizedJob bool 4478 Stop bool 4479 Status string 4480 StatusDescription string 4481 JobSummary *JobSummary 4482 CreateIndex uint64 4483 ModifyIndex uint64 4484 JobModifyIndex uint64 4485 SubmitTime int64 4486 } 4487 4488 // JobSummary summarizes the state of the allocations of a job 4489 type JobSummary struct { 4490 // JobID is the ID of the job the summary is for 4491 JobID string 4492 4493 // Namespace is the namespace of the job and its summary 4494 Namespace string 4495 4496 // Summary contains the summary per task group for the Job 4497 Summary map[string]TaskGroupSummary 4498 4499 // Children contains a summary for the children of this job. 4500 Children *JobChildrenSummary 4501 4502 // Raft Indexes 4503 CreateIndex uint64 4504 ModifyIndex uint64 4505 } 4506 4507 // Copy returns a new copy of JobSummary 4508 func (js *JobSummary) Copy() *JobSummary { 4509 newJobSummary := new(JobSummary) 4510 *newJobSummary = *js 4511 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 4512 for k, v := range js.Summary { 4513 newTGSummary[k] = v 4514 } 4515 newJobSummary.Summary = newTGSummary 4516 newJobSummary.Children = newJobSummary.Children.Copy() 4517 return newJobSummary 4518 } 4519 4520 // JobChildrenSummary contains the summary of children job statuses 4521 type JobChildrenSummary struct { 4522 Pending int64 4523 Running int64 4524 Dead int64 4525 } 4526 4527 // Copy returns a new copy of a JobChildrenSummary 4528 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 4529 if jc == nil { 4530 return nil 4531 } 4532 4533 njc := new(JobChildrenSummary) 4534 *njc = *jc 4535 return njc 4536 } 4537 4538 // TaskGroup summarizes the state of all the allocations of a particular 4539 // TaskGroup 4540 type TaskGroupSummary struct { 4541 Queued int 4542 Complete int 4543 Failed int 4544 Running int 4545 Starting int 4546 Lost int 4547 } 4548 4549 const ( 4550 // Checks uses any registered health check state in combination with task 4551 // states to determine if a allocation is healthy. 4552 UpdateStrategyHealthCheck_Checks = "checks" 4553 4554 // TaskStates uses the task states of an allocation to determine if the 4555 // allocation is healthy. 4556 UpdateStrategyHealthCheck_TaskStates = "task_states" 4557 4558 // Manual allows the operator to manually signal to Nomad when an 4559 // allocations is healthy. This allows more advanced health checking that is 4560 // outside of the scope of Nomad. 4561 UpdateStrategyHealthCheck_Manual = "manual" 4562 ) 4563 4564 var ( 4565 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 4566 // jobs with the old policy or for populating field defaults. 4567 DefaultUpdateStrategy = &UpdateStrategy{ 4568 Stagger: 30 * time.Second, 4569 MaxParallel: 1, 4570 HealthCheck: UpdateStrategyHealthCheck_Checks, 4571 MinHealthyTime: 10 * time.Second, 4572 HealthyDeadline: 5 * time.Minute, 4573 ProgressDeadline: 10 * time.Minute, 4574 AutoRevert: false, 4575 AutoPromote: false, 4576 Canary: 0, 4577 } 4578 ) 4579 4580 // UpdateStrategy is used to modify how updates are done 4581 type UpdateStrategy struct { 4582 // Stagger is used to determine the rate at which allocations are migrated 4583 // due to down or draining nodes. 4584 Stagger time.Duration 4585 4586 // MaxParallel is how many updates can be done in parallel 4587 MaxParallel int 4588 4589 // HealthCheck specifies the mechanism in which allocations are marked 4590 // healthy or unhealthy as part of a deployment. 4591 HealthCheck string 4592 4593 // MinHealthyTime is the minimum time an allocation must be in the healthy 4594 // state before it is marked as healthy, unblocking more allocations to be 4595 // rolled. 4596 MinHealthyTime time.Duration 4597 4598 // HealthyDeadline is the time in which an allocation must be marked as 4599 // healthy before it is automatically transitioned to unhealthy. This time 4600 // period doesn't count against the MinHealthyTime. 4601 HealthyDeadline time.Duration 4602 4603 // ProgressDeadline is the time in which an allocation as part of the 4604 // deployment must transition to healthy. If no allocation becomes healthy 4605 // after the deadline, the deployment is marked as failed. If the deadline 4606 // is zero, the first failure causes the deployment to fail. 4607 ProgressDeadline time.Duration 4608 4609 // AutoRevert declares that if a deployment fails because of unhealthy 4610 // allocations, there should be an attempt to auto-revert the job to a 4611 // stable version. 4612 AutoRevert bool 4613 4614 // AutoPromote declares that the deployment should be promoted when all canaries are 4615 // healthy 4616 AutoPromote bool 4617 4618 // Canary is the number of canaries to deploy when a change to the task 4619 // group is detected. 4620 Canary int 4621 } 4622 4623 func (u *UpdateStrategy) Copy() *UpdateStrategy { 4624 if u == nil { 4625 return nil 4626 } 4627 4628 copy := new(UpdateStrategy) 4629 *copy = *u 4630 return copy 4631 } 4632 4633 func (u *UpdateStrategy) Validate() error { 4634 if u == nil { 4635 return nil 4636 } 4637 4638 var mErr multierror.Error 4639 switch u.HealthCheck { 4640 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4641 default: 4642 _ = multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4643 } 4644 4645 if u.MaxParallel < 0 { 4646 _ = multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4647 } 4648 if u.Canary < 0 { 4649 _ = multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4650 } 4651 if u.Canary == 0 && u.AutoPromote { 4652 _ = multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4653 } 4654 if u.MinHealthyTime < 0 { 4655 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4656 } 4657 if u.HealthyDeadline <= 0 { 4658 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4659 } 4660 if u.ProgressDeadline < 0 { 4661 _ = multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4662 } 4663 if u.MinHealthyTime >= u.HealthyDeadline { 4664 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4665 } 4666 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4667 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4668 } 4669 if u.Stagger <= 0 { 4670 _ = multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4671 } 4672 4673 return mErr.ErrorOrNil() 4674 } 4675 4676 func (u *UpdateStrategy) IsEmpty() bool { 4677 if u == nil { 4678 return true 4679 } 4680 4681 return u.MaxParallel == 0 4682 } 4683 4684 // TODO(alexdadgar): Remove once no longer used by the scheduler. 4685 // Rolling returns if a rolling strategy should be used 4686 func (u *UpdateStrategy) Rolling() bool { 4687 return u.Stagger > 0 && u.MaxParallel > 0 4688 } 4689 4690 type Multiregion struct { 4691 Strategy *MultiregionStrategy 4692 Regions []*MultiregionRegion 4693 } 4694 4695 func (m *Multiregion) Canonicalize() { 4696 if m.Strategy == nil { 4697 m.Strategy = &MultiregionStrategy{} 4698 } 4699 if m.Regions == nil { 4700 m.Regions = []*MultiregionRegion{} 4701 } 4702 } 4703 4704 // Diff indicates whether the multiregion config has changed 4705 func (m *Multiregion) Diff(m2 *Multiregion) bool { 4706 return !reflect.DeepEqual(m, m2) 4707 } 4708 4709 func (m *Multiregion) Copy() *Multiregion { 4710 if m == nil { 4711 return nil 4712 } 4713 copy := new(Multiregion) 4714 if m.Strategy != nil { 4715 copy.Strategy = &MultiregionStrategy{ 4716 MaxParallel: m.Strategy.MaxParallel, 4717 OnFailure: m.Strategy.OnFailure, 4718 } 4719 } 4720 for _, region := range m.Regions { 4721 copyRegion := &MultiregionRegion{ 4722 Name: region.Name, 4723 Count: region.Count, 4724 Datacenters: []string{}, 4725 Meta: map[string]string{}, 4726 } 4727 copyRegion.Datacenters = append(copyRegion.Datacenters, region.Datacenters...) 4728 for k, v := range region.Meta { 4729 copyRegion.Meta[k] = v 4730 } 4731 copy.Regions = append(copy.Regions, copyRegion) 4732 } 4733 return copy 4734 } 4735 4736 type MultiregionStrategy struct { 4737 MaxParallel int 4738 OnFailure string 4739 } 4740 4741 type MultiregionRegion struct { 4742 Name string 4743 Count int 4744 Datacenters []string 4745 Meta map[string]string 4746 } 4747 4748 // Namespace allows logically grouping jobs and their associated objects. 4749 type Namespace struct { 4750 // Name is the name of the namespace 4751 Name string 4752 4753 // Description is a human readable description of the namespace 4754 Description string 4755 4756 // Quota is the quota specification that the namespace should account 4757 // against. 4758 Quota string 4759 4760 // Hash is the hash of the namespace which is used to efficiently replicate 4761 // cross-regions. 4762 Hash []byte 4763 4764 // Raft Indexes 4765 CreateIndex uint64 4766 ModifyIndex uint64 4767 } 4768 4769 func (n *Namespace) Validate() error { 4770 var mErr multierror.Error 4771 4772 // Validate the name and description 4773 if !validNamespaceName.MatchString(n.Name) { 4774 err := fmt.Errorf("invalid name %q. Must match regex %s", n.Name, validNamespaceName) 4775 mErr.Errors = append(mErr.Errors, err) 4776 } 4777 if len(n.Description) > maxNamespaceDescriptionLength { 4778 err := fmt.Errorf("description longer than %d", maxNamespaceDescriptionLength) 4779 mErr.Errors = append(mErr.Errors, err) 4780 } 4781 4782 return mErr.ErrorOrNil() 4783 } 4784 4785 // SetHash is used to compute and set the hash of the namespace 4786 func (n *Namespace) SetHash() []byte { 4787 // Initialize a 256bit Blake2 hash (32 bytes) 4788 hash, err := blake2b.New256(nil) 4789 if err != nil { 4790 panic(err) 4791 } 4792 4793 // Write all the user set fields 4794 _, _ = hash.Write([]byte(n.Name)) 4795 _, _ = hash.Write([]byte(n.Description)) 4796 _, _ = hash.Write([]byte(n.Quota)) 4797 4798 // Finalize the hash 4799 hashVal := hash.Sum(nil) 4800 4801 // Set and return the hash 4802 n.Hash = hashVal 4803 return hashVal 4804 } 4805 4806 func (n *Namespace) Copy() *Namespace { 4807 nc := new(Namespace) 4808 *nc = *n 4809 nc.Hash = make([]byte, len(n.Hash)) 4810 copy(nc.Hash, n.Hash) 4811 return nc 4812 } 4813 4814 // NamespaceListRequest is used to request a list of namespaces 4815 type NamespaceListRequest struct { 4816 QueryOptions 4817 } 4818 4819 // NamespaceListResponse is used for a list request 4820 type NamespaceListResponse struct { 4821 Namespaces []*Namespace 4822 QueryMeta 4823 } 4824 4825 // NamespaceSpecificRequest is used to query a specific namespace 4826 type NamespaceSpecificRequest struct { 4827 Name string 4828 QueryOptions 4829 } 4830 4831 // SingleNamespaceResponse is used to return a single namespace 4832 type SingleNamespaceResponse struct { 4833 Namespace *Namespace 4834 QueryMeta 4835 } 4836 4837 // NamespaceSetRequest is used to query a set of namespaces 4838 type NamespaceSetRequest struct { 4839 Namespaces []string 4840 QueryOptions 4841 } 4842 4843 // NamespaceSetResponse is used to return a set of namespaces 4844 type NamespaceSetResponse struct { 4845 Namespaces map[string]*Namespace // Keyed by namespace Name 4846 QueryMeta 4847 } 4848 4849 // NamespaceDeleteRequest is used to delete a set of namespaces 4850 type NamespaceDeleteRequest struct { 4851 Namespaces []string 4852 WriteRequest 4853 } 4854 4855 // NamespaceUpsertRequest is used to upsert a set of namespaces 4856 type NamespaceUpsertRequest struct { 4857 Namespaces []*Namespace 4858 WriteRequest 4859 } 4860 4861 const ( 4862 // PeriodicSpecCron is used for a cron spec. 4863 PeriodicSpecCron = "cron" 4864 4865 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 4866 // separated list of unix timestamps at which to launch. 4867 PeriodicSpecTest = "_internal_test" 4868 ) 4869 4870 // Periodic defines the interval a job should be run at. 4871 type PeriodicConfig struct { 4872 // Enabled determines if the job should be run periodically. 4873 Enabled bool 4874 4875 // Spec specifies the interval the job should be run as. It is parsed based 4876 // on the SpecType. 4877 Spec string 4878 4879 // SpecType defines the format of the spec. 4880 SpecType string 4881 4882 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 4883 ProhibitOverlap bool 4884 4885 // TimeZone is the user specified string that determines the time zone to 4886 // launch against. The time zones must be specified from IANA Time Zone 4887 // database, such as "America/New_York". 4888 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 4889 // Reference: https://www.iana.org/time-zones 4890 TimeZone string 4891 4892 // location is the time zone to evaluate the launch time against 4893 location *time.Location 4894 } 4895 4896 func (p *PeriodicConfig) Copy() *PeriodicConfig { 4897 if p == nil { 4898 return nil 4899 } 4900 np := new(PeriodicConfig) 4901 *np = *p 4902 return np 4903 } 4904 4905 func (p *PeriodicConfig) Validate() error { 4906 if !p.Enabled { 4907 return nil 4908 } 4909 4910 var mErr multierror.Error 4911 if p.Spec == "" { 4912 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 4913 } 4914 4915 // Check if we got a valid time zone 4916 if p.TimeZone != "" { 4917 if _, err := time.LoadLocation(p.TimeZone); err != nil { 4918 _ = multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 4919 } 4920 } 4921 4922 switch p.SpecType { 4923 case PeriodicSpecCron: 4924 // Validate the cron spec 4925 if _, err := cronexpr.Parse(p.Spec); err != nil { 4926 _ = multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 4927 } 4928 case PeriodicSpecTest: 4929 // No-op 4930 default: 4931 _ = multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 4932 } 4933 4934 return mErr.ErrorOrNil() 4935 } 4936 4937 func (p *PeriodicConfig) Canonicalize() { 4938 // Load the location 4939 l, err := time.LoadLocation(p.TimeZone) 4940 if err != nil { 4941 p.location = time.UTC 4942 } 4943 4944 p.location = l 4945 } 4946 4947 // CronParseNext is a helper that parses the next time for the given expression 4948 // but captures any panic that may occur in the underlying library. 4949 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 4950 defer func() { 4951 if recover() != nil { 4952 t = time.Time{} 4953 err = fmt.Errorf("failed parsing cron expression: %q", spec) 4954 } 4955 }() 4956 4957 return e.Next(fromTime), nil 4958 } 4959 4960 // Next returns the closest time instant matching the spec that is after the 4961 // passed time. If no matching instance exists, the zero value of time.Time is 4962 // returned. The `time.Location` of the returned value matches that of the 4963 // passed time. 4964 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 4965 switch p.SpecType { 4966 case PeriodicSpecCron: 4967 e, err := cronexpr.Parse(p.Spec) 4968 if err != nil { 4969 return time.Time{}, fmt.Errorf("failed parsing cron expression: %q: %v", p.Spec, err) 4970 } 4971 return CronParseNext(e, fromTime, p.Spec) 4972 case PeriodicSpecTest: 4973 split := strings.Split(p.Spec, ",") 4974 if len(split) == 1 && split[0] == "" { 4975 return time.Time{}, nil 4976 } 4977 4978 // Parse the times 4979 times := make([]time.Time, len(split)) 4980 for i, s := range split { 4981 unix, err := strconv.Atoi(s) 4982 if err != nil { 4983 return time.Time{}, nil 4984 } 4985 4986 times[i] = time.Unix(int64(unix), 0) 4987 } 4988 4989 // Find the next match 4990 for _, next := range times { 4991 if fromTime.Before(next) { 4992 return next, nil 4993 } 4994 } 4995 } 4996 4997 return time.Time{}, nil 4998 } 4999 5000 // GetLocation returns the location to use for determining the time zone to run 5001 // the periodic job against. 5002 func (p *PeriodicConfig) GetLocation() *time.Location { 5003 // Jobs pre 0.5.5 will not have this 5004 if p.location != nil { 5005 return p.location 5006 } 5007 5008 return time.UTC 5009 } 5010 5011 const ( 5012 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 5013 // when launching derived instances of it. 5014 PeriodicLaunchSuffix = "/periodic-" 5015 ) 5016 5017 // PeriodicLaunch tracks the last launch time of a periodic job. 5018 type PeriodicLaunch struct { 5019 ID string // ID of the periodic job. 5020 Namespace string // Namespace of the periodic job 5021 Launch time.Time // The last launch time. 5022 5023 // Raft Indexes 5024 CreateIndex uint64 5025 ModifyIndex uint64 5026 } 5027 5028 const ( 5029 DispatchPayloadForbidden = "forbidden" 5030 DispatchPayloadOptional = "optional" 5031 DispatchPayloadRequired = "required" 5032 5033 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 5034 // when dispatching instances of it. 5035 DispatchLaunchSuffix = "/dispatch-" 5036 ) 5037 5038 // ParameterizedJobConfig is used to configure the parameterized job 5039 type ParameterizedJobConfig struct { 5040 // Payload configure the payload requirements 5041 Payload string 5042 5043 // MetaRequired is metadata keys that must be specified by the dispatcher 5044 MetaRequired []string 5045 5046 // MetaOptional is metadata keys that may be specified by the dispatcher 5047 MetaOptional []string 5048 } 5049 5050 func (d *ParameterizedJobConfig) Validate() error { 5051 var mErr multierror.Error 5052 switch d.Payload { 5053 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 5054 default: 5055 _ = multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 5056 } 5057 5058 // Check that the meta configurations are disjoint sets 5059 disjoint, offending := helper.SliceSetDisjoint(d.MetaRequired, d.MetaOptional) 5060 if !disjoint { 5061 _ = multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 5062 } 5063 5064 return mErr.ErrorOrNil() 5065 } 5066 5067 func (d *ParameterizedJobConfig) Canonicalize() { 5068 if d.Payload == "" { 5069 d.Payload = DispatchPayloadOptional 5070 } 5071 } 5072 5073 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 5074 if d == nil { 5075 return nil 5076 } 5077 nd := new(ParameterizedJobConfig) 5078 *nd = *d 5079 nd.MetaOptional = helper.CopySliceString(nd.MetaOptional) 5080 nd.MetaRequired = helper.CopySliceString(nd.MetaRequired) 5081 return nd 5082 } 5083 5084 // DispatchedID returns an ID appropriate for a job dispatched against a 5085 // particular parameterized job 5086 func DispatchedID(templateID string, t time.Time) string { 5087 u := uuid.Generate()[:8] 5088 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 5089 } 5090 5091 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 5092 type DispatchPayloadConfig struct { 5093 // File specifies a relative path to where the input data should be written 5094 File string 5095 } 5096 5097 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 5098 if d == nil { 5099 return nil 5100 } 5101 nd := new(DispatchPayloadConfig) 5102 *nd = *d 5103 return nd 5104 } 5105 5106 func (d *DispatchPayloadConfig) Validate() error { 5107 // Verify the destination doesn't escape 5108 escaped, err := PathEscapesAllocDir("task/local/", d.File) 5109 if err != nil { 5110 return fmt.Errorf("invalid destination path: %v", err) 5111 } else if escaped { 5112 return fmt.Errorf("destination escapes allocation directory") 5113 } 5114 5115 return nil 5116 } 5117 5118 const ( 5119 TaskLifecycleHookPrestart = "prestart" 5120 TaskLifecycleHookPoststart = "poststart" 5121 TaskLifecycleHookPoststop = "poststop" 5122 ) 5123 5124 type TaskLifecycleConfig struct { 5125 Hook string 5126 Sidecar bool 5127 } 5128 5129 func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { 5130 if d == nil { 5131 return nil 5132 } 5133 nd := new(TaskLifecycleConfig) 5134 *nd = *d 5135 return nd 5136 } 5137 5138 func (d *TaskLifecycleConfig) Validate() error { 5139 if d == nil { 5140 return nil 5141 } 5142 5143 switch d.Hook { 5144 case TaskLifecycleHookPrestart: 5145 case TaskLifecycleHookPoststart: 5146 case TaskLifecycleHookPoststop: 5147 case "": 5148 return fmt.Errorf("no lifecycle hook provided") 5149 default: 5150 return fmt.Errorf("invalid hook: %v", d.Hook) 5151 } 5152 5153 return nil 5154 } 5155 5156 var ( 5157 // These default restart policies needs to be in sync with 5158 // Canonicalize in api/tasks.go 5159 5160 DefaultServiceJobRestartPolicy = RestartPolicy{ 5161 Delay: 15 * time.Second, 5162 Attempts: 2, 5163 Interval: 30 * time.Minute, 5164 Mode: RestartPolicyModeFail, 5165 } 5166 DefaultBatchJobRestartPolicy = RestartPolicy{ 5167 Delay: 15 * time.Second, 5168 Attempts: 3, 5169 Interval: 24 * time.Hour, 5170 Mode: RestartPolicyModeFail, 5171 } 5172 ) 5173 5174 var ( 5175 // These default reschedule policies needs to be in sync with 5176 // NewDefaultReschedulePolicy in api/tasks.go 5177 5178 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 5179 Delay: 30 * time.Second, 5180 DelayFunction: "exponential", 5181 MaxDelay: 1 * time.Hour, 5182 Unlimited: true, 5183 } 5184 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 5185 Attempts: 1, 5186 Interval: 24 * time.Hour, 5187 Delay: 5 * time.Second, 5188 DelayFunction: "constant", 5189 } 5190 ) 5191 5192 const ( 5193 // RestartPolicyModeDelay causes an artificial delay till the next interval is 5194 // reached when the specified attempts have been reached in the interval. 5195 RestartPolicyModeDelay = "delay" 5196 5197 // RestartPolicyModeFail causes a job to fail if the specified number of 5198 // attempts are reached within an interval. 5199 RestartPolicyModeFail = "fail" 5200 5201 // RestartPolicyMinInterval is the minimum interval that is accepted for a 5202 // restart policy. 5203 RestartPolicyMinInterval = 5 * time.Second 5204 5205 // ReasonWithinPolicy describes restart events that are within policy 5206 ReasonWithinPolicy = "Restart within policy" 5207 ) 5208 5209 // JobScalingEvents contains the scaling events for a given job 5210 type JobScalingEvents struct { 5211 Namespace string 5212 JobID string 5213 5214 // This map is indexed by target; currently, this is just task group 5215 // the indexed array is sorted from newest to oldest event 5216 // the array should have less than JobTrackedScalingEvents entries 5217 ScalingEvents map[string][]*ScalingEvent 5218 5219 // Raft index 5220 ModifyIndex uint64 5221 } 5222 5223 // Factory method for ScalingEvent objects 5224 func NewScalingEvent(message string) *ScalingEvent { 5225 return &ScalingEvent{ 5226 Time: time.Now().Unix(), 5227 Message: message, 5228 } 5229 } 5230 5231 // ScalingEvent describes a scaling event against a Job 5232 type ScalingEvent struct { 5233 // Unix Nanosecond timestamp for the scaling event 5234 Time int64 5235 5236 // Count is the new scaling count, if provided 5237 Count *int64 5238 5239 // PreviousCount is the count at the time of the scaling event 5240 PreviousCount int64 5241 5242 // Message is the message describing a scaling event 5243 Message string 5244 5245 // Error indicates an error state for this scaling event 5246 Error bool 5247 5248 // Meta is a map of metadata returned during a scaling event 5249 Meta map[string]interface{} 5250 5251 // EvalID is the ID for an evaluation if one was created as part of a scaling event 5252 EvalID *string 5253 5254 // Raft index 5255 CreateIndex uint64 5256 } 5257 5258 func (e *ScalingEvent) SetError(error bool) *ScalingEvent { 5259 e.Error = error 5260 return e 5261 } 5262 5263 func (e *ScalingEvent) SetMeta(meta map[string]interface{}) *ScalingEvent { 5264 e.Meta = meta 5265 return e 5266 } 5267 5268 func (e *ScalingEvent) SetEvalID(evalID string) *ScalingEvent { 5269 e.EvalID = &evalID 5270 return e 5271 } 5272 5273 // ScalingEventRequest is by for Job.Scale endpoint 5274 // to register scaling events 5275 type ScalingEventRequest struct { 5276 Namespace string 5277 JobID string 5278 TaskGroup string 5279 5280 ScalingEvent *ScalingEvent 5281 } 5282 5283 // ScalingPolicy specifies the scaling policy for a scaling target 5284 type ScalingPolicy struct { 5285 // ID is a generated UUID used for looking up the scaling policy 5286 ID string 5287 5288 // Type is the type of scaling performed by the policy 5289 Type string 5290 5291 // Target contains information about the target of the scaling policy, like job and group 5292 Target map[string]string 5293 5294 // Policy is an opaque description of the scaling policy, passed to the autoscaler 5295 Policy map[string]interface{} 5296 5297 // Min is the minimum allowable scaling count for this target 5298 Min int64 5299 5300 // Max is the maximum allowable scaling count for this target 5301 Max int64 5302 5303 // Enabled indicates whether this policy has been enabled/disabled 5304 Enabled bool 5305 5306 CreateIndex uint64 5307 ModifyIndex uint64 5308 } 5309 5310 // JobKey returns a key that is unique to a job-scoped target, useful as a map 5311 // key. This uses the policy type, plus target (group and task). 5312 func (p *ScalingPolicy) JobKey() string { 5313 return p.Type + "\000" + 5314 p.Target[ScalingTargetGroup] + "\000" + 5315 p.Target[ScalingTargetTask] 5316 } 5317 5318 const ( 5319 ScalingTargetNamespace = "Namespace" 5320 ScalingTargetJob = "Job" 5321 ScalingTargetGroup = "Group" 5322 ScalingTargetTask = "Task" 5323 5324 ScalingPolicyTypeHorizontal = "horizontal" 5325 ) 5326 5327 func (p *ScalingPolicy) Canonicalize() { 5328 if p.Type == "" { 5329 p.Type = ScalingPolicyTypeHorizontal 5330 } 5331 } 5332 5333 func (p *ScalingPolicy) Copy() *ScalingPolicy { 5334 if p == nil { 5335 return nil 5336 } 5337 5338 opaquePolicyConfig, err := copystructure.Copy(p.Policy) 5339 if err != nil { 5340 panic(err.Error()) 5341 } 5342 5343 c := ScalingPolicy{ 5344 ID: p.ID, 5345 Policy: opaquePolicyConfig.(map[string]interface{}), 5346 Enabled: p.Enabled, 5347 Type: p.Type, 5348 Min: p.Min, 5349 Max: p.Max, 5350 CreateIndex: p.CreateIndex, 5351 ModifyIndex: p.ModifyIndex, 5352 } 5353 c.Target = make(map[string]string, len(p.Target)) 5354 for k, v := range p.Target { 5355 c.Target[k] = v 5356 } 5357 return &c 5358 } 5359 5360 func (p *ScalingPolicy) Validate() error { 5361 if p == nil { 5362 return nil 5363 } 5364 5365 var mErr multierror.Error 5366 5367 // Check policy type and target 5368 if p.Type == "" { 5369 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing scaling policy type")) 5370 } else { 5371 mErr.Errors = append(mErr.Errors, p.validateType().Errors...) 5372 } 5373 5374 // Check Min and Max 5375 if p.Max < 0 { 5376 mErr.Errors = append(mErr.Errors, 5377 fmt.Errorf("maximum count must be specified and non-negative")) 5378 } else if p.Max < p.Min { 5379 mErr.Errors = append(mErr.Errors, 5380 fmt.Errorf("maximum count must not be less than minimum count")) 5381 } 5382 5383 if p.Min < 0 { 5384 mErr.Errors = append(mErr.Errors, 5385 fmt.Errorf("minimum count must be specified and non-negative")) 5386 } 5387 5388 return mErr.ErrorOrNil() 5389 } 5390 5391 func (p *ScalingPolicy) validateTargetHorizontal() (mErr multierror.Error) { 5392 if len(p.Target) == 0 { 5393 // This is probably not a Nomad horizontal policy 5394 return 5395 } 5396 5397 // Nomad horizontal policies should have Namespace, Job and TaskGroup 5398 if p.Target[ScalingTargetNamespace] == "" { 5399 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target namespace")) 5400 } 5401 if p.Target[ScalingTargetJob] == "" { 5402 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target job")) 5403 } 5404 if p.Target[ScalingTargetGroup] == "" { 5405 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target group")) 5406 } 5407 return 5408 } 5409 5410 // Diff indicates whether the specification for a given scaling policy has changed 5411 func (p *ScalingPolicy) Diff(p2 *ScalingPolicy) bool { 5412 copy := *p2 5413 copy.ID = p.ID 5414 copy.CreateIndex = p.CreateIndex 5415 copy.ModifyIndex = p.ModifyIndex 5416 return !reflect.DeepEqual(*p, copy) 5417 } 5418 5419 // TarketTaskGroup updates a ScalingPolicy target to specify a given task group 5420 func (p *ScalingPolicy) TargetTaskGroup(job *Job, tg *TaskGroup) *ScalingPolicy { 5421 p.Target = map[string]string{ 5422 ScalingTargetNamespace: job.Namespace, 5423 ScalingTargetJob: job.ID, 5424 ScalingTargetGroup: tg.Name, 5425 } 5426 return p 5427 } 5428 5429 // TargetTask updates a ScalingPolicy target to specify a given task 5430 func (p *ScalingPolicy) TargetTask(job *Job, tg *TaskGroup, task *Task) *ScalingPolicy { 5431 p.TargetTaskGroup(job, tg) 5432 p.Target[ScalingTargetTask] = task.Name 5433 return p 5434 } 5435 5436 func (p *ScalingPolicy) Stub() *ScalingPolicyListStub { 5437 stub := &ScalingPolicyListStub{ 5438 ID: p.ID, 5439 Type: p.Type, 5440 Target: make(map[string]string), 5441 Enabled: p.Enabled, 5442 CreateIndex: p.CreateIndex, 5443 ModifyIndex: p.ModifyIndex, 5444 } 5445 for k, v := range p.Target { 5446 stub.Target[k] = v 5447 } 5448 return stub 5449 } 5450 5451 // GetScalingPolicies returns a slice of all scaling scaling policies for this job 5452 func (j *Job) GetScalingPolicies() []*ScalingPolicy { 5453 ret := make([]*ScalingPolicy, 0) 5454 5455 for _, tg := range j.TaskGroups { 5456 if tg.Scaling != nil { 5457 ret = append(ret, tg.Scaling) 5458 } 5459 } 5460 5461 ret = append(ret, j.GetEntScalingPolicies()...) 5462 5463 return ret 5464 } 5465 5466 // ScalingPolicyListStub is used to return a subset of scaling policy information 5467 // for the scaling policy list 5468 type ScalingPolicyListStub struct { 5469 ID string 5470 Enabled bool 5471 Type string 5472 Target map[string]string 5473 CreateIndex uint64 5474 ModifyIndex uint64 5475 } 5476 5477 // RestartPolicy configures how Tasks are restarted when they crash or fail. 5478 type RestartPolicy struct { 5479 // Attempts is the number of restart that will occur in an interval. 5480 Attempts int 5481 5482 // Interval is a duration in which we can limit the number of restarts 5483 // within. 5484 Interval time.Duration 5485 5486 // Delay is the time between a failure and a restart. 5487 Delay time.Duration 5488 5489 // Mode controls what happens when the task restarts more than attempt times 5490 // in an interval. 5491 Mode string 5492 } 5493 5494 func (r *RestartPolicy) Copy() *RestartPolicy { 5495 if r == nil { 5496 return nil 5497 } 5498 nrp := new(RestartPolicy) 5499 *nrp = *r 5500 return nrp 5501 } 5502 5503 func (r *RestartPolicy) Validate() error { 5504 var mErr multierror.Error 5505 switch r.Mode { 5506 case RestartPolicyModeDelay, RestartPolicyModeFail: 5507 default: 5508 _ = multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 5509 } 5510 5511 // Check for ambiguous/confusing settings 5512 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 5513 _ = multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 5514 } 5515 5516 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 5517 _ = multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 5518 } 5519 if time.Duration(r.Attempts)*r.Delay > r.Interval { 5520 _ = multierror.Append(&mErr, 5521 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 5522 } 5523 return mErr.ErrorOrNil() 5524 } 5525 5526 func NewRestartPolicy(jobType string) *RestartPolicy { 5527 switch jobType { 5528 case JobTypeService, JobTypeSystem: 5529 rp := DefaultServiceJobRestartPolicy 5530 return &rp 5531 case JobTypeBatch: 5532 rp := DefaultBatchJobRestartPolicy 5533 return &rp 5534 } 5535 return nil 5536 } 5537 5538 const ReschedulePolicyMinInterval = 15 * time.Second 5539 const ReschedulePolicyMinDelay = 5 * time.Second 5540 5541 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 5542 5543 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 5544 type ReschedulePolicy struct { 5545 // Attempts limits the number of rescheduling attempts that can occur in an interval. 5546 Attempts int 5547 5548 // Interval is a duration in which we can limit the number of reschedule attempts. 5549 Interval time.Duration 5550 5551 // Delay is a minimum duration to wait between reschedule attempts. 5552 // The delay function determines how much subsequent reschedule attempts are delayed by. 5553 Delay time.Duration 5554 5555 // DelayFunction determines how the delay progressively changes on subsequent reschedule 5556 // attempts. Valid values are "exponential", "constant", and "fibonacci". 5557 DelayFunction string 5558 5559 // MaxDelay is an upper bound on the delay. 5560 MaxDelay time.Duration 5561 5562 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 5563 // between reschedule attempts. 5564 Unlimited bool 5565 } 5566 5567 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 5568 if r == nil { 5569 return nil 5570 } 5571 nrp := new(ReschedulePolicy) 5572 *nrp = *r 5573 return nrp 5574 } 5575 5576 func (r *ReschedulePolicy) Enabled() bool { 5577 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 5578 return enabled 5579 } 5580 5581 // Validate uses different criteria to validate the reschedule policy 5582 // Delay must be a minimum of 5 seconds 5583 // Delay Ceiling is ignored if Delay Function is "constant" 5584 // Number of possible attempts is validated, given the interval, delay and delay function 5585 func (r *ReschedulePolicy) Validate() error { 5586 if !r.Enabled() { 5587 return nil 5588 } 5589 var mErr multierror.Error 5590 // Check for ambiguous/confusing settings 5591 if r.Attempts > 0 { 5592 if r.Interval <= 0 { 5593 _ = multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 5594 } 5595 if r.Unlimited { 5596 _ = multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 5597 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 5598 _ = multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 5599 } 5600 } 5601 5602 delayPreCheck := true 5603 // Delay should be bigger than the default 5604 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 5605 _ = multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 5606 delayPreCheck = false 5607 } 5608 5609 // Must use a valid delay function 5610 if !isValidDelayFunction(r.DelayFunction) { 5611 _ = multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 5612 delayPreCheck = false 5613 } 5614 5615 // Validate MaxDelay if not using linear delay progression 5616 if r.DelayFunction != "constant" { 5617 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 5618 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 5619 delayPreCheck = false 5620 } 5621 if r.MaxDelay < r.Delay { 5622 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 5623 delayPreCheck = false 5624 } 5625 5626 } 5627 5628 // Validate Interval and other delay parameters if attempts are limited 5629 if !r.Unlimited { 5630 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 5631 _ = multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 5632 } 5633 if !delayPreCheck { 5634 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 5635 return mErr.ErrorOrNil() 5636 } 5637 crossValidationErr := r.validateDelayParams() 5638 if crossValidationErr != nil { 5639 _ = multierror.Append(&mErr, crossValidationErr) 5640 } 5641 } 5642 return mErr.ErrorOrNil() 5643 } 5644 5645 func isValidDelayFunction(delayFunc string) bool { 5646 for _, value := range RescheduleDelayFunctions { 5647 if value == delayFunc { 5648 return true 5649 } 5650 } 5651 return false 5652 } 5653 5654 func (r *ReschedulePolicy) validateDelayParams() error { 5655 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 5656 if ok { 5657 return nil 5658 } 5659 var mErr multierror.Error 5660 if r.DelayFunction == "constant" { 5661 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 5662 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 5663 } else { 5664 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 5665 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 5666 } 5667 _ = multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 5668 return mErr.ErrorOrNil() 5669 } 5670 5671 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 5672 var possibleAttempts int 5673 var recommendedInterval time.Duration 5674 valid := true 5675 switch r.DelayFunction { 5676 case "constant": 5677 recommendedInterval = time.Duration(r.Attempts) * r.Delay 5678 if r.Interval < recommendedInterval { 5679 possibleAttempts = int(r.Interval / r.Delay) 5680 valid = false 5681 } 5682 case "exponential": 5683 for i := 0; i < r.Attempts; i++ { 5684 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 5685 if nextDelay > r.MaxDelay { 5686 nextDelay = r.MaxDelay 5687 recommendedInterval += nextDelay 5688 } else { 5689 recommendedInterval = nextDelay 5690 } 5691 if recommendedInterval < r.Interval { 5692 possibleAttempts++ 5693 } 5694 } 5695 if possibleAttempts < r.Attempts { 5696 valid = false 5697 } 5698 case "fibonacci": 5699 var slots []time.Duration 5700 slots = append(slots, r.Delay) 5701 slots = append(slots, r.Delay) 5702 reachedCeiling := false 5703 for i := 2; i < r.Attempts; i++ { 5704 var nextDelay time.Duration 5705 if reachedCeiling { 5706 //switch to linear 5707 nextDelay = slots[i-1] + r.MaxDelay 5708 } else { 5709 nextDelay = slots[i-1] + slots[i-2] 5710 if nextDelay > r.MaxDelay { 5711 nextDelay = r.MaxDelay 5712 reachedCeiling = true 5713 } 5714 } 5715 slots = append(slots, nextDelay) 5716 } 5717 recommendedInterval = slots[len(slots)-1] 5718 if r.Interval < recommendedInterval { 5719 valid = false 5720 // calculate possible attempts 5721 for i := 0; i < len(slots); i++ { 5722 if slots[i] > r.Interval { 5723 possibleAttempts = i 5724 break 5725 } 5726 } 5727 } 5728 default: 5729 return false, 0, 0 5730 } 5731 if possibleAttempts < 0 { // can happen if delay is bigger than interval 5732 possibleAttempts = 0 5733 } 5734 return valid, possibleAttempts, recommendedInterval 5735 } 5736 5737 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 5738 switch jobType { 5739 case JobTypeService: 5740 rp := DefaultServiceJobReschedulePolicy 5741 return &rp 5742 case JobTypeBatch: 5743 rp := DefaultBatchJobReschedulePolicy 5744 return &rp 5745 } 5746 return nil 5747 } 5748 5749 const ( 5750 MigrateStrategyHealthChecks = "checks" 5751 MigrateStrategyHealthStates = "task_states" 5752 ) 5753 5754 type MigrateStrategy struct { 5755 MaxParallel int 5756 HealthCheck string 5757 MinHealthyTime time.Duration 5758 HealthyDeadline time.Duration 5759 } 5760 5761 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 5762 // that lack an update strategy. 5763 // 5764 // This function should match its counterpart in api/tasks.go 5765 func DefaultMigrateStrategy() *MigrateStrategy { 5766 return &MigrateStrategy{ 5767 MaxParallel: 1, 5768 HealthCheck: MigrateStrategyHealthChecks, 5769 MinHealthyTime: 10 * time.Second, 5770 HealthyDeadline: 5 * time.Minute, 5771 } 5772 } 5773 5774 func (m *MigrateStrategy) Validate() error { 5775 var mErr multierror.Error 5776 5777 if m.MaxParallel < 0 { 5778 _ = multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 5779 } 5780 5781 switch m.HealthCheck { 5782 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 5783 // ok 5784 case "": 5785 if m.MaxParallel > 0 { 5786 _ = multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 5787 } 5788 default: 5789 _ = multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 5790 } 5791 5792 if m.MinHealthyTime < 0 { 5793 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 5794 } 5795 5796 if m.HealthyDeadline < 0 { 5797 _ = multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 5798 } 5799 5800 if m.MinHealthyTime > m.HealthyDeadline { 5801 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 5802 } 5803 5804 return mErr.ErrorOrNil() 5805 } 5806 5807 // TaskGroup is an atomic unit of placement. Each task group belongs to 5808 // a job and may contain any number of tasks. A task group support running 5809 // in many replicas using the same configuration.. 5810 type TaskGroup struct { 5811 // Name of the task group 5812 Name string 5813 5814 // Count is the number of replicas of this task group that should 5815 // be scheduled. 5816 Count int 5817 5818 // Update is used to control the update strategy for this task group 5819 Update *UpdateStrategy 5820 5821 // Migrate is used to control the migration strategy for this task group 5822 Migrate *MigrateStrategy 5823 5824 // Constraints can be specified at a task group level and apply to 5825 // all the tasks contained. 5826 Constraints []*Constraint 5827 5828 // Scaling is the list of autoscaling policies for the TaskGroup 5829 Scaling *ScalingPolicy 5830 5831 // RestartPolicy of a TaskGroup 5832 RestartPolicy *RestartPolicy 5833 5834 // Tasks are the collection of tasks that this task group needs to run 5835 Tasks []*Task 5836 5837 // EphemeralDisk is the disk resources that the task group requests 5838 EphemeralDisk *EphemeralDisk 5839 5840 // Meta is used to associate arbitrary metadata with this 5841 // task group. This is opaque to Nomad. 5842 Meta map[string]string 5843 5844 // ReschedulePolicy is used to configure how the scheduler should 5845 // retry failed allocations. 5846 ReschedulePolicy *ReschedulePolicy 5847 5848 // Affinities can be specified at the task group level to express 5849 // scheduling preferences. 5850 Affinities []*Affinity 5851 5852 // Spread can be specified at the task group level to express spreading 5853 // allocations across a desired attribute, such as datacenter 5854 Spreads []*Spread 5855 5856 // Networks are the network configuration for the task group. This can be 5857 // overridden in the task. 5858 Networks Networks 5859 5860 // Services this group provides 5861 Services []*Service 5862 5863 // Volumes is a map of volumes that have been requested by the task group. 5864 Volumes map[string]*VolumeRequest 5865 5866 // ShutdownDelay is the amount of time to wait between deregistering 5867 // group services in consul and stopping tasks. 5868 ShutdownDelay *time.Duration 5869 5870 // StopAfterClientDisconnect, if set, configures the client to stop the task group 5871 // after this duration since the last known good heartbeat 5872 StopAfterClientDisconnect *time.Duration 5873 } 5874 5875 func (tg *TaskGroup) Copy() *TaskGroup { 5876 if tg == nil { 5877 return nil 5878 } 5879 ntg := new(TaskGroup) 5880 *ntg = *tg 5881 ntg.Update = ntg.Update.Copy() 5882 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 5883 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 5884 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 5885 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 5886 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 5887 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 5888 ntg.Scaling = ntg.Scaling.Copy() 5889 5890 // Copy the network objects 5891 if tg.Networks != nil { 5892 n := len(tg.Networks) 5893 ntg.Networks = make([]*NetworkResource, n) 5894 for i := 0; i < n; i++ { 5895 ntg.Networks[i] = tg.Networks[i].Copy() 5896 } 5897 } 5898 5899 if tg.Tasks != nil { 5900 tasks := make([]*Task, len(ntg.Tasks)) 5901 for i, t := range ntg.Tasks { 5902 tasks[i] = t.Copy() 5903 } 5904 ntg.Tasks = tasks 5905 } 5906 5907 ntg.Meta = helper.CopyMapStringString(ntg.Meta) 5908 5909 if tg.EphemeralDisk != nil { 5910 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 5911 } 5912 5913 if tg.Services != nil { 5914 ntg.Services = make([]*Service, len(tg.Services)) 5915 for i, s := range tg.Services { 5916 ntg.Services[i] = s.Copy() 5917 } 5918 } 5919 5920 if tg.ShutdownDelay != nil { 5921 ntg.ShutdownDelay = tg.ShutdownDelay 5922 } 5923 5924 if tg.StopAfterClientDisconnect != nil { 5925 ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect 5926 } 5927 5928 return ntg 5929 } 5930 5931 // Canonicalize is used to canonicalize fields in the TaskGroup. 5932 func (tg *TaskGroup) Canonicalize(job *Job) { 5933 // Ensure that an empty and nil map are treated the same to avoid scheduling 5934 // problems since we use reflect DeepEquals. 5935 if len(tg.Meta) == 0 { 5936 tg.Meta = nil 5937 } 5938 5939 // Set the default restart policy. 5940 if tg.RestartPolicy == nil { 5941 tg.RestartPolicy = NewRestartPolicy(job.Type) 5942 } 5943 5944 if tg.ReschedulePolicy == nil { 5945 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 5946 } 5947 5948 // Canonicalize Migrate for service jobs 5949 if job.Type == JobTypeService && tg.Migrate == nil { 5950 tg.Migrate = DefaultMigrateStrategy() 5951 } 5952 5953 // Set a default ephemeral disk object if the user has not requested for one 5954 if tg.EphemeralDisk == nil { 5955 tg.EphemeralDisk = DefaultEphemeralDisk() 5956 } 5957 5958 if tg.Scaling != nil { 5959 tg.Scaling.Canonicalize() 5960 } 5961 5962 for _, service := range tg.Services { 5963 service.Canonicalize(job.Name, tg.Name, "group") 5964 } 5965 5966 for _, network := range tg.Networks { 5967 network.Canonicalize() 5968 } 5969 5970 for _, task := range tg.Tasks { 5971 task.Canonicalize(job, tg) 5972 } 5973 } 5974 5975 // Validate is used to sanity check a task group 5976 func (tg *TaskGroup) Validate(j *Job) error { 5977 var mErr multierror.Error 5978 if tg.Name == "" { 5979 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 5980 } else if strings.Contains(tg.Name, "\000") { 5981 mErr.Errors = append(mErr.Errors, errors.New("Task group name contains null character")) 5982 } 5983 if tg.Count < 0 { 5984 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 5985 } 5986 if len(tg.Tasks) == 0 { 5987 // could be a lone consul gateway inserted by the connect mutator 5988 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 5989 } 5990 5991 for idx, constr := range tg.Constraints { 5992 if err := constr.Validate(); err != nil { 5993 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 5994 mErr.Errors = append(mErr.Errors, outer) 5995 } 5996 } 5997 if j.Type == JobTypeSystem { 5998 if tg.Affinities != nil { 5999 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6000 } 6001 } else { 6002 for idx, affinity := range tg.Affinities { 6003 if err := affinity.Validate(); err != nil { 6004 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6005 mErr.Errors = append(mErr.Errors, outer) 6006 } 6007 } 6008 } 6009 6010 if tg.RestartPolicy != nil { 6011 if err := tg.RestartPolicy.Validate(); err != nil { 6012 mErr.Errors = append(mErr.Errors, err) 6013 } 6014 } else { 6015 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 6016 } 6017 6018 if j.Type == JobTypeSystem { 6019 if tg.Spreads != nil { 6020 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 6021 } 6022 } else { 6023 for idx, spread := range tg.Spreads { 6024 if err := spread.Validate(); err != nil { 6025 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 6026 mErr.Errors = append(mErr.Errors, outer) 6027 } 6028 } 6029 } 6030 6031 if j.Type == JobTypeSystem { 6032 if tg.ReschedulePolicy != nil { 6033 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 6034 } 6035 } else { 6036 if tg.ReschedulePolicy != nil { 6037 if err := tg.ReschedulePolicy.Validate(); err != nil { 6038 mErr.Errors = append(mErr.Errors, err) 6039 } 6040 } else { 6041 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 6042 } 6043 } 6044 6045 if tg.EphemeralDisk != nil { 6046 if err := tg.EphemeralDisk.Validate(); err != nil { 6047 mErr.Errors = append(mErr.Errors, err) 6048 } 6049 } else { 6050 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 6051 } 6052 6053 // Validate the update strategy 6054 if u := tg.Update; u != nil { 6055 switch j.Type { 6056 case JobTypeService, JobTypeSystem: 6057 default: 6058 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 6059 } 6060 if err := u.Validate(); err != nil { 6061 mErr.Errors = append(mErr.Errors, err) 6062 } 6063 } 6064 6065 // Validate the migration strategy 6066 switch j.Type { 6067 case JobTypeService: 6068 if tg.Migrate != nil { 6069 if err := tg.Migrate.Validate(); err != nil { 6070 mErr.Errors = append(mErr.Errors, err) 6071 } 6072 } 6073 default: 6074 if tg.Migrate != nil { 6075 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 6076 } 6077 } 6078 6079 // Check that there is only one leader task if any 6080 tasks := make(map[string]int) 6081 leaderTasks := 0 6082 for idx, task := range tg.Tasks { 6083 if task.Name == "" { 6084 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 6085 } else if existing, ok := tasks[task.Name]; ok { 6086 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 6087 } else { 6088 tasks[task.Name] = idx 6089 } 6090 6091 if task.Leader { 6092 leaderTasks++ 6093 } 6094 } 6095 6096 if leaderTasks > 1 { 6097 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 6098 } 6099 6100 // Validate the Host Volumes 6101 for name, decl := range tg.Volumes { 6102 if !(decl.Type == VolumeTypeHost || 6103 decl.Type == VolumeTypeCSI) { 6104 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has unrecognised type %s", name, decl.Type)) 6105 continue 6106 } 6107 6108 if decl.Source == "" { 6109 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume %s has an empty source", name)) 6110 } 6111 } 6112 6113 // Validate task group and task network resources 6114 if err := tg.validateNetworks(); err != nil { 6115 outer := fmt.Errorf("Task group network validation failed: %v", err) 6116 mErr.Errors = append(mErr.Errors, outer) 6117 } 6118 6119 // Validate task group and task services 6120 if err := tg.validateServices(); err != nil { 6121 outer := fmt.Errorf("Task group service validation failed: %v", err) 6122 mErr.Errors = append(mErr.Errors, outer) 6123 } 6124 6125 // Validate group service script-checks 6126 if err := tg.validateScriptChecksInGroupServices(); err != nil { 6127 outer := fmt.Errorf("Task group service check validation failed: %v", err) 6128 mErr.Errors = append(mErr.Errors, outer) 6129 } 6130 6131 // Validate the scaling policy 6132 if err := tg.validateScalingPolicy(j); err != nil { 6133 outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) 6134 mErr.Errors = append(mErr.Errors, outer) 6135 } 6136 6137 // Validate the tasks 6138 for _, task := range tg.Tasks { 6139 // Validate the task does not reference undefined volume mounts 6140 for i, mnt := range task.VolumeMounts { 6141 if mnt.Volume == "" { 6142 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 6143 continue 6144 } 6145 6146 if _, ok := tg.Volumes[mnt.Volume]; !ok { 6147 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 6148 continue 6149 } 6150 } 6151 6152 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services, tg.Networks); err != nil { 6153 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 6154 mErr.Errors = append(mErr.Errors, outer) 6155 } 6156 } 6157 return mErr.ErrorOrNil() 6158 } 6159 6160 func (tg *TaskGroup) validateNetworks() error { 6161 var mErr multierror.Error 6162 portLabels := make(map[string]string) 6163 // host_network -> static port tracking 6164 staticPortsIndex := make(map[string]map[int]string) 6165 6166 for _, net := range tg.Networks { 6167 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6168 if other, ok := portLabels[port.Label]; ok { 6169 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6170 } else { 6171 portLabels[port.Label] = "taskgroup network" 6172 } 6173 6174 if port.Value != 0 { 6175 hostNetwork := port.HostNetwork 6176 if hostNetwork == "" { 6177 hostNetwork = "default" 6178 } 6179 staticPorts, ok := staticPortsIndex[hostNetwork] 6180 if !ok { 6181 staticPorts = make(map[int]string) 6182 } 6183 // static port 6184 if other, ok := staticPorts[port.Value]; ok { 6185 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6186 mErr.Errors = append(mErr.Errors, err) 6187 } else if port.Value > math.MaxUint16 { 6188 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6189 mErr.Errors = append(mErr.Errors, err) 6190 } else { 6191 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 6192 staticPortsIndex[hostNetwork] = staticPorts 6193 } 6194 } 6195 6196 if port.To < -1 { 6197 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 6198 mErr.Errors = append(mErr.Errors, err) 6199 } else if port.To > math.MaxUint16 { 6200 err := fmt.Errorf("Port %q cannot be mapped to a port (%d) greater than %d", port.Label, port.To, math.MaxUint16) 6201 mErr.Errors = append(mErr.Errors, err) 6202 } 6203 } 6204 } 6205 // Check for duplicate tasks or port labels, and no duplicated static ports 6206 for _, task := range tg.Tasks { 6207 if task.Resources == nil { 6208 continue 6209 } 6210 6211 for _, net := range task.Resources.Networks { 6212 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6213 if other, ok := portLabels[port.Label]; ok { 6214 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6215 } 6216 6217 if port.Value != 0 { 6218 hostNetwork := port.HostNetwork 6219 if hostNetwork == "" { 6220 hostNetwork = "default" 6221 } 6222 staticPorts, ok := staticPortsIndex[hostNetwork] 6223 if !ok { 6224 staticPorts = make(map[int]string) 6225 } 6226 if other, ok := staticPorts[port.Value]; ok { 6227 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6228 mErr.Errors = append(mErr.Errors, err) 6229 } else if port.Value > math.MaxUint16 { 6230 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6231 mErr.Errors = append(mErr.Errors, err) 6232 } else { 6233 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 6234 staticPortsIndex[hostNetwork] = staticPorts 6235 } 6236 } 6237 } 6238 } 6239 } 6240 return mErr.ErrorOrNil() 6241 } 6242 6243 // validateServices runs Service.Validate() on group-level services, 6244 // checks that group services do not conflict with task services and that 6245 // group service checks that refer to tasks only refer to tasks that exist. 6246 func (tg *TaskGroup) validateServices() error { 6247 var mErr multierror.Error 6248 knownTasks := make(map[string]struct{}) 6249 knownServices := make(map[string]struct{}) 6250 6251 // Create a map of known tasks and their services so we can compare 6252 // vs the group-level services and checks 6253 for _, task := range tg.Tasks { 6254 knownTasks[task.Name] = struct{}{} 6255 if task.Services == nil { 6256 continue 6257 } 6258 for _, service := range task.Services { 6259 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6260 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 6261 } 6262 for _, check := range service.Checks { 6263 if check.TaskName != "" { 6264 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %s is invalid: only task group service checks can be assigned tasks", check.Name)) 6265 } 6266 } 6267 knownServices[service.Name+service.PortLabel] = struct{}{} 6268 } 6269 } 6270 for i, service := range tg.Services { 6271 if err := service.Validate(); err != nil { 6272 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 6273 mErr.Errors = append(mErr.Errors, outer) 6274 // we break here to avoid the risk of crashing on null-pointer 6275 // access in a later step, accepting that we might miss out on 6276 // error messages to provide the user. 6277 continue 6278 } 6279 if service.AddressMode == AddressModeDriver { 6280 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"driver\", only services defined in a \"task\" block can use this mode", service.Name)) 6281 } 6282 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6283 mErr.Errors = append(mErr.Errors, fmt.Errorf("Service %s is duplicate", service.Name)) 6284 } 6285 knownServices[service.Name+service.PortLabel] = struct{}{} 6286 for _, check := range service.Checks { 6287 if check.TaskName != "" { 6288 if check.Type != ServiceCheckScript && check.Type != ServiceCheckGRPC { 6289 mErr.Errors = append(mErr.Errors, 6290 fmt.Errorf("Check %s invalid: only script and gRPC checks should have tasks", check.Name)) 6291 } 6292 if check.AddressMode == AddressModeDriver { 6293 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %q invalid: cannot use address_mode=\"driver\", only checks defined in a \"task\" service block can use this mode", service.Name)) 6294 } 6295 if _, ok := knownTasks[check.TaskName]; !ok { 6296 mErr.Errors = append(mErr.Errors, 6297 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 6298 } 6299 } 6300 } 6301 } 6302 return mErr.ErrorOrNil() 6303 } 6304 6305 // validateScriptChecksInGroupServices ensures group-level services with script 6306 // checks know what task driver to use. Either the service.task or service.check.task 6307 // parameter must be configured. 6308 func (tg *TaskGroup) validateScriptChecksInGroupServices() error { 6309 var mErr multierror.Error 6310 for _, service := range tg.Services { 6311 if service.TaskName == "" { 6312 for _, check := range service.Checks { 6313 if check.Type == "script" && check.TaskName == "" { 6314 mErr.Errors = append(mErr.Errors, 6315 fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter", 6316 tg.Name, service.Name, check.Name, 6317 )) 6318 } 6319 } 6320 } 6321 } 6322 return mErr.ErrorOrNil() 6323 } 6324 6325 // validateScalingPolicy ensures that the scaling policy has consistent 6326 // min and max, not in conflict with the task group count 6327 func (tg *TaskGroup) validateScalingPolicy(j *Job) error { 6328 if tg.Scaling == nil { 6329 return nil 6330 } 6331 6332 var mErr multierror.Error 6333 6334 err := tg.Scaling.Validate() 6335 if err != nil { 6336 // prefix scaling policy errors 6337 if me, ok := err.(*multierror.Error); ok { 6338 for _, e := range me.Errors { 6339 mErr.Errors = append(mErr.Errors, fmt.Errorf("Scaling policy invalid: %s", e)) 6340 } 6341 } 6342 } 6343 6344 if tg.Scaling.Max < int64(tg.Count) { 6345 mErr.Errors = append(mErr.Errors, 6346 fmt.Errorf("Scaling policy invalid: task group count must not be greater than maximum count in scaling policy")) 6347 } 6348 6349 if int64(tg.Count) < tg.Scaling.Min && !(j.IsMultiregion() && tg.Count == 0 && j.Region == "global") { 6350 mErr.Errors = append(mErr.Errors, 6351 fmt.Errorf("Scaling policy invalid: task group count must not be less than minimum count in scaling policy")) 6352 } 6353 6354 return mErr.ErrorOrNil() 6355 } 6356 6357 // Warnings returns a list of warnings that may be from dubious settings or 6358 // deprecation warnings. 6359 func (tg *TaskGroup) Warnings(j *Job) error { 6360 var mErr multierror.Error 6361 6362 // Validate the update strategy 6363 if u := tg.Update; u != nil { 6364 // Check the counts are appropriate 6365 if u.MaxParallel > tg.Count && !(j.IsMultiregion() && tg.Count == 0) { 6366 mErr.Errors = append(mErr.Errors, 6367 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 6368 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 6369 } 6370 } 6371 6372 // Check for mbits network field 6373 if len(tg.Networks) > 0 && tg.Networks[0].MBits > 0 { 6374 mErr.Errors = append(mErr.Errors, fmt.Errorf("mbits has been deprecated as of Nomad 0.12.0. Please remove mbits from the network block")) 6375 } 6376 6377 for _, t := range tg.Tasks { 6378 if err := t.Warnings(); err != nil { 6379 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 6380 mErr.Errors = append(mErr.Errors, err) 6381 } 6382 } 6383 6384 return mErr.ErrorOrNil() 6385 } 6386 6387 // LookupTask finds a task by name 6388 func (tg *TaskGroup) LookupTask(name string) *Task { 6389 for _, t := range tg.Tasks { 6390 if t.Name == name { 6391 return t 6392 } 6393 } 6394 return nil 6395 } 6396 6397 // UsesConnect for convenience returns true if the TaskGroup contains at least 6398 // one service that makes use of Consul Connect features. 6399 // 6400 // Currently used for validating that the task group contains one or more connect 6401 // aware services before generating a service identity token. 6402 func (tg *TaskGroup) UsesConnect() bool { 6403 for _, service := range tg.Services { 6404 if service.Connect != nil { 6405 if service.Connect.IsNative() || service.Connect.HasSidecar() || service.Connect.IsGateway() { 6406 return true 6407 } 6408 } 6409 } 6410 return false 6411 } 6412 6413 // UsesConnectGateway for convenience returns true if the TaskGroup contains at 6414 // least one service that makes use of Consul Connect Gateway features. 6415 func (tg *TaskGroup) UsesConnectGateway() bool { 6416 for _, service := range tg.Services { 6417 if service.Connect != nil { 6418 if service.Connect.IsGateway() { 6419 return true 6420 } 6421 } 6422 } 6423 return false 6424 } 6425 6426 func (tg *TaskGroup) GoString() string { 6427 return fmt.Sprintf("*%#v", *tg) 6428 } 6429 6430 // CheckRestart describes if and when a task should be restarted based on 6431 // failing health checks. 6432 type CheckRestart struct { 6433 Limit int // Restart task after this many unhealthy intervals 6434 Grace time.Duration // Grace time to give tasks after starting to get healthy 6435 IgnoreWarnings bool // If true treat checks in `warning` as passing 6436 } 6437 6438 func (c *CheckRestart) Copy() *CheckRestart { 6439 if c == nil { 6440 return nil 6441 } 6442 6443 nc := new(CheckRestart) 6444 *nc = *c 6445 return nc 6446 } 6447 6448 func (c *CheckRestart) Equals(o *CheckRestart) bool { 6449 if c == nil || o == nil { 6450 return c == o 6451 } 6452 6453 if c.Limit != o.Limit { 6454 return false 6455 } 6456 6457 if c.Grace != o.Grace { 6458 return false 6459 } 6460 6461 if c.IgnoreWarnings != o.IgnoreWarnings { 6462 return false 6463 } 6464 6465 return true 6466 } 6467 6468 func (c *CheckRestart) Validate() error { 6469 if c == nil { 6470 return nil 6471 } 6472 6473 var mErr multierror.Error 6474 if c.Limit < 0 { 6475 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 6476 } 6477 6478 if c.Grace < 0 { 6479 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 6480 } 6481 6482 return mErr.ErrorOrNil() 6483 } 6484 6485 const ( 6486 // DefaultKillTimeout is the default timeout between signaling a task it 6487 // will be killed and killing it. 6488 DefaultKillTimeout = 5 * time.Second 6489 ) 6490 6491 // LogConfig provides configuration for log rotation 6492 type LogConfig struct { 6493 MaxFiles int 6494 MaxFileSizeMB int 6495 } 6496 6497 func (l *LogConfig) Equals(o *LogConfig) bool { 6498 if l == nil || o == nil { 6499 return l == o 6500 } 6501 6502 if l.MaxFiles != o.MaxFiles { 6503 return false 6504 } 6505 6506 if l.MaxFileSizeMB != o.MaxFileSizeMB { 6507 return false 6508 } 6509 6510 return true 6511 } 6512 6513 func (l *LogConfig) Copy() *LogConfig { 6514 if l == nil { 6515 return nil 6516 } 6517 return &LogConfig{ 6518 MaxFiles: l.MaxFiles, 6519 MaxFileSizeMB: l.MaxFileSizeMB, 6520 } 6521 } 6522 6523 // DefaultLogConfig returns the default LogConfig values. 6524 func DefaultLogConfig() *LogConfig { 6525 return &LogConfig{ 6526 MaxFiles: 10, 6527 MaxFileSizeMB: 10, 6528 } 6529 } 6530 6531 // Validate returns an error if the log config specified are less than 6532 // the minimum allowed. 6533 func (l *LogConfig) Validate() error { 6534 var mErr multierror.Error 6535 if l.MaxFiles < 1 { 6536 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 6537 } 6538 if l.MaxFileSizeMB < 1 { 6539 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 6540 } 6541 return mErr.ErrorOrNil() 6542 } 6543 6544 // Task is a single process typically that is executed as part of a task group. 6545 type Task struct { 6546 // Name of the task 6547 Name string 6548 6549 // Driver is used to control which driver is used 6550 Driver string 6551 6552 // User is used to determine which user will run the task. It defaults to 6553 // the same user the Nomad client is being run as. 6554 User string 6555 6556 // Config is provided to the driver to initialize 6557 Config map[string]interface{} 6558 6559 // Map of environment variables to be used by the driver 6560 Env map[string]string 6561 6562 // List of service definitions exposed by the Task 6563 Services []*Service 6564 6565 // Vault is used to define the set of Vault policies that this task should 6566 // have access to. 6567 Vault *Vault 6568 6569 // Templates are the set of templates to be rendered for the task. 6570 Templates []*Template 6571 6572 // Constraints can be specified at a task level and apply only to 6573 // the particular task. 6574 Constraints []*Constraint 6575 6576 // Affinities can be specified at the task level to express 6577 // scheduling preferences 6578 Affinities []*Affinity 6579 6580 // Resources is the resources needed by this task 6581 Resources *Resources 6582 6583 // RestartPolicy of a TaskGroup 6584 RestartPolicy *RestartPolicy 6585 6586 // DispatchPayload configures how the task retrieves its input from a dispatch 6587 DispatchPayload *DispatchPayloadConfig 6588 6589 Lifecycle *TaskLifecycleConfig 6590 6591 // Meta is used to associate arbitrary metadata with this 6592 // task. This is opaque to Nomad. 6593 Meta map[string]string 6594 6595 // KillTimeout is the time between signaling a task that it will be 6596 // killed and killing it. 6597 KillTimeout time.Duration 6598 6599 // LogConfig provides configuration for log rotation 6600 LogConfig *LogConfig 6601 6602 // Artifacts is a list of artifacts to download and extract before running 6603 // the task. 6604 Artifacts []*TaskArtifact 6605 6606 // Leader marks the task as the leader within the group. When the leader 6607 // task exits, other tasks will be gracefully terminated. 6608 Leader bool 6609 6610 // ShutdownDelay is the duration of the delay between deregistering a 6611 // task from Consul and sending it a signal to shutdown. See #2441 6612 ShutdownDelay time.Duration 6613 6614 // VolumeMounts is a list of Volume name <-> mount configurations that will be 6615 // attached to this task. 6616 VolumeMounts []*VolumeMount 6617 6618 // ScalingPolicies is a list of scaling policies scoped to this task 6619 ScalingPolicies []*ScalingPolicy 6620 6621 // KillSignal is the kill signal to use for the task. This is an optional 6622 // specification and defaults to SIGINT 6623 KillSignal string 6624 6625 // Used internally to manage tasks according to their TaskKind. Initial use case 6626 // is for Consul Connect 6627 Kind TaskKind 6628 6629 // CSIPluginConfig is used to configure the plugin supervisor for the task. 6630 CSIPluginConfig *TaskCSIPluginConfig 6631 } 6632 6633 // UsesConnect is for conveniently detecting if the Task is able to make use 6634 // of Consul Connect features. This will be indicated in the TaskKind of the 6635 // Task, which exports known types of Tasks. UsesConnect will be true if the 6636 // task is a connect proxy, connect native, or is a connect gateway. 6637 func (t *Task) UsesConnect() bool { 6638 return t.Kind.IsConnectNative() || t.UsesConnectSidecar() 6639 } 6640 6641 func (t *Task) UsesConnectSidecar() bool { 6642 return t.Kind.IsConnectProxy() || t.Kind.IsAnyConnectGateway() 6643 } 6644 6645 func (t *Task) Copy() *Task { 6646 if t == nil { 6647 return nil 6648 } 6649 nt := new(Task) 6650 *nt = *t 6651 nt.Env = helper.CopyMapStringString(nt.Env) 6652 6653 if t.Services != nil { 6654 services := make([]*Service, len(nt.Services)) 6655 for i, s := range nt.Services { 6656 services[i] = s.Copy() 6657 } 6658 nt.Services = services 6659 } 6660 6661 nt.Constraints = CopySliceConstraints(nt.Constraints) 6662 nt.Affinities = CopySliceAffinities(nt.Affinities) 6663 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 6664 nt.CSIPluginConfig = nt.CSIPluginConfig.Copy() 6665 6666 nt.Vault = nt.Vault.Copy() 6667 nt.Resources = nt.Resources.Copy() 6668 nt.LogConfig = nt.LogConfig.Copy() 6669 nt.Meta = helper.CopyMapStringString(nt.Meta) 6670 nt.DispatchPayload = nt.DispatchPayload.Copy() 6671 nt.Lifecycle = nt.Lifecycle.Copy() 6672 6673 if t.Artifacts != nil { 6674 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 6675 for _, a := range nt.Artifacts { 6676 artifacts = append(artifacts, a.Copy()) 6677 } 6678 nt.Artifacts = artifacts 6679 } 6680 6681 if i, err := copystructure.Copy(nt.Config); err != nil { 6682 panic(err.Error()) 6683 } else { 6684 nt.Config = i.(map[string]interface{}) 6685 } 6686 6687 if t.Templates != nil { 6688 templates := make([]*Template, len(t.Templates)) 6689 for i, tmpl := range nt.Templates { 6690 templates[i] = tmpl.Copy() 6691 } 6692 nt.Templates = templates 6693 } 6694 6695 return nt 6696 } 6697 6698 // Canonicalize canonicalizes fields in the task. 6699 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 6700 // Ensure that an empty and nil map are treated the same to avoid scheduling 6701 // problems since we use reflect DeepEquals. 6702 if len(t.Meta) == 0 { 6703 t.Meta = nil 6704 } 6705 if len(t.Config) == 0 { 6706 t.Config = nil 6707 } 6708 if len(t.Env) == 0 { 6709 t.Env = nil 6710 } 6711 6712 for _, service := range t.Services { 6713 service.Canonicalize(job.Name, tg.Name, t.Name) 6714 } 6715 6716 // If Resources are nil initialize them to defaults, otherwise canonicalize 6717 if t.Resources == nil { 6718 t.Resources = DefaultResources() 6719 } else { 6720 t.Resources.Canonicalize() 6721 } 6722 6723 if t.RestartPolicy == nil { 6724 t.RestartPolicy = tg.RestartPolicy 6725 } 6726 6727 // Set the default timeout if it is not specified. 6728 if t.KillTimeout == 0 { 6729 t.KillTimeout = DefaultKillTimeout 6730 } 6731 6732 if t.Vault != nil { 6733 t.Vault.Canonicalize() 6734 } 6735 6736 for _, template := range t.Templates { 6737 template.Canonicalize() 6738 } 6739 } 6740 6741 func (t *Task) GoString() string { 6742 return fmt.Sprintf("*%#v", *t) 6743 } 6744 6745 // Validate is used to sanity check a task 6746 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service, tgNetworks Networks) error { 6747 var mErr multierror.Error 6748 if t.Name == "" { 6749 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 6750 } 6751 if strings.ContainsAny(t.Name, `/\`) { 6752 // We enforce this so that when creating the directory on disk it will 6753 // not have any slashes. 6754 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 6755 } else if strings.Contains(t.Name, "\000") { 6756 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include null characters")) 6757 } 6758 if t.Driver == "" { 6759 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 6760 } 6761 if t.KillTimeout < 0 { 6762 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 6763 } 6764 if t.ShutdownDelay < 0 { 6765 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 6766 } 6767 6768 // Validate the resources. 6769 if t.Resources == nil { 6770 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 6771 } else if err := t.Resources.Validate(); err != nil { 6772 mErr.Errors = append(mErr.Errors, err) 6773 } 6774 6775 // Validate the log config 6776 if t.LogConfig == nil { 6777 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 6778 } else if err := t.LogConfig.Validate(); err != nil { 6779 mErr.Errors = append(mErr.Errors, err) 6780 } 6781 6782 for idx, constr := range t.Constraints { 6783 if err := constr.Validate(); err != nil { 6784 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6785 mErr.Errors = append(mErr.Errors, outer) 6786 } 6787 6788 switch constr.Operand { 6789 case ConstraintDistinctHosts, ConstraintDistinctProperty: 6790 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 6791 mErr.Errors = append(mErr.Errors, outer) 6792 } 6793 } 6794 6795 if jobType == JobTypeSystem { 6796 if t.Affinities != nil { 6797 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6798 } 6799 } else { 6800 for idx, affinity := range t.Affinities { 6801 if err := affinity.Validate(); err != nil { 6802 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6803 mErr.Errors = append(mErr.Errors, outer) 6804 } 6805 } 6806 } 6807 6808 // Validate Services 6809 if err := validateServices(t, tgNetworks); err != nil { 6810 mErr.Errors = append(mErr.Errors, err) 6811 } 6812 6813 if t.LogConfig != nil && ephemeralDisk != nil { 6814 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 6815 if ephemeralDisk.SizeMB <= logUsage { 6816 mErr.Errors = append(mErr.Errors, 6817 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 6818 logUsage, ephemeralDisk.SizeMB)) 6819 } 6820 } 6821 6822 for idx, artifact := range t.Artifacts { 6823 if err := artifact.Validate(); err != nil { 6824 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 6825 mErr.Errors = append(mErr.Errors, outer) 6826 } 6827 } 6828 6829 if t.Vault != nil { 6830 if err := t.Vault.Validate(); err != nil { 6831 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 6832 } 6833 } 6834 6835 destinations := make(map[string]int, len(t.Templates)) 6836 for idx, tmpl := range t.Templates { 6837 if err := tmpl.Validate(); err != nil { 6838 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 6839 mErr.Errors = append(mErr.Errors, outer) 6840 } 6841 6842 if other, ok := destinations[tmpl.DestPath]; ok { 6843 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 6844 mErr.Errors = append(mErr.Errors, outer) 6845 } else { 6846 destinations[tmpl.DestPath] = idx + 1 6847 } 6848 } 6849 6850 // Validate the dispatch payload block if there 6851 if t.DispatchPayload != nil { 6852 if err := t.DispatchPayload.Validate(); err != nil { 6853 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 6854 } 6855 } 6856 6857 // Validate the Lifecycle block if there 6858 if t.Lifecycle != nil { 6859 if err := t.Lifecycle.Validate(); err != nil { 6860 mErr.Errors = append(mErr.Errors, fmt.Errorf("Lifecycle validation failed: %v", err)) 6861 } 6862 6863 } 6864 6865 // Validation for TaskKind field which is used for Consul Connect integration 6866 if t.Kind.IsConnectProxy() { 6867 // This task is a Connect proxy so it should not have service stanzas 6868 if len(t.Services) > 0 { 6869 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 6870 } 6871 if t.Leader { 6872 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 6873 } 6874 6875 // Ensure the proxy task has a corresponding service entry 6876 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 6877 if serviceErr != nil { 6878 mErr.Errors = append(mErr.Errors, serviceErr) 6879 } 6880 } 6881 6882 // Validation for volumes 6883 for idx, vm := range t.VolumeMounts { 6884 if !MountPropagationModeIsValid(vm.PropagationMode) { 6885 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 6886 } 6887 } 6888 6889 // Validate CSI Plugin Config 6890 if t.CSIPluginConfig != nil { 6891 if t.CSIPluginConfig.ID == "" { 6892 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig must have a non-empty PluginID")) 6893 } 6894 6895 if !CSIPluginTypeIsValid(t.CSIPluginConfig.Type) { 6896 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig PluginType must be one of 'node', 'controller', or 'monolith', got: \"%s\"", t.CSIPluginConfig.Type)) 6897 } 6898 6899 // TODO: Investigate validation of the PluginMountDir. Not much we can do apart from check IsAbs until after we understand its execution environment though :( 6900 } 6901 6902 return mErr.ErrorOrNil() 6903 } 6904 6905 // validateServices takes a task and validates the services within it are valid 6906 // and reference ports that exist. 6907 func validateServices(t *Task, tgNetworks Networks) error { 6908 var mErr multierror.Error 6909 6910 // Ensure that services don't ask for nonexistent ports and their names are 6911 // unique. 6912 servicePorts := make(map[string]map[string]struct{}) 6913 addServicePort := func(label, service string) { 6914 if _, ok := servicePorts[label]; !ok { 6915 servicePorts[label] = map[string]struct{}{} 6916 } 6917 servicePorts[label][service] = struct{}{} 6918 } 6919 knownServices := make(map[string]struct{}) 6920 for i, service := range t.Services { 6921 if err := service.Validate(); err != nil { 6922 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 6923 mErr.Errors = append(mErr.Errors, outer) 6924 } 6925 6926 if service.AddressMode == AddressModeAlloc { 6927 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"alloc\", only services defined in a \"group\" block can use this mode", service.Name)) 6928 } 6929 6930 // Ensure that services with the same name are not being registered for 6931 // the same port 6932 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 6933 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 6934 } 6935 knownServices[service.Name+service.PortLabel] = struct{}{} 6936 6937 if service.PortLabel != "" { 6938 if service.AddressMode == "driver" { 6939 // Numeric port labels are valid for address_mode=driver 6940 _, err := strconv.Atoi(service.PortLabel) 6941 if err != nil { 6942 // Not a numeric port label, add it to list to check 6943 addServicePort(service.PortLabel, service.Name) 6944 } 6945 } else { 6946 addServicePort(service.PortLabel, service.Name) 6947 } 6948 } 6949 6950 // connect block is only allowed on group level 6951 if service.Connect != nil { 6952 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot have \"connect\" block, only services defined in a \"group\" block can", service.Name)) 6953 } 6954 6955 // Ensure that check names are unique and have valid ports 6956 knownChecks := make(map[string]struct{}) 6957 for _, check := range service.Checks { 6958 if _, ok := knownChecks[check.Name]; ok { 6959 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 6960 } 6961 knownChecks[check.Name] = struct{}{} 6962 6963 if check.AddressMode == AddressModeAlloc { 6964 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q cannot use address_mode=\"alloc\", only checks defined in a \"group\" service block can use this mode", service.Name)) 6965 } 6966 6967 if !check.RequiresPort() { 6968 // No need to continue validating check if it doesn't need a port 6969 continue 6970 } 6971 6972 effectivePort := check.PortLabel 6973 if effectivePort == "" { 6974 // Inherits from service 6975 effectivePort = service.PortLabel 6976 } 6977 6978 if effectivePort == "" { 6979 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 6980 continue 6981 } 6982 6983 isNumeric := false 6984 portNumber, err := strconv.Atoi(effectivePort) 6985 if err == nil { 6986 isNumeric = true 6987 } 6988 6989 // Numeric ports are fine for address_mode = "driver" 6990 if check.AddressMode == "driver" && isNumeric { 6991 if portNumber <= 0 { 6992 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 6993 } 6994 continue 6995 } 6996 6997 if isNumeric { 6998 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 6999 continue 7000 } 7001 7002 // PortLabel must exist, report errors by its parent service 7003 addServicePort(effectivePort, service.Name) 7004 } 7005 } 7006 7007 // Get the set of group port labels. 7008 portLabels := make(map[string]struct{}) 7009 if len(tgNetworks) > 0 { 7010 ports := tgNetworks[0].PortLabels() 7011 for portLabel := range ports { 7012 portLabels[portLabel] = struct{}{} 7013 } 7014 } 7015 7016 // COMPAT(0.13) 7017 // Append the set of task port labels. (Note that network resources on the 7018 // task resources are deprecated, but we must let them continue working; a 7019 // warning will be emitted on job submission). 7020 if t.Resources != nil { 7021 for _, network := range t.Resources.Networks { 7022 for portLabel := range network.PortLabels() { 7023 portLabels[portLabel] = struct{}{} 7024 } 7025 } 7026 } 7027 7028 // Iterate over a sorted list of keys to make error listings stable 7029 keys := make([]string, 0, len(servicePorts)) 7030 for p := range servicePorts { 7031 keys = append(keys, p) 7032 } 7033 sort.Strings(keys) 7034 7035 // Ensure all ports referenced in services exist. 7036 for _, servicePort := range keys { 7037 services := servicePorts[servicePort] 7038 _, ok := portLabels[servicePort] 7039 if !ok { 7040 names := make([]string, 0, len(services)) 7041 for name := range services { 7042 names = append(names, name) 7043 } 7044 7045 // Keep order deterministic 7046 sort.Strings(names) 7047 joined := strings.Join(names, ", ") 7048 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 7049 mErr.Errors = append(mErr.Errors, err) 7050 } 7051 } 7052 7053 // Ensure address mode is valid 7054 return mErr.ErrorOrNil() 7055 } 7056 7057 func (t *Task) Warnings() error { 7058 var mErr multierror.Error 7059 7060 // Validate the resources 7061 if t.Resources != nil && t.Resources.IOPS != 0 { 7062 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 7063 } 7064 7065 if t.Resources != nil && len(t.Resources.Networks) != 0 { 7066 mErr.Errors = append(mErr.Errors, fmt.Errorf("task network resources have been deprecated as of Nomad 0.12.0. Please configure networking via group network block.")) 7067 } 7068 7069 for idx, tmpl := range t.Templates { 7070 if err := tmpl.Warnings(); err != nil { 7071 err = multierror.Prefix(err, fmt.Sprintf("Template[%d]", idx)) 7072 mErr.Errors = append(mErr.Errors, err) 7073 } 7074 } 7075 7076 return mErr.ErrorOrNil() 7077 } 7078 7079 // TaskKind identifies the special kinds of tasks using the following format: 7080 // '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 7081 // is opaque to the Task. This identifier can be used to relate the task to some 7082 // other entity based on the kind. 7083 // 7084 // For example, a task may have the TaskKind of `connect-proxy:service` where 7085 // 'connect-proxy' is the kind name and 'service' is the identifier that relates the 7086 // task to the service name of which it is a connect proxy for. 7087 type TaskKind string 7088 7089 func NewTaskKind(name, identifier string) TaskKind { 7090 return TaskKind(fmt.Sprintf("%s:%s", name, identifier)) 7091 } 7092 7093 // Name returns the kind name portion of the TaskKind 7094 func (k TaskKind) Name() string { 7095 return strings.Split(string(k), ":")[0] 7096 } 7097 7098 // Value returns the identifier of the TaskKind or an empty string if it doesn't 7099 // include one. 7100 func (k TaskKind) Value() string { 7101 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 7102 return s[1] 7103 } 7104 return "" 7105 } 7106 7107 func (k TaskKind) hasPrefix(prefix string) bool { 7108 return strings.HasPrefix(string(k), prefix+":") && len(k) > len(prefix)+1 7109 } 7110 7111 // IsConnectProxy returns true if the TaskKind is connect-proxy. 7112 func (k TaskKind) IsConnectProxy() bool { 7113 return k.hasPrefix(ConnectProxyPrefix) 7114 } 7115 7116 // IsConnectNative returns true if the TaskKind is connect-native. 7117 func (k TaskKind) IsConnectNative() bool { 7118 return k.hasPrefix(ConnectNativePrefix) 7119 } 7120 7121 func (k TaskKind) IsConnectIngress() bool { 7122 return k.hasPrefix(ConnectIngressPrefix) 7123 } 7124 7125 func (k TaskKind) IsConnectTerminating() bool { 7126 return k.hasPrefix(ConnectTerminatingPrefix) 7127 } 7128 7129 func (k TaskKind) IsAnyConnectGateway() bool { 7130 switch { 7131 case k.IsConnectIngress(): 7132 return true 7133 case k.IsConnectTerminating(): 7134 return true 7135 default: 7136 return false 7137 } 7138 } 7139 7140 const ( 7141 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 7142 // Proxy 7143 ConnectProxyPrefix = "connect-proxy" 7144 7145 // ConnectNativePrefix is the prefix used for fields referencing a Connect 7146 // Native Task 7147 ConnectNativePrefix = "connect-native" 7148 7149 // ConnectIngressPrefix is the prefix used for fields referencing a Consul 7150 // Connect Ingress Gateway Proxy. 7151 ConnectIngressPrefix = "connect-ingress" 7152 7153 // ConnectTerminatingPrefix is the prefix used for fields referencing a Consul 7154 // Connect Terminating Gateway Proxy. 7155 // 7156 ConnectTerminatingPrefix = "connect-terminating" 7157 7158 // ConnectMeshPrefix is the prefix used for fields referencing a Consul Connect 7159 // Mesh Gateway Proxy. 7160 // 7161 // Not yet supported. 7162 // ConnectMeshPrefix = "connect-mesh" 7163 ) 7164 7165 // ValidateConnectProxyService checks that the service that is being 7166 // proxied by this task exists in the task group and contains 7167 // valid Connect config. 7168 func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 7169 found := false 7170 names := make([]string, 0, len(tgServices)) 7171 for _, svc := range tgServices { 7172 if svc.Connect == nil || svc.Connect.SidecarService == nil { 7173 continue 7174 } 7175 7176 if svc.Name == serviceName { 7177 found = true 7178 break 7179 } 7180 7181 // Build up list of mismatched Connect service names for error 7182 // reporting. 7183 names = append(names, svc.Name) 7184 } 7185 7186 if !found { 7187 if len(names) == 0 { 7188 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 7189 } else { 7190 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 7191 } 7192 } 7193 7194 return nil 7195 } 7196 7197 const ( 7198 // TemplateChangeModeNoop marks that no action should be taken if the 7199 // template is re-rendered 7200 TemplateChangeModeNoop = "noop" 7201 7202 // TemplateChangeModeSignal marks that the task should be signaled if the 7203 // template is re-rendered 7204 TemplateChangeModeSignal = "signal" 7205 7206 // TemplateChangeModeRestart marks that the task should be restarted if the 7207 // template is re-rendered 7208 TemplateChangeModeRestart = "restart" 7209 ) 7210 7211 var ( 7212 // TemplateChangeModeInvalidError is the error for when an invalid change 7213 // mode is given 7214 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, restart") 7215 ) 7216 7217 // Template represents a template configuration to be rendered for a given task 7218 type Template struct { 7219 // SourcePath is the path to the template to be rendered 7220 SourcePath string 7221 7222 // DestPath is the path to where the template should be rendered 7223 DestPath string 7224 7225 // EmbeddedTmpl store the raw template. This is useful for smaller templates 7226 // where they are embedded in the job file rather than sent as an artifact 7227 EmbeddedTmpl string 7228 7229 // ChangeMode indicates what should be done if the template is re-rendered 7230 ChangeMode string 7231 7232 // ChangeSignal is the signal that should be sent if the change mode 7233 // requires it. 7234 ChangeSignal string 7235 7236 // Splay is used to avoid coordinated restarts of processes by applying a 7237 // random wait between 0 and the given splay value before signalling the 7238 // application of a change 7239 Splay time.Duration 7240 7241 // Perms is the permission the file should be written out with. 7242 Perms string 7243 7244 // LeftDelim and RightDelim are optional configurations to control what 7245 // delimiter is utilized when parsing the template. 7246 LeftDelim string 7247 RightDelim string 7248 7249 // Envvars enables exposing the template as environment variables 7250 // instead of as a file. The template must be of the form: 7251 // 7252 // VAR_NAME_1={{ key service/my-key }} 7253 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 7254 // 7255 // Lines will be split on the initial "=" with the first part being the 7256 // key name and the second part the value. 7257 // Empty lines and lines starting with # will be ignored, but to avoid 7258 // escaping issues #s within lines will not be treated as comments. 7259 Envvars bool 7260 7261 // VaultGrace is the grace duration between lease renewal and reacquiring a 7262 // secret. If the lease of a secret is less than the grace, a new secret is 7263 // acquired. 7264 // COMPAT(0.12) VaultGrace has been ignored by Vault since Vault v0.5. 7265 VaultGrace time.Duration 7266 } 7267 7268 // DefaultTemplate returns a default template. 7269 func DefaultTemplate() *Template { 7270 return &Template{ 7271 ChangeMode: TemplateChangeModeRestart, 7272 Splay: 5 * time.Second, 7273 Perms: "0644", 7274 } 7275 } 7276 7277 func (t *Template) Copy() *Template { 7278 if t == nil { 7279 return nil 7280 } 7281 copy := new(Template) 7282 *copy = *t 7283 return copy 7284 } 7285 7286 func (t *Template) Canonicalize() { 7287 if t.ChangeSignal != "" { 7288 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 7289 } 7290 } 7291 7292 func (t *Template) Validate() error { 7293 var mErr multierror.Error 7294 7295 // Verify we have something to render 7296 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 7297 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 7298 } 7299 7300 // Verify we can render somewhere 7301 if t.DestPath == "" { 7302 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 7303 } 7304 7305 // Verify the destination doesn't escape 7306 escaped, err := PathEscapesAllocDir("task", t.DestPath) 7307 if err != nil { 7308 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 7309 } else if escaped { 7310 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 7311 } 7312 7313 // Verify a proper change mode 7314 switch t.ChangeMode { 7315 case TemplateChangeModeNoop, TemplateChangeModeRestart: 7316 case TemplateChangeModeSignal: 7317 if t.ChangeSignal == "" { 7318 _ = multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 7319 } 7320 if t.Envvars { 7321 _ = multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 7322 } 7323 default: 7324 _ = multierror.Append(&mErr, TemplateChangeModeInvalidError) 7325 } 7326 7327 // Verify the splay is positive 7328 if t.Splay < 0 { 7329 _ = multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 7330 } 7331 7332 // Verify the permissions 7333 if t.Perms != "" { 7334 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 7335 _ = multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 7336 } 7337 } 7338 7339 return mErr.ErrorOrNil() 7340 } 7341 7342 func (t *Template) Warnings() error { 7343 var mErr multierror.Error 7344 7345 // Deprecation notice for vault_grace 7346 if t.VaultGrace != 0 { 7347 mErr.Errors = append(mErr.Errors, fmt.Errorf("VaultGrace has been deprecated as of Nomad 0.11 and ignored since Vault 0.5. Please remove VaultGrace / vault_grace from template stanza.")) 7348 } 7349 7350 return mErr.ErrorOrNil() 7351 } 7352 7353 // AllocState records a single event that changes the state of the whole allocation 7354 type AllocStateField uint8 7355 7356 const ( 7357 AllocStateFieldClientStatus AllocStateField = iota 7358 ) 7359 7360 type AllocState struct { 7361 Field AllocStateField 7362 Value string 7363 Time time.Time 7364 } 7365 7366 // Set of possible states for a task. 7367 const ( 7368 TaskStatePending = "pending" // The task is waiting to be run. 7369 TaskStateRunning = "running" // The task is currently running. 7370 TaskStateDead = "dead" // Terminal state of task. 7371 ) 7372 7373 // TaskState tracks the current state of a task and events that caused state 7374 // transitions. 7375 type TaskState struct { 7376 // The current state of the task. 7377 State string 7378 7379 // Failed marks a task as having failed 7380 Failed bool 7381 7382 // Restarts is the number of times the task has restarted 7383 Restarts uint64 7384 7385 // LastRestart is the time the task last restarted. It is updated each time the 7386 // task restarts 7387 LastRestart time.Time 7388 7389 // StartedAt is the time the task is started. It is updated each time the 7390 // task starts 7391 StartedAt time.Time 7392 7393 // FinishedAt is the time at which the task transitioned to dead and will 7394 // not be started again. 7395 FinishedAt time.Time 7396 7397 // Series of task events that transition the state of the task. 7398 Events []*TaskEvent 7399 } 7400 7401 // NewTaskState returns a TaskState initialized in the Pending state. 7402 func NewTaskState() *TaskState { 7403 return &TaskState{ 7404 State: TaskStatePending, 7405 } 7406 } 7407 7408 // Canonicalize ensures the TaskState has a State set. It should default to 7409 // Pending. 7410 func (ts *TaskState) Canonicalize() { 7411 if ts.State == "" { 7412 ts.State = TaskStatePending 7413 } 7414 } 7415 7416 func (ts *TaskState) Copy() *TaskState { 7417 if ts == nil { 7418 return nil 7419 } 7420 copy := new(TaskState) 7421 *copy = *ts 7422 7423 if ts.Events != nil { 7424 copy.Events = make([]*TaskEvent, len(ts.Events)) 7425 for i, e := range ts.Events { 7426 copy.Events[i] = e.Copy() 7427 } 7428 } 7429 return copy 7430 } 7431 7432 // Successful returns whether a task finished successfully. This doesn't really 7433 // have meaning on a non-batch allocation because a service and system 7434 // allocation should not finish. 7435 func (ts *TaskState) Successful() bool { 7436 return ts.State == TaskStateDead && !ts.Failed 7437 } 7438 7439 const ( 7440 // TaskSetupFailure indicates that the task could not be started due to a 7441 // a setup failure. 7442 TaskSetupFailure = "Setup Failure" 7443 7444 // TaskDriveFailure indicates that the task could not be started due to a 7445 // failure in the driver. TaskDriverFailure is considered Recoverable. 7446 TaskDriverFailure = "Driver Failure" 7447 7448 // TaskReceived signals that the task has been pulled by the client at the 7449 // given timestamp. 7450 TaskReceived = "Received" 7451 7452 // TaskFailedValidation indicates the task was invalid and as such was not run. 7453 // TaskFailedValidation is not considered Recoverable. 7454 TaskFailedValidation = "Failed Validation" 7455 7456 // TaskStarted signals that the task was started and its timestamp can be 7457 // used to determine the running length of the task. 7458 TaskStarted = "Started" 7459 7460 // TaskTerminated indicates that the task was started and exited. 7461 TaskTerminated = "Terminated" 7462 7463 // TaskKilling indicates a kill signal has been sent to the task. 7464 TaskKilling = "Killing" 7465 7466 // TaskKilled indicates a user has killed the task. 7467 TaskKilled = "Killed" 7468 7469 // TaskRestarting indicates that task terminated and is being restarted. 7470 TaskRestarting = "Restarting" 7471 7472 // TaskNotRestarting indicates that the task has failed and is not being 7473 // restarted because it has exceeded its restart policy. 7474 TaskNotRestarting = "Not Restarting" 7475 7476 // TaskRestartSignal indicates that the task has been signalled to be 7477 // restarted 7478 TaskRestartSignal = "Restart Signaled" 7479 7480 // TaskSignaling indicates that the task is being signalled. 7481 TaskSignaling = "Signaling" 7482 7483 // TaskDownloadingArtifacts means the task is downloading the artifacts 7484 // specified in the task. 7485 TaskDownloadingArtifacts = "Downloading Artifacts" 7486 7487 // TaskArtifactDownloadFailed indicates that downloading the artifacts 7488 // failed. 7489 TaskArtifactDownloadFailed = "Failed Artifact Download" 7490 7491 // TaskBuildingTaskDir indicates that the task directory/chroot is being 7492 // built. 7493 TaskBuildingTaskDir = "Building Task Directory" 7494 7495 // TaskSetup indicates the task runner is setting up the task environment 7496 TaskSetup = "Task Setup" 7497 7498 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 7499 // exceeded the requested disk resources. 7500 TaskDiskExceeded = "Disk Resources Exceeded" 7501 7502 // TaskSiblingFailed indicates that a sibling task in the task group has 7503 // failed. 7504 TaskSiblingFailed = "Sibling Task Failed" 7505 7506 // TaskDriverMessage is an informational event message emitted by 7507 // drivers such as when they're performing a long running action like 7508 // downloading an image. 7509 TaskDriverMessage = "Driver" 7510 7511 // TaskLeaderDead indicates that the leader task within the has finished. 7512 TaskLeaderDead = "Leader Task Dead" 7513 7514 // TaskMainDead indicates that the main tasks have dead 7515 TaskMainDead = "Main Tasks Dead" 7516 7517 // TaskHookFailed indicates that one of the hooks for a task failed. 7518 TaskHookFailed = "Task hook failed" 7519 7520 // TaskRestoreFailed indicates Nomad was unable to reattach to a 7521 // restored task. 7522 TaskRestoreFailed = "Failed Restoring Task" 7523 7524 // TaskPluginUnhealthy indicates that a plugin managed by Nomad became unhealthy 7525 TaskPluginUnhealthy = "Plugin became unhealthy" 7526 7527 // TaskPluginHealthy indicates that a plugin managed by Nomad became healthy 7528 TaskPluginHealthy = "Plugin became healthy" 7529 ) 7530 7531 // TaskEvent is an event that effects the state of a task and contains meta-data 7532 // appropriate to the events type. 7533 type TaskEvent struct { 7534 Type string 7535 Time int64 // Unix Nanosecond timestamp 7536 7537 Message string // A possible message explaining the termination of the task. 7538 7539 // DisplayMessage is a human friendly message about the event 7540 DisplayMessage string 7541 7542 // Details is a map with annotated info about the event 7543 Details map[string]string 7544 7545 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 7546 // in a future release. Field values are available in the Details map. 7547 7548 // FailsTask marks whether this event fails the task. 7549 // Deprecated, use Details["fails_task"] to access this. 7550 FailsTask bool 7551 7552 // Restart fields. 7553 // Deprecated, use Details["restart_reason"] to access this. 7554 RestartReason string 7555 7556 // Setup Failure fields. 7557 // Deprecated, use Details["setup_error"] to access this. 7558 SetupError string 7559 7560 // Driver Failure fields. 7561 // Deprecated, use Details["driver_error"] to access this. 7562 DriverError string // A driver error occurred while starting the task. 7563 7564 // Task Terminated Fields. 7565 7566 // Deprecated, use Details["exit_code"] to access this. 7567 ExitCode int // The exit code of the task. 7568 7569 // Deprecated, use Details["signal"] to access this. 7570 Signal int // The signal that terminated the task. 7571 7572 // Killing fields 7573 // Deprecated, use Details["kill_timeout"] to access this. 7574 KillTimeout time.Duration 7575 7576 // Task Killed Fields. 7577 // Deprecated, use Details["kill_error"] to access this. 7578 KillError string // Error killing the task. 7579 7580 // KillReason is the reason the task was killed 7581 // Deprecated, use Details["kill_reason"] to access this. 7582 KillReason string 7583 7584 // TaskRestarting fields. 7585 // Deprecated, use Details["start_delay"] to access this. 7586 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 7587 7588 // Artifact Download fields 7589 // Deprecated, use Details["download_error"] to access this. 7590 DownloadError string // Error downloading artifacts 7591 7592 // Validation fields 7593 // Deprecated, use Details["validation_error"] to access this. 7594 ValidationError string // Validation error 7595 7596 // The maximum allowed task disk size. 7597 // Deprecated, use Details["disk_limit"] to access this. 7598 DiskLimit int64 7599 7600 // Name of the sibling task that caused termination of the task that 7601 // the TaskEvent refers to. 7602 // Deprecated, use Details["failed_sibling"] to access this. 7603 FailedSibling string 7604 7605 // VaultError is the error from token renewal 7606 // Deprecated, use Details["vault_renewal_error"] to access this. 7607 VaultError string 7608 7609 // TaskSignalReason indicates the reason the task is being signalled. 7610 // Deprecated, use Details["task_signal_reason"] to access this. 7611 TaskSignalReason string 7612 7613 // TaskSignal is the signal that was sent to the task 7614 // Deprecated, use Details["task_signal"] to access this. 7615 TaskSignal string 7616 7617 // DriverMessage indicates a driver action being taken. 7618 // Deprecated, use Details["driver_message"] to access this. 7619 DriverMessage string 7620 7621 // GenericSource is the source of a message. 7622 // Deprecated, is redundant with event type. 7623 GenericSource string 7624 } 7625 7626 func (event *TaskEvent) PopulateEventDisplayMessage() { 7627 // Build up the description based on the event type. 7628 if event == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 7629 return 7630 } 7631 7632 if event.DisplayMessage != "" { 7633 return 7634 } 7635 7636 var desc string 7637 switch event.Type { 7638 case TaskSetup: 7639 desc = event.Message 7640 case TaskStarted: 7641 desc = "Task started by client" 7642 case TaskReceived: 7643 desc = "Task received by client" 7644 case TaskFailedValidation: 7645 if event.ValidationError != "" { 7646 desc = event.ValidationError 7647 } else { 7648 desc = "Validation of task failed" 7649 } 7650 case TaskSetupFailure: 7651 if event.SetupError != "" { 7652 desc = event.SetupError 7653 } else { 7654 desc = "Task setup failed" 7655 } 7656 case TaskDriverFailure: 7657 if event.DriverError != "" { 7658 desc = event.DriverError 7659 } else { 7660 desc = "Failed to start task" 7661 } 7662 case TaskDownloadingArtifacts: 7663 desc = "Client is downloading artifacts" 7664 case TaskArtifactDownloadFailed: 7665 if event.DownloadError != "" { 7666 desc = event.DownloadError 7667 } else { 7668 desc = "Failed to download artifacts" 7669 } 7670 case TaskKilling: 7671 if event.KillReason != "" { 7672 desc = event.KillReason 7673 } else if event.KillTimeout != 0 { 7674 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 7675 } else { 7676 desc = "Sent interrupt" 7677 } 7678 case TaskKilled: 7679 if event.KillError != "" { 7680 desc = event.KillError 7681 } else { 7682 desc = "Task successfully killed" 7683 } 7684 case TaskTerminated: 7685 var parts []string 7686 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 7687 7688 if event.Signal != 0 { 7689 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 7690 } 7691 7692 if event.Message != "" { 7693 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 7694 } 7695 desc = strings.Join(parts, ", ") 7696 case TaskRestarting: 7697 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 7698 if event.RestartReason != "" && event.RestartReason != ReasonWithinPolicy { 7699 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 7700 } else { 7701 desc = in 7702 } 7703 case TaskNotRestarting: 7704 if event.RestartReason != "" { 7705 desc = event.RestartReason 7706 } else { 7707 desc = "Task exceeded restart policy" 7708 } 7709 case TaskSiblingFailed: 7710 if event.FailedSibling != "" { 7711 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 7712 } else { 7713 desc = "Task's sibling failed" 7714 } 7715 case TaskSignaling: 7716 sig := event.TaskSignal 7717 reason := event.TaskSignalReason 7718 7719 if sig == "" && reason == "" { 7720 desc = "Task being sent a signal" 7721 } else if sig == "" { 7722 desc = reason 7723 } else if reason == "" { 7724 desc = fmt.Sprintf("Task being sent signal %v", sig) 7725 } else { 7726 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 7727 } 7728 case TaskRestartSignal: 7729 if event.RestartReason != "" { 7730 desc = event.RestartReason 7731 } else { 7732 desc = "Task signaled to restart" 7733 } 7734 case TaskDriverMessage: 7735 desc = event.DriverMessage 7736 case TaskLeaderDead: 7737 desc = "Leader Task in Group dead" 7738 case TaskMainDead: 7739 desc = "Main tasks in the group died" 7740 default: 7741 desc = event.Message 7742 } 7743 7744 event.DisplayMessage = desc 7745 } 7746 7747 func (te *TaskEvent) GoString() string { 7748 return fmt.Sprintf("%v - %v", te.Time, te.Type) 7749 } 7750 7751 // SetDisplayMessage sets the display message of TaskEvent 7752 func (te *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 7753 te.DisplayMessage = msg 7754 return te 7755 } 7756 7757 // SetMessage sets the message of TaskEvent 7758 func (te *TaskEvent) SetMessage(msg string) *TaskEvent { 7759 te.Message = msg 7760 te.Details["message"] = msg 7761 return te 7762 } 7763 7764 func (te *TaskEvent) Copy() *TaskEvent { 7765 if te == nil { 7766 return nil 7767 } 7768 copy := new(TaskEvent) 7769 *copy = *te 7770 return copy 7771 } 7772 7773 func NewTaskEvent(event string) *TaskEvent { 7774 return &TaskEvent{ 7775 Type: event, 7776 Time: time.Now().UnixNano(), 7777 Details: make(map[string]string), 7778 } 7779 } 7780 7781 // SetSetupError is used to store an error that occurred while setting up the 7782 // task 7783 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 7784 if err != nil { 7785 e.SetupError = err.Error() 7786 e.Details["setup_error"] = err.Error() 7787 } 7788 return e 7789 } 7790 7791 func (e *TaskEvent) SetFailsTask() *TaskEvent { 7792 e.FailsTask = true 7793 e.Details["fails_task"] = "true" 7794 return e 7795 } 7796 7797 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 7798 if err != nil { 7799 e.DriverError = err.Error() 7800 e.Details["driver_error"] = err.Error() 7801 } 7802 return e 7803 } 7804 7805 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 7806 e.ExitCode = c 7807 e.Details["exit_code"] = fmt.Sprintf("%d", c) 7808 return e 7809 } 7810 7811 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 7812 e.Signal = s 7813 e.Details["signal"] = fmt.Sprintf("%d", s) 7814 return e 7815 } 7816 7817 func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 7818 e.Details["signal"] = s 7819 return e 7820 } 7821 7822 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 7823 if err != nil { 7824 e.Message = err.Error() 7825 e.Details["exit_message"] = err.Error() 7826 } 7827 return e 7828 } 7829 7830 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 7831 if err != nil { 7832 e.KillError = err.Error() 7833 e.Details["kill_error"] = err.Error() 7834 } 7835 return e 7836 } 7837 7838 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 7839 e.KillReason = r 7840 e.Details["kill_reason"] = r 7841 return e 7842 } 7843 7844 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 7845 e.StartDelay = int64(delay) 7846 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 7847 return e 7848 } 7849 7850 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 7851 e.RestartReason = reason 7852 e.Details["restart_reason"] = reason 7853 return e 7854 } 7855 7856 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 7857 e.TaskSignalReason = r 7858 e.Details["task_signal_reason"] = r 7859 return e 7860 } 7861 7862 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 7863 e.TaskSignal = s.String() 7864 e.Details["task_signal"] = s.String() 7865 return e 7866 } 7867 7868 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 7869 if err != nil { 7870 e.DownloadError = err.Error() 7871 e.Details["download_error"] = err.Error() 7872 } 7873 return e 7874 } 7875 7876 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 7877 if err != nil { 7878 e.ValidationError = err.Error() 7879 e.Details["validation_error"] = err.Error() 7880 } 7881 return e 7882 } 7883 7884 func (e *TaskEvent) SetKillTimeout(timeout time.Duration) *TaskEvent { 7885 e.KillTimeout = timeout 7886 e.Details["kill_timeout"] = timeout.String() 7887 return e 7888 } 7889 7890 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 7891 e.DiskLimit = limit 7892 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 7893 return e 7894 } 7895 7896 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 7897 e.FailedSibling = sibling 7898 e.Details["failed_sibling"] = sibling 7899 return e 7900 } 7901 7902 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 7903 if err != nil { 7904 e.VaultError = err.Error() 7905 e.Details["vault_renewal_error"] = err.Error() 7906 } 7907 return e 7908 } 7909 7910 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 7911 e.DriverMessage = m 7912 e.Details["driver_message"] = m 7913 return e 7914 } 7915 7916 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 7917 e.Details["oom_killed"] = strconv.FormatBool(oom) 7918 return e 7919 } 7920 7921 // TaskArtifact is an artifact to download before running the task. 7922 type TaskArtifact struct { 7923 // GetterSource is the source to download an artifact using go-getter 7924 GetterSource string 7925 7926 // GetterOptions are options to use when downloading the artifact using 7927 // go-getter. 7928 GetterOptions map[string]string 7929 7930 // GetterHeaders are headers to use when downloading the artifact using 7931 // go-getter. 7932 GetterHeaders map[string]string 7933 7934 // GetterMode is the go-getter.ClientMode for fetching resources. 7935 // Defaults to "any" but can be set to "file" or "dir". 7936 GetterMode string 7937 7938 // RelativeDest is the download destination given relative to the task's 7939 // directory. 7940 RelativeDest string 7941 } 7942 7943 func (ta *TaskArtifact) Copy() *TaskArtifact { 7944 if ta == nil { 7945 return nil 7946 } 7947 return &TaskArtifact{ 7948 GetterSource: ta.GetterSource, 7949 GetterOptions: helper.CopyMapStringString(ta.GetterOptions), 7950 GetterHeaders: helper.CopyMapStringString(ta.GetterHeaders), 7951 GetterMode: ta.GetterMode, 7952 RelativeDest: ta.RelativeDest, 7953 } 7954 } 7955 7956 func (ta *TaskArtifact) GoString() string { 7957 return fmt.Sprintf("%+v", ta) 7958 } 7959 7960 // hashStringMap appends a deterministic hash of m onto h. 7961 func hashStringMap(h hash.Hash, m map[string]string) { 7962 keys := make([]string, 0, len(m)) 7963 for k := range m { 7964 keys = append(keys, k) 7965 } 7966 sort.Strings(keys) 7967 for _, k := range keys { 7968 _, _ = h.Write([]byte(k)) 7969 _, _ = h.Write([]byte(m[k])) 7970 } 7971 } 7972 7973 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 7974 // may be specified multiple times with different destinations. 7975 func (ta *TaskArtifact) Hash() string { 7976 h, err := blake2b.New256(nil) 7977 if err != nil { 7978 panic(err) 7979 } 7980 7981 _, _ = h.Write([]byte(ta.GetterSource)) 7982 7983 hashStringMap(h, ta.GetterOptions) 7984 hashStringMap(h, ta.GetterHeaders) 7985 7986 _, _ = h.Write([]byte(ta.GetterMode)) 7987 _, _ = h.Write([]byte(ta.RelativeDest)) 7988 return base64.RawStdEncoding.EncodeToString(h.Sum(nil)) 7989 } 7990 7991 // PathEscapesAllocDir returns if the given path escapes the allocation 7992 // directory. 7993 // 7994 // The prefix is to joined to the path (e.g. "task/local"), and this function 7995 // checks if path escapes the alloc dir, NOT the prefix directory within the alloc dir. 7996 // With prefix="task/local", it will return false for "../secret", but 7997 // true for "../../../../../../root" path; only the latter escapes the alloc dir 7998 func PathEscapesAllocDir(prefix, path string) (bool, error) { 7999 // Verify the destination doesn't escape the tasks directory 8000 alloc, err := filepath.Abs(filepath.Join("/", "alloc-dir/", "alloc-id/")) 8001 if err != nil { 8002 return false, err 8003 } 8004 abs, err := filepath.Abs(filepath.Join(alloc, prefix, path)) 8005 if err != nil { 8006 return false, err 8007 } 8008 rel, err := filepath.Rel(alloc, abs) 8009 if err != nil { 8010 return false, err 8011 } 8012 8013 return strings.HasPrefix(rel, ".."), nil 8014 } 8015 8016 func (ta *TaskArtifact) Validate() error { 8017 // Verify the source 8018 var mErr multierror.Error 8019 if ta.GetterSource == "" { 8020 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 8021 } 8022 8023 switch ta.GetterMode { 8024 case "": 8025 // Default to any 8026 ta.GetterMode = GetterModeAny 8027 case GetterModeAny, GetterModeFile, GetterModeDir: 8028 // Ok 8029 default: 8030 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 8031 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 8032 } 8033 8034 escaped, err := PathEscapesAllocDir("task", ta.RelativeDest) 8035 if err != nil { 8036 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 8037 } else if escaped { 8038 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 8039 } 8040 8041 if err := ta.validateChecksum(); err != nil { 8042 mErr.Errors = append(mErr.Errors, err) 8043 } 8044 8045 return mErr.ErrorOrNil() 8046 } 8047 8048 func (ta *TaskArtifact) validateChecksum() error { 8049 check, ok := ta.GetterOptions["checksum"] 8050 if !ok { 8051 return nil 8052 } 8053 8054 // Job struct validation occurs before interpolation resolution can be effective. 8055 // Skip checking if checksum contain variable reference, and artifacts fetching will 8056 // eventually fail, if checksum is indeed invalid. 8057 if args.ContainsEnv(check) { 8058 return nil 8059 } 8060 8061 check = strings.TrimSpace(check) 8062 if check == "" { 8063 return fmt.Errorf("checksum value cannot be empty") 8064 } 8065 8066 parts := strings.Split(check, ":") 8067 if l := len(parts); l != 2 { 8068 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 8069 } 8070 8071 checksumVal := parts[1] 8072 checksumBytes, err := hex.DecodeString(checksumVal) 8073 if err != nil { 8074 return fmt.Errorf("invalid checksum: %v", err) 8075 } 8076 8077 checksumType := parts[0] 8078 expectedLength := 0 8079 switch checksumType { 8080 case "md5": 8081 expectedLength = md5.Size 8082 case "sha1": 8083 expectedLength = sha1.Size 8084 case "sha256": 8085 expectedLength = sha256.Size 8086 case "sha512": 8087 expectedLength = sha512.Size 8088 default: 8089 return fmt.Errorf("unsupported checksum type: %s", checksumType) 8090 } 8091 8092 if len(checksumBytes) != expectedLength { 8093 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 8094 } 8095 8096 return nil 8097 } 8098 8099 const ( 8100 ConstraintDistinctProperty = "distinct_property" 8101 ConstraintDistinctHosts = "distinct_hosts" 8102 ConstraintRegex = "regexp" 8103 ConstraintVersion = "version" 8104 ConstraintSemver = "semver" 8105 ConstraintSetContains = "set_contains" 8106 ConstraintSetContainsAll = "set_contains_all" 8107 ConstraintSetContainsAny = "set_contains_any" 8108 ConstraintAttributeIsSet = "is_set" 8109 ConstraintAttributeIsNotSet = "is_not_set" 8110 ) 8111 8112 // Constraints are used to restrict placement options. 8113 type Constraint struct { 8114 LTarget string // Left-hand target 8115 RTarget string // Right-hand target 8116 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 8117 str string // Memoized string 8118 } 8119 8120 // Equal checks if two constraints are equal 8121 func (c *Constraint) Equals(o *Constraint) bool { 8122 return c == o || 8123 c.LTarget == o.LTarget && 8124 c.RTarget == o.RTarget && 8125 c.Operand == o.Operand 8126 } 8127 8128 func (c *Constraint) Equal(o *Constraint) bool { 8129 return c.Equals(o) 8130 } 8131 8132 func (c *Constraint) Copy() *Constraint { 8133 if c == nil { 8134 return nil 8135 } 8136 nc := new(Constraint) 8137 *nc = *c 8138 return nc 8139 } 8140 8141 func (c *Constraint) String() string { 8142 if c.str != "" { 8143 return c.str 8144 } 8145 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 8146 return c.str 8147 } 8148 8149 func (c *Constraint) Validate() error { 8150 var mErr multierror.Error 8151 if c.Operand == "" { 8152 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 8153 } 8154 8155 // requireLtarget specifies whether the constraint requires an LTarget to be 8156 // provided. 8157 requireLtarget := true 8158 8159 // Perform additional validation based on operand 8160 switch c.Operand { 8161 case ConstraintDistinctHosts: 8162 requireLtarget = false 8163 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8164 if c.RTarget == "" { 8165 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 8166 } 8167 case ConstraintRegex: 8168 if _, err := regexp.Compile(c.RTarget); err != nil { 8169 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8170 } 8171 case ConstraintVersion: 8172 if _, err := version.NewConstraint(c.RTarget); err != nil { 8173 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 8174 } 8175 case ConstraintSemver: 8176 if _, err := semver.NewConstraint(c.RTarget); err != nil { 8177 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 8178 } 8179 case ConstraintDistinctProperty: 8180 // If a count is set, make sure it is convertible to a uint64 8181 if c.RTarget != "" { 8182 count, err := strconv.ParseUint(c.RTarget, 10, 64) 8183 if err != nil { 8184 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 8185 } else if count < 1 { 8186 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 8187 } 8188 } 8189 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 8190 if c.RTarget != "" { 8191 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 8192 } 8193 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8194 if c.RTarget == "" { 8195 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 8196 } 8197 default: 8198 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 8199 } 8200 8201 // Ensure we have an LTarget for the constraints that need one 8202 if requireLtarget && c.LTarget == "" { 8203 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 8204 } 8205 8206 return mErr.ErrorOrNil() 8207 } 8208 8209 type Constraints []*Constraint 8210 8211 // Equals compares Constraints as a set 8212 func (xs *Constraints) Equals(ys *Constraints) bool { 8213 if xs == ys { 8214 return true 8215 } 8216 if xs == nil || ys == nil { 8217 return false 8218 } 8219 if len(*xs) != len(*ys) { 8220 return false 8221 } 8222 SETEQUALS: 8223 for _, x := range *xs { 8224 for _, y := range *ys { 8225 if x.Equals(y) { 8226 continue SETEQUALS 8227 } 8228 } 8229 return false 8230 } 8231 return true 8232 } 8233 8234 // Affinity is used to score placement options based on a weight 8235 type Affinity struct { 8236 LTarget string // Left-hand target 8237 RTarget string // Right-hand target 8238 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 8239 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 8240 str string // Memoized string 8241 } 8242 8243 // Equal checks if two affinities are equal 8244 func (a *Affinity) Equals(o *Affinity) bool { 8245 return a == o || 8246 a.LTarget == o.LTarget && 8247 a.RTarget == o.RTarget && 8248 a.Operand == o.Operand && 8249 a.Weight == o.Weight 8250 } 8251 8252 func (a *Affinity) Equal(o *Affinity) bool { 8253 return a.Equals(o) 8254 } 8255 8256 func (a *Affinity) Copy() *Affinity { 8257 if a == nil { 8258 return nil 8259 } 8260 na := new(Affinity) 8261 *na = *a 8262 return na 8263 } 8264 8265 func (a *Affinity) String() string { 8266 if a.str != "" { 8267 return a.str 8268 } 8269 a.str = fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 8270 return a.str 8271 } 8272 8273 func (a *Affinity) Validate() error { 8274 var mErr multierror.Error 8275 if a.Operand == "" { 8276 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 8277 } 8278 8279 // Perform additional validation based on operand 8280 switch a.Operand { 8281 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8282 if a.RTarget == "" { 8283 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 8284 } 8285 case ConstraintRegex: 8286 if _, err := regexp.Compile(a.RTarget); err != nil { 8287 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8288 } 8289 case ConstraintVersion: 8290 if _, err := version.NewConstraint(a.RTarget); err != nil { 8291 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 8292 } 8293 case ConstraintSemver: 8294 if _, err := semver.NewConstraint(a.RTarget); err != nil { 8295 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 8296 } 8297 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8298 if a.RTarget == "" { 8299 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 8300 } 8301 default: 8302 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 8303 } 8304 8305 // Ensure we have an LTarget 8306 if a.LTarget == "" { 8307 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 8308 } 8309 8310 // Ensure that weight is between -100 and 100, and not zero 8311 if a.Weight == 0 { 8312 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 8313 } 8314 8315 if a.Weight > 100 || a.Weight < -100 { 8316 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 8317 } 8318 8319 return mErr.ErrorOrNil() 8320 } 8321 8322 // Spread is used to specify desired distribution of allocations according to weight 8323 type Spread struct { 8324 // Attribute is the node attribute used as the spread criteria 8325 Attribute string 8326 8327 // Weight is the relative weight of this spread, useful when there are multiple 8328 // spread and affinities 8329 Weight int8 8330 8331 // SpreadTarget is used to describe desired percentages for each attribute value 8332 SpreadTarget []*SpreadTarget 8333 8334 // Memoized string representation 8335 str string 8336 } 8337 8338 type Affinities []*Affinity 8339 8340 // Equals compares Affinities as a set 8341 func (xs *Affinities) Equals(ys *Affinities) bool { 8342 if xs == ys { 8343 return true 8344 } 8345 if xs == nil || ys == nil { 8346 return false 8347 } 8348 if len(*xs) != len(*ys) { 8349 return false 8350 } 8351 SETEQUALS: 8352 for _, x := range *xs { 8353 for _, y := range *ys { 8354 if x.Equals(y) { 8355 continue SETEQUALS 8356 } 8357 } 8358 return false 8359 } 8360 return true 8361 } 8362 8363 func (s *Spread) Copy() *Spread { 8364 if s == nil { 8365 return nil 8366 } 8367 ns := new(Spread) 8368 *ns = *s 8369 8370 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 8371 return ns 8372 } 8373 8374 func (s *Spread) String() string { 8375 if s.str != "" { 8376 return s.str 8377 } 8378 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 8379 return s.str 8380 } 8381 8382 func (s *Spread) Validate() error { 8383 var mErr multierror.Error 8384 if s.Attribute == "" { 8385 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 8386 } 8387 if s.Weight <= 0 || s.Weight > 100 { 8388 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 8389 } 8390 seen := make(map[string]struct{}) 8391 sumPercent := uint32(0) 8392 8393 for _, target := range s.SpreadTarget { 8394 // Make sure there are no duplicates 8395 _, ok := seen[target.Value] 8396 if !ok { 8397 seen[target.Value] = struct{}{} 8398 } else { 8399 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target value %q already defined", target.Value)) 8400 } 8401 if target.Percent > 100 { 8402 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target percentage for value %q must be between 0 and 100", target.Value)) 8403 } 8404 sumPercent += uint32(target.Percent) 8405 } 8406 if sumPercent > 100 { 8407 mErr.Errors = append(mErr.Errors, fmt.Errorf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent)) 8408 } 8409 return mErr.ErrorOrNil() 8410 } 8411 8412 // SpreadTarget is used to specify desired percentages for each attribute value 8413 type SpreadTarget struct { 8414 // Value is a single attribute value, like "dc1" 8415 Value string 8416 8417 // Percent is the desired percentage of allocs 8418 Percent uint8 8419 8420 // Memoized string representation 8421 str string 8422 } 8423 8424 func (s *SpreadTarget) Copy() *SpreadTarget { 8425 if s == nil { 8426 return nil 8427 } 8428 8429 ns := new(SpreadTarget) 8430 *ns = *s 8431 return ns 8432 } 8433 8434 func (s *SpreadTarget) String() string { 8435 if s.str != "" { 8436 return s.str 8437 } 8438 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 8439 return s.str 8440 } 8441 8442 // EphemeralDisk is an ephemeral disk object 8443 type EphemeralDisk struct { 8444 // Sticky indicates whether the allocation is sticky to a node 8445 Sticky bool 8446 8447 // SizeMB is the size of the local disk 8448 SizeMB int 8449 8450 // Migrate determines if Nomad client should migrate the allocation dir for 8451 // sticky allocations 8452 Migrate bool 8453 } 8454 8455 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 8456 func DefaultEphemeralDisk() *EphemeralDisk { 8457 return &EphemeralDisk{ 8458 SizeMB: 300, 8459 } 8460 } 8461 8462 // Validate validates EphemeralDisk 8463 func (d *EphemeralDisk) Validate() error { 8464 if d.SizeMB < 10 { 8465 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 8466 } 8467 return nil 8468 } 8469 8470 // Copy copies the EphemeralDisk struct and returns a new one 8471 func (d *EphemeralDisk) Copy() *EphemeralDisk { 8472 ld := new(EphemeralDisk) 8473 *ld = *d 8474 return ld 8475 } 8476 8477 var ( 8478 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 8479 // server 8480 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 8481 ) 8482 8483 const ( 8484 // VaultChangeModeNoop takes no action when a new token is retrieved. 8485 VaultChangeModeNoop = "noop" 8486 8487 // VaultChangeModeSignal signals the task when a new token is retrieved. 8488 VaultChangeModeSignal = "signal" 8489 8490 // VaultChangeModeRestart restarts the task when a new token is retrieved. 8491 VaultChangeModeRestart = "restart" 8492 ) 8493 8494 // Vault stores the set of permissions a task needs access to from Vault. 8495 type Vault struct { 8496 // Policies is the set of policies that the task needs access to 8497 Policies []string 8498 8499 // Namespace is the vault namespace that should be used. 8500 Namespace string 8501 8502 // Env marks whether the Vault Token should be exposed as an environment 8503 // variable 8504 Env bool 8505 8506 // ChangeMode is used to configure the task's behavior when the Vault 8507 // token changes because the original token could not be renewed in time. 8508 ChangeMode string 8509 8510 // ChangeSignal is the signal sent to the task when a new token is 8511 // retrieved. This is only valid when using the signal change mode. 8512 ChangeSignal string 8513 } 8514 8515 func DefaultVaultBlock() *Vault { 8516 return &Vault{ 8517 Env: true, 8518 ChangeMode: VaultChangeModeRestart, 8519 } 8520 } 8521 8522 // Copy returns a copy of this Vault block. 8523 func (v *Vault) Copy() *Vault { 8524 if v == nil { 8525 return nil 8526 } 8527 8528 nv := new(Vault) 8529 *nv = *v 8530 return nv 8531 } 8532 8533 func (v *Vault) Canonicalize() { 8534 if v.ChangeSignal != "" { 8535 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 8536 } 8537 } 8538 8539 // Validate returns if the Vault block is valid. 8540 func (v *Vault) Validate() error { 8541 if v == nil { 8542 return nil 8543 } 8544 8545 var mErr multierror.Error 8546 if len(v.Policies) == 0 { 8547 _ = multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 8548 } 8549 8550 for _, p := range v.Policies { 8551 if p == "root" { 8552 _ = multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 8553 } 8554 } 8555 8556 switch v.ChangeMode { 8557 case VaultChangeModeSignal: 8558 if v.ChangeSignal == "" { 8559 _ = multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 8560 } 8561 case VaultChangeModeNoop, VaultChangeModeRestart: 8562 default: 8563 _ = multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 8564 } 8565 8566 return mErr.ErrorOrNil() 8567 } 8568 8569 const ( 8570 // DeploymentStatuses are the various states a deployment can be be in 8571 DeploymentStatusRunning = "running" 8572 DeploymentStatusPaused = "paused" 8573 DeploymentStatusFailed = "failed" 8574 DeploymentStatusSuccessful = "successful" 8575 DeploymentStatusCancelled = "cancelled" 8576 DeploymentStatusPending = "pending" 8577 DeploymentStatusBlocked = "blocked" 8578 DeploymentStatusUnblocking = "unblocking" 8579 8580 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 8581 8582 // DeploymentStatusDescriptions are the various descriptions of the states a 8583 // deployment can be in. 8584 DeploymentStatusDescriptionRunning = "Deployment is running" 8585 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 8586 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 8587 DeploymentStatusDescriptionPaused = "Deployment is paused" 8588 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 8589 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 8590 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 8591 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 8592 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 8593 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 8594 8595 // used only in multiregion deployments 8596 DeploymentStatusDescriptionFailedByPeer = "Failed because of an error in peer region" 8597 DeploymentStatusDescriptionBlocked = "Deployment is complete but waiting for peer region" 8598 DeploymentStatusDescriptionUnblocking = "Deployment is unblocking remaining regions" 8599 DeploymentStatusDescriptionPendingForPeer = "Deployment is pending, waiting for peer region" 8600 ) 8601 8602 // DeploymentStatusDescriptionRollback is used to get the status description of 8603 // a deployment when rolling back to an older job. 8604 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 8605 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 8606 } 8607 8608 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 8609 // a deployment when rolling back is not possible because it has the same specification 8610 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 8611 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 8612 } 8613 8614 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 8615 // a deployment when there is no target to rollback to but autorevert is desired. 8616 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 8617 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 8618 } 8619 8620 // Deployment is the object that represents a job deployment which is used to 8621 // transition a job between versions. 8622 type Deployment struct { 8623 // ID is a generated UUID for the deployment 8624 ID string 8625 8626 // Namespace is the namespace the deployment is created in 8627 Namespace string 8628 8629 // JobID is the job the deployment is created for 8630 JobID string 8631 8632 // JobVersion is the version of the job at which the deployment is tracking 8633 JobVersion uint64 8634 8635 // JobModifyIndex is the ModifyIndex of the job which the deployment is 8636 // tracking. 8637 JobModifyIndex uint64 8638 8639 // JobSpecModifyIndex is the JobModifyIndex of the job which the 8640 // deployment is tracking. 8641 JobSpecModifyIndex uint64 8642 8643 // JobCreateIndex is the create index of the job which the deployment is 8644 // tracking. It is needed so that if the job gets stopped and reran we can 8645 // present the correct list of deployments for the job and not old ones. 8646 JobCreateIndex uint64 8647 8648 // Multiregion specifies if deployment is part of multiregion deployment 8649 IsMultiregion bool 8650 8651 // TaskGroups is the set of task groups effected by the deployment and their 8652 // current deployment status. 8653 TaskGroups map[string]*DeploymentState 8654 8655 // The status of the deployment 8656 Status string 8657 8658 // StatusDescription allows a human readable description of the deployment 8659 // status. 8660 StatusDescription string 8661 8662 CreateIndex uint64 8663 ModifyIndex uint64 8664 } 8665 8666 // NewDeployment creates a new deployment given the job. 8667 func NewDeployment(job *Job) *Deployment { 8668 return &Deployment{ 8669 ID: uuid.Generate(), 8670 Namespace: job.Namespace, 8671 JobID: job.ID, 8672 JobVersion: job.Version, 8673 JobModifyIndex: job.ModifyIndex, 8674 JobSpecModifyIndex: job.JobModifyIndex, 8675 JobCreateIndex: job.CreateIndex, 8676 IsMultiregion: job.IsMultiregion(), 8677 Status: DeploymentStatusRunning, 8678 StatusDescription: DeploymentStatusDescriptionRunning, 8679 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 8680 } 8681 } 8682 8683 func (d *Deployment) Copy() *Deployment { 8684 if d == nil { 8685 return nil 8686 } 8687 8688 c := &Deployment{} 8689 *c = *d 8690 8691 c.TaskGroups = nil 8692 if l := len(d.TaskGroups); d.TaskGroups != nil { 8693 c.TaskGroups = make(map[string]*DeploymentState, l) 8694 for tg, s := range d.TaskGroups { 8695 c.TaskGroups[tg] = s.Copy() 8696 } 8697 } 8698 8699 return c 8700 } 8701 8702 // Active returns whether the deployment is active or terminal. 8703 func (d *Deployment) Active() bool { 8704 switch d.Status { 8705 case DeploymentStatusRunning, DeploymentStatusPaused, DeploymentStatusBlocked, DeploymentStatusUnblocking, DeploymentStatusPending: 8706 return true 8707 default: 8708 return false 8709 } 8710 } 8711 8712 // GetID is a helper for getting the ID when the object may be nil 8713 func (d *Deployment) GetID() string { 8714 if d == nil { 8715 return "" 8716 } 8717 return d.ID 8718 } 8719 8720 // HasPlacedCanaries returns whether the deployment has placed canaries 8721 func (d *Deployment) HasPlacedCanaries() bool { 8722 if d == nil || len(d.TaskGroups) == 0 { 8723 return false 8724 } 8725 for _, group := range d.TaskGroups { 8726 if len(group.PlacedCanaries) != 0 { 8727 return true 8728 } 8729 } 8730 return false 8731 } 8732 8733 // RequiresPromotion returns whether the deployment requires promotion to 8734 // continue 8735 func (d *Deployment) RequiresPromotion() bool { 8736 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 8737 return false 8738 } 8739 for _, group := range d.TaskGroups { 8740 if group.DesiredCanaries > 0 && !group.Promoted { 8741 return true 8742 } 8743 } 8744 return false 8745 } 8746 8747 // HasAutoPromote determines if all taskgroups are marked auto_promote 8748 func (d *Deployment) HasAutoPromote() bool { 8749 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 8750 return false 8751 } 8752 for _, group := range d.TaskGroups { 8753 if !group.AutoPromote { 8754 return false 8755 } 8756 } 8757 return true 8758 } 8759 8760 func (d *Deployment) GoString() string { 8761 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 8762 for group, state := range d.TaskGroups { 8763 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 8764 } 8765 return base 8766 } 8767 8768 // DeploymentState tracks the state of a deployment for a given task group. 8769 type DeploymentState struct { 8770 // AutoRevert marks whether the task group has indicated the job should be 8771 // reverted on failure 8772 AutoRevert bool 8773 8774 // AutoPromote marks promotion triggered automatically by healthy canaries 8775 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 8776 AutoPromote bool 8777 8778 // ProgressDeadline is the deadline by which an allocation must transition 8779 // to healthy before the deployment is considered failed. This value is set 8780 // by the jobspec `update.progress_deadline` field. 8781 ProgressDeadline time.Duration 8782 8783 // RequireProgressBy is the time by which an allocation must transition to 8784 // healthy before the deployment is considered failed. This value is reset 8785 // to "now" + ProgressDeadline when an allocation updates the deployment. 8786 RequireProgressBy time.Time 8787 8788 // Promoted marks whether the canaries have been promoted 8789 Promoted bool 8790 8791 // PlacedCanaries is the set of placed canary allocations 8792 PlacedCanaries []string 8793 8794 // DesiredCanaries is the number of canaries that should be created. 8795 DesiredCanaries int 8796 8797 // DesiredTotal is the total number of allocations that should be created as 8798 // part of the deployment. 8799 DesiredTotal int 8800 8801 // PlacedAllocs is the number of allocations that have been placed 8802 PlacedAllocs int 8803 8804 // HealthyAllocs is the number of allocations that have been marked healthy. 8805 HealthyAllocs int 8806 8807 // UnhealthyAllocs are allocations that have been marked as unhealthy. 8808 UnhealthyAllocs int 8809 } 8810 8811 func (d *DeploymentState) GoString() string { 8812 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 8813 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 8814 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 8815 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 8816 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 8817 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 8818 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 8819 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 8820 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 8821 return base 8822 } 8823 8824 func (d *DeploymentState) Copy() *DeploymentState { 8825 c := &DeploymentState{} 8826 *c = *d 8827 c.PlacedCanaries = helper.CopySliceString(d.PlacedCanaries) 8828 return c 8829 } 8830 8831 // DeploymentStatusUpdate is used to update the status of a given deployment 8832 type DeploymentStatusUpdate struct { 8833 // DeploymentID is the ID of the deployment to update 8834 DeploymentID string 8835 8836 // Status is the new status of the deployment. 8837 Status string 8838 8839 // StatusDescription is the new status description of the deployment. 8840 StatusDescription string 8841 } 8842 8843 // RescheduleTracker encapsulates previous reschedule events 8844 type RescheduleTracker struct { 8845 Events []*RescheduleEvent 8846 } 8847 8848 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 8849 if rt == nil { 8850 return nil 8851 } 8852 nt := &RescheduleTracker{} 8853 *nt = *rt 8854 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 8855 for _, tracker := range rt.Events { 8856 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 8857 } 8858 nt.Events = rescheduleEvents 8859 return nt 8860 } 8861 8862 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 8863 type RescheduleEvent struct { 8864 // RescheduleTime is the timestamp of a reschedule attempt 8865 RescheduleTime int64 8866 8867 // PrevAllocID is the ID of the previous allocation being restarted 8868 PrevAllocID string 8869 8870 // PrevNodeID is the node ID of the previous allocation 8871 PrevNodeID string 8872 8873 // Delay is the reschedule delay associated with the attempt 8874 Delay time.Duration 8875 } 8876 8877 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 8878 return &RescheduleEvent{RescheduleTime: rescheduleTime, 8879 PrevAllocID: prevAllocID, 8880 PrevNodeID: prevNodeID, 8881 Delay: delay} 8882 } 8883 8884 func (re *RescheduleEvent) Copy() *RescheduleEvent { 8885 if re == nil { 8886 return nil 8887 } 8888 copy := new(RescheduleEvent) 8889 *copy = *re 8890 return copy 8891 } 8892 8893 // DesiredTransition is used to mark an allocation as having a desired state 8894 // transition. This information can be used by the scheduler to make the 8895 // correct decision. 8896 type DesiredTransition struct { 8897 // Migrate is used to indicate that this allocation should be stopped and 8898 // migrated to another node. 8899 Migrate *bool 8900 8901 // Reschedule is used to indicate that this allocation is eligible to be 8902 // rescheduled. Most allocations are automatically eligible for 8903 // rescheduling, so this field is only required when an allocation is not 8904 // automatically eligible. An example is an allocation that is part of a 8905 // deployment. 8906 Reschedule *bool 8907 8908 // ForceReschedule is used to indicate that this allocation must be rescheduled. 8909 // This field is only used when operators want to force a placement even if 8910 // a failed allocation is not eligible to be rescheduled 8911 ForceReschedule *bool 8912 } 8913 8914 // Merge merges the two desired transitions, preferring the values from the 8915 // passed in object. 8916 func (d *DesiredTransition) Merge(o *DesiredTransition) { 8917 if o.Migrate != nil { 8918 d.Migrate = o.Migrate 8919 } 8920 8921 if o.Reschedule != nil { 8922 d.Reschedule = o.Reschedule 8923 } 8924 8925 if o.ForceReschedule != nil { 8926 d.ForceReschedule = o.ForceReschedule 8927 } 8928 } 8929 8930 // ShouldMigrate returns whether the transition object dictates a migration. 8931 func (d *DesiredTransition) ShouldMigrate() bool { 8932 return d.Migrate != nil && *d.Migrate 8933 } 8934 8935 // ShouldReschedule returns whether the transition object dictates a 8936 // rescheduling. 8937 func (d *DesiredTransition) ShouldReschedule() bool { 8938 return d.Reschedule != nil && *d.Reschedule 8939 } 8940 8941 // ShouldForceReschedule returns whether the transition object dictates a 8942 // forced rescheduling. 8943 func (d *DesiredTransition) ShouldForceReschedule() bool { 8944 if d == nil { 8945 return false 8946 } 8947 return d.ForceReschedule != nil && *d.ForceReschedule 8948 } 8949 8950 const ( 8951 AllocDesiredStatusRun = "run" // Allocation should run 8952 AllocDesiredStatusStop = "stop" // Allocation should stop 8953 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 8954 ) 8955 8956 const ( 8957 AllocClientStatusPending = "pending" 8958 AllocClientStatusRunning = "running" 8959 AllocClientStatusComplete = "complete" 8960 AllocClientStatusFailed = "failed" 8961 AllocClientStatusLost = "lost" 8962 ) 8963 8964 // Allocation is used to allocate the placement of a task group to a node. 8965 type Allocation struct { 8966 // msgpack omit empty fields during serialization 8967 _struct bool `codec:",omitempty"` // nolint: structcheck 8968 8969 // ID of the allocation (UUID) 8970 ID string 8971 8972 // Namespace is the namespace the allocation is created in 8973 Namespace string 8974 8975 // ID of the evaluation that generated this allocation 8976 EvalID string 8977 8978 // Name is a logical name of the allocation. 8979 Name string 8980 8981 // NodeID is the node this is being placed on 8982 NodeID string 8983 8984 // NodeName is the name of the node this is being placed on. 8985 NodeName string 8986 8987 // Job is the parent job of the task group being allocated. 8988 // This is copied at allocation time to avoid issues if the job 8989 // definition is updated. 8990 JobID string 8991 Job *Job 8992 8993 // TaskGroup is the name of the task group that should be run 8994 TaskGroup string 8995 8996 // COMPAT(0.11): Remove in 0.11 8997 // Resources is the total set of resources allocated as part 8998 // of this allocation of the task group. Dynamic ports will be set by 8999 // the scheduler. 9000 Resources *Resources 9001 9002 // SharedResources are the resources that are shared by all the tasks in an 9003 // allocation 9004 // Deprecated: use AllocatedResources.Shared instead. 9005 // Keep field to allow us to handle upgrade paths from old versions 9006 SharedResources *Resources 9007 9008 // TaskResources is the set of resources allocated to each 9009 // task. These should sum to the total Resources. Dynamic ports will be 9010 // set by the scheduler. 9011 // Deprecated: use AllocatedResources.Tasks instead. 9012 // Keep field to allow us to handle upgrade paths from old versions 9013 TaskResources map[string]*Resources 9014 9015 // AllocatedResources is the total resources allocated for the task group. 9016 AllocatedResources *AllocatedResources 9017 9018 // Metrics associated with this allocation 9019 Metrics *AllocMetric 9020 9021 // Desired Status of the allocation on the client 9022 DesiredStatus string 9023 9024 // DesiredStatusDescription is meant to provide more human useful information 9025 DesiredDescription string 9026 9027 // DesiredTransition is used to indicate that a state transition 9028 // is desired for a given reason. 9029 DesiredTransition DesiredTransition 9030 9031 // Status of the allocation on the client 9032 ClientStatus string 9033 9034 // ClientStatusDescription is meant to provide more human useful information 9035 ClientDescription string 9036 9037 // TaskStates stores the state of each task, 9038 TaskStates map[string]*TaskState 9039 9040 // AllocStates track meta data associated with changes to the state of the whole allocation, like becoming lost 9041 AllocStates []*AllocState 9042 9043 // PreviousAllocation is the allocation that this allocation is replacing 9044 PreviousAllocation string 9045 9046 // NextAllocation is the allocation that this allocation is being replaced by 9047 NextAllocation string 9048 9049 // DeploymentID identifies an allocation as being created from a 9050 // particular deployment 9051 DeploymentID string 9052 9053 // DeploymentStatus captures the status of the allocation as part of the 9054 // given deployment 9055 DeploymentStatus *AllocDeploymentStatus 9056 9057 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 9058 RescheduleTracker *RescheduleTracker 9059 9060 // NetworkStatus captures networking details of an allocation known at runtime 9061 NetworkStatus *AllocNetworkStatus 9062 9063 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 9064 // that can be rescheduled in the future 9065 FollowupEvalID string 9066 9067 // PreemptedAllocations captures IDs of any allocations that were preempted 9068 // in order to place this allocation 9069 PreemptedAllocations []string 9070 9071 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 9072 // to stop running because it got preempted 9073 PreemptedByAllocation string 9074 9075 // Raft Indexes 9076 CreateIndex uint64 9077 ModifyIndex uint64 9078 9079 // AllocModifyIndex is not updated when the client updates allocations. This 9080 // lets the client pull only the allocs updated by the server. 9081 AllocModifyIndex uint64 9082 9083 // CreateTime is the time the allocation has finished scheduling and been 9084 // verified by the plan applier. 9085 CreateTime int64 9086 9087 // ModifyTime is the time the allocation was last updated. 9088 ModifyTime int64 9089 } 9090 9091 // Index returns the index of the allocation. If the allocation is from a task 9092 // group with count greater than 1, there will be multiple allocations for it. 9093 func (a *Allocation) Index() uint { 9094 l := len(a.Name) 9095 prefix := len(a.JobID) + len(a.TaskGroup) + 2 9096 if l <= 3 || l <= prefix { 9097 return uint(0) 9098 } 9099 9100 strNum := a.Name[prefix : len(a.Name)-1] 9101 num, _ := strconv.Atoi(strNum) 9102 return uint(num) 9103 } 9104 9105 // Copy provides a copy of the allocation and deep copies the job 9106 func (a *Allocation) Copy() *Allocation { 9107 return a.copyImpl(true) 9108 } 9109 9110 // CopySkipJob provides a copy of the allocation but doesn't deep copy the job 9111 func (a *Allocation) CopySkipJob() *Allocation { 9112 return a.copyImpl(false) 9113 } 9114 9115 // Canonicalize Allocation to ensure fields are initialized to the expectations 9116 // of this version of Nomad. Should be called when restoring persisted 9117 // Allocations or receiving Allocations from Nomad agents potentially on an 9118 // older version of Nomad. 9119 func (a *Allocation) Canonicalize() { 9120 if a.AllocatedResources == nil && a.TaskResources != nil { 9121 ar := AllocatedResources{} 9122 9123 tasks := make(map[string]*AllocatedTaskResources, len(a.TaskResources)) 9124 for name, tr := range a.TaskResources { 9125 atr := AllocatedTaskResources{} 9126 atr.Cpu.CpuShares = int64(tr.CPU) 9127 atr.Memory.MemoryMB = int64(tr.MemoryMB) 9128 atr.Networks = tr.Networks.Copy() 9129 9130 tasks[name] = &atr 9131 } 9132 ar.Tasks = tasks 9133 9134 if a.SharedResources != nil { 9135 ar.Shared.DiskMB = int64(a.SharedResources.DiskMB) 9136 ar.Shared.Networks = a.SharedResources.Networks.Copy() 9137 } 9138 9139 a.AllocatedResources = &ar 9140 } 9141 9142 a.Job.Canonicalize() 9143 } 9144 9145 func (a *Allocation) copyImpl(job bool) *Allocation { 9146 if a == nil { 9147 return nil 9148 } 9149 na := new(Allocation) 9150 *na = *a 9151 9152 if job { 9153 na.Job = na.Job.Copy() 9154 } 9155 9156 na.AllocatedResources = na.AllocatedResources.Copy() 9157 na.Resources = na.Resources.Copy() 9158 na.SharedResources = na.SharedResources.Copy() 9159 9160 if a.TaskResources != nil { 9161 tr := make(map[string]*Resources, len(na.TaskResources)) 9162 for task, resource := range na.TaskResources { 9163 tr[task] = resource.Copy() 9164 } 9165 na.TaskResources = tr 9166 } 9167 9168 na.Metrics = na.Metrics.Copy() 9169 na.DeploymentStatus = na.DeploymentStatus.Copy() 9170 9171 if a.TaskStates != nil { 9172 ts := make(map[string]*TaskState, len(na.TaskStates)) 9173 for task, state := range na.TaskStates { 9174 ts[task] = state.Copy() 9175 } 9176 na.TaskStates = ts 9177 } 9178 9179 na.RescheduleTracker = a.RescheduleTracker.Copy() 9180 na.PreemptedAllocations = helper.CopySliceString(a.PreemptedAllocations) 9181 return na 9182 } 9183 9184 // TerminalStatus returns if the desired or actual status is terminal and 9185 // will no longer transition. 9186 func (a *Allocation) TerminalStatus() bool { 9187 // First check the desired state and if that isn't terminal, check client 9188 // state. 9189 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 9190 } 9191 9192 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 9193 func (a *Allocation) ServerTerminalStatus() bool { 9194 switch a.DesiredStatus { 9195 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 9196 return true 9197 default: 9198 return false 9199 } 9200 } 9201 9202 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 9203 func (a *Allocation) ClientTerminalStatus() bool { 9204 switch a.ClientStatus { 9205 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 9206 return true 9207 default: 9208 return false 9209 } 9210 } 9211 9212 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 9213 // to its status and ReschedulePolicy given its failure time 9214 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 9215 // First check the desired state 9216 switch a.DesiredStatus { 9217 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 9218 return false 9219 default: 9220 } 9221 switch a.ClientStatus { 9222 case AllocClientStatusFailed: 9223 return a.RescheduleEligible(reschedulePolicy, failTime) 9224 default: 9225 return false 9226 } 9227 } 9228 9229 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 9230 // to its ReschedulePolicy and the current state of its reschedule trackers 9231 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 9232 if reschedulePolicy == nil { 9233 return false 9234 } 9235 attempts := reschedulePolicy.Attempts 9236 interval := reschedulePolicy.Interval 9237 enabled := attempts > 0 || reschedulePolicy.Unlimited 9238 if !enabled { 9239 return false 9240 } 9241 if reschedulePolicy.Unlimited { 9242 return true 9243 } 9244 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 9245 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 9246 return true 9247 } 9248 attempted := 0 9249 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 9250 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 9251 timeDiff := failTime.UTC().UnixNano() - lastAttempt 9252 if timeDiff < interval.Nanoseconds() { 9253 attempted += 1 9254 } 9255 } 9256 return attempted < attempts 9257 } 9258 9259 // LastEventTime is the time of the last task event in the allocation. 9260 // It is used to determine allocation failure time. If the FinishedAt field 9261 // is not set, the alloc's modify time is used 9262 func (a *Allocation) LastEventTime() time.Time { 9263 var lastEventTime time.Time 9264 if a.TaskStates != nil { 9265 for _, s := range a.TaskStates { 9266 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 9267 lastEventTime = s.FinishedAt 9268 } 9269 } 9270 } 9271 9272 if lastEventTime.IsZero() { 9273 return time.Unix(0, a.ModifyTime).UTC() 9274 } 9275 return lastEventTime 9276 } 9277 9278 // ReschedulePolicy returns the reschedule policy based on the task group 9279 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 9280 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9281 if tg == nil { 9282 return nil 9283 } 9284 return tg.ReschedulePolicy 9285 } 9286 9287 // MigrateStrategy returns the migrate strategy based on the task group 9288 func (a *Allocation) MigrateStrategy() *MigrateStrategy { 9289 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9290 if tg == nil { 9291 return nil 9292 } 9293 return tg.Migrate 9294 } 9295 9296 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 9297 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 9298 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 9299 failTime := a.LastEventTime() 9300 reschedulePolicy := a.ReschedulePolicy() 9301 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 9302 return time.Time{}, false 9303 } 9304 9305 nextDelay := a.NextDelay() 9306 nextRescheduleTime := failTime.Add(nextDelay) 9307 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 9308 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 9309 // Check for eligibility based on the interval if max attempts is set 9310 attempted := 0 9311 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 9312 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 9313 timeDiff := failTime.UTC().UnixNano() - lastAttempt 9314 if timeDiff < reschedulePolicy.Interval.Nanoseconds() { 9315 attempted += 1 9316 } 9317 } 9318 rescheduleEligible = attempted < reschedulePolicy.Attempts && nextDelay < reschedulePolicy.Interval 9319 } 9320 return nextRescheduleTime, rescheduleEligible 9321 } 9322 9323 // ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration 9324 func (a *Allocation) ShouldClientStop() bool { 9325 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9326 if tg == nil || 9327 tg.StopAfterClientDisconnect == nil || 9328 *tg.StopAfterClientDisconnect == 0*time.Nanosecond { 9329 return false 9330 } 9331 return true 9332 } 9333 9334 // WaitClientStop uses the reschedule delay mechanism to block rescheduling until 9335 // StopAfterClientDisconnect's block interval passes 9336 func (a *Allocation) WaitClientStop() time.Time { 9337 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9338 9339 // An alloc can only be marked lost once, so use the first lost transition 9340 var t time.Time 9341 for _, s := range a.AllocStates { 9342 if s.Field == AllocStateFieldClientStatus && 9343 s.Value == AllocClientStatusLost { 9344 t = s.Time 9345 break 9346 } 9347 } 9348 9349 // On the first pass, the alloc hasn't been marked lost yet, and so we start 9350 // counting from now 9351 if t.IsZero() { 9352 t = time.Now().UTC() 9353 } 9354 9355 // Find the max kill timeout 9356 kill := DefaultKillTimeout 9357 for _, t := range tg.Tasks { 9358 if t.KillTimeout > kill { 9359 kill = t.KillTimeout 9360 } 9361 } 9362 9363 return t.Add(*tg.StopAfterClientDisconnect + kill) 9364 } 9365 9366 // NextDelay returns a duration after which the allocation can be rescheduled. 9367 // It is calculated according to the delay function and previous reschedule attempts. 9368 func (a *Allocation) NextDelay() time.Duration { 9369 policy := a.ReschedulePolicy() 9370 // Can be nil if the task group was updated to remove its reschedule policy 9371 if policy == nil { 9372 return 0 9373 } 9374 delayDur := policy.Delay 9375 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 9376 return delayDur 9377 } 9378 events := a.RescheduleTracker.Events 9379 switch policy.DelayFunction { 9380 case "exponential": 9381 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 9382 case "fibonacci": 9383 if len(events) >= 2 { 9384 fibN1Delay := events[len(events)-1].Delay 9385 fibN2Delay := events[len(events)-2].Delay 9386 // Handle reset of delay ceiling which should cause 9387 // a new series to start 9388 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 9389 delayDur = fibN1Delay 9390 } else { 9391 delayDur = fibN1Delay + fibN2Delay 9392 } 9393 } 9394 default: 9395 return delayDur 9396 } 9397 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 9398 delayDur = policy.MaxDelay 9399 // check if delay needs to be reset 9400 9401 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 9402 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 9403 if timeDiff > delayDur.Nanoseconds() { 9404 delayDur = policy.Delay 9405 } 9406 9407 } 9408 9409 return delayDur 9410 } 9411 9412 // Terminated returns if the allocation is in a terminal state on a client. 9413 func (a *Allocation) Terminated() bool { 9414 if a.ClientStatus == AllocClientStatusFailed || 9415 a.ClientStatus == AllocClientStatusComplete || 9416 a.ClientStatus == AllocClientStatusLost { 9417 return true 9418 } 9419 return false 9420 } 9421 9422 // SetStopped updates the allocation in place to a DesiredStatus stop, with the ClientStatus 9423 func (a *Allocation) SetStop(clientStatus, clientDesc string) { 9424 a.DesiredStatus = AllocDesiredStatusStop 9425 a.ClientStatus = clientStatus 9426 a.ClientDescription = clientDesc 9427 a.AppendState(AllocStateFieldClientStatus, clientStatus) 9428 } 9429 9430 // AppendState creates and appends an AllocState entry recording the time of the state 9431 // transition. Used to mark the transition to lost 9432 func (a *Allocation) AppendState(field AllocStateField, value string) { 9433 a.AllocStates = append(a.AllocStates, &AllocState{ 9434 Field: field, 9435 Value: value, 9436 Time: time.Now().UTC(), 9437 }) 9438 } 9439 9440 // RanSuccessfully returns whether the client has ran the allocation and all 9441 // tasks finished successfully. Critically this function returns whether the 9442 // allocation has ran to completion and not just that the alloc has converged to 9443 // its desired state. That is to say that a batch allocation must have finished 9444 // with exit code 0 on all task groups. This doesn't really have meaning on a 9445 // non-batch allocation because a service and system allocation should not 9446 // finish. 9447 func (a *Allocation) RanSuccessfully() bool { 9448 // Handle the case the client hasn't started the allocation. 9449 if len(a.TaskStates) == 0 { 9450 return false 9451 } 9452 9453 // Check to see if all the tasks finished successfully in the allocation 9454 allSuccess := true 9455 for _, state := range a.TaskStates { 9456 allSuccess = allSuccess && state.Successful() 9457 } 9458 9459 return allSuccess 9460 } 9461 9462 // ShouldMigrate returns if the allocation needs data migration 9463 func (a *Allocation) ShouldMigrate() bool { 9464 if a.PreviousAllocation == "" { 9465 return false 9466 } 9467 9468 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 9469 return false 9470 } 9471 9472 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9473 9474 // if the task group is nil or the ephemeral disk block isn't present then 9475 // we won't migrate 9476 if tg == nil || tg.EphemeralDisk == nil { 9477 return false 9478 } 9479 9480 // We won't migrate any data is the user hasn't enabled migration or the 9481 // disk is not marked as sticky 9482 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 9483 return false 9484 } 9485 9486 return true 9487 } 9488 9489 // SetEventDisplayMessage populates the display message if its not already set, 9490 // a temporary fix to handle old allocations that don't have it. 9491 // This method will be removed in a future release. 9492 func (a *Allocation) SetEventDisplayMessages() { 9493 setDisplayMsg(a.TaskStates) 9494 } 9495 9496 // COMPAT(0.11): Remove in 0.11 9497 // ComparableResources returns the resources on the allocation 9498 // handling upgrade paths. After 0.11 calls to this should be replaced with: 9499 // alloc.AllocatedResources.Comparable() 9500 func (a *Allocation) ComparableResources() *ComparableResources { 9501 // ALloc already has 0.9+ behavior 9502 if a.AllocatedResources != nil { 9503 return a.AllocatedResources.Comparable() 9504 } 9505 9506 var resources *Resources 9507 if a.Resources != nil { 9508 resources = a.Resources 9509 } else if a.TaskResources != nil { 9510 resources = new(Resources) 9511 resources.Add(a.SharedResources) 9512 for _, taskResource := range a.TaskResources { 9513 resources.Add(taskResource) 9514 } 9515 } 9516 9517 // Upgrade path 9518 return &ComparableResources{ 9519 Flattened: AllocatedTaskResources{ 9520 Cpu: AllocatedCpuResources{ 9521 CpuShares: int64(resources.CPU), 9522 }, 9523 Memory: AllocatedMemoryResources{ 9524 MemoryMB: int64(resources.MemoryMB), 9525 }, 9526 Networks: resources.Networks, 9527 }, 9528 Shared: AllocatedSharedResources{ 9529 DiskMB: int64(resources.DiskMB), 9530 }, 9531 } 9532 } 9533 9534 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 9535 // TaskGroup does not exist, or the task name cannot be found. 9536 func (a *Allocation) LookupTask(name string) *Task { 9537 if a.Job == nil { 9538 return nil 9539 } 9540 9541 tg := a.Job.LookupTaskGroup(a.TaskGroup) 9542 if tg == nil { 9543 return nil 9544 } 9545 9546 return tg.LookupTask(name) 9547 } 9548 9549 // Stub returns a list stub for the allocation 9550 func (a *Allocation) Stub(fields *AllocStubFields) *AllocListStub { 9551 s := &AllocListStub{ 9552 ID: a.ID, 9553 EvalID: a.EvalID, 9554 Name: a.Name, 9555 Namespace: a.Namespace, 9556 NodeID: a.NodeID, 9557 NodeName: a.NodeName, 9558 JobID: a.JobID, 9559 JobType: a.Job.Type, 9560 JobVersion: a.Job.Version, 9561 TaskGroup: a.TaskGroup, 9562 DesiredStatus: a.DesiredStatus, 9563 DesiredDescription: a.DesiredDescription, 9564 ClientStatus: a.ClientStatus, 9565 ClientDescription: a.ClientDescription, 9566 DesiredTransition: a.DesiredTransition, 9567 TaskStates: a.TaskStates, 9568 DeploymentStatus: a.DeploymentStatus, 9569 FollowupEvalID: a.FollowupEvalID, 9570 RescheduleTracker: a.RescheduleTracker, 9571 PreemptedAllocations: a.PreemptedAllocations, 9572 PreemptedByAllocation: a.PreemptedByAllocation, 9573 CreateIndex: a.CreateIndex, 9574 ModifyIndex: a.ModifyIndex, 9575 CreateTime: a.CreateTime, 9576 ModifyTime: a.ModifyTime, 9577 } 9578 9579 if fields != nil { 9580 if fields.Resources { 9581 s.AllocatedResources = a.AllocatedResources 9582 } 9583 if !fields.TaskStates { 9584 s.TaskStates = nil 9585 } 9586 } 9587 9588 return s 9589 } 9590 9591 // AllocationDiff converts an Allocation type to an AllocationDiff type 9592 // If at any time, modification are made to AllocationDiff so that an 9593 // Allocation can no longer be safely converted to AllocationDiff, 9594 // this method should be changed accordingly. 9595 func (a *Allocation) AllocationDiff() *AllocationDiff { 9596 return (*AllocationDiff)(a) 9597 } 9598 9599 // AllocationDiff is another named type for Allocation (to use the same fields), 9600 // which is used to represent the delta for an Allocation. If you need a method 9601 // defined on the al 9602 type AllocationDiff Allocation 9603 9604 // AllocListStub is used to return a subset of alloc information 9605 type AllocListStub struct { 9606 ID string 9607 EvalID string 9608 Name string 9609 Namespace string 9610 NodeID string 9611 NodeName string 9612 JobID string 9613 JobType string 9614 JobVersion uint64 9615 TaskGroup string 9616 AllocatedResources *AllocatedResources `json:",omitempty"` 9617 DesiredStatus string 9618 DesiredDescription string 9619 ClientStatus string 9620 ClientDescription string 9621 DesiredTransition DesiredTransition 9622 TaskStates map[string]*TaskState 9623 DeploymentStatus *AllocDeploymentStatus 9624 FollowupEvalID string 9625 RescheduleTracker *RescheduleTracker 9626 PreemptedAllocations []string 9627 PreemptedByAllocation string 9628 CreateIndex uint64 9629 ModifyIndex uint64 9630 CreateTime int64 9631 ModifyTime int64 9632 } 9633 9634 // SetEventDisplayMessage populates the display message if its not already set, 9635 // a temporary fix to handle old allocations that don't have it. 9636 // This method will be removed in a future release. 9637 func (a *AllocListStub) SetEventDisplayMessages() { 9638 setDisplayMsg(a.TaskStates) 9639 } 9640 9641 func setDisplayMsg(taskStates map[string]*TaskState) { 9642 for _, taskState := range taskStates { 9643 for _, event := range taskState.Events { 9644 event.PopulateEventDisplayMessage() 9645 } 9646 } 9647 } 9648 9649 // AllocStubFields defines which fields are included in the AllocListStub. 9650 type AllocStubFields struct { 9651 // Resources includes resource-related fields if true. 9652 Resources bool 9653 9654 // TaskStates removes the TaskStates field if false (default is to 9655 // include TaskStates). 9656 TaskStates bool 9657 } 9658 9659 func NewAllocStubFields() *AllocStubFields { 9660 return &AllocStubFields{ 9661 // Maintain backward compatibility by retaining task states by 9662 // default. 9663 TaskStates: true, 9664 } 9665 } 9666 9667 // AllocMetric is used to track various metrics while attempting 9668 // to make an allocation. These are used to debug a job, or to better 9669 // understand the pressure within the system. 9670 type AllocMetric struct { 9671 // NodesEvaluated is the number of nodes that were evaluated 9672 NodesEvaluated int 9673 9674 // NodesFiltered is the number of nodes filtered due to a constraint 9675 NodesFiltered int 9676 9677 // NodesAvailable is the number of nodes available for evaluation per DC. 9678 NodesAvailable map[string]int 9679 9680 // ClassFiltered is the number of nodes filtered by class 9681 ClassFiltered map[string]int 9682 9683 // ConstraintFiltered is the number of failures caused by constraint 9684 ConstraintFiltered map[string]int 9685 9686 // NodesExhausted is the number of nodes skipped due to being 9687 // exhausted of at least one resource 9688 NodesExhausted int 9689 9690 // ClassExhausted is the number of nodes exhausted by class 9691 ClassExhausted map[string]int 9692 9693 // DimensionExhausted provides the count by dimension or reason 9694 DimensionExhausted map[string]int 9695 9696 // QuotaExhausted provides the exhausted dimensions 9697 QuotaExhausted []string 9698 9699 // Scores is the scores of the final few nodes remaining 9700 // for placement. The top score is typically selected. 9701 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 9702 Scores map[string]float64 9703 9704 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 9705 ScoreMetaData []*NodeScoreMeta 9706 9707 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 9708 // we receive normalized score during the last step of the scoring stack. 9709 nodeScoreMeta *NodeScoreMeta 9710 9711 // topScores is used to maintain a heap of the top K nodes with 9712 // the highest normalized score 9713 topScores *kheap.ScoreHeap 9714 9715 // AllocationTime is a measure of how long the allocation 9716 // attempt took. This can affect performance and SLAs. 9717 AllocationTime time.Duration 9718 9719 // CoalescedFailures indicates the number of other 9720 // allocations that were coalesced into this failed allocation. 9721 // This is to prevent creating many failed allocations for a 9722 // single task group. 9723 CoalescedFailures int 9724 } 9725 9726 func (a *AllocMetric) Copy() *AllocMetric { 9727 if a == nil { 9728 return nil 9729 } 9730 na := new(AllocMetric) 9731 *na = *a 9732 na.NodesAvailable = helper.CopyMapStringInt(na.NodesAvailable) 9733 na.ClassFiltered = helper.CopyMapStringInt(na.ClassFiltered) 9734 na.ConstraintFiltered = helper.CopyMapStringInt(na.ConstraintFiltered) 9735 na.ClassExhausted = helper.CopyMapStringInt(na.ClassExhausted) 9736 na.DimensionExhausted = helper.CopyMapStringInt(na.DimensionExhausted) 9737 na.QuotaExhausted = helper.CopySliceString(na.QuotaExhausted) 9738 na.Scores = helper.CopyMapStringFloat64(na.Scores) 9739 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 9740 return na 9741 } 9742 9743 func (a *AllocMetric) EvaluateNode() { 9744 a.NodesEvaluated += 1 9745 } 9746 9747 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 9748 a.NodesFiltered += 1 9749 if node != nil && node.NodeClass != "" { 9750 if a.ClassFiltered == nil { 9751 a.ClassFiltered = make(map[string]int) 9752 } 9753 a.ClassFiltered[node.NodeClass] += 1 9754 } 9755 if constraint != "" { 9756 if a.ConstraintFiltered == nil { 9757 a.ConstraintFiltered = make(map[string]int) 9758 } 9759 a.ConstraintFiltered[constraint] += 1 9760 } 9761 } 9762 9763 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 9764 a.NodesExhausted += 1 9765 if node != nil && node.NodeClass != "" { 9766 if a.ClassExhausted == nil { 9767 a.ClassExhausted = make(map[string]int) 9768 } 9769 a.ClassExhausted[node.NodeClass] += 1 9770 } 9771 if dimension != "" { 9772 if a.DimensionExhausted == nil { 9773 a.DimensionExhausted = make(map[string]int) 9774 } 9775 a.DimensionExhausted[dimension] += 1 9776 } 9777 } 9778 9779 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 9780 if a.QuotaExhausted == nil { 9781 a.QuotaExhausted = make([]string, 0, len(dimensions)) 9782 } 9783 9784 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 9785 } 9786 9787 // ScoreNode is used to gather top K scoring nodes in a heap 9788 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 9789 // Create nodeScoreMeta lazily if its the first time or if its a new node 9790 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 9791 a.nodeScoreMeta = &NodeScoreMeta{ 9792 NodeID: node.ID, 9793 Scores: make(map[string]float64), 9794 } 9795 } 9796 if name == NormScorerName { 9797 a.nodeScoreMeta.NormScore = score 9798 // Once we have the normalized score we can push to the heap 9799 // that tracks top K by normalized score 9800 9801 // Create the heap if its not there already 9802 if a.topScores == nil { 9803 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 9804 } 9805 heap.Push(a.topScores, a.nodeScoreMeta) 9806 9807 // Clear out this entry because its now in the heap 9808 a.nodeScoreMeta = nil 9809 } else { 9810 a.nodeScoreMeta.Scores[name] = score 9811 } 9812 } 9813 9814 // PopulateScoreMetaData populates a map of scorer to scoring metadata 9815 // The map is populated by popping elements from a heap of top K scores 9816 // maintained per scorer 9817 func (a *AllocMetric) PopulateScoreMetaData() { 9818 if a.topScores == nil { 9819 return 9820 } 9821 9822 if a.ScoreMetaData == nil { 9823 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 9824 } 9825 heapItems := a.topScores.GetItemsReverse() 9826 for i, item := range heapItems { 9827 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 9828 } 9829 } 9830 9831 // NodeScoreMeta captures scoring meta data derived from 9832 // different scoring factors. 9833 type NodeScoreMeta struct { 9834 NodeID string 9835 Scores map[string]float64 9836 NormScore float64 9837 } 9838 9839 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 9840 if s == nil { 9841 return nil 9842 } 9843 ns := new(NodeScoreMeta) 9844 *ns = *s 9845 return ns 9846 } 9847 9848 func (s *NodeScoreMeta) String() string { 9849 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 9850 } 9851 9852 func (s *NodeScoreMeta) Score() float64 { 9853 return s.NormScore 9854 } 9855 9856 func (s *NodeScoreMeta) Data() interface{} { 9857 return s 9858 } 9859 9860 // AllocNetworkStatus captures the status of an allocation's network during runtime. 9861 // Depending on the network mode, an allocation's address may need to be known to other 9862 // systems in Nomad such as service registration. 9863 type AllocNetworkStatus struct { 9864 InterfaceName string 9865 Address string 9866 DNS *DNSConfig 9867 } 9868 9869 func (a *AllocNetworkStatus) Copy() *AllocNetworkStatus { 9870 if a == nil { 9871 return nil 9872 } 9873 return &AllocNetworkStatus{ 9874 InterfaceName: a.InterfaceName, 9875 Address: a.Address, 9876 DNS: a.DNS.Copy(), 9877 } 9878 } 9879 9880 // AllocDeploymentStatus captures the status of the allocation as part of the 9881 // deployment. This can include things like if the allocation has been marked as 9882 // healthy. 9883 type AllocDeploymentStatus struct { 9884 // Healthy marks whether the allocation has been marked healthy or unhealthy 9885 // as part of a deployment. It can be unset if it has neither been marked 9886 // healthy or unhealthy. 9887 Healthy *bool 9888 9889 // Timestamp is the time at which the health status was set. 9890 Timestamp time.Time 9891 9892 // Canary marks whether the allocation is a canary or not. A canary that has 9893 // been promoted will have this field set to false. 9894 Canary bool 9895 9896 // ModifyIndex is the raft index in which the deployment status was last 9897 // changed. 9898 ModifyIndex uint64 9899 } 9900 9901 // HasHealth returns true if the allocation has its health set. 9902 func (a *AllocDeploymentStatus) HasHealth() bool { 9903 return a != nil && a.Healthy != nil 9904 } 9905 9906 // IsHealthy returns if the allocation is marked as healthy as part of a 9907 // deployment 9908 func (a *AllocDeploymentStatus) IsHealthy() bool { 9909 if a == nil { 9910 return false 9911 } 9912 9913 return a.Healthy != nil && *a.Healthy 9914 } 9915 9916 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 9917 // deployment 9918 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 9919 if a == nil { 9920 return false 9921 } 9922 9923 return a.Healthy != nil && !*a.Healthy 9924 } 9925 9926 // IsCanary returns if the allocation is marked as a canary 9927 func (a *AllocDeploymentStatus) IsCanary() bool { 9928 if a == nil { 9929 return false 9930 } 9931 9932 return a.Canary 9933 } 9934 9935 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 9936 if a == nil { 9937 return nil 9938 } 9939 9940 c := new(AllocDeploymentStatus) 9941 *c = *a 9942 9943 if a.Healthy != nil { 9944 c.Healthy = helper.BoolToPtr(*a.Healthy) 9945 } 9946 9947 return c 9948 } 9949 9950 const ( 9951 EvalStatusBlocked = "blocked" 9952 EvalStatusPending = "pending" 9953 EvalStatusComplete = "complete" 9954 EvalStatusFailed = "failed" 9955 EvalStatusCancelled = "canceled" 9956 ) 9957 9958 const ( 9959 EvalTriggerJobRegister = "job-register" 9960 EvalTriggerJobDeregister = "job-deregister" 9961 EvalTriggerPeriodicJob = "periodic-job" 9962 EvalTriggerNodeDrain = "node-drain" 9963 EvalTriggerNodeUpdate = "node-update" 9964 EvalTriggerAllocStop = "alloc-stop" 9965 EvalTriggerScheduled = "scheduled" 9966 EvalTriggerRollingUpdate = "rolling-update" 9967 EvalTriggerDeploymentWatcher = "deployment-watcher" 9968 EvalTriggerFailedFollowUp = "failed-follow-up" 9969 EvalTriggerMaxPlans = "max-plan-attempts" 9970 EvalTriggerRetryFailedAlloc = "alloc-failure" 9971 EvalTriggerQueuedAllocs = "queued-allocs" 9972 EvalTriggerPreemption = "preemption" 9973 EvalTriggerScaling = "job-scaling" 9974 ) 9975 9976 const ( 9977 // CoreJobEvalGC is used for the garbage collection of evaluations 9978 // and allocations. We periodically scan evaluations in a terminal state, 9979 // in which all the corresponding allocations are also terminal. We 9980 // delete these out of the system to bound the state. 9981 CoreJobEvalGC = "eval-gc" 9982 9983 // CoreJobNodeGC is used for the garbage collection of failed nodes. 9984 // We periodically scan nodes in a terminal state, and if they have no 9985 // corresponding allocations we delete these out of the system. 9986 CoreJobNodeGC = "node-gc" 9987 9988 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 9989 // periodically scan garbage collectible jobs and check if both their 9990 // evaluations and allocations are terminal. If so, we delete these out of 9991 // the system. 9992 CoreJobJobGC = "job-gc" 9993 9994 // CoreJobDeploymentGC is used for the garbage collection of eligible 9995 // deployments. We periodically scan garbage collectible deployments and 9996 // check if they are terminal. If so, we delete these out of the system. 9997 CoreJobDeploymentGC = "deployment-gc" 9998 9999 // CoreJobCSIVolumeClaimGC is use for the garbage collection of CSI 10000 // volume claims. We periodically scan volumes to see if no allocs are 10001 // claiming them. If so, we unclaim the volume. 10002 CoreJobCSIVolumeClaimGC = "csi-volume-claim-gc" 10003 10004 // CoreJobCSIPluginGC is use for the garbage collection of CSI plugins. 10005 // We periodically scan plugins to see if they have no associated volumes 10006 // or allocs running them. If so, we delete the plugin. 10007 CoreJobCSIPluginGC = "csi-plugin-gc" 10008 10009 // CoreJobForceGC is used to force garbage collection of all GCable objects. 10010 CoreJobForceGC = "force-gc" 10011 ) 10012 10013 // Evaluation is used anytime we need to apply business logic as a result 10014 // of a change to our desired state (job specification) or the emergent state 10015 // (registered nodes). When the inputs change, we need to "evaluate" them, 10016 // potentially taking action (allocation of work) or doing nothing if the state 10017 // of the world does not require it. 10018 type Evaluation struct { 10019 // msgpack omit empty fields during serialization 10020 _struct bool `codec:",omitempty"` // nolint: structcheck 10021 10022 // ID is a randomly generated UUID used for this evaluation. This 10023 // is assigned upon the creation of the evaluation. 10024 ID string 10025 10026 // Namespace is the namespace the evaluation is created in 10027 Namespace string 10028 10029 // Priority is used to control scheduling importance and if this job 10030 // can preempt other jobs. 10031 Priority int 10032 10033 // Type is used to control which schedulers are available to handle 10034 // this evaluation. 10035 Type string 10036 10037 // TriggeredBy is used to give some insight into why this Eval 10038 // was created. (Job change, node failure, alloc failure, etc). 10039 TriggeredBy string 10040 10041 // JobID is the job this evaluation is scoped to. Evaluations cannot 10042 // be run in parallel for a given JobID, so we serialize on this. 10043 JobID string 10044 10045 // JobModifyIndex is the modify index of the job at the time 10046 // the evaluation was created 10047 JobModifyIndex uint64 10048 10049 // NodeID is the node that was affected triggering the evaluation. 10050 NodeID string 10051 10052 // NodeModifyIndex is the modify index of the node at the time 10053 // the evaluation was created 10054 NodeModifyIndex uint64 10055 10056 // DeploymentID is the ID of the deployment that triggered the evaluation. 10057 DeploymentID string 10058 10059 // Status of the evaluation 10060 Status string 10061 10062 // StatusDescription is meant to provide more human useful information 10063 StatusDescription string 10064 10065 // Wait is a minimum wait time for running the eval. This is used to 10066 // support a rolling upgrade in versions prior to 0.7.0 10067 // Deprecated 10068 Wait time.Duration 10069 10070 // WaitUntil is the time when this eval should be run. This is used to 10071 // supported delayed rescheduling of failed allocations 10072 WaitUntil time.Time 10073 10074 // NextEval is the evaluation ID for the eval created to do a followup. 10075 // This is used to support rolling upgrades and failed-follow-up evals, where 10076 // we need a chain of evaluations. 10077 NextEval string 10078 10079 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 10080 // This is used to support rolling upgrades and failed-follow-up evals, where 10081 // we need a chain of evaluations. 10082 PreviousEval string 10083 10084 // BlockedEval is the evaluation ID for a created blocked eval. A 10085 // blocked eval will be created if all allocations could not be placed due 10086 // to constraints or lacking resources. 10087 BlockedEval string 10088 10089 // FailedTGAllocs are task groups which have allocations that could not be 10090 // made, but the metrics are persisted so that the user can use the feedback 10091 // to determine the cause. 10092 FailedTGAllocs map[string]*AllocMetric 10093 10094 // ClassEligibility tracks computed node classes that have been explicitly 10095 // marked as eligible or ineligible. 10096 ClassEligibility map[string]bool 10097 10098 // QuotaLimitReached marks whether a quota limit was reached for the 10099 // evaluation. 10100 QuotaLimitReached string 10101 10102 // EscapedComputedClass marks whether the job has constraints that are not 10103 // captured by computed node classes. 10104 EscapedComputedClass bool 10105 10106 // AnnotatePlan triggers the scheduler to provide additional annotations 10107 // during the evaluation. This should not be set during normal operations. 10108 AnnotatePlan bool 10109 10110 // QueuedAllocations is the number of unplaced allocations at the time the 10111 // evaluation was processed. The map is keyed by Task Group names. 10112 QueuedAllocations map[string]int 10113 10114 // LeaderACL provides the ACL token to when issuing RPCs back to the 10115 // leader. This will be a valid management token as long as the leader is 10116 // active. This should not ever be exposed via the API. 10117 LeaderACL string 10118 10119 // SnapshotIndex is the Raft index of the snapshot used to process the 10120 // evaluation. The index will either be set when it has gone through the 10121 // scheduler or if a blocked evaluation is being created. The index is set 10122 // in this case so we can determine if an early unblocking is required since 10123 // capacity has changed since the evaluation was created. This can result in 10124 // the SnapshotIndex being less than the CreateIndex. 10125 SnapshotIndex uint64 10126 10127 // Raft Indexes 10128 CreateIndex uint64 10129 ModifyIndex uint64 10130 10131 CreateTime int64 10132 ModifyTime int64 10133 } 10134 10135 // TerminalStatus returns if the current status is terminal and 10136 // will no longer transition. 10137 func (e *Evaluation) TerminalStatus() bool { 10138 switch e.Status { 10139 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 10140 return true 10141 default: 10142 return false 10143 } 10144 } 10145 10146 func (e *Evaluation) GoString() string { 10147 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 10148 } 10149 10150 func (e *Evaluation) Copy() *Evaluation { 10151 if e == nil { 10152 return nil 10153 } 10154 ne := new(Evaluation) 10155 *ne = *e 10156 10157 // Copy ClassEligibility 10158 if e.ClassEligibility != nil { 10159 classes := make(map[string]bool, len(e.ClassEligibility)) 10160 for class, elig := range e.ClassEligibility { 10161 classes[class] = elig 10162 } 10163 ne.ClassEligibility = classes 10164 } 10165 10166 // Copy FailedTGAllocs 10167 if e.FailedTGAllocs != nil { 10168 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 10169 for tg, metric := range e.FailedTGAllocs { 10170 failedTGs[tg] = metric.Copy() 10171 } 10172 ne.FailedTGAllocs = failedTGs 10173 } 10174 10175 // Copy queued allocations 10176 if e.QueuedAllocations != nil { 10177 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 10178 for tg, num := range e.QueuedAllocations { 10179 queuedAllocations[tg] = num 10180 } 10181 ne.QueuedAllocations = queuedAllocations 10182 } 10183 10184 return ne 10185 } 10186 10187 // ShouldEnqueue checks if a given evaluation should be enqueued into the 10188 // eval_broker 10189 func (e *Evaluation) ShouldEnqueue() bool { 10190 switch e.Status { 10191 case EvalStatusPending: 10192 return true 10193 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 10194 return false 10195 default: 10196 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 10197 } 10198 } 10199 10200 // ShouldBlock checks if a given evaluation should be entered into the blocked 10201 // eval tracker. 10202 func (e *Evaluation) ShouldBlock() bool { 10203 switch e.Status { 10204 case EvalStatusBlocked: 10205 return true 10206 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 10207 return false 10208 default: 10209 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 10210 } 10211 } 10212 10213 // MakePlan is used to make a plan from the given evaluation 10214 // for a given Job 10215 func (e *Evaluation) MakePlan(j *Job) *Plan { 10216 p := &Plan{ 10217 EvalID: e.ID, 10218 Priority: e.Priority, 10219 Job: j, 10220 NodeUpdate: make(map[string][]*Allocation), 10221 NodeAllocation: make(map[string][]*Allocation), 10222 NodePreemptions: make(map[string][]*Allocation), 10223 } 10224 if j != nil { 10225 p.AllAtOnce = j.AllAtOnce 10226 } 10227 return p 10228 } 10229 10230 // NextRollingEval creates an evaluation to followup this eval for rolling updates 10231 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 10232 now := time.Now().UTC().UnixNano() 10233 return &Evaluation{ 10234 ID: uuid.Generate(), 10235 Namespace: e.Namespace, 10236 Priority: e.Priority, 10237 Type: e.Type, 10238 TriggeredBy: EvalTriggerRollingUpdate, 10239 JobID: e.JobID, 10240 JobModifyIndex: e.JobModifyIndex, 10241 Status: EvalStatusPending, 10242 Wait: wait, 10243 PreviousEval: e.ID, 10244 CreateTime: now, 10245 ModifyTime: now, 10246 } 10247 } 10248 10249 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 10250 // failed allocations. It takes the classes marked explicitly eligible or 10251 // ineligible, whether the job has escaped computed node classes and whether the 10252 // quota limit was reached. 10253 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 10254 escaped bool, quotaReached string) *Evaluation { 10255 now := time.Now().UTC().UnixNano() 10256 return &Evaluation{ 10257 ID: uuid.Generate(), 10258 Namespace: e.Namespace, 10259 Priority: e.Priority, 10260 Type: e.Type, 10261 TriggeredBy: EvalTriggerQueuedAllocs, 10262 JobID: e.JobID, 10263 JobModifyIndex: e.JobModifyIndex, 10264 Status: EvalStatusBlocked, 10265 PreviousEval: e.ID, 10266 ClassEligibility: classEligibility, 10267 EscapedComputedClass: escaped, 10268 QuotaLimitReached: quotaReached, 10269 CreateTime: now, 10270 ModifyTime: now, 10271 } 10272 } 10273 10274 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 10275 // has been marked as failed because it has hit the delivery limit and will not 10276 // be retried by the eval_broker. Callers should copy the created eval's ID to 10277 // into the old eval's NextEval field. 10278 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 10279 now := time.Now().UTC().UnixNano() 10280 return &Evaluation{ 10281 ID: uuid.Generate(), 10282 Namespace: e.Namespace, 10283 Priority: e.Priority, 10284 Type: e.Type, 10285 TriggeredBy: EvalTriggerFailedFollowUp, 10286 JobID: e.JobID, 10287 JobModifyIndex: e.JobModifyIndex, 10288 Status: EvalStatusPending, 10289 Wait: wait, 10290 PreviousEval: e.ID, 10291 CreateTime: now, 10292 ModifyTime: now, 10293 } 10294 } 10295 10296 // UpdateModifyTime takes into account that clocks on different servers may be 10297 // slightly out of sync. Even in case of a leader change, this method will 10298 // guarantee that ModifyTime will always be after CreateTime. 10299 func (e *Evaluation) UpdateModifyTime() { 10300 now := time.Now().UTC().UnixNano() 10301 if now <= e.CreateTime { 10302 e.ModifyTime = e.CreateTime + 1 10303 } else { 10304 e.ModifyTime = now 10305 } 10306 } 10307 10308 // Plan is used to submit a commit plan for task allocations. These 10309 // are submitted to the leader which verifies that resources have 10310 // not been overcommitted before admitting the plan. 10311 type Plan struct { 10312 // msgpack omit empty fields during serialization 10313 _struct bool `codec:",omitempty"` // nolint: structcheck 10314 10315 // EvalID is the evaluation ID this plan is associated with 10316 EvalID string 10317 10318 // EvalToken is used to prevent a split-brain processing of 10319 // an evaluation. There should only be a single scheduler running 10320 // an Eval at a time, but this could be violated after a leadership 10321 // transition. This unique token is used to reject plans that are 10322 // being submitted from a different leader. 10323 EvalToken string 10324 10325 // Priority is the priority of the upstream job 10326 Priority int 10327 10328 // AllAtOnce is used to control if incremental scheduling of task groups 10329 // is allowed or if we must do a gang scheduling of the entire job. 10330 // If this is false, a plan may be partially applied. Otherwise, the 10331 // entire plan must be able to make progress. 10332 AllAtOnce bool 10333 10334 // Job is the parent job of all the allocations in the Plan. 10335 // Since a Plan only involves a single Job, we can reduce the size 10336 // of the plan by only including it once. 10337 Job *Job 10338 10339 // NodeUpdate contains all the allocations for each node. For each node, 10340 // this is a list of the allocations to update to either stop or evict. 10341 NodeUpdate map[string][]*Allocation 10342 10343 // NodeAllocation contains all the allocations for each node. 10344 // The evicts must be considered prior to the allocations. 10345 NodeAllocation map[string][]*Allocation 10346 10347 // Annotations contains annotations by the scheduler to be used by operators 10348 // to understand the decisions made by the scheduler. 10349 Annotations *PlanAnnotations 10350 10351 // Deployment is the deployment created or updated by the scheduler that 10352 // should be applied by the planner. 10353 Deployment *Deployment 10354 10355 // DeploymentUpdates is a set of status updates to apply to the given 10356 // deployments. This allows the scheduler to cancel any unneeded deployment 10357 // because the job is stopped or the update block is removed. 10358 DeploymentUpdates []*DeploymentStatusUpdate 10359 10360 // NodePreemptions is a map from node id to a set of allocations from other 10361 // lower priority jobs that are preempted. Preempted allocations are marked 10362 // as evicted. 10363 NodePreemptions map[string][]*Allocation 10364 10365 // SnapshotIndex is the Raft index of the snapshot used to create the 10366 // Plan. The leader will wait to evaluate the plan until its StateStore 10367 // has reached at least this index. 10368 SnapshotIndex uint64 10369 } 10370 10371 // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 10372 // allocation may be optionally set by passing in a non-empty value. 10373 func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus, followupEvalID string) { 10374 newAlloc := new(Allocation) 10375 *newAlloc = *alloc 10376 10377 // If the job is not set in the plan we are deregistering a job so we 10378 // extract the job from the allocation. 10379 if p.Job == nil && newAlloc.Job != nil { 10380 p.Job = newAlloc.Job 10381 } 10382 10383 // Normalize the job 10384 newAlloc.Job = nil 10385 10386 // Strip the resources as it can be rebuilt. 10387 newAlloc.Resources = nil 10388 10389 newAlloc.DesiredStatus = AllocDesiredStatusStop 10390 newAlloc.DesiredDescription = desiredDesc 10391 10392 if clientStatus != "" { 10393 newAlloc.ClientStatus = clientStatus 10394 } 10395 10396 newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) 10397 10398 if followupEvalID != "" { 10399 newAlloc.FollowupEvalID = followupEvalID 10400 } 10401 10402 node := alloc.NodeID 10403 existing := p.NodeUpdate[node] 10404 p.NodeUpdate[node] = append(existing, newAlloc) 10405 } 10406 10407 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 10408 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 10409 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 10410 newAlloc := &Allocation{} 10411 newAlloc.ID = alloc.ID 10412 newAlloc.JobID = alloc.JobID 10413 newAlloc.Namespace = alloc.Namespace 10414 newAlloc.DesiredStatus = AllocDesiredStatusEvict 10415 newAlloc.PreemptedByAllocation = preemptingAllocID 10416 10417 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 10418 newAlloc.DesiredDescription = desiredDesc 10419 10420 // TaskResources are needed by the plan applier to check if allocations fit 10421 // after removing preempted allocations 10422 if alloc.AllocatedResources != nil { 10423 newAlloc.AllocatedResources = alloc.AllocatedResources 10424 } else { 10425 // COMPAT Remove in version 0.11 10426 newAlloc.TaskResources = alloc.TaskResources 10427 newAlloc.SharedResources = alloc.SharedResources 10428 } 10429 10430 // Append this alloc to slice for this node 10431 node := alloc.NodeID 10432 existing := p.NodePreemptions[node] 10433 p.NodePreemptions[node] = append(existing, newAlloc) 10434 } 10435 10436 func (p *Plan) PopUpdate(alloc *Allocation) { 10437 existing := p.NodeUpdate[alloc.NodeID] 10438 n := len(existing) 10439 if n > 0 && existing[n-1].ID == alloc.ID { 10440 existing = existing[:n-1] 10441 if len(existing) > 0 { 10442 p.NodeUpdate[alloc.NodeID] = existing 10443 } else { 10444 delete(p.NodeUpdate, alloc.NodeID) 10445 } 10446 } 10447 } 10448 10449 // AppendAlloc appends the alloc to the plan allocations. 10450 // Uses the passed job if explicitly passed, otherwise 10451 // it is assumed the alloc will use the plan Job version. 10452 func (p *Plan) AppendAlloc(alloc *Allocation, job *Job) { 10453 node := alloc.NodeID 10454 existing := p.NodeAllocation[node] 10455 10456 alloc.Job = job 10457 10458 p.NodeAllocation[node] = append(existing, alloc) 10459 } 10460 10461 // IsNoOp checks if this plan would do nothing 10462 func (p *Plan) IsNoOp() bool { 10463 return len(p.NodeUpdate) == 0 && 10464 len(p.NodeAllocation) == 0 && 10465 p.Deployment == nil && 10466 len(p.DeploymentUpdates) == 0 10467 } 10468 10469 // NormalizeAllocations normalizes allocations to remove fields that can 10470 // be fetched from the MemDB instead of sending over the wire 10471 func (p *Plan) NormalizeAllocations() { 10472 for _, allocs := range p.NodeUpdate { 10473 for i, alloc := range allocs { 10474 allocs[i] = &Allocation{ 10475 ID: alloc.ID, 10476 DesiredDescription: alloc.DesiredDescription, 10477 ClientStatus: alloc.ClientStatus, 10478 FollowupEvalID: alloc.FollowupEvalID, 10479 } 10480 } 10481 } 10482 10483 for _, allocs := range p.NodePreemptions { 10484 for i, alloc := range allocs { 10485 allocs[i] = &Allocation{ 10486 ID: alloc.ID, 10487 PreemptedByAllocation: alloc.PreemptedByAllocation, 10488 } 10489 } 10490 } 10491 } 10492 10493 // PlanResult is the result of a plan submitted to the leader. 10494 type PlanResult struct { 10495 // NodeUpdate contains all the updates that were committed. 10496 NodeUpdate map[string][]*Allocation 10497 10498 // NodeAllocation contains all the allocations that were committed. 10499 NodeAllocation map[string][]*Allocation 10500 10501 // Deployment is the deployment that was committed. 10502 Deployment *Deployment 10503 10504 // DeploymentUpdates is the set of deployment updates that were committed. 10505 DeploymentUpdates []*DeploymentStatusUpdate 10506 10507 // NodePreemptions is a map from node id to a set of allocations from other 10508 // lower priority jobs that are preempted. Preempted allocations are marked 10509 // as stopped. 10510 NodePreemptions map[string][]*Allocation 10511 10512 // RefreshIndex is the index the worker should refresh state up to. 10513 // This allows all evictions and allocations to be materialized. 10514 // If any allocations were rejected due to stale data (node state, 10515 // over committed) this can be used to force a worker refresh. 10516 RefreshIndex uint64 10517 10518 // AllocIndex is the Raft index in which the evictions and 10519 // allocations took place. This is used for the write index. 10520 AllocIndex uint64 10521 } 10522 10523 // IsNoOp checks if this plan result would do nothing 10524 func (p *PlanResult) IsNoOp() bool { 10525 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 && 10526 len(p.DeploymentUpdates) == 0 && p.Deployment == nil 10527 } 10528 10529 // FullCommit is used to check if all the allocations in a plan 10530 // were committed as part of the result. Returns if there was 10531 // a match, and the number of expected and actual allocations. 10532 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 10533 expected := 0 10534 actual := 0 10535 for name, allocList := range plan.NodeAllocation { 10536 didAlloc := p.NodeAllocation[name] 10537 expected += len(allocList) 10538 actual += len(didAlloc) 10539 } 10540 return actual == expected, expected, actual 10541 } 10542 10543 // PlanAnnotations holds annotations made by the scheduler to give further debug 10544 // information to operators. 10545 type PlanAnnotations struct { 10546 // DesiredTGUpdates is the set of desired updates per task group. 10547 DesiredTGUpdates map[string]*DesiredUpdates 10548 10549 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 10550 PreemptedAllocs []*AllocListStub 10551 } 10552 10553 // DesiredUpdates is the set of changes the scheduler would like to make given 10554 // sufficient resources and cluster capacity. 10555 type DesiredUpdates struct { 10556 Ignore uint64 10557 Place uint64 10558 Migrate uint64 10559 Stop uint64 10560 InPlaceUpdate uint64 10561 DestructiveUpdate uint64 10562 Canary uint64 10563 Preemptions uint64 10564 } 10565 10566 func (d *DesiredUpdates) GoString() string { 10567 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 10568 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 10569 } 10570 10571 // msgpackHandle is a shared handle for encoding/decoding of structs 10572 var MsgpackHandle = func() *codec.MsgpackHandle { 10573 h := &codec.MsgpackHandle{} 10574 h.RawToString = true 10575 10576 // maintain binary format from time prior to upgrading latest ugorji 10577 h.BasicHandle.TimeNotBuiltin = true 10578 10579 // Sets the default type for decoding a map into a nil interface{}. 10580 // This is necessary in particular because we store the driver configs as a 10581 // nil interface{}. 10582 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 10583 10584 // only review struct codec tags 10585 h.TypeInfos = codec.NewTypeInfos([]string{"codec"}) 10586 10587 return h 10588 }() 10589 10590 var ( 10591 // JsonHandle and JsonHandlePretty are the codec handles to JSON encode 10592 // structs. The pretty handle will add indents for easier human consumption. 10593 JsonHandle = &codec.JsonHandle{ 10594 HTMLCharsAsIs: true, 10595 } 10596 JsonHandlePretty = &codec.JsonHandle{ 10597 HTMLCharsAsIs: true, 10598 Indent: 4, 10599 } 10600 ) 10601 10602 // Decode is used to decode a MsgPack encoded object 10603 func Decode(buf []byte, out interface{}) error { 10604 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 10605 } 10606 10607 // Encode is used to encode a MsgPack object with type prefix 10608 func Encode(t MessageType, msg interface{}) ([]byte, error) { 10609 var buf bytes.Buffer 10610 buf.WriteByte(uint8(t)) 10611 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 10612 return buf.Bytes(), err 10613 } 10614 10615 // KeyringResponse is a unified key response and can be used for install, 10616 // remove, use, as well as listing key queries. 10617 type KeyringResponse struct { 10618 Messages map[string]string 10619 Keys map[string]int 10620 NumNodes int 10621 } 10622 10623 // KeyringRequest is request objects for serf key operations. 10624 type KeyringRequest struct { 10625 Key string 10626 } 10627 10628 // RecoverableError wraps an error and marks whether it is recoverable and could 10629 // be retried or it is fatal. 10630 type RecoverableError struct { 10631 Err string 10632 Recoverable bool 10633 } 10634 10635 // NewRecoverableError is used to wrap an error and mark it as recoverable or 10636 // not. 10637 func NewRecoverableError(e error, recoverable bool) error { 10638 if e == nil { 10639 return nil 10640 } 10641 10642 return &RecoverableError{ 10643 Err: e.Error(), 10644 Recoverable: recoverable, 10645 } 10646 } 10647 10648 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 10649 // message. If the error was recoverable before the returned error is as well; 10650 // otherwise it is unrecoverable. 10651 func WrapRecoverable(msg string, err error) error { 10652 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 10653 } 10654 10655 func (r *RecoverableError) Error() string { 10656 return r.Err 10657 } 10658 10659 func (r *RecoverableError) IsRecoverable() bool { 10660 return r.Recoverable 10661 } 10662 10663 func (r *RecoverableError) IsUnrecoverable() bool { 10664 return !r.Recoverable 10665 } 10666 10667 // Recoverable is an interface for errors to implement to indicate whether or 10668 // not they are fatal or recoverable. 10669 type Recoverable interface { 10670 error 10671 IsRecoverable() bool 10672 } 10673 10674 // IsRecoverable returns true if error is a RecoverableError with 10675 // Recoverable=true. Otherwise false is returned. 10676 func IsRecoverable(e error) bool { 10677 if re, ok := e.(Recoverable); ok { 10678 return re.IsRecoverable() 10679 } 10680 return false 10681 } 10682 10683 // WrappedServerError wraps an error and satisfies 10684 // both the Recoverable and the ServerSideError interfaces 10685 type WrappedServerError struct { 10686 Err error 10687 } 10688 10689 // NewWrappedServerError is used to create a wrapped server side error 10690 func NewWrappedServerError(e error) error { 10691 return &WrappedServerError{ 10692 Err: e, 10693 } 10694 } 10695 10696 func (r *WrappedServerError) IsRecoverable() bool { 10697 return IsRecoverable(r.Err) 10698 } 10699 10700 func (r *WrappedServerError) Error() string { 10701 return r.Err.Error() 10702 } 10703 10704 func (r *WrappedServerError) IsServerSide() bool { 10705 return true 10706 } 10707 10708 // ServerSideError is an interface for errors to implement to indicate 10709 // errors occurring after the request makes it to a server 10710 type ServerSideError interface { 10711 error 10712 IsServerSide() bool 10713 } 10714 10715 // IsServerSide returns true if error is a wrapped 10716 // server side error 10717 func IsServerSide(e error) bool { 10718 if se, ok := e.(ServerSideError); ok { 10719 return se.IsServerSide() 10720 } 10721 return false 10722 } 10723 10724 // ACLPolicy is used to represent an ACL policy 10725 type ACLPolicy struct { 10726 Name string // Unique name 10727 Description string // Human readable 10728 Rules string // HCL or JSON format 10729 RulesJSON *acl.Policy // Generated from Rules on read 10730 Hash []byte 10731 CreateIndex uint64 10732 ModifyIndex uint64 10733 } 10734 10735 // SetHash is used to compute and set the hash of the ACL policy 10736 func (c *ACLPolicy) SetHash() []byte { 10737 // Initialize a 256bit Blake2 hash (32 bytes) 10738 hash, err := blake2b.New256(nil) 10739 if err != nil { 10740 panic(err) 10741 } 10742 10743 // Write all the user set fields 10744 _, _ = hash.Write([]byte(c.Name)) 10745 _, _ = hash.Write([]byte(c.Description)) 10746 _, _ = hash.Write([]byte(c.Rules)) 10747 10748 // Finalize the hash 10749 hashVal := hash.Sum(nil) 10750 10751 // Set and return the hash 10752 c.Hash = hashVal 10753 return hashVal 10754 } 10755 10756 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 10757 return &ACLPolicyListStub{ 10758 Name: a.Name, 10759 Description: a.Description, 10760 Hash: a.Hash, 10761 CreateIndex: a.CreateIndex, 10762 ModifyIndex: a.ModifyIndex, 10763 } 10764 } 10765 10766 func (a *ACLPolicy) Validate() error { 10767 var mErr multierror.Error 10768 if !validPolicyName.MatchString(a.Name) { 10769 err := fmt.Errorf("invalid name '%s'", a.Name) 10770 mErr.Errors = append(mErr.Errors, err) 10771 } 10772 if _, err := acl.Parse(a.Rules); err != nil { 10773 err = fmt.Errorf("failed to parse rules: %v", err) 10774 mErr.Errors = append(mErr.Errors, err) 10775 } 10776 if len(a.Description) > maxPolicyDescriptionLength { 10777 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 10778 mErr.Errors = append(mErr.Errors, err) 10779 } 10780 return mErr.ErrorOrNil() 10781 } 10782 10783 // ACLPolicyListStub is used to for listing ACL policies 10784 type ACLPolicyListStub struct { 10785 Name string 10786 Description string 10787 Hash []byte 10788 CreateIndex uint64 10789 ModifyIndex uint64 10790 } 10791 10792 // ACLPolicyListRequest is used to request a list of policies 10793 type ACLPolicyListRequest struct { 10794 QueryOptions 10795 } 10796 10797 // ACLPolicySpecificRequest is used to query a specific policy 10798 type ACLPolicySpecificRequest struct { 10799 Name string 10800 QueryOptions 10801 } 10802 10803 // ACLPolicySetRequest is used to query a set of policies 10804 type ACLPolicySetRequest struct { 10805 Names []string 10806 QueryOptions 10807 } 10808 10809 // ACLPolicyListResponse is used for a list request 10810 type ACLPolicyListResponse struct { 10811 Policies []*ACLPolicyListStub 10812 QueryMeta 10813 } 10814 10815 // SingleACLPolicyResponse is used to return a single policy 10816 type SingleACLPolicyResponse struct { 10817 Policy *ACLPolicy 10818 QueryMeta 10819 } 10820 10821 // ACLPolicySetResponse is used to return a set of policies 10822 type ACLPolicySetResponse struct { 10823 Policies map[string]*ACLPolicy 10824 QueryMeta 10825 } 10826 10827 // ACLPolicyDeleteRequest is used to delete a set of policies 10828 type ACLPolicyDeleteRequest struct { 10829 Names []string 10830 WriteRequest 10831 } 10832 10833 // ACLPolicyUpsertRequest is used to upsert a set of policies 10834 type ACLPolicyUpsertRequest struct { 10835 Policies []*ACLPolicy 10836 WriteRequest 10837 } 10838 10839 // ACLToken represents a client token which is used to Authenticate 10840 type ACLToken struct { 10841 AccessorID string // Public Accessor ID (UUID) 10842 SecretID string // Secret ID, private (UUID) 10843 Name string // Human friendly name 10844 Type string // Client or Management 10845 Policies []string // Policies this token ties to 10846 Global bool // Global or Region local 10847 Hash []byte 10848 CreateTime time.Time // Time of creation 10849 CreateIndex uint64 10850 ModifyIndex uint64 10851 } 10852 10853 func (a *ACLToken) Copy() *ACLToken { 10854 c := new(ACLToken) 10855 *c = *a 10856 10857 c.Policies = make([]string, len(a.Policies)) 10858 copy(c.Policies, a.Policies) 10859 c.Hash = make([]byte, len(a.Hash)) 10860 copy(c.Hash, a.Hash) 10861 10862 return c 10863 } 10864 10865 var ( 10866 // AnonymousACLToken is used no SecretID is provided, and the 10867 // request is made anonymously. 10868 AnonymousACLToken = &ACLToken{ 10869 AccessorID: "anonymous", 10870 Name: "Anonymous Token", 10871 Type: ACLClientToken, 10872 Policies: []string{"anonymous"}, 10873 Global: false, 10874 } 10875 ) 10876 10877 type ACLTokenListStub struct { 10878 AccessorID string 10879 Name string 10880 Type string 10881 Policies []string 10882 Global bool 10883 Hash []byte 10884 CreateTime time.Time 10885 CreateIndex uint64 10886 ModifyIndex uint64 10887 } 10888 10889 // SetHash is used to compute and set the hash of the ACL token 10890 func (a *ACLToken) SetHash() []byte { 10891 // Initialize a 256bit Blake2 hash (32 bytes) 10892 hash, err := blake2b.New256(nil) 10893 if err != nil { 10894 panic(err) 10895 } 10896 10897 // Write all the user set fields 10898 _, _ = hash.Write([]byte(a.Name)) 10899 _, _ = hash.Write([]byte(a.Type)) 10900 for _, policyName := range a.Policies { 10901 _, _ = hash.Write([]byte(policyName)) 10902 } 10903 if a.Global { 10904 _, _ = hash.Write([]byte("global")) 10905 } else { 10906 _, _ = hash.Write([]byte("local")) 10907 } 10908 10909 // Finalize the hash 10910 hashVal := hash.Sum(nil) 10911 10912 // Set and return the hash 10913 a.Hash = hashVal 10914 return hashVal 10915 } 10916 10917 func (a *ACLToken) Stub() *ACLTokenListStub { 10918 return &ACLTokenListStub{ 10919 AccessorID: a.AccessorID, 10920 Name: a.Name, 10921 Type: a.Type, 10922 Policies: a.Policies, 10923 Global: a.Global, 10924 Hash: a.Hash, 10925 CreateTime: a.CreateTime, 10926 CreateIndex: a.CreateIndex, 10927 ModifyIndex: a.ModifyIndex, 10928 } 10929 } 10930 10931 // Validate is used to sanity check a token 10932 func (a *ACLToken) Validate() error { 10933 var mErr multierror.Error 10934 if len(a.Name) > maxTokenNameLength { 10935 mErr.Errors = append(mErr.Errors, fmt.Errorf("token name too long")) 10936 } 10937 switch a.Type { 10938 case ACLClientToken: 10939 if len(a.Policies) == 0 { 10940 mErr.Errors = append(mErr.Errors, fmt.Errorf("client token missing policies")) 10941 } 10942 case ACLManagementToken: 10943 if len(a.Policies) != 0 { 10944 mErr.Errors = append(mErr.Errors, fmt.Errorf("management token cannot be associated with policies")) 10945 } 10946 default: 10947 mErr.Errors = append(mErr.Errors, fmt.Errorf("token type must be client or management")) 10948 } 10949 return mErr.ErrorOrNil() 10950 } 10951 10952 // PolicySubset checks if a given set of policies is a subset of the token 10953 func (a *ACLToken) PolicySubset(policies []string) bool { 10954 // Hot-path the management tokens, superset of all policies. 10955 if a.Type == ACLManagementToken { 10956 return true 10957 } 10958 associatedPolicies := make(map[string]struct{}, len(a.Policies)) 10959 for _, policy := range a.Policies { 10960 associatedPolicies[policy] = struct{}{} 10961 } 10962 for _, policy := range policies { 10963 if _, ok := associatedPolicies[policy]; !ok { 10964 return false 10965 } 10966 } 10967 return true 10968 } 10969 10970 // ACLTokenListRequest is used to request a list of tokens 10971 type ACLTokenListRequest struct { 10972 GlobalOnly bool 10973 QueryOptions 10974 } 10975 10976 // ACLTokenSpecificRequest is used to query a specific token 10977 type ACLTokenSpecificRequest struct { 10978 AccessorID string 10979 QueryOptions 10980 } 10981 10982 // ACLTokenSetRequest is used to query a set of tokens 10983 type ACLTokenSetRequest struct { 10984 AccessorIDS []string 10985 QueryOptions 10986 } 10987 10988 // ACLTokenListResponse is used for a list request 10989 type ACLTokenListResponse struct { 10990 Tokens []*ACLTokenListStub 10991 QueryMeta 10992 } 10993 10994 // SingleACLTokenResponse is used to return a single token 10995 type SingleACLTokenResponse struct { 10996 Token *ACLToken 10997 QueryMeta 10998 } 10999 11000 // ACLTokenSetResponse is used to return a set of token 11001 type ACLTokenSetResponse struct { 11002 Tokens map[string]*ACLToken // Keyed by Accessor ID 11003 QueryMeta 11004 } 11005 11006 // ResolveACLTokenRequest is used to resolve a specific token 11007 type ResolveACLTokenRequest struct { 11008 SecretID string 11009 QueryOptions 11010 } 11011 11012 // ResolveACLTokenResponse is used to resolve a single token 11013 type ResolveACLTokenResponse struct { 11014 Token *ACLToken 11015 QueryMeta 11016 } 11017 11018 // ACLTokenDeleteRequest is used to delete a set of tokens 11019 type ACLTokenDeleteRequest struct { 11020 AccessorIDs []string 11021 WriteRequest 11022 } 11023 11024 // ACLTokenBootstrapRequest is used to bootstrap ACLs 11025 type ACLTokenBootstrapRequest struct { 11026 Token *ACLToken // Not client specifiable 11027 ResetIndex uint64 // Reset index is used to clear the bootstrap token 11028 WriteRequest 11029 } 11030 11031 // ACLTokenUpsertRequest is used to upsert a set of tokens 11032 type ACLTokenUpsertRequest struct { 11033 Tokens []*ACLToken 11034 WriteRequest 11035 } 11036 11037 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 11038 type ACLTokenUpsertResponse struct { 11039 Tokens []*ACLToken 11040 WriteMeta 11041 } 11042 11043 // RpcError is used for serializing errors with a potential error code 11044 type RpcError struct { 11045 Message string 11046 Code *int64 11047 } 11048 11049 func NewRpcError(err error, code *int64) *RpcError { 11050 return &RpcError{ 11051 Message: err.Error(), 11052 Code: code, 11053 } 11054 } 11055 11056 func (r *RpcError) Error() string { 11057 return r.Message 11058 }