github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "container/heap" 6 "crypto/md5" 7 "crypto/sha1" 8 "crypto/sha256" 9 "crypto/sha512" 10 "encoding/base32" 11 "encoding/base64" 12 "encoding/hex" 13 "errors" 14 "fmt" 15 "hash" 16 "hash/crc32" 17 "math" 18 "net" 19 "os" 20 "reflect" 21 "regexp" 22 "sort" 23 "strconv" 24 "strings" 25 "time" 26 27 jwt "github.com/golang-jwt/jwt/v4" 28 "github.com/hashicorp/cronexpr" 29 "github.com/hashicorp/go-msgpack/codec" 30 "github.com/hashicorp/go-multierror" 31 "github.com/hashicorp/go-set" 32 "github.com/hashicorp/go-version" 33 "github.com/hashicorp/nomad/acl" 34 "github.com/hashicorp/nomad/command/agent/host" 35 "github.com/hashicorp/nomad/command/agent/pprof" 36 "github.com/hashicorp/nomad/helper" 37 "github.com/hashicorp/nomad/helper/args" 38 "github.com/hashicorp/nomad/helper/constraints/semver" 39 "github.com/hashicorp/nomad/helper/escapingfs" 40 "github.com/hashicorp/nomad/helper/pointer" 41 "github.com/hashicorp/nomad/helper/uuid" 42 "github.com/hashicorp/nomad/lib/cpuset" 43 "github.com/hashicorp/nomad/lib/kheap" 44 psstructs "github.com/hashicorp/nomad/plugins/shared/structs" 45 "github.com/miekg/dns" 46 "github.com/mitchellh/copystructure" 47 "golang.org/x/crypto/blake2b" 48 "golang.org/x/exp/maps" 49 "golang.org/x/exp/slices" 50 ) 51 52 var ( 53 // validPolicyName is used to validate a policy name 54 validPolicyName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 55 56 // b32 is a lowercase base32 encoding for use in URL friendly service hashes 57 b32 = base32.NewEncoding(strings.ToLower("abcdefghijklmnopqrstuvwxyz234567")) 58 ) 59 60 type MessageType uint8 61 62 // note: new raft message types need to be added to the end of this 63 // list of contents 64 const ( 65 NodeRegisterRequestType MessageType = 0 66 NodeDeregisterRequestType MessageType = 1 67 NodeUpdateStatusRequestType MessageType = 2 68 NodeUpdateDrainRequestType MessageType = 3 69 JobRegisterRequestType MessageType = 4 70 JobDeregisterRequestType MessageType = 5 71 EvalUpdateRequestType MessageType = 6 72 EvalDeleteRequestType MessageType = 7 73 AllocUpdateRequestType MessageType = 8 74 AllocClientUpdateRequestType MessageType = 9 75 ReconcileJobSummariesRequestType MessageType = 10 76 VaultAccessorRegisterRequestType MessageType = 11 77 VaultAccessorDeregisterRequestType MessageType = 12 78 ApplyPlanResultsRequestType MessageType = 13 79 DeploymentStatusUpdateRequestType MessageType = 14 80 DeploymentPromoteRequestType MessageType = 15 81 DeploymentAllocHealthRequestType MessageType = 16 82 DeploymentDeleteRequestType MessageType = 17 83 JobStabilityRequestType MessageType = 18 84 ACLPolicyUpsertRequestType MessageType = 19 85 ACLPolicyDeleteRequestType MessageType = 20 86 ACLTokenUpsertRequestType MessageType = 21 87 ACLTokenDeleteRequestType MessageType = 22 88 ACLTokenBootstrapRequestType MessageType = 23 89 AutopilotRequestType MessageType = 24 90 UpsertNodeEventsType MessageType = 25 91 JobBatchDeregisterRequestType MessageType = 26 92 AllocUpdateDesiredTransitionRequestType MessageType = 27 93 NodeUpdateEligibilityRequestType MessageType = 28 94 BatchNodeUpdateDrainRequestType MessageType = 29 95 SchedulerConfigRequestType MessageType = 30 96 NodeBatchDeregisterRequestType MessageType = 31 97 ClusterMetadataRequestType MessageType = 32 98 ServiceIdentityAccessorRegisterRequestType MessageType = 33 99 ServiceIdentityAccessorDeregisterRequestType MessageType = 34 100 CSIVolumeRegisterRequestType MessageType = 35 101 CSIVolumeDeregisterRequestType MessageType = 36 102 CSIVolumeClaimRequestType MessageType = 37 103 ScalingEventRegisterRequestType MessageType = 38 104 CSIVolumeClaimBatchRequestType MessageType = 39 105 CSIPluginDeleteRequestType MessageType = 40 106 EventSinkUpsertRequestType MessageType = 41 107 EventSinkDeleteRequestType MessageType = 42 108 BatchEventSinkUpdateProgressType MessageType = 43 109 OneTimeTokenUpsertRequestType MessageType = 44 110 OneTimeTokenDeleteRequestType MessageType = 45 111 OneTimeTokenExpireRequestType MessageType = 46 112 ServiceRegistrationUpsertRequestType MessageType = 47 113 ServiceRegistrationDeleteByIDRequestType MessageType = 48 114 ServiceRegistrationDeleteByNodeIDRequestType MessageType = 49 115 VarApplyStateRequestType MessageType = 50 116 RootKeyMetaUpsertRequestType MessageType = 51 117 RootKeyMetaDeleteRequestType MessageType = 52 118 ACLRolesUpsertRequestType MessageType = 53 119 ACLRolesDeleteByIDRequestType MessageType = 54 120 ACLAuthMethodsUpsertRequestType MessageType = 55 121 ACLAuthMethodsDeleteRequestType MessageType = 56 122 ACLBindingRulesUpsertRequestType MessageType = 57 123 ACLBindingRulesDeleteRequestType MessageType = 58 124 125 // Namespace types were moved from enterprise and therefore start at 64 126 NamespaceUpsertRequestType MessageType = 64 127 NamespaceDeleteRequestType MessageType = 65 128 ) 129 130 const ( 131 // IgnoreUnknownTypeFlag is set along with a MessageType 132 // to indicate that the message type can be safely ignored 133 // if it is not recognized. This is for future proofing, so 134 // that new commands can be added in a way that won't cause 135 // old servers to crash when the FSM attempts to process them. 136 IgnoreUnknownTypeFlag MessageType = 128 137 138 // MsgTypeTestSetup is used during testing when calling state store 139 // methods directly that require an FSM MessageType 140 MsgTypeTestSetup MessageType = IgnoreUnknownTypeFlag 141 142 GetterModeAny = "any" 143 GetterModeFile = "file" 144 GetterModeDir = "dir" 145 146 // maxPolicyDescriptionLength limits a policy description length 147 maxPolicyDescriptionLength = 256 148 149 // maxTokenNameLength limits a ACL token name length 150 maxTokenNameLength = 256 151 152 // ACLClientToken and ACLManagementToken are the only types of tokens 153 ACLClientToken = "client" 154 ACLManagementToken = "management" 155 156 // DefaultNamespace is the default namespace. 157 DefaultNamespace = "default" 158 DefaultNamespaceDescription = "Default shared namespace" 159 160 // AllNamespacesSentinel is the value used as a namespace RPC value 161 // to indicate that endpoints must search in all namespaces 162 // 163 // Also defined in acl/acl.go to avoid circular dependencies. If modified 164 // it should be updated there as well. 165 AllNamespacesSentinel = "*" 166 167 // maxNamespaceDescriptionLength limits a namespace description length 168 maxNamespaceDescriptionLength = 256 169 170 // JitterFraction is a the limit to the amount of jitter we apply 171 // to a user specified MaxQueryTime. We divide the specified time by 172 // the fraction. So 16 == 6.25% limit of jitter. This jitter is also 173 // applied to RPCHoldTimeout. 174 JitterFraction = 16 175 176 // MaxRetainedNodeEvents is the maximum number of node events that will be 177 // retained for a single node 178 MaxRetainedNodeEvents = 10 179 180 // MaxRetainedNodeScores is the number of top scoring nodes for which we 181 // retain scoring metadata 182 MaxRetainedNodeScores = 5 183 184 // Normalized scorer name 185 NormScorerName = "normalized-score" 186 187 // MaxBlockingRPCQueryTime is used to bound the limit of a blocking query 188 MaxBlockingRPCQueryTime = 300 * time.Second 189 190 // DefaultBlockingRPCQueryTime is the amount of time we block waiting for a change 191 // if no time is specified. Previously we would wait the MaxBlockingRPCQueryTime. 192 DefaultBlockingRPCQueryTime = 300 * time.Second 193 ) 194 195 var ( 196 // validNamespaceName is used to validate a namespace name 197 validNamespaceName = regexp.MustCompile("^[a-zA-Z0-9-]{1,128}$") 198 ) 199 200 // NamespacedID is a tuple of an ID and a namespace 201 type NamespacedID struct { 202 ID string 203 Namespace string 204 } 205 206 // NewNamespacedID returns a new namespaced ID given the ID and namespace 207 func NewNamespacedID(id, ns string) NamespacedID { 208 return NamespacedID{ 209 ID: id, 210 Namespace: ns, 211 } 212 } 213 214 func (n NamespacedID) String() string { 215 return fmt.Sprintf("<ns: %q, id: %q>", n.Namespace, n.ID) 216 } 217 218 // RPCInfo is used to describe common information about query 219 type RPCInfo interface { 220 RequestRegion() string 221 IsRead() bool 222 AllowStaleRead() bool 223 IsForwarded() bool 224 SetForwarded() 225 TimeToBlock() time.Duration 226 // SetTimeToBlock sets how long this request can block. The requested time may not be possible, 227 // so Callers should readback TimeToBlock. E.g. you cannot set time to block at all on WriteRequests 228 // and it cannot exceed MaxBlockingRPCQueryTime 229 SetTimeToBlock(t time.Duration) 230 } 231 232 // InternalRpcInfo allows adding internal RPC metadata to an RPC. This struct 233 // should NOT be replicated in the API package as it is internal only. 234 type InternalRpcInfo struct { 235 // Forwarded marks whether the RPC has been forwarded. 236 Forwarded bool 237 } 238 239 // IsForwarded returns whether the RPC is forwarded from another server. 240 func (i *InternalRpcInfo) IsForwarded() bool { 241 return i.Forwarded 242 } 243 244 // SetForwarded marks that the RPC is being forwarded from another server. 245 func (i *InternalRpcInfo) SetForwarded() { 246 i.Forwarded = true 247 } 248 249 // QueryOptions is used to specify various flags for read queries 250 type QueryOptions struct { 251 // The target region for this query 252 Region string 253 254 // Namespace is the target namespace for the query. 255 // 256 // Since handlers do not have a default value set they should access 257 // the Namespace via the RequestNamespace method. 258 // 259 // Requests accessing specific namespaced objects must check ACLs 260 // against the namespace of the object, not the namespace in the 261 // request. 262 Namespace string 263 264 // If set, wait until query exceeds given index. Must be provided 265 // with MaxQueryTime. 266 MinQueryIndex uint64 267 268 // Provided with MinQueryIndex to wait for change. 269 MaxQueryTime time.Duration 270 271 // If set, any follower can service the request. Results 272 // may be arbitrarily stale. 273 AllowStale bool 274 275 // If set, used as prefix for resource list searches 276 Prefix string 277 278 // AuthToken is secret portion of the ACL token used for the request 279 AuthToken string 280 281 // Filter specifies the go-bexpr filter expression to be used for 282 // filtering the data prior to returning a response 283 Filter string 284 285 // PerPage is the number of entries to be returned in queries that support 286 // paginated lists. 287 PerPage int32 288 289 // NextToken is the token used to indicate where to start paging 290 // for queries that support paginated lists. This token should be 291 // the ID of the next object after the last one seen in the 292 // previous response. 293 NextToken string 294 295 // Reverse is used to reverse the default order of list results. 296 Reverse bool 297 298 identity *AuthenticatedIdentity 299 300 InternalRpcInfo 301 } 302 303 // TimeToBlock returns MaxQueryTime adjusted for maximums and defaults 304 // it will return 0 if this is not a blocking query 305 func (q QueryOptions) TimeToBlock() time.Duration { 306 if q.MinQueryIndex == 0 { 307 return 0 308 } 309 if q.MaxQueryTime > MaxBlockingRPCQueryTime { 310 return MaxBlockingRPCQueryTime 311 } else if q.MaxQueryTime <= 0 { 312 return DefaultBlockingRPCQueryTime 313 } 314 return q.MaxQueryTime 315 } 316 317 func (q *QueryOptions) SetTimeToBlock(t time.Duration) { 318 q.MaxQueryTime = t 319 } 320 321 func (q QueryOptions) RequestRegion() string { 322 return q.Region 323 } 324 325 // RequestNamespace returns the request's namespace or the default namespace if 326 // no explicit namespace was sent. 327 // 328 // Requests accessing specific namespaced objects must check ACLs against the 329 // namespace of the object, not the namespace in the request. 330 func (q QueryOptions) RequestNamespace() string { 331 if q.Namespace == "" { 332 return DefaultNamespace 333 } 334 return q.Namespace 335 } 336 337 // IsRead only applies to reads, so always true. 338 func (q QueryOptions) IsRead() bool { 339 return true 340 } 341 342 func (q QueryOptions) AllowStaleRead() bool { 343 return q.AllowStale 344 } 345 346 func (q *QueryOptions) SetIdentity(identity *AuthenticatedIdentity) { 347 q.identity = identity 348 } 349 350 func (q QueryOptions) GetIdentity() *AuthenticatedIdentity { 351 return q.identity 352 } 353 354 // AgentPprofRequest is used to request a pprof report for a given node. 355 type AgentPprofRequest struct { 356 // ReqType specifies the profile to use 357 ReqType pprof.ReqType 358 359 // Profile specifies the runtime/pprof profile to lookup and generate. 360 Profile string 361 362 // Seconds is the number of seconds to capture a profile 363 Seconds int 364 365 // Debug specifies if pprof profile should inclue debug output 366 Debug int 367 368 // GC specifies if the profile should call runtime.GC() before 369 // running its profile. This is only used for "heap" profiles 370 GC int 371 372 // NodeID is the node we want to track the logs of 373 NodeID string 374 375 // ServerID is the server we want to track the logs of 376 ServerID string 377 378 QueryOptions 379 } 380 381 // AgentPprofResponse is used to return a generated pprof profile 382 type AgentPprofResponse struct { 383 // ID of the agent that fulfilled the request 384 AgentID string 385 386 // Payload is the generated pprof profile 387 Payload []byte 388 389 // HTTPHeaders are a set of key value pairs to be applied as 390 // HTTP headers for a specific runtime profile 391 HTTPHeaders map[string]string 392 } 393 394 type WriteRequest struct { 395 // The target region for this write 396 Region string 397 398 // Namespace is the target namespace for the write. 399 // 400 // Since RPC handlers do not have a default value set they should 401 // access the Namespace via the RequestNamespace method. 402 // 403 // Requests accessing specific namespaced objects must check ACLs 404 // against the namespace of the object, not the namespace in the 405 // request. 406 Namespace string 407 408 // AuthToken is secret portion of the ACL token used for the request 409 AuthToken string 410 411 // IdempotencyToken can be used to ensure the write is idempotent. 412 IdempotencyToken string 413 414 identity *AuthenticatedIdentity 415 416 InternalRpcInfo 417 } 418 419 func (w WriteRequest) TimeToBlock() time.Duration { 420 return 0 421 } 422 423 func (w WriteRequest) SetTimeToBlock(_ time.Duration) { 424 } 425 426 func (w WriteRequest) RequestRegion() string { 427 // The target region for this request 428 return w.Region 429 } 430 431 // RequestNamespace returns the request's namespace or the default namespace if 432 // no explicit namespace was sent. 433 // 434 // Requests accessing specific namespaced objects must check ACLs against the 435 // namespace of the object, not the namespace in the request. 436 func (w WriteRequest) RequestNamespace() string { 437 if w.Namespace == "" { 438 return DefaultNamespace 439 } 440 return w.Namespace 441 } 442 443 // IsRead only applies to writes, always false. 444 func (w WriteRequest) IsRead() bool { 445 return false 446 } 447 448 func (w WriteRequest) AllowStaleRead() bool { 449 return false 450 } 451 452 func (w *WriteRequest) SetIdentity(identity *AuthenticatedIdentity) { 453 w.identity = identity 454 } 455 456 func (w WriteRequest) GetIdentity() *AuthenticatedIdentity { 457 return w.identity 458 } 459 460 // AuthenticatedIdentity is returned by the Authenticate method on server to 461 // return a wrapper around the various elements that can be resolved as an 462 // identity. RPC handlers will use the relevant fields for performing 463 // authorization. 464 type AuthenticatedIdentity struct { 465 ACLToken *ACLToken 466 Claims *IdentityClaims 467 ClientID string 468 ServerID string 469 TLSName string 470 RemoteIP net.IP 471 } 472 473 func (ai *AuthenticatedIdentity) GetACLToken() *ACLToken { 474 if ai == nil { 475 return nil 476 } 477 return ai.ACLToken 478 } 479 480 func (ai *AuthenticatedIdentity) GetClaims() *IdentityClaims { 481 if ai == nil { 482 return nil 483 } 484 return ai.Claims 485 } 486 487 // QueryMeta allows a query response to include potentially 488 // useful metadata about a query 489 type QueryMeta struct { 490 // This is the index associated with the read 491 Index uint64 492 493 // If AllowStale is used, this is time elapsed since 494 // last contact between the follower and leader. This 495 // can be used to gauge staleness. 496 LastContact time.Duration 497 498 // Used to indicate if there is a known leader node 499 KnownLeader bool 500 501 // NextToken is the token returned with queries that support 502 // paginated lists. To resume paging from this point, pass 503 // this token in the next request's QueryOptions. 504 NextToken string 505 } 506 507 // WriteMeta allows a write response to include potentially 508 // useful metadata about the write 509 type WriteMeta struct { 510 // This is the index associated with the write 511 Index uint64 512 } 513 514 // NodeRegisterRequest is used for Node.Register endpoint 515 // to register a node as being a schedulable entity. 516 type NodeRegisterRequest struct { 517 Node *Node 518 NodeEvent *NodeEvent 519 WriteRequest 520 } 521 522 // NodeDeregisterRequest is used for Node.Deregister endpoint 523 // to deregister a node as being a schedulable entity. 524 type NodeDeregisterRequest struct { 525 NodeID string 526 WriteRequest 527 } 528 529 // NodeBatchDeregisterRequest is used for Node.BatchDeregister endpoint 530 // to deregister a batch of nodes from being schedulable entities. 531 type NodeBatchDeregisterRequest struct { 532 NodeIDs []string 533 WriteRequest 534 } 535 536 // NodeServerInfo is used to in NodeUpdateResponse to return Nomad server 537 // information used in RPC server lists. 538 type NodeServerInfo struct { 539 // RPCAdvertiseAddr is the IP endpoint that a Nomad Server wishes to 540 // be contacted at for RPCs. 541 RPCAdvertiseAddr string 542 543 // RpcMajorVersion is the major version number the Nomad Server 544 // supports 545 RPCMajorVersion int32 546 547 // RpcMinorVersion is the minor version number the Nomad Server 548 // supports 549 RPCMinorVersion int32 550 551 // Datacenter is the datacenter that a Nomad server belongs to 552 Datacenter string 553 } 554 555 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 556 // to update the status of a node. 557 type NodeUpdateStatusRequest struct { 558 NodeID string 559 Status string 560 NodeEvent *NodeEvent 561 UpdatedAt int64 562 WriteRequest 563 } 564 565 // NodeUpdateDrainRequest is used for updating the drain strategy 566 type NodeUpdateDrainRequest struct { 567 NodeID string 568 DrainStrategy *DrainStrategy 569 570 // MarkEligible marks the node as eligible if removing the drain strategy. 571 MarkEligible bool 572 573 // NodeEvent is the event added to the node 574 NodeEvent *NodeEvent 575 576 // UpdatedAt represents server time of receiving request 577 UpdatedAt int64 578 579 // Meta is user-provided metadata relating to the drain operation 580 Meta map[string]string 581 582 WriteRequest 583 } 584 585 // BatchNodeUpdateDrainRequest is used for updating the drain strategy for a 586 // batch of nodes 587 type BatchNodeUpdateDrainRequest struct { 588 // Updates is a mapping of nodes to their updated drain strategy 589 Updates map[string]*DrainUpdate 590 591 // NodeEvents is a mapping of the node to the event to add to the node 592 NodeEvents map[string]*NodeEvent 593 594 // UpdatedAt represents server time of receiving request 595 UpdatedAt int64 596 597 WriteRequest 598 } 599 600 // DrainUpdate is used to update the drain of a node 601 type DrainUpdate struct { 602 // DrainStrategy is the new strategy for the node 603 DrainStrategy *DrainStrategy 604 605 // MarkEligible marks the node as eligible if removing the drain strategy. 606 MarkEligible bool 607 } 608 609 // NodeUpdateEligibilityRequest is used for updating the scheduling eligibility 610 type NodeUpdateEligibilityRequest struct { 611 NodeID string 612 Eligibility string 613 614 // NodeEvent is the event added to the node 615 NodeEvent *NodeEvent 616 617 // UpdatedAt represents server time of receiving request 618 UpdatedAt int64 619 620 WriteRequest 621 } 622 623 // NodeEvaluateRequest is used to re-evaluate the node 624 type NodeEvaluateRequest struct { 625 NodeID string 626 WriteRequest 627 } 628 629 // NodeSpecificRequest is used when we just need to specify a target node 630 type NodeSpecificRequest struct { 631 NodeID string 632 SecretID string 633 QueryOptions 634 } 635 636 // JobRegisterRequest is used for Job.Register endpoint 637 // to register a job as being a schedulable entity. 638 type JobRegisterRequest struct { 639 Job *Job 640 641 // If EnforceIndex is set then the job will only be registered if the passed 642 // JobModifyIndex matches the current Jobs index. If the index is zero, the 643 // register only occurs if the job is new. 644 EnforceIndex bool 645 JobModifyIndex uint64 646 647 // PreserveCounts indicates that during job update, existing task group 648 // counts should be preserved, over those specified in the new job spec 649 // PreserveCounts is ignored for newly created jobs. 650 PreserveCounts bool 651 652 // PolicyOverride is set when the user is attempting to override any policies 653 PolicyOverride bool 654 655 // EvalPriority is an optional priority to use on any evaluation created as 656 // a result on this job registration. This value must be between 1-100 657 // inclusively, where a larger value corresponds to a higher priority. This 658 // is useful when an operator wishes to push through a job registration in 659 // busy clusters with a large evaluation backlog. This avoids needing to 660 // change the job priority which also impacts preemption. 661 EvalPriority int 662 663 // Eval is the evaluation that is associated with the job registration 664 Eval *Evaluation 665 666 // Deployment is the deployment to be create when the job is registered. If 667 // there is an active deployment for the job it will be canceled. 668 Deployment *Deployment 669 670 WriteRequest 671 } 672 673 // JobDeregisterRequest is used for Job.Deregister endpoint 674 // to deregister a job as being a schedulable entity. 675 type JobDeregisterRequest struct { 676 JobID string 677 678 // Purge controls whether the deregister purges the job from the system or 679 // whether the job is just marked as stopped and will be removed by the 680 // garbage collector 681 Purge bool 682 683 // Global controls whether all regions of a multi-region job are 684 // deregistered. It is ignored for single-region jobs. 685 Global bool 686 687 // EvalPriority is an optional priority to use on any evaluation created as 688 // a result on this job deregistration. This value must be between 1-100 689 // inclusively, where a larger value corresponds to a higher priority. This 690 // is useful when an operator wishes to push through a job deregistration 691 // in busy clusters with a large evaluation backlog. 692 EvalPriority int 693 694 // NoShutdownDelay, if set to true, will override the group and 695 // task shutdown_delay configuration and ignore the delay for any 696 // allocations stopped as a result of this Deregister call. 697 NoShutdownDelay bool 698 699 // Eval is the evaluation to create that's associated with job deregister 700 Eval *Evaluation 701 702 WriteRequest 703 } 704 705 // JobBatchDeregisterRequest is used to batch deregister jobs and upsert 706 // evaluations. 707 type JobBatchDeregisterRequest struct { 708 // Jobs is the set of jobs to deregister 709 Jobs map[NamespacedID]*JobDeregisterOptions 710 711 // Evals is the set of evaluations to create. 712 Evals []*Evaluation 713 714 WriteRequest 715 } 716 717 // JobDeregisterOptions configures how a job is deregistered. 718 type JobDeregisterOptions struct { 719 // Purge controls whether the deregister purges the job from the system or 720 // whether the job is just marked as stopped and will be removed by the 721 // garbage collector 722 Purge bool 723 } 724 725 // JobEvaluateRequest is used when we just need to re-evaluate a target job 726 type JobEvaluateRequest struct { 727 JobID string 728 EvalOptions EvalOptions 729 WriteRequest 730 } 731 732 // EvalOptions is used to encapsulate options when forcing a job evaluation 733 type EvalOptions struct { 734 ForceReschedule bool 735 } 736 737 // JobSpecificRequest is used when we just need to specify a target job 738 type JobSpecificRequest struct { 739 JobID string 740 All bool 741 QueryOptions 742 } 743 744 // JobListRequest is used to parameterize a list request 745 type JobListRequest struct { 746 QueryOptions 747 Fields *JobStubFields 748 } 749 750 // Stub returns a summarized version of the job 751 type JobStubFields struct { 752 Meta bool 753 } 754 755 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 756 // evaluation of the Job. 757 type JobPlanRequest struct { 758 Job *Job 759 Diff bool // Toggles an annotated diff 760 // PolicyOverride is set when the user is attempting to override any policies 761 PolicyOverride bool 762 WriteRequest 763 } 764 765 // JobScaleRequest is used for the Job.Scale endpoint to scale one of the 766 // scaling targets in a job 767 type JobScaleRequest struct { 768 JobID string 769 Target map[string]string 770 Count *int64 771 Message string 772 Error bool 773 Meta map[string]interface{} 774 // PolicyOverride is set when the user is attempting to override any policies 775 PolicyOverride bool 776 WriteRequest 777 } 778 779 // Validate is used to validate the arguments in the request 780 func (r *JobScaleRequest) Validate() error { 781 namespace := r.Target[ScalingTargetNamespace] 782 if namespace != "" && namespace != r.RequestNamespace() { 783 return NewErrRPCCoded(400, "namespace in payload did not match header") 784 } 785 786 jobID := r.Target[ScalingTargetJob] 787 if jobID != "" && jobID != r.JobID { 788 return fmt.Errorf("job ID in payload did not match URL") 789 } 790 791 groupName := r.Target[ScalingTargetGroup] 792 if groupName == "" { 793 return NewErrRPCCoded(400, "missing task group name for scaling action") 794 } 795 796 if r.Count != nil { 797 if *r.Count < 0 { 798 return NewErrRPCCoded(400, "scaling action count can't be negative") 799 } 800 801 if r.Error { 802 return NewErrRPCCoded(400, "scaling action should not contain count if error is true") 803 } 804 805 truncCount := int(*r.Count) 806 if int64(truncCount) != *r.Count { 807 return NewErrRPCCoded(400, 808 fmt.Sprintf("new scaling count is too large for TaskGroup.Count (int): %v", r.Count)) 809 } 810 } 811 812 return nil 813 } 814 815 // JobSummaryRequest is used when we just need to get a specific job summary 816 type JobSummaryRequest struct { 817 JobID string 818 QueryOptions 819 } 820 821 // JobScaleStatusRequest is used to get the scale status for a job 822 type JobScaleStatusRequest struct { 823 JobID string 824 QueryOptions 825 } 826 827 // JobDispatchRequest is used to dispatch a job based on a parameterized job 828 type JobDispatchRequest struct { 829 JobID string 830 Payload []byte 831 Meta map[string]string 832 WriteRequest 833 IdPrefixTemplate string 834 } 835 836 // JobValidateRequest is used to validate a job 837 type JobValidateRequest struct { 838 Job *Job 839 WriteRequest 840 } 841 842 // JobRevertRequest is used to revert a job to a prior version. 843 type JobRevertRequest struct { 844 // JobID is the ID of the job being reverted 845 JobID string 846 847 // JobVersion the version to revert to. 848 JobVersion uint64 849 850 // EnforcePriorVersion if set will enforce that the job is at the given 851 // version before reverting. 852 EnforcePriorVersion *uint64 853 854 // ConsulToken is the Consul token that proves the submitter of the job revert 855 // has access to the Service Identity policies associated with the job's 856 // Consul Connect enabled services. This field is only used to transfer the 857 // token and is not stored after the Job revert. 858 ConsulToken string 859 860 // VaultToken is the Vault token that proves the submitter of the job revert 861 // has access to any Vault policies specified in the targeted job version. This 862 // field is only used to transfer the token and is not stored after the Job 863 // revert. 864 VaultToken string 865 866 WriteRequest 867 } 868 869 // JobStabilityRequest is used to marked a job as stable. 870 type JobStabilityRequest struct { 871 // Job to set the stability on 872 JobID string 873 JobVersion uint64 874 875 // Set the stability 876 Stable bool 877 WriteRequest 878 } 879 880 // JobStabilityResponse is the response when marking a job as stable. 881 type JobStabilityResponse struct { 882 WriteMeta 883 } 884 885 // NodeListRequest is used to parameterize a list request 886 type NodeListRequest struct { 887 QueryOptions 888 889 Fields *NodeStubFields 890 } 891 892 // EvalUpdateRequest is used for upserting evaluations. 893 type EvalUpdateRequest struct { 894 Evals []*Evaluation 895 EvalToken string 896 WriteRequest 897 } 898 899 // EvalReapRequest is used for reaping evaluations and allocation. This struct 900 // is used by the Eval.Reap RPC endpoint as a request argument, and also when 901 // performing eval reap or deletes via Raft. This is because Eval.Reap and 902 // Eval.Delete use the same Raft message when performing deletes so we do not 903 // need more Raft message types. 904 type EvalReapRequest struct { 905 Evals []string // slice of Evaluation IDs 906 Allocs []string // slice of Allocation IDs 907 908 // Filter specifies the go-bexpr filter expression to be used for 909 // filtering the data prior to returning a response 910 Filter string 911 PerPage int32 912 NextToken string 913 914 // UserInitiated tracks whether this reap request is the result of an 915 // operator request. If this is true, the FSM needs to ensure the eval 916 // broker is paused as the request can include non-terminal allocations. 917 UserInitiated bool 918 919 WriteRequest 920 } 921 922 // EvalSpecificRequest is used when we just need to specify a target evaluation 923 type EvalSpecificRequest struct { 924 EvalID string 925 IncludeRelated bool 926 QueryOptions 927 } 928 929 // EvalAckRequest is used to Ack/Nack a specific evaluation 930 type EvalAckRequest struct { 931 EvalID string 932 Token string 933 WriteRequest 934 } 935 936 // EvalDequeueRequest is used when we want to dequeue an evaluation 937 type EvalDequeueRequest struct { 938 Schedulers []string 939 Timeout time.Duration 940 SchedulerVersion uint16 941 WriteRequest 942 } 943 944 // EvalListRequest is used to list the evaluations 945 type EvalListRequest struct { 946 FilterJobID string 947 FilterEvalStatus string 948 QueryOptions 949 } 950 951 // ShouldBeFiltered indicates that the eval should be filtered (that 952 // is, removed) from the results 953 func (req *EvalListRequest) ShouldBeFiltered(e *Evaluation) bool { 954 if req.FilterJobID != "" && req.FilterJobID != e.JobID { 955 return true 956 } 957 if req.FilterEvalStatus != "" && req.FilterEvalStatus != e.Status { 958 return true 959 } 960 return false 961 } 962 963 // EvalCountRequest is used to count evaluations 964 type EvalCountRequest struct { 965 QueryOptions 966 } 967 968 // PlanRequest is used to submit an allocation plan to the leader 969 type PlanRequest struct { 970 Plan *Plan 971 WriteRequest 972 } 973 974 // ApplyPlanResultsRequest is used by the planner to apply a Raft transaction 975 // committing the result of a plan. 976 type ApplyPlanResultsRequest struct { 977 // AllocUpdateRequest holds the allocation updates to be made by the 978 // scheduler. 979 AllocUpdateRequest 980 981 // Deployment is the deployment created or updated as a result of a 982 // scheduling event. 983 Deployment *Deployment 984 985 // DeploymentUpdates is a set of status updates to apply to the given 986 // deployments. This allows the scheduler to cancel any unneeded deployment 987 // because the job is stopped or the update block is removed. 988 DeploymentUpdates []*DeploymentStatusUpdate 989 990 // EvalID is the eval ID of the plan being applied. The modify index of the 991 // evaluation is updated as part of applying the plan to ensure that subsequent 992 // scheduling events for the same job will wait for the index that last produced 993 // state changes. This is necessary for blocked evaluations since they can be 994 // processed many times, potentially making state updates, without the state of 995 // the evaluation itself being updated. 996 EvalID string 997 998 // COMPAT 0.11 999 // NodePreemptions is a slice of allocations from other lower priority jobs 1000 // that are preempted. Preempted allocations are marked as evicted. 1001 // Deprecated: Replaced with AllocsPreempted which contains only the diff 1002 NodePreemptions []*Allocation 1003 1004 // AllocsPreempted is a slice of allocation diffs from other lower priority jobs 1005 // that are preempted. Preempted allocations are marked as evicted. 1006 AllocsPreempted []*AllocationDiff 1007 1008 // PreemptionEvals is a slice of follow up evals for jobs whose allocations 1009 // have been preempted to place allocs in this plan 1010 PreemptionEvals []*Evaluation 1011 1012 // IneligibleNodes are nodes the plan applier has repeatedly rejected 1013 // placements for and should therefore be considered ineligible by workers 1014 // to avoid retrying them repeatedly. 1015 IneligibleNodes []string 1016 1017 // UpdatedAt represents server time of receiving request. 1018 UpdatedAt int64 1019 } 1020 1021 // AllocUpdateRequest is used to submit changes to allocations, either 1022 // to cause evictions or to assign new allocations. Both can be done 1023 // within a single transaction 1024 type AllocUpdateRequest struct { 1025 // COMPAT 0.11 1026 // Alloc is the list of new allocations to assign 1027 // Deprecated: Replaced with two separate slices, one containing stopped allocations 1028 // and another containing updated allocations 1029 Alloc []*Allocation 1030 1031 // Allocations to stop. Contains only the diff, not the entire allocation 1032 AllocsStopped []*AllocationDiff 1033 1034 // New or updated allocations 1035 AllocsUpdated []*Allocation 1036 1037 // Evals is the list of new evaluations to create 1038 // Evals are valid only when used in the Raft RPC 1039 Evals []*Evaluation 1040 1041 // Job is the shared parent job of the allocations. 1042 // It is pulled out since it is common to reduce payload size. 1043 Job *Job 1044 1045 WriteRequest 1046 } 1047 1048 // AllocUpdateDesiredTransitionRequest is used to submit changes to allocations 1049 // desired transition state. 1050 type AllocUpdateDesiredTransitionRequest struct { 1051 // Allocs is the mapping of allocation ids to their desired state 1052 // transition 1053 Allocs map[string]*DesiredTransition 1054 1055 // Evals is the set of evaluations to create 1056 Evals []*Evaluation 1057 1058 WriteRequest 1059 } 1060 1061 // AllocStopRequest is used to stop and reschedule a running Allocation. 1062 type AllocStopRequest struct { 1063 AllocID string 1064 NoShutdownDelay bool 1065 1066 WriteRequest 1067 } 1068 1069 // AllocStopResponse is the response to an `AllocStopRequest` 1070 type AllocStopResponse struct { 1071 // EvalID is the id of the follow up evalution for the rescheduled alloc. 1072 EvalID string 1073 1074 WriteMeta 1075 } 1076 1077 // AllocListRequest is used to request a list of allocations 1078 type AllocListRequest struct { 1079 QueryOptions 1080 1081 Fields *AllocStubFields 1082 } 1083 1084 // AllocSpecificRequest is used to query a specific allocation 1085 type AllocSpecificRequest struct { 1086 AllocID string 1087 QueryOptions 1088 } 1089 1090 // AllocSignalRequest is used to signal a specific allocation 1091 type AllocSignalRequest struct { 1092 AllocID string 1093 Task string 1094 Signal string 1095 QueryOptions 1096 } 1097 1098 // AllocsGetRequest is used to query a set of allocations 1099 type AllocsGetRequest struct { 1100 AllocIDs []string 1101 QueryOptions 1102 } 1103 1104 // AllocRestartRequest is used to restart a specific allocations tasks. 1105 type AllocRestartRequest struct { 1106 AllocID string 1107 TaskName string 1108 AllTasks bool 1109 1110 QueryOptions 1111 } 1112 1113 // PeriodicForceRequest is used to force a specific periodic job. 1114 type PeriodicForceRequest struct { 1115 JobID string 1116 WriteRequest 1117 } 1118 1119 // ServerMembersResponse has the list of servers in a cluster 1120 type ServerMembersResponse struct { 1121 ServerName string 1122 ServerRegion string 1123 ServerDC string 1124 Members []*ServerMember 1125 } 1126 1127 // ServerMember holds information about a Nomad server agent in a cluster 1128 type ServerMember struct { 1129 Name string 1130 Addr net.IP 1131 Port uint16 1132 Tags map[string]string 1133 Status string 1134 ProtocolMin uint8 1135 ProtocolMax uint8 1136 ProtocolCur uint8 1137 DelegateMin uint8 1138 DelegateMax uint8 1139 DelegateCur uint8 1140 } 1141 1142 // ClusterMetadata is used to store per-cluster metadata. 1143 type ClusterMetadata struct { 1144 ClusterID string 1145 CreateTime int64 1146 } 1147 1148 // DeriveVaultTokenRequest is used to request wrapped Vault tokens for the 1149 // following tasks in the given allocation 1150 type DeriveVaultTokenRequest struct { 1151 NodeID string 1152 SecretID string 1153 AllocID string 1154 Tasks []string 1155 QueryOptions 1156 } 1157 1158 // VaultAccessorsRequest is used to operate on a set of Vault accessors 1159 type VaultAccessorsRequest struct { 1160 Accessors []*VaultAccessor 1161 } 1162 1163 // VaultAccessor is a reference to a created Vault token on behalf of 1164 // an allocation's task. 1165 type VaultAccessor struct { 1166 AllocID string 1167 Task string 1168 NodeID string 1169 Accessor string 1170 CreationTTL int 1171 1172 // Raft Indexes 1173 CreateIndex uint64 1174 } 1175 1176 // DeriveVaultTokenResponse returns the wrapped tokens for each requested task 1177 type DeriveVaultTokenResponse struct { 1178 // Tasks is a mapping between the task name and the wrapped token 1179 Tasks map[string]string 1180 1181 // Error stores any error that occurred. Errors are stored here so we can 1182 // communicate whether it is retryable 1183 Error *RecoverableError 1184 1185 QueryMeta 1186 } 1187 1188 // GenericRequest is used to request where no 1189 // specific information is needed. 1190 type GenericRequest struct { 1191 QueryOptions 1192 } 1193 1194 // DeploymentListRequest is used to list the deployments 1195 type DeploymentListRequest struct { 1196 QueryOptions 1197 } 1198 1199 // DeploymentDeleteRequest is used for deleting deployments. 1200 type DeploymentDeleteRequest struct { 1201 Deployments []string 1202 WriteRequest 1203 } 1204 1205 // DeploymentStatusUpdateRequest is used to update the status of a deployment as 1206 // well as optionally creating an evaluation atomically. 1207 type DeploymentStatusUpdateRequest struct { 1208 // Eval, if set, is used to create an evaluation at the same time as 1209 // updating the status of a deployment. 1210 Eval *Evaluation 1211 1212 // DeploymentUpdate is a status update to apply to the given 1213 // deployment. 1214 DeploymentUpdate *DeploymentStatusUpdate 1215 1216 // Job is used to optionally upsert a job. This is used when setting the 1217 // allocation health results in a deployment failure and the deployment 1218 // auto-reverts to the latest stable job. 1219 Job *Job 1220 } 1221 1222 // DeploymentAllocHealthRequest is used to set the health of a set of 1223 // allocations as part of a deployment. 1224 type DeploymentAllocHealthRequest struct { 1225 DeploymentID string 1226 1227 // Marks these allocations as healthy, allow further allocations 1228 // to be rolled. 1229 HealthyAllocationIDs []string 1230 1231 // Any unhealthy allocations fail the deployment 1232 UnhealthyAllocationIDs []string 1233 1234 WriteRequest 1235 } 1236 1237 // ApplyDeploymentAllocHealthRequest is used to apply an alloc health request via Raft 1238 type ApplyDeploymentAllocHealthRequest struct { 1239 DeploymentAllocHealthRequest 1240 1241 // Timestamp is the timestamp to use when setting the allocations health. 1242 Timestamp time.Time 1243 1244 // An optional field to update the status of a deployment 1245 DeploymentUpdate *DeploymentStatusUpdate 1246 1247 // Job is used to optionally upsert a job. This is used when setting the 1248 // allocation health results in a deployment failure and the deployment 1249 // auto-reverts to the latest stable job. 1250 Job *Job 1251 1252 // An optional evaluation to create after promoting the canaries 1253 Eval *Evaluation 1254 } 1255 1256 // DeploymentPromoteRequest is used to promote task groups in a deployment 1257 type DeploymentPromoteRequest struct { 1258 DeploymentID string 1259 1260 // All is to promote all task groups 1261 All bool 1262 1263 // Groups is used to set the promotion status per task group 1264 Groups []string 1265 1266 WriteRequest 1267 } 1268 1269 // ApplyDeploymentPromoteRequest is used to apply a promotion request via Raft 1270 type ApplyDeploymentPromoteRequest struct { 1271 DeploymentPromoteRequest 1272 1273 // An optional evaluation to create after promoting the canaries 1274 Eval *Evaluation 1275 } 1276 1277 // DeploymentPauseRequest is used to pause a deployment 1278 type DeploymentPauseRequest struct { 1279 DeploymentID string 1280 1281 // Pause sets the pause status 1282 Pause bool 1283 1284 WriteRequest 1285 } 1286 1287 // DeploymentRunRequest is used to remotely start a pending deployment. 1288 // Used only for multiregion deployments. 1289 type DeploymentRunRequest struct { 1290 DeploymentID string 1291 1292 WriteRequest 1293 } 1294 1295 // DeploymentUnblockRequest is used to remotely unblock a deployment. 1296 // Used only for multiregion deployments. 1297 type DeploymentUnblockRequest struct { 1298 DeploymentID string 1299 1300 WriteRequest 1301 } 1302 1303 // DeploymentCancelRequest is used to remotely cancel a deployment. 1304 // Used only for multiregion deployments. 1305 type DeploymentCancelRequest struct { 1306 DeploymentID string 1307 1308 WriteRequest 1309 } 1310 1311 // DeploymentSpecificRequest is used to make a request specific to a particular 1312 // deployment 1313 type DeploymentSpecificRequest struct { 1314 DeploymentID string 1315 QueryOptions 1316 } 1317 1318 // DeploymentFailRequest is used to fail a particular deployment 1319 type DeploymentFailRequest struct { 1320 DeploymentID string 1321 WriteRequest 1322 } 1323 1324 // ScalingPolicySpecificRequest is used when we just need to specify a target scaling policy 1325 type ScalingPolicySpecificRequest struct { 1326 ID string 1327 QueryOptions 1328 } 1329 1330 // SingleScalingPolicyResponse is used to return a single job 1331 type SingleScalingPolicyResponse struct { 1332 Policy *ScalingPolicy 1333 QueryMeta 1334 } 1335 1336 // ScalingPolicyListRequest is used to parameterize a scaling policy list request 1337 type ScalingPolicyListRequest struct { 1338 Job string 1339 Type string 1340 QueryOptions 1341 } 1342 1343 // ScalingPolicyListResponse is used for a list request 1344 type ScalingPolicyListResponse struct { 1345 Policies []*ScalingPolicyListStub 1346 QueryMeta 1347 } 1348 1349 // SingleDeploymentResponse is used to respond with a single deployment 1350 type SingleDeploymentResponse struct { 1351 Deployment *Deployment 1352 QueryMeta 1353 } 1354 1355 // GenericResponse is used to respond to a request where no 1356 // specific response information is needed. 1357 type GenericResponse struct { 1358 WriteMeta 1359 } 1360 1361 // VersionResponse is used for the Status.Version response 1362 type VersionResponse struct { 1363 Build string 1364 Versions map[string]int 1365 QueryMeta 1366 } 1367 1368 // JobRegisterResponse is used to respond to a job registration 1369 type JobRegisterResponse struct { 1370 EvalID string 1371 EvalCreateIndex uint64 1372 JobModifyIndex uint64 1373 1374 // Warnings contains any warnings about the given job. These may include 1375 // deprecation warnings. 1376 Warnings string 1377 1378 QueryMeta 1379 } 1380 1381 // JobDeregisterResponse is used to respond to a job deregistration 1382 type JobDeregisterResponse struct { 1383 EvalID string 1384 EvalCreateIndex uint64 1385 JobModifyIndex uint64 1386 VolumeEvalID string 1387 VolumeEvalIndex uint64 1388 QueryMeta 1389 } 1390 1391 // JobBatchDeregisterResponse is used to respond to a batch job deregistration 1392 type JobBatchDeregisterResponse struct { 1393 // JobEvals maps the job to its created evaluation 1394 JobEvals map[NamespacedID]string 1395 QueryMeta 1396 } 1397 1398 // JobValidateResponse is the response from validate request 1399 type JobValidateResponse struct { 1400 // DriverConfigValidated indicates whether the agent validated the driver 1401 // config 1402 DriverConfigValidated bool 1403 1404 // ValidationErrors is a list of validation errors 1405 ValidationErrors []string 1406 1407 // Error is a string version of any error that may have occurred 1408 Error string 1409 1410 // Warnings contains any warnings about the given job. These may include 1411 // deprecation warnings. 1412 Warnings string 1413 } 1414 1415 // NodeUpdateResponse is used to respond to a node update 1416 type NodeUpdateResponse struct { 1417 HeartbeatTTL time.Duration 1418 EvalIDs []string 1419 EvalCreateIndex uint64 1420 NodeModifyIndex uint64 1421 1422 // Features informs clients what enterprise features are allowed 1423 Features uint64 1424 1425 // LeaderRPCAddr is the RPC address of the current Raft Leader. If 1426 // empty, the current Nomad Server is in the minority of a partition. 1427 LeaderRPCAddr string 1428 1429 // NumNodes is the number of Nomad nodes attached to this quorum of 1430 // Nomad Servers at the time of the response. This value can 1431 // fluctuate based on the health of the cluster between heartbeats. 1432 NumNodes int32 1433 1434 // Servers is the full list of known Nomad servers in the local 1435 // region. 1436 Servers []*NodeServerInfo 1437 1438 // SchedulingEligibility is used to inform clients what the server-side 1439 // has for their scheduling status during heartbeats. 1440 SchedulingEligibility string 1441 1442 QueryMeta 1443 } 1444 1445 // NodeDrainUpdateResponse is used to respond to a node drain update 1446 type NodeDrainUpdateResponse struct { 1447 NodeModifyIndex uint64 1448 EvalIDs []string 1449 EvalCreateIndex uint64 1450 WriteMeta 1451 } 1452 1453 // NodeEligibilityUpdateResponse is used to respond to a node eligibility update 1454 type NodeEligibilityUpdateResponse struct { 1455 NodeModifyIndex uint64 1456 EvalIDs []string 1457 EvalCreateIndex uint64 1458 WriteMeta 1459 } 1460 1461 // NodeAllocsResponse is used to return allocs for a single node 1462 type NodeAllocsResponse struct { 1463 Allocs []*Allocation 1464 QueryMeta 1465 } 1466 1467 // NodeClientAllocsResponse is used to return allocs meta data for a single node 1468 type NodeClientAllocsResponse struct { 1469 Allocs map[string]uint64 1470 1471 // MigrateTokens are used when ACLs are enabled to allow cross node, 1472 // authenticated access to sticky volumes 1473 MigrateTokens map[string]string 1474 1475 QueryMeta 1476 } 1477 1478 // SingleNodeResponse is used to return a single node 1479 type SingleNodeResponse struct { 1480 Node *Node 1481 QueryMeta 1482 } 1483 1484 // NodeListResponse is used for a list request 1485 type NodeListResponse struct { 1486 Nodes []*NodeListStub 1487 QueryMeta 1488 } 1489 1490 // SingleJobResponse is used to return a single job 1491 type SingleJobResponse struct { 1492 Job *Job 1493 QueryMeta 1494 } 1495 1496 // JobSummaryResponse is used to return a single job summary 1497 type JobSummaryResponse struct { 1498 JobSummary *JobSummary 1499 QueryMeta 1500 } 1501 1502 // JobScaleStatusResponse is used to return the scale status for a job 1503 type JobScaleStatusResponse struct { 1504 JobScaleStatus *JobScaleStatus 1505 QueryMeta 1506 } 1507 1508 type JobScaleStatus struct { 1509 JobID string 1510 Namespace string 1511 JobCreateIndex uint64 1512 JobModifyIndex uint64 1513 JobStopped bool 1514 TaskGroups map[string]*TaskGroupScaleStatus 1515 } 1516 1517 // TaskGroupScaleStatus is used to return the scale status for a given task group 1518 type TaskGroupScaleStatus struct { 1519 Desired int 1520 Placed int 1521 Running int 1522 Healthy int 1523 Unhealthy int 1524 Events []*ScalingEvent 1525 } 1526 1527 type JobDispatchResponse struct { 1528 DispatchedJobID string 1529 EvalID string 1530 EvalCreateIndex uint64 1531 JobCreateIndex uint64 1532 WriteMeta 1533 } 1534 1535 // JobListResponse is used for a list request 1536 type JobListResponse struct { 1537 Jobs []*JobListStub 1538 QueryMeta 1539 } 1540 1541 // JobVersionsRequest is used to get a jobs versions 1542 type JobVersionsRequest struct { 1543 JobID string 1544 Diffs bool 1545 QueryOptions 1546 } 1547 1548 // JobVersionsResponse is used for a job get versions request 1549 type JobVersionsResponse struct { 1550 Versions []*Job 1551 Diffs []*JobDiff 1552 QueryMeta 1553 } 1554 1555 // JobPlanResponse is used to respond to a job plan request 1556 type JobPlanResponse struct { 1557 // Annotations stores annotations explaining decisions the scheduler made. 1558 Annotations *PlanAnnotations 1559 1560 // FailedTGAllocs is the placement failures per task group. 1561 FailedTGAllocs map[string]*AllocMetric 1562 1563 // JobModifyIndex is the modification index of the job. The value can be 1564 // used when running `nomad run` to ensure that the Job wasn’t modified 1565 // since the last plan. If the job is being created, the value is zero. 1566 JobModifyIndex uint64 1567 1568 // CreatedEvals is the set of evaluations created by the scheduler. The 1569 // reasons for this can be rolling-updates or blocked evals. 1570 CreatedEvals []*Evaluation 1571 1572 // Diff contains the diff of the job and annotations on whether the change 1573 // causes an in-place update or create/destroy 1574 Diff *JobDiff 1575 1576 // NextPeriodicLaunch is the time duration till the job would be launched if 1577 // submitted. 1578 NextPeriodicLaunch time.Time 1579 1580 // Warnings contains any warnings about the given job. These may include 1581 // deprecation warnings. 1582 Warnings string 1583 1584 WriteMeta 1585 } 1586 1587 // SingleAllocResponse is used to return a single allocation 1588 type SingleAllocResponse struct { 1589 Alloc *Allocation 1590 QueryMeta 1591 } 1592 1593 // AllocsGetResponse is used to return a set of allocations 1594 type AllocsGetResponse struct { 1595 Allocs []*Allocation 1596 QueryMeta 1597 } 1598 1599 // JobAllocationsResponse is used to return the allocations for a job 1600 type JobAllocationsResponse struct { 1601 Allocations []*AllocListStub 1602 QueryMeta 1603 } 1604 1605 // JobEvaluationsResponse is used to return the evaluations for a job 1606 type JobEvaluationsResponse struct { 1607 Evaluations []*Evaluation 1608 QueryMeta 1609 } 1610 1611 // SingleEvalResponse is used to return a single evaluation 1612 type SingleEvalResponse struct { 1613 Eval *Evaluation 1614 QueryMeta 1615 } 1616 1617 // EvalDequeueResponse is used to return from a dequeue 1618 type EvalDequeueResponse struct { 1619 Eval *Evaluation 1620 Token string 1621 1622 // WaitIndex is the Raft index the worker should wait until invoking the 1623 // scheduler. 1624 WaitIndex uint64 1625 1626 QueryMeta 1627 } 1628 1629 // GetWaitIndex is used to retrieve the Raft index in which state should be at 1630 // or beyond before invoking the scheduler. 1631 func (e *EvalDequeueResponse) GetWaitIndex() uint64 { 1632 // Prefer the wait index sent. This will be populated on all responses from 1633 // 0.7.0 and above 1634 if e.WaitIndex != 0 { 1635 return e.WaitIndex 1636 } else if e.Eval != nil { 1637 return e.Eval.ModifyIndex 1638 } 1639 1640 // This should never happen 1641 return 1 1642 } 1643 1644 // PlanResponse is used to return from a PlanRequest 1645 type PlanResponse struct { 1646 Result *PlanResult 1647 WriteMeta 1648 } 1649 1650 // AllocListResponse is used for a list request 1651 type AllocListResponse struct { 1652 Allocations []*AllocListStub 1653 QueryMeta 1654 } 1655 1656 // DeploymentListResponse is used for a list request 1657 type DeploymentListResponse struct { 1658 Deployments []*Deployment 1659 QueryMeta 1660 } 1661 1662 // EvalListResponse is used for a list request 1663 type EvalListResponse struct { 1664 Evaluations []*Evaluation 1665 QueryMeta 1666 } 1667 1668 // EvalCountResponse is used for a count request 1669 type EvalCountResponse struct { 1670 Count int 1671 QueryMeta 1672 } 1673 1674 // EvalAllocationsResponse is used to return the allocations for an evaluation 1675 type EvalAllocationsResponse struct { 1676 Allocations []*AllocListStub 1677 QueryMeta 1678 } 1679 1680 // PeriodicForceResponse is used to respond to a periodic job force launch 1681 type PeriodicForceResponse struct { 1682 EvalID string 1683 EvalCreateIndex uint64 1684 WriteMeta 1685 } 1686 1687 // DeploymentUpdateResponse is used to respond to a deployment change. The 1688 // response will include the modify index of the deployment as well as details 1689 // of any triggered evaluation. 1690 type DeploymentUpdateResponse struct { 1691 EvalID string 1692 EvalCreateIndex uint64 1693 DeploymentModifyIndex uint64 1694 1695 // RevertedJobVersion is the version the job was reverted to. If unset, the 1696 // job wasn't reverted 1697 RevertedJobVersion *uint64 1698 1699 WriteMeta 1700 } 1701 1702 // NodeConnQueryResponse is used to respond to a query of whether a server has 1703 // a connection to a specific Node 1704 type NodeConnQueryResponse struct { 1705 // Connected indicates whether a connection to the Client exists 1706 Connected bool 1707 1708 // Established marks the time at which the connection was established 1709 Established time.Time 1710 1711 QueryMeta 1712 } 1713 1714 // HostDataRequest is used by /agent/host to retrieve data about the agent's host system. If 1715 // ServerID or NodeID is specified, the request is forwarded to the remote agent 1716 type HostDataRequest struct { 1717 ServerID string 1718 NodeID string 1719 QueryOptions 1720 } 1721 1722 // HostDataResponse contains the HostData content 1723 type HostDataResponse struct { 1724 AgentID string 1725 HostData *host.HostData 1726 } 1727 1728 // EmitNodeEventsRequest is a request to update the node events source 1729 // with a new client-side event 1730 type EmitNodeEventsRequest struct { 1731 // NodeEvents are a map where the key is a node id, and value is a list of 1732 // events for that node 1733 NodeEvents map[string][]*NodeEvent 1734 1735 WriteRequest 1736 } 1737 1738 // EmitNodeEventsResponse is a response to the client about the status of 1739 // the node event source update. 1740 type EmitNodeEventsResponse struct { 1741 WriteMeta 1742 } 1743 1744 const ( 1745 NodeEventSubsystemDrain = "Drain" 1746 NodeEventSubsystemDriver = "Driver" 1747 NodeEventSubsystemHeartbeat = "Heartbeat" 1748 NodeEventSubsystemCluster = "Cluster" 1749 NodeEventSubsystemScheduler = "Scheduler" 1750 NodeEventSubsystemStorage = "Storage" 1751 ) 1752 1753 // NodeEvent is a single unit representing a node’s state change 1754 type NodeEvent struct { 1755 Message string 1756 Subsystem string 1757 Details map[string]string 1758 Timestamp time.Time 1759 CreateIndex uint64 1760 } 1761 1762 func (ne *NodeEvent) String() string { 1763 var details []string 1764 for k, v := range ne.Details { 1765 details = append(details, fmt.Sprintf("%s: %s", k, v)) 1766 } 1767 1768 return fmt.Sprintf("Message: %s, Subsystem: %s, Details: %s, Timestamp: %s", ne.Message, ne.Subsystem, strings.Join(details, ","), ne.Timestamp.String()) 1769 } 1770 1771 func (ne *NodeEvent) Copy() *NodeEvent { 1772 c := new(NodeEvent) 1773 *c = *ne 1774 c.Details = maps.Clone(ne.Details) 1775 return c 1776 } 1777 1778 // NewNodeEvent generates a new node event storing the current time as the 1779 // timestamp 1780 func NewNodeEvent() *NodeEvent { 1781 return &NodeEvent{Timestamp: time.Now()} 1782 } 1783 1784 // SetMessage is used to set the message on the node event 1785 func (ne *NodeEvent) SetMessage(msg string) *NodeEvent { 1786 ne.Message = msg 1787 return ne 1788 } 1789 1790 // SetSubsystem is used to set the subsystem on the node event 1791 func (ne *NodeEvent) SetSubsystem(sys string) *NodeEvent { 1792 ne.Subsystem = sys 1793 return ne 1794 } 1795 1796 // SetTimestamp is used to set the timestamp on the node event 1797 func (ne *NodeEvent) SetTimestamp(ts time.Time) *NodeEvent { 1798 ne.Timestamp = ts 1799 return ne 1800 } 1801 1802 // AddDetail is used to add a detail to the node event 1803 func (ne *NodeEvent) AddDetail(k, v string) *NodeEvent { 1804 if ne.Details == nil { 1805 ne.Details = make(map[string]string, 1) 1806 } 1807 ne.Details[k] = v 1808 return ne 1809 } 1810 1811 const ( 1812 NodeStatusInit = "initializing" 1813 NodeStatusReady = "ready" 1814 NodeStatusDown = "down" 1815 NodeStatusDisconnected = "disconnected" 1816 ) 1817 1818 // ShouldDrainNode checks if a given node status should trigger an 1819 // evaluation. Some states don't require any further action. 1820 func ShouldDrainNode(status string) bool { 1821 switch status { 1822 case NodeStatusInit, NodeStatusReady, NodeStatusDisconnected: 1823 return false 1824 case NodeStatusDown: 1825 return true 1826 default: 1827 panic(fmt.Sprintf("unhandled node status %s", status)) 1828 } 1829 } 1830 1831 // ValidNodeStatus is used to check if a node status is valid 1832 func ValidNodeStatus(status string) bool { 1833 switch status { 1834 case NodeStatusInit, NodeStatusReady, NodeStatusDown, NodeStatusDisconnected: 1835 return true 1836 default: 1837 return false 1838 } 1839 } 1840 1841 const ( 1842 // NodeSchedulingEligible and Ineligible marks the node as eligible or not, 1843 // respectively, for receiving allocations. This is orthogonal to the node 1844 // status being ready. 1845 NodeSchedulingEligible = "eligible" 1846 NodeSchedulingIneligible = "ineligible" 1847 ) 1848 1849 // DrainSpec describes a Node's desired drain behavior. 1850 type DrainSpec struct { 1851 // Deadline is the duration after StartTime when the remaining 1852 // allocations on a draining Node should be told to stop. 1853 Deadline time.Duration 1854 1855 // IgnoreSystemJobs allows systems jobs to remain on the node even though it 1856 // has been marked for draining. 1857 IgnoreSystemJobs bool 1858 } 1859 1860 // DrainStrategy describes a Node's drain behavior. 1861 type DrainStrategy struct { 1862 // DrainSpec is the user declared drain specification 1863 DrainSpec 1864 1865 // ForceDeadline is the deadline time for the drain after which drains will 1866 // be forced 1867 ForceDeadline time.Time 1868 1869 // StartedAt is the time the drain process started 1870 StartedAt time.Time 1871 } 1872 1873 func (d *DrainStrategy) Copy() *DrainStrategy { 1874 if d == nil { 1875 return nil 1876 } 1877 1878 nd := new(DrainStrategy) 1879 *nd = *d 1880 return nd 1881 } 1882 1883 // DeadlineTime returns a boolean whether the drain strategy allows an infinite 1884 // duration or otherwise the deadline time. The force drain is captured by the 1885 // deadline time being in the past. 1886 func (d *DrainStrategy) DeadlineTime() (infinite bool, deadline time.Time) { 1887 // Treat the nil case as a force drain so during an upgrade where a node may 1888 // not have a drain strategy but has Drain set to true, it is treated as a 1889 // force to mimick old behavior. 1890 if d == nil { 1891 return false, time.Time{} 1892 } 1893 1894 ns := d.Deadline.Nanoseconds() 1895 switch { 1896 case ns < 0: // Force 1897 return false, time.Time{} 1898 case ns == 0: // Infinite 1899 return true, time.Time{} 1900 default: 1901 return false, d.ForceDeadline 1902 } 1903 } 1904 1905 func (d *DrainStrategy) Equal(o *DrainStrategy) bool { 1906 if d == nil && o == nil { 1907 return true 1908 } else if o != nil && d == nil { 1909 return false 1910 } else if d != nil && o == nil { 1911 return false 1912 } 1913 1914 // Compare values 1915 if d.ForceDeadline != o.ForceDeadline { 1916 return false 1917 } else if d.Deadline != o.Deadline { 1918 return false 1919 } else if d.IgnoreSystemJobs != o.IgnoreSystemJobs { 1920 return false 1921 } 1922 1923 return true 1924 } 1925 1926 const ( 1927 // DrainStatuses are the various states a drain can be in, as reflect in DrainMetadata 1928 DrainStatusDraining DrainStatus = "draining" 1929 DrainStatusComplete DrainStatus = "complete" 1930 DrainStatusCanceled DrainStatus = "canceled" 1931 ) 1932 1933 type DrainStatus string 1934 1935 // DrainMetadata contains information about the most recent drain operation for a given Node. 1936 type DrainMetadata struct { 1937 // StartedAt is the time that the drain operation started. This is equal to Node.DrainStrategy.StartedAt, 1938 // if it exists 1939 StartedAt time.Time 1940 1941 // UpdatedAt is the time that that this struct was most recently updated, either via API action 1942 // or drain completion 1943 UpdatedAt time.Time 1944 1945 // Status reflects the status of the drain operation. 1946 Status DrainStatus 1947 1948 // AccessorID is the accessor ID of the ACL token used in the most recent API operation against this drain 1949 AccessorID string 1950 1951 // Meta includes the operator-submitted metadata about this drain operation 1952 Meta map[string]string 1953 } 1954 1955 func (m *DrainMetadata) Copy() *DrainMetadata { 1956 if m == nil { 1957 return nil 1958 } 1959 c := new(DrainMetadata) 1960 *c = *m 1961 c.Meta = maps.Clone(m.Meta) 1962 return c 1963 } 1964 1965 // Node is a representation of a schedulable client node 1966 type Node struct { 1967 // ID is a unique identifier for the node. It can be constructed 1968 // by doing a concatenation of the Name and Datacenter as a simple 1969 // approach. Alternatively a UUID may be used. 1970 ID string 1971 1972 // SecretID is an ID that is only known by the Node and the set of Servers. 1973 // It is not accessible via the API and is used to authenticate nodes 1974 // conducting privileged activities. 1975 SecretID string 1976 1977 // Datacenter for this node 1978 Datacenter string 1979 1980 // Node name 1981 Name string 1982 1983 // CgroupParent for this node (linux only) 1984 CgroupParent string 1985 1986 // HTTPAddr is the address on which the Nomad client is listening for http 1987 // requests 1988 HTTPAddr string 1989 1990 // TLSEnabled indicates if the Agent has TLS enabled for the HTTP API 1991 TLSEnabled bool 1992 1993 // Attributes is an arbitrary set of key/value 1994 // data that can be used for constraints. Examples 1995 // include "kernel.name=linux", "arch=386", "driver.docker=1", 1996 // "docker.runtime=1.8.3" 1997 Attributes map[string]string 1998 1999 // NodeResources captures the available resources on the client. 2000 NodeResources *NodeResources 2001 2002 // ReservedResources captures the set resources on the client that are 2003 // reserved from scheduling. 2004 ReservedResources *NodeReservedResources 2005 2006 // Resources is the available resources on the client. 2007 // For example 'cpu=2' 'memory=2048' 2008 // COMPAT(0.10): Remove after 0.10 2009 Resources *Resources 2010 2011 // Reserved is the set of resources that are reserved, 2012 // and should be subtracted from the total resources for 2013 // the purposes of scheduling. This may be provide certain 2014 // high-watermark tolerances or because of external schedulers 2015 // consuming resources. 2016 // COMPAT(0.10): Remove after 0.10 2017 Reserved *Resources 2018 2019 // Links are used to 'link' this client to external 2020 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 2021 // 'ami=ami-123' 2022 Links map[string]string 2023 2024 // Meta is used to associate arbitrary metadata with this 2025 // client. This is opaque to Nomad. 2026 Meta map[string]string 2027 2028 // NodeClass is an opaque identifier used to group nodes 2029 // together for the purpose of determining scheduling pressure. 2030 NodeClass string 2031 2032 // ComputedClass is a unique id that identifies nodes with a common set of 2033 // attributes and capabilities. 2034 ComputedClass string 2035 2036 // DrainStrategy determines the node's draining behavior. 2037 // Will be non-nil only while draining. 2038 DrainStrategy *DrainStrategy 2039 2040 // SchedulingEligibility determines whether this node will receive new 2041 // placements. 2042 SchedulingEligibility string 2043 2044 // Status of this node 2045 Status string 2046 2047 // StatusDescription is meant to provide more human useful information 2048 StatusDescription string 2049 2050 // StatusUpdatedAt is the time stamp at which the state of the node was 2051 // updated 2052 StatusUpdatedAt int64 2053 2054 // Events is the most recent set of events generated for the node, 2055 // retaining only MaxRetainedNodeEvents number at a time 2056 Events []*NodeEvent 2057 2058 // Drivers is a map of driver names to current driver information 2059 Drivers map[string]*DriverInfo 2060 2061 // CSIControllerPlugins is a map of plugin names to current CSI Plugin info 2062 CSIControllerPlugins map[string]*CSIInfo 2063 // CSINodePlugins is a map of plugin names to current CSI Plugin info 2064 CSINodePlugins map[string]*CSIInfo 2065 2066 // HostVolumes is a map of host volume names to their configuration 2067 HostVolumes map[string]*ClientHostVolumeConfig 2068 2069 // HostNetworks is a map of host host_network names to their configuration 2070 HostNetworks map[string]*ClientHostNetworkConfig 2071 2072 // LastDrain contains metadata about the most recent drain operation 2073 LastDrain *DrainMetadata 2074 2075 // Raft Indexes 2076 CreateIndex uint64 2077 ModifyIndex uint64 2078 } 2079 2080 // GetID is a helper for getting the ID when the object may be nil and is 2081 // required for pagination. 2082 func (n *Node) GetID() string { 2083 if n == nil { 2084 return "" 2085 } 2086 return n.ID 2087 } 2088 2089 // Sanitize returns a copy of the Node omitting confidential fields 2090 // It only returns a copy if the Node contains the confidential fields 2091 func (n *Node) Sanitize() *Node { 2092 if n == nil { 2093 return nil 2094 } 2095 if n.SecretID == "" { 2096 return n 2097 } 2098 clean := n.Copy() 2099 clean.SecretID = "" 2100 return clean 2101 } 2102 2103 // Ready returns true if the node is ready for running allocations 2104 func (n *Node) Ready() bool { 2105 return n.Status == NodeStatusReady && n.DrainStrategy == nil && n.SchedulingEligibility == NodeSchedulingEligible 2106 } 2107 2108 func (n *Node) Canonicalize() { 2109 if n == nil { 2110 return 2111 } 2112 2113 // Ensure SchedulingEligibility is correctly set whenever draining so the plan applier and other scheduling logic 2114 // only need to check SchedulingEligibility when determining whether a placement is feasible on a node. 2115 if n.DrainStrategy != nil { 2116 n.SchedulingEligibility = NodeSchedulingIneligible 2117 } else if n.SchedulingEligibility == "" { 2118 n.SchedulingEligibility = NodeSchedulingEligible 2119 } 2120 2121 // COMPAT remove in 1.0 2122 // In v0.12.0 we introduced a separate node specific network resource struct 2123 // so we need to covert any pre 0.12 clients to the correct struct 2124 if n.NodeResources != nil && n.NodeResources.NodeNetworks == nil { 2125 if n.NodeResources.Networks != nil { 2126 for _, nr := range n.NodeResources.Networks { 2127 nnr := &NodeNetworkResource{ 2128 Mode: nr.Mode, 2129 Speed: nr.MBits, 2130 Device: nr.Device, 2131 } 2132 if nr.IP != "" { 2133 nnr.Addresses = []NodeNetworkAddress{ 2134 { 2135 Alias: "default", 2136 Address: nr.IP, 2137 }, 2138 } 2139 } 2140 n.NodeResources.NodeNetworks = append(n.NodeResources.NodeNetworks, nnr) 2141 } 2142 } 2143 } 2144 } 2145 2146 func (n *Node) Copy() *Node { 2147 if n == nil { 2148 return nil 2149 } 2150 nn := *n 2151 nn.Attributes = maps.Clone(nn.Attributes) 2152 nn.NodeResources = nn.NodeResources.Copy() 2153 nn.ReservedResources = nn.ReservedResources.Copy() 2154 nn.Resources = nn.Resources.Copy() 2155 nn.Reserved = nn.Reserved.Copy() 2156 nn.Links = maps.Clone(nn.Links) 2157 nn.Meta = maps.Clone(nn.Meta) 2158 nn.DrainStrategy = nn.DrainStrategy.Copy() 2159 nn.Events = helper.CopySlice(n.Events) 2160 nn.Drivers = helper.DeepCopyMap(n.Drivers) 2161 nn.CSIControllerPlugins = helper.DeepCopyMap(nn.CSIControllerPlugins) 2162 nn.CSINodePlugins = helper.DeepCopyMap(nn.CSINodePlugins) 2163 nn.HostVolumes = helper.DeepCopyMap(n.HostVolumes) 2164 nn.HostNetworks = helper.DeepCopyMap(n.HostNetworks) 2165 nn.LastDrain = nn.LastDrain.Copy() 2166 return &nn 2167 } 2168 2169 // TerminalStatus returns if the current status is terminal and 2170 // will no longer transition. 2171 func (n *Node) TerminalStatus() bool { 2172 switch n.Status { 2173 case NodeStatusDown: 2174 return true 2175 default: 2176 return false 2177 } 2178 } 2179 2180 // ComparableReservedResources returns the reserved resouces on the node 2181 // handling upgrade paths. Reserved networks must be handled separately. After 2182 // 0.11 calls to this should be replaced with: 2183 // node.ReservedResources.Comparable() 2184 // 2185 // COMPAT(0.11): Remove in 0.11 2186 func (n *Node) ComparableReservedResources() *ComparableResources { 2187 // See if we can no-op 2188 if n.Reserved == nil && n.ReservedResources == nil { 2189 return nil 2190 } 2191 2192 // Node already has 0.9+ behavior 2193 if n.ReservedResources != nil { 2194 return n.ReservedResources.Comparable() 2195 } 2196 2197 // Upgrade path 2198 return &ComparableResources{ 2199 Flattened: AllocatedTaskResources{ 2200 Cpu: AllocatedCpuResources{ 2201 CpuShares: int64(n.Reserved.CPU), 2202 }, 2203 Memory: AllocatedMemoryResources{ 2204 MemoryMB: int64(n.Reserved.MemoryMB), 2205 }, 2206 }, 2207 Shared: AllocatedSharedResources{ 2208 DiskMB: int64(n.Reserved.DiskMB), 2209 }, 2210 } 2211 } 2212 2213 // ComparableResources returns the resouces on the node 2214 // handling upgrade paths. Networking must be handled separately. After 0.11 2215 // calls to this should be replaced with: node.NodeResources.Comparable() 2216 // 2217 // // COMPAT(0.11): Remove in 0.11 2218 func (n *Node) ComparableResources() *ComparableResources { 2219 // Node already has 0.9+ behavior 2220 if n.NodeResources != nil { 2221 return n.NodeResources.Comparable() 2222 } 2223 2224 // Upgrade path 2225 return &ComparableResources{ 2226 Flattened: AllocatedTaskResources{ 2227 Cpu: AllocatedCpuResources{ 2228 CpuShares: int64(n.Resources.CPU), 2229 }, 2230 Memory: AllocatedMemoryResources{ 2231 MemoryMB: int64(n.Resources.MemoryMB), 2232 }, 2233 }, 2234 Shared: AllocatedSharedResources{ 2235 DiskMB: int64(n.Resources.DiskMB), 2236 }, 2237 } 2238 } 2239 2240 // Stub returns a summarized version of the node 2241 func (n *Node) Stub(fields *NodeStubFields) *NodeListStub { 2242 2243 addr, _, _ := net.SplitHostPort(n.HTTPAddr) 2244 2245 s := &NodeListStub{ 2246 Address: addr, 2247 ID: n.ID, 2248 Datacenter: n.Datacenter, 2249 Name: n.Name, 2250 NodeClass: n.NodeClass, 2251 Version: n.Attributes["nomad.version"], 2252 Drain: n.DrainStrategy != nil, 2253 SchedulingEligibility: n.SchedulingEligibility, 2254 Status: n.Status, 2255 StatusDescription: n.StatusDescription, 2256 Drivers: n.Drivers, 2257 HostVolumes: n.HostVolumes, 2258 LastDrain: n.LastDrain, 2259 CreateIndex: n.CreateIndex, 2260 ModifyIndex: n.ModifyIndex, 2261 } 2262 2263 if fields != nil { 2264 if fields.Resources { 2265 s.NodeResources = n.NodeResources 2266 s.ReservedResources = n.ReservedResources 2267 } 2268 2269 // Fetch key attributes from the main Attributes map. 2270 if fields.OS { 2271 m := make(map[string]string) 2272 m["os.name"] = n.Attributes["os.name"] 2273 s.Attributes = m 2274 } 2275 } 2276 2277 return s 2278 } 2279 2280 // NodeListStub is used to return a subset of job information 2281 // for the job list 2282 type NodeListStub struct { 2283 Address string 2284 ID string 2285 Attributes map[string]string `json:",omitempty"` 2286 Datacenter string 2287 Name string 2288 NodeClass string 2289 Version string 2290 Drain bool 2291 SchedulingEligibility string 2292 Status string 2293 StatusDescription string 2294 Drivers map[string]*DriverInfo 2295 HostVolumes map[string]*ClientHostVolumeConfig 2296 NodeResources *NodeResources `json:",omitempty"` 2297 ReservedResources *NodeReservedResources `json:",omitempty"` 2298 LastDrain *DrainMetadata 2299 CreateIndex uint64 2300 ModifyIndex uint64 2301 } 2302 2303 // NodeStubFields defines which fields are included in the NodeListStub. 2304 type NodeStubFields struct { 2305 Resources bool 2306 OS bool 2307 } 2308 2309 // Resources is used to define the resources available 2310 // on a client 2311 type Resources struct { 2312 CPU int 2313 Cores int 2314 MemoryMB int 2315 MemoryMaxMB int 2316 DiskMB int 2317 IOPS int // COMPAT(0.10): Only being used to issue warnings 2318 Networks Networks 2319 Devices ResourceDevices 2320 } 2321 2322 const ( 2323 BytesInMegabyte = 1024 * 1024 2324 ) 2325 2326 // DefaultResources is a small resources object that contains the 2327 // default resources requests that we will provide to an object. 2328 // --- THIS FUNCTION IS REPLICATED IN api/resources.go and should 2329 // be kept in sync. 2330 func DefaultResources() *Resources { 2331 return &Resources{ 2332 CPU: 100, 2333 Cores: 0, 2334 MemoryMB: 300, 2335 } 2336 } 2337 2338 // MinResources is a small resources object that contains the 2339 // absolute minimum resources that we will provide to an object. 2340 // This should not be confused with the defaults which are 2341 // provided in Canonicalize() --- THIS FUNCTION IS REPLICATED IN 2342 // api/resources.go and should be kept in sync. 2343 func MinResources() *Resources { 2344 return &Resources{ 2345 CPU: 1, 2346 Cores: 0, 2347 MemoryMB: 10, 2348 } 2349 } 2350 2351 // DiskInBytes returns the amount of disk resources in bytes. 2352 func (r *Resources) DiskInBytes() int64 { 2353 return int64(r.DiskMB * BytesInMegabyte) 2354 } 2355 2356 func (r *Resources) Validate() error { 2357 var mErr multierror.Error 2358 2359 if r.Cores > 0 && r.CPU > 0 { 2360 mErr.Errors = append(mErr.Errors, errors.New("Task can only ask for 'cpu' or 'cores' resource, not both.")) 2361 } 2362 2363 if err := r.MeetsMinResources(); err != nil { 2364 mErr.Errors = append(mErr.Errors, err) 2365 } 2366 2367 // Ensure the task isn't asking for disk resources 2368 if r.DiskMB > 0 { 2369 mErr.Errors = append(mErr.Errors, errors.New("Task can't ask for disk resources, they have to be specified at the task group level.")) 2370 } 2371 2372 for i, d := range r.Devices { 2373 if err := d.Validate(); err != nil { 2374 mErr.Errors = append(mErr.Errors, fmt.Errorf("device %d failed validation: %v", i+1, err)) 2375 } 2376 } 2377 2378 if r.MemoryMaxMB != 0 && r.MemoryMaxMB < r.MemoryMB { 2379 mErr.Errors = append(mErr.Errors, fmt.Errorf("MemoryMaxMB value (%d) should be larger than MemoryMB value (%d)", r.MemoryMaxMB, r.MemoryMB)) 2380 } 2381 2382 return mErr.ErrorOrNil() 2383 } 2384 2385 // Merge merges this resource with another resource. 2386 // COMPAT(0.10): Remove in 0.10 2387 func (r *Resources) Merge(other *Resources) { 2388 if other.CPU != 0 { 2389 r.CPU = other.CPU 2390 } 2391 if other.Cores != 0 { 2392 r.Cores = other.Cores 2393 } 2394 if other.MemoryMB != 0 { 2395 r.MemoryMB = other.MemoryMB 2396 } 2397 if other.MemoryMaxMB != 0 { 2398 r.MemoryMaxMB = other.MemoryMaxMB 2399 } 2400 if other.DiskMB != 0 { 2401 r.DiskMB = other.DiskMB 2402 } 2403 if len(other.Networks) != 0 { 2404 r.Networks = other.Networks 2405 } 2406 if len(other.Devices) != 0 { 2407 r.Devices = other.Devices 2408 } 2409 } 2410 2411 // Equal Resources. 2412 // 2413 // COMPAT(0.10): Remove in 0.10 2414 func (r *Resources) Equal(o *Resources) bool { 2415 if r == o { 2416 return true 2417 } 2418 if r == nil || o == nil { 2419 return false 2420 } 2421 return r.CPU == o.CPU && 2422 r.Cores == o.Cores && 2423 r.MemoryMB == o.MemoryMB && 2424 r.MemoryMaxMB == o.MemoryMaxMB && 2425 r.DiskMB == o.DiskMB && 2426 r.IOPS == o.IOPS && 2427 r.Networks.Equal(&o.Networks) && 2428 r.Devices.Equal(&o.Devices) 2429 } 2430 2431 // ResourceDevices are part of Resources. 2432 // 2433 // COMPAT(0.10): Remove in 0.10. 2434 type ResourceDevices []*RequestedDevice 2435 2436 // Equal ResourceDevices as set keyed by Name. 2437 // 2438 // COMPAT(0.10): Remove in 0.10 2439 func (d *ResourceDevices) Equal(o *ResourceDevices) bool { 2440 if d == o { 2441 return true 2442 } 2443 if d == nil || o == nil { 2444 return false 2445 } 2446 if len(*d) != len(*o) { 2447 return false 2448 } 2449 m := make(map[string]*RequestedDevice, len(*d)) 2450 for _, e := range *d { 2451 m[e.Name] = e 2452 } 2453 for _, oe := range *o { 2454 de, ok := m[oe.Name] 2455 if !ok || !de.Equal(oe) { 2456 return false 2457 } 2458 } 2459 return true 2460 } 2461 2462 // Canonicalize the Resources struct. 2463 // 2464 // COMPAT(0.10): Remove in 0.10 2465 func (r *Resources) Canonicalize() { 2466 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2467 // problems since we use reflect DeepEquals. 2468 if len(r.Networks) == 0 { 2469 r.Networks = nil 2470 } 2471 if len(r.Devices) == 0 { 2472 r.Devices = nil 2473 } 2474 2475 for _, n := range r.Networks { 2476 n.Canonicalize() 2477 } 2478 } 2479 2480 // MeetsMinResources returns an error if the resources specified are less than 2481 // the minimum allowed. 2482 // This is based on the minimums defined in the Resources type 2483 // COMPAT(0.10): Remove in 0.10 2484 func (r *Resources) MeetsMinResources() error { 2485 var mErr multierror.Error 2486 minResources := MinResources() 2487 if r.CPU < minResources.CPU && r.Cores == 0 { 2488 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is %d; got %d", minResources.CPU, r.CPU)) 2489 } 2490 if r.MemoryMB < minResources.MemoryMB { 2491 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is %d; got %d", minResources.MemoryMB, r.MemoryMB)) 2492 } 2493 return mErr.ErrorOrNil() 2494 } 2495 2496 // Copy returns a deep copy of the resources 2497 func (r *Resources) Copy() *Resources { 2498 if r == nil { 2499 return nil 2500 } 2501 newR := new(Resources) 2502 *newR = *r 2503 2504 // Copy the network objects 2505 newR.Networks = r.Networks.Copy() 2506 2507 // Copy the devices 2508 if r.Devices != nil { 2509 n := len(r.Devices) 2510 newR.Devices = make([]*RequestedDevice, n) 2511 for i := 0; i < n; i++ { 2512 newR.Devices[i] = r.Devices[i].Copy() 2513 } 2514 } 2515 2516 return newR 2517 } 2518 2519 // NetIndex finds the matching net index using device name 2520 // COMPAT(0.10): Remove in 0.10 2521 func (r *Resources) NetIndex(n *NetworkResource) int { 2522 return r.Networks.NetIndex(n) 2523 } 2524 2525 // Add adds the resources of the delta to this, potentially 2526 // returning an error if not possible. 2527 // COMPAT(0.10): Remove in 0.10 2528 func (r *Resources) Add(delta *Resources) { 2529 if delta == nil { 2530 return 2531 } 2532 2533 r.CPU += delta.CPU 2534 r.MemoryMB += delta.MemoryMB 2535 if delta.MemoryMaxMB > 0 { 2536 r.MemoryMaxMB += delta.MemoryMaxMB 2537 } else { 2538 r.MemoryMaxMB += delta.MemoryMB 2539 } 2540 r.DiskMB += delta.DiskMB 2541 2542 for _, n := range delta.Networks { 2543 // Find the matching interface by IP or CIDR 2544 idx := r.NetIndex(n) 2545 if idx == -1 { 2546 r.Networks = append(r.Networks, n.Copy()) 2547 } else { 2548 r.Networks[idx].Add(n) 2549 } 2550 } 2551 } 2552 2553 // GoString returns the string representation of the Resources struct. 2554 // 2555 // COMPAT(0.10): Remove in 0.10 2556 func (r *Resources) GoString() string { 2557 return fmt.Sprintf("*%#v", *r) 2558 } 2559 2560 // NodeNetworkResource is used to describe a fingerprinted network of a node 2561 type NodeNetworkResource struct { 2562 Mode string // host for physical networks, cni/<name> for cni networks 2563 2564 // The following apply only to host networks 2565 Device string // interface name 2566 MacAddress string 2567 Speed int 2568 2569 Addresses []NodeNetworkAddress // not valid for cni, for bridge there will only be 1 ip 2570 } 2571 2572 func (n *NodeNetworkResource) Equal(o *NodeNetworkResource) bool { 2573 return reflect.DeepEqual(n, o) 2574 } 2575 2576 func (n *NodeNetworkResource) Copy() *NodeNetworkResource { 2577 if n == nil { 2578 return nil 2579 } 2580 2581 c := new(NodeNetworkResource) 2582 *c = *n 2583 2584 if n.Addresses != nil { 2585 c.Addresses = make([]NodeNetworkAddress, len(n.Addresses)) 2586 copy(c.Addresses, n.Addresses) 2587 } 2588 2589 return c 2590 } 2591 2592 func (n *NodeNetworkResource) HasAlias(alias string) bool { 2593 for _, addr := range n.Addresses { 2594 if addr.Alias == alias { 2595 return true 2596 } 2597 } 2598 return false 2599 } 2600 2601 type NodeNetworkAF string 2602 2603 const ( 2604 NodeNetworkAF_IPv4 NodeNetworkAF = "ipv4" 2605 NodeNetworkAF_IPv6 NodeNetworkAF = "ipv6" 2606 ) 2607 2608 type NodeNetworkAddress struct { 2609 Family NodeNetworkAF 2610 Alias string 2611 Address string 2612 ReservedPorts string 2613 Gateway string // default route for this address 2614 } 2615 2616 type AllocatedPortMapping struct { 2617 Label string 2618 Value int 2619 To int 2620 HostIP string 2621 } 2622 2623 type AllocatedPorts []AllocatedPortMapping 2624 2625 func (p AllocatedPorts) Get(label string) (AllocatedPortMapping, bool) { 2626 for _, port := range p { 2627 if port.Label == label { 2628 return port, true 2629 } 2630 } 2631 2632 return AllocatedPortMapping{}, false 2633 } 2634 2635 type Port struct { 2636 // Label is the key for HCL port stanzas: port "foo" {} 2637 Label string 2638 2639 // Value is the static or dynamic port value. For dynamic ports this 2640 // will be 0 in the jobspec and set by the scheduler. 2641 Value int 2642 2643 // To is the port inside a network namespace where this port is 2644 // forwarded. -1 is an internal sentinel value used by Consul Connect 2645 // to mean "same as the host port." 2646 To int 2647 2648 // HostNetwork is the name of the network this port should be assigned 2649 // to. Jobs with a HostNetwork set can only be placed on nodes with 2650 // that host network available. 2651 HostNetwork string 2652 } 2653 2654 type DNSConfig struct { 2655 Servers []string 2656 Searches []string 2657 Options []string 2658 } 2659 2660 func (d *DNSConfig) Copy() *DNSConfig { 2661 if d == nil { 2662 return nil 2663 } 2664 newD := new(DNSConfig) 2665 newD.Servers = make([]string, len(d.Servers)) 2666 copy(newD.Servers, d.Servers) 2667 newD.Searches = make([]string, len(d.Searches)) 2668 copy(newD.Searches, d.Searches) 2669 newD.Options = make([]string, len(d.Options)) 2670 copy(newD.Options, d.Options) 2671 return newD 2672 } 2673 2674 // NetworkResource is used to represent available network 2675 // resources 2676 type NetworkResource struct { 2677 Mode string // Mode of the network 2678 Device string // Name of the device 2679 CIDR string // CIDR block of addresses 2680 IP string // Host IP address 2681 Hostname string `json:",omitempty"` // Hostname of the network namespace 2682 MBits int // Throughput 2683 DNS *DNSConfig // DNS Configuration 2684 ReservedPorts []Port // Host Reserved ports 2685 DynamicPorts []Port // Host Dynamically assigned ports 2686 } 2687 2688 func (n *NetworkResource) Hash() uint32 { 2689 var data []byte 2690 data = append(data, []byte(fmt.Sprintf("%s%s%s%s%s%d", n.Mode, n.Device, n.CIDR, n.IP, n.Hostname, n.MBits))...) 2691 2692 for i, port := range n.ReservedPorts { 2693 data = append(data, []byte(fmt.Sprintf("r%d%s%d%d", i, port.Label, port.Value, port.To))...) 2694 } 2695 2696 for i, port := range n.DynamicPorts { 2697 data = append(data, []byte(fmt.Sprintf("d%d%s%d%d", i, port.Label, port.Value, port.To))...) 2698 } 2699 2700 return crc32.ChecksumIEEE(data) 2701 } 2702 2703 func (n *NetworkResource) Equal(other *NetworkResource) bool { 2704 return n.Hash() == other.Hash() 2705 } 2706 2707 func (n *NetworkResource) Canonicalize() { 2708 // Ensure that an empty and nil slices are treated the same to avoid scheduling 2709 // problems since we use reflect DeepEquals. 2710 if len(n.ReservedPorts) == 0 { 2711 n.ReservedPorts = nil 2712 } 2713 if len(n.DynamicPorts) == 0 { 2714 n.DynamicPorts = nil 2715 } 2716 2717 for i, p := range n.DynamicPorts { 2718 if p.HostNetwork == "" { 2719 n.DynamicPorts[i].HostNetwork = "default" 2720 } 2721 } 2722 for i, p := range n.ReservedPorts { 2723 if p.HostNetwork == "" { 2724 n.ReservedPorts[i].HostNetwork = "default" 2725 } 2726 } 2727 } 2728 2729 // Copy returns a deep copy of the network resource 2730 func (n *NetworkResource) Copy() *NetworkResource { 2731 if n == nil { 2732 return nil 2733 } 2734 newR := new(NetworkResource) 2735 *newR = *n 2736 newR.DNS = n.DNS.Copy() 2737 if n.ReservedPorts != nil { 2738 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 2739 copy(newR.ReservedPorts, n.ReservedPorts) 2740 } 2741 if n.DynamicPorts != nil { 2742 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 2743 copy(newR.DynamicPorts, n.DynamicPorts) 2744 } 2745 return newR 2746 } 2747 2748 // Add adds the resources of the delta to this, potentially 2749 // returning an error if not possible. 2750 func (n *NetworkResource) Add(delta *NetworkResource) { 2751 if len(delta.ReservedPorts) > 0 { 2752 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 2753 } 2754 n.MBits += delta.MBits 2755 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 2756 } 2757 2758 func (n *NetworkResource) GoString() string { 2759 return fmt.Sprintf("*%#v", *n) 2760 } 2761 2762 // PortLabels returns a map of port labels to their assigned host ports. 2763 func (n *NetworkResource) PortLabels() map[string]int { 2764 num := len(n.ReservedPorts) + len(n.DynamicPorts) 2765 labelValues := make(map[string]int, num) 2766 for _, port := range n.ReservedPorts { 2767 labelValues[port.Label] = port.Value 2768 } 2769 for _, port := range n.DynamicPorts { 2770 labelValues[port.Label] = port.Value 2771 } 2772 return labelValues 2773 } 2774 2775 // Networks defined for a task on the Resources struct. 2776 type Networks []*NetworkResource 2777 2778 func (ns Networks) Copy() Networks { 2779 if len(ns) == 0 { 2780 return nil 2781 } 2782 2783 out := make([]*NetworkResource, len(ns)) 2784 for i := range ns { 2785 out[i] = ns[i].Copy() 2786 } 2787 return out 2788 } 2789 2790 // Port assignment and IP for the given label or empty values. 2791 func (ns Networks) Port(label string) AllocatedPortMapping { 2792 for _, n := range ns { 2793 for _, p := range n.ReservedPorts { 2794 if p.Label == label { 2795 return AllocatedPortMapping{ 2796 Label: label, 2797 Value: p.Value, 2798 To: p.To, 2799 HostIP: n.IP, 2800 } 2801 } 2802 } 2803 for _, p := range n.DynamicPorts { 2804 if p.Label == label { 2805 return AllocatedPortMapping{ 2806 Label: label, 2807 Value: p.Value, 2808 To: p.To, 2809 HostIP: n.IP, 2810 } 2811 } 2812 } 2813 } 2814 return AllocatedPortMapping{} 2815 } 2816 2817 func (ns Networks) NetIndex(n *NetworkResource) int { 2818 for idx, net := range ns { 2819 if net.Device == n.Device { 2820 return idx 2821 } 2822 } 2823 return -1 2824 } 2825 2826 // RequestedDevice is used to request a device for a task. 2827 type RequestedDevice struct { 2828 // Name is the request name. The possible values are as follows: 2829 // * <type>: A single value only specifies the type of request. 2830 // * <vendor>/<type>: A single slash delimiter assumes the vendor and type of device is specified. 2831 // * <vendor>/<type>/<name>: Two slash delimiters assume vendor, type and specific model are specified. 2832 // 2833 // Examples are as follows: 2834 // * "gpu" 2835 // * "nvidia/gpu" 2836 // * "nvidia/gpu/GTX2080Ti" 2837 Name string 2838 2839 // Count is the number of requested devices 2840 Count uint64 2841 2842 // Constraints are a set of constraints to apply when selecting the device 2843 // to use. 2844 Constraints Constraints 2845 2846 // Affinities are a set of affinities to apply when selecting the device 2847 // to use. 2848 Affinities Affinities 2849 } 2850 2851 func (r *RequestedDevice) Equal(o *RequestedDevice) bool { 2852 if r == o { 2853 return true 2854 } 2855 if r == nil || o == nil { 2856 return false 2857 } 2858 return r.Name == o.Name && 2859 r.Count == o.Count && 2860 r.Constraints.Equal(&o.Constraints) && 2861 r.Affinities.Equal(&o.Affinities) 2862 } 2863 2864 func (r *RequestedDevice) Copy() *RequestedDevice { 2865 if r == nil { 2866 return nil 2867 } 2868 2869 nr := *r 2870 nr.Constraints = CopySliceConstraints(nr.Constraints) 2871 nr.Affinities = CopySliceAffinities(nr.Affinities) 2872 2873 return &nr 2874 } 2875 2876 func (r *RequestedDevice) ID() *DeviceIdTuple { 2877 if r == nil || r.Name == "" { 2878 return nil 2879 } 2880 2881 parts := strings.SplitN(r.Name, "/", 3) 2882 switch len(parts) { 2883 case 1: 2884 return &DeviceIdTuple{ 2885 Type: parts[0], 2886 } 2887 case 2: 2888 return &DeviceIdTuple{ 2889 Vendor: parts[0], 2890 Type: parts[1], 2891 } 2892 default: 2893 return &DeviceIdTuple{ 2894 Vendor: parts[0], 2895 Type: parts[1], 2896 Name: parts[2], 2897 } 2898 } 2899 } 2900 2901 func (r *RequestedDevice) Validate() error { 2902 if r == nil { 2903 return nil 2904 } 2905 2906 var mErr multierror.Error 2907 if r.Name == "" { 2908 _ = multierror.Append(&mErr, errors.New("device name must be given as one of the following: type, vendor/type, or vendor/type/name")) 2909 } 2910 2911 for idx, constr := range r.Constraints { 2912 // Ensure that the constraint doesn't use an operand we do not allow 2913 switch constr.Operand { 2914 case ConstraintDistinctHosts, ConstraintDistinctProperty: 2915 outer := fmt.Errorf("Constraint %d validation failed: using unsupported operand %q", idx+1, constr.Operand) 2916 _ = multierror.Append(&mErr, outer) 2917 default: 2918 if err := constr.Validate(); err != nil { 2919 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 2920 _ = multierror.Append(&mErr, outer) 2921 } 2922 } 2923 } 2924 for idx, affinity := range r.Affinities { 2925 if err := affinity.Validate(); err != nil { 2926 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 2927 _ = multierror.Append(&mErr, outer) 2928 } 2929 } 2930 2931 return mErr.ErrorOrNil() 2932 } 2933 2934 // NodeResources is used to define the resources available on a client node. 2935 type NodeResources struct { 2936 Cpu NodeCpuResources 2937 Memory NodeMemoryResources 2938 Disk NodeDiskResources 2939 Devices []*NodeDeviceResource 2940 2941 // NodeNetworks was added in Nomad 0.12 to support multiple interfaces. 2942 // It is the superset of host_networks, fingerprinted networks, and the 2943 // node's default interface. 2944 NodeNetworks []*NodeNetworkResource 2945 2946 // Networks is the node's bridge network and default interface. It is 2947 // only used when scheduling jobs with a deprecated 2948 // task.resources.network stanza. 2949 Networks Networks 2950 2951 // MinDynamicPort and MaxDynamicPort represent the inclusive port range 2952 // to select dynamic ports from across all networks. 2953 MinDynamicPort int 2954 MaxDynamicPort int 2955 } 2956 2957 func (n *NodeResources) Copy() *NodeResources { 2958 if n == nil { 2959 return nil 2960 } 2961 2962 newN := new(NodeResources) 2963 *newN = *n 2964 newN.Cpu = n.Cpu.Copy() 2965 newN.Networks = n.Networks.Copy() 2966 2967 if n.NodeNetworks != nil { 2968 newN.NodeNetworks = make([]*NodeNetworkResource, len(n.NodeNetworks)) 2969 for i, nn := range n.NodeNetworks { 2970 newN.NodeNetworks[i] = nn.Copy() 2971 } 2972 } 2973 2974 // Copy the devices 2975 if n.Devices != nil { 2976 devices := len(n.Devices) 2977 newN.Devices = make([]*NodeDeviceResource, devices) 2978 for i := 0; i < devices; i++ { 2979 newN.Devices[i] = n.Devices[i].Copy() 2980 } 2981 } 2982 2983 return newN 2984 } 2985 2986 // Comparable returns a comparable version of the nodes resources. This 2987 // conversion can be lossy so care must be taken when using it. 2988 func (n *NodeResources) Comparable() *ComparableResources { 2989 if n == nil { 2990 return nil 2991 } 2992 2993 c := &ComparableResources{ 2994 Flattened: AllocatedTaskResources{ 2995 Cpu: AllocatedCpuResources{ 2996 CpuShares: n.Cpu.CpuShares, 2997 ReservedCores: n.Cpu.ReservableCpuCores, 2998 }, 2999 Memory: AllocatedMemoryResources{ 3000 MemoryMB: n.Memory.MemoryMB, 3001 }, 3002 Networks: n.Networks, 3003 }, 3004 Shared: AllocatedSharedResources{ 3005 DiskMB: n.Disk.DiskMB, 3006 }, 3007 } 3008 return c 3009 } 3010 3011 func (n *NodeResources) Merge(o *NodeResources) { 3012 if o == nil { 3013 return 3014 } 3015 3016 n.Cpu.Merge(&o.Cpu) 3017 n.Memory.Merge(&o.Memory) 3018 n.Disk.Merge(&o.Disk) 3019 3020 if len(o.Networks) != 0 { 3021 n.Networks = append(n.Networks, o.Networks...) 3022 } 3023 3024 if len(o.Devices) != 0 { 3025 n.Devices = o.Devices 3026 } 3027 3028 if len(o.NodeNetworks) != 0 { 3029 for _, nw := range o.NodeNetworks { 3030 if i, nnw := lookupNetworkByDevice(n.NodeNetworks, nw.Device); nnw != nil { 3031 n.NodeNetworks[i] = nw 3032 } else { 3033 n.NodeNetworks = append(n.NodeNetworks, nw) 3034 } 3035 } 3036 } 3037 } 3038 3039 func lookupNetworkByDevice(nets []*NodeNetworkResource, name string) (int, *NodeNetworkResource) { 3040 for i, nw := range nets { 3041 if nw.Device == name { 3042 return i, nw 3043 } 3044 } 3045 return 0, nil 3046 } 3047 3048 func (n *NodeResources) Equal(o *NodeResources) bool { 3049 if o == nil && n == nil { 3050 return true 3051 } else if o == nil { 3052 return false 3053 } else if n == nil { 3054 return false 3055 } 3056 3057 if !n.Cpu.Equal(&o.Cpu) { 3058 return false 3059 } 3060 if !n.Memory.Equal(&o.Memory) { 3061 return false 3062 } 3063 if !n.Disk.Equal(&o.Disk) { 3064 return false 3065 } 3066 if !n.Networks.Equal(&o.Networks) { 3067 return false 3068 } 3069 3070 // Check the devices 3071 if !DevicesEquals(n.Devices, o.Devices) { 3072 return false 3073 } 3074 3075 if !NodeNetworksEquals(n.NodeNetworks, o.NodeNetworks) { 3076 return false 3077 } 3078 3079 return true 3080 } 3081 3082 // Equal equates Networks as a set 3083 func (ns *Networks) Equal(o *Networks) bool { 3084 if ns == o { 3085 return true 3086 } 3087 if ns == nil || o == nil { 3088 return false 3089 } 3090 if len(*ns) != len(*o) { 3091 return false 3092 } 3093 SETEQUALS: 3094 for _, ne := range *ns { 3095 for _, oe := range *o { 3096 if ne.Equal(oe) { 3097 continue SETEQUALS 3098 } 3099 } 3100 return false 3101 } 3102 return true 3103 } 3104 3105 // DevicesEquals returns true if the two device arrays are set equal 3106 func DevicesEquals(d1, d2 []*NodeDeviceResource) bool { 3107 if len(d1) != len(d2) { 3108 return false 3109 } 3110 idMap := make(map[DeviceIdTuple]*NodeDeviceResource, len(d1)) 3111 for _, d := range d1 { 3112 idMap[*d.ID()] = d 3113 } 3114 for _, otherD := range d2 { 3115 if d, ok := idMap[*otherD.ID()]; !ok || !d.Equal(otherD) { 3116 return false 3117 } 3118 } 3119 3120 return true 3121 } 3122 3123 func NodeNetworksEquals(n1, n2 []*NodeNetworkResource) bool { 3124 if len(n1) != len(n2) { 3125 return false 3126 } 3127 3128 netMap := make(map[string]*NodeNetworkResource, len(n1)) 3129 for _, n := range n1 { 3130 netMap[n.Device] = n 3131 } 3132 for _, otherN := range n2 { 3133 if n, ok := netMap[otherN.Device]; !ok || !n.Equal(otherN) { 3134 return false 3135 } 3136 } 3137 3138 return true 3139 3140 } 3141 3142 // NodeCpuResources captures the CPU resources of the node. 3143 type NodeCpuResources struct { 3144 // CpuShares is the CPU shares available. This is calculated by number of 3145 // cores multiplied by the core frequency. 3146 CpuShares int64 3147 3148 // TotalCpuCores is the total number of cores on the machine. This includes cores not in 3149 // the agent's cpuset if on a linux platform 3150 TotalCpuCores uint16 3151 3152 // ReservableCpuCores is the set of cpus which are available to be reserved on the Node. 3153 // This value is currently only reported on Linux platforms which support cgroups and is 3154 // discovered by inspecting the cpuset of the agent's cgroup. 3155 ReservableCpuCores []uint16 3156 } 3157 3158 func (n NodeCpuResources) Copy() NodeCpuResources { 3159 newN := n 3160 if n.ReservableCpuCores != nil { 3161 newN.ReservableCpuCores = make([]uint16, len(n.ReservableCpuCores)) 3162 copy(newN.ReservableCpuCores, n.ReservableCpuCores) 3163 } 3164 3165 return newN 3166 } 3167 3168 func (n *NodeCpuResources) Merge(o *NodeCpuResources) { 3169 if o == nil { 3170 return 3171 } 3172 3173 if o.CpuShares != 0 { 3174 n.CpuShares = o.CpuShares 3175 } 3176 3177 if o.TotalCpuCores != 0 { 3178 n.TotalCpuCores = o.TotalCpuCores 3179 } 3180 3181 if len(o.ReservableCpuCores) != 0 { 3182 n.ReservableCpuCores = o.ReservableCpuCores 3183 } 3184 } 3185 3186 func (n *NodeCpuResources) Equal(o *NodeCpuResources) bool { 3187 if o == nil && n == nil { 3188 return true 3189 } else if o == nil { 3190 return false 3191 } else if n == nil { 3192 return false 3193 } 3194 3195 if n.CpuShares != o.CpuShares { 3196 return false 3197 } 3198 3199 if n.TotalCpuCores != o.TotalCpuCores { 3200 return false 3201 } 3202 3203 if len(n.ReservableCpuCores) != len(o.ReservableCpuCores) { 3204 return false 3205 } 3206 for i := range n.ReservableCpuCores { 3207 if n.ReservableCpuCores[i] != o.ReservableCpuCores[i] { 3208 return false 3209 } 3210 } 3211 return true 3212 } 3213 3214 func (n *NodeCpuResources) SharesPerCore() int64 { 3215 return n.CpuShares / int64(n.TotalCpuCores) 3216 } 3217 3218 // NodeMemoryResources captures the memory resources of the node 3219 type NodeMemoryResources struct { 3220 // MemoryMB is the total available memory on the node 3221 MemoryMB int64 3222 } 3223 3224 func (n *NodeMemoryResources) Merge(o *NodeMemoryResources) { 3225 if o == nil { 3226 return 3227 } 3228 3229 if o.MemoryMB != 0 { 3230 n.MemoryMB = o.MemoryMB 3231 } 3232 } 3233 3234 func (n *NodeMemoryResources) Equal(o *NodeMemoryResources) bool { 3235 if o == nil && n == nil { 3236 return true 3237 } else if o == nil { 3238 return false 3239 } else if n == nil { 3240 return false 3241 } 3242 3243 if n.MemoryMB != o.MemoryMB { 3244 return false 3245 } 3246 3247 return true 3248 } 3249 3250 // NodeDiskResources captures the disk resources of the node 3251 type NodeDiskResources struct { 3252 // DiskMB is the total available disk space on the node 3253 DiskMB int64 3254 } 3255 3256 func (n *NodeDiskResources) Merge(o *NodeDiskResources) { 3257 if o == nil { 3258 return 3259 } 3260 if o.DiskMB != 0 { 3261 n.DiskMB = o.DiskMB 3262 } 3263 } 3264 3265 func (n *NodeDiskResources) Equal(o *NodeDiskResources) bool { 3266 if o == nil && n == nil { 3267 return true 3268 } else if o == nil { 3269 return false 3270 } else if n == nil { 3271 return false 3272 } 3273 3274 if n.DiskMB != o.DiskMB { 3275 return false 3276 } 3277 3278 return true 3279 } 3280 3281 // DeviceIdTuple is the tuple that identifies a device 3282 type DeviceIdTuple struct { 3283 Vendor string 3284 Type string 3285 Name string 3286 } 3287 3288 func (id *DeviceIdTuple) String() string { 3289 if id == nil { 3290 return "" 3291 } 3292 3293 return fmt.Sprintf("%s/%s/%s", id.Vendor, id.Type, id.Name) 3294 } 3295 3296 // Matches returns if this Device ID is a superset of the passed ID. 3297 func (id *DeviceIdTuple) Matches(other *DeviceIdTuple) bool { 3298 if other == nil { 3299 return false 3300 } 3301 3302 if other.Name != "" && other.Name != id.Name { 3303 return false 3304 } 3305 3306 if other.Vendor != "" && other.Vendor != id.Vendor { 3307 return false 3308 } 3309 3310 if other.Type != "" && other.Type != id.Type { 3311 return false 3312 } 3313 3314 return true 3315 } 3316 3317 // Equal returns if this Device ID is the same as the passed ID. 3318 func (id *DeviceIdTuple) Equal(o *DeviceIdTuple) bool { 3319 if id == nil && o == nil { 3320 return true 3321 } else if id == nil || o == nil { 3322 return false 3323 } 3324 3325 return o.Vendor == id.Vendor && o.Type == id.Type && o.Name == id.Name 3326 } 3327 3328 // NodeDeviceResource captures a set of devices sharing a common 3329 // vendor/type/device_name tuple. 3330 type NodeDeviceResource struct { 3331 Vendor string 3332 Type string 3333 Name string 3334 Instances []*NodeDevice 3335 Attributes map[string]*psstructs.Attribute 3336 } 3337 3338 func (n *NodeDeviceResource) ID() *DeviceIdTuple { 3339 if n == nil { 3340 return nil 3341 } 3342 3343 return &DeviceIdTuple{ 3344 Vendor: n.Vendor, 3345 Type: n.Type, 3346 Name: n.Name, 3347 } 3348 } 3349 3350 func (n *NodeDeviceResource) Copy() *NodeDeviceResource { 3351 if n == nil { 3352 return nil 3353 } 3354 3355 // Copy the primitives 3356 nn := *n 3357 3358 // Copy the device instances 3359 if l := len(nn.Instances); l != 0 { 3360 nn.Instances = make([]*NodeDevice, 0, l) 3361 for _, d := range n.Instances { 3362 nn.Instances = append(nn.Instances, d.Copy()) 3363 } 3364 } 3365 3366 // Copy the Attributes 3367 nn.Attributes = psstructs.CopyMapStringAttribute(nn.Attributes) 3368 3369 return &nn 3370 } 3371 3372 func (n *NodeDeviceResource) Equal(o *NodeDeviceResource) bool { 3373 if o == nil && n == nil { 3374 return true 3375 } else if o == nil { 3376 return false 3377 } else if n == nil { 3378 return false 3379 } 3380 3381 if n.Vendor != o.Vendor { 3382 return false 3383 } else if n.Type != o.Type { 3384 return false 3385 } else if n.Name != o.Name { 3386 return false 3387 } 3388 3389 // Check the attributes 3390 if len(n.Attributes) != len(o.Attributes) { 3391 return false 3392 } 3393 for k, v := range n.Attributes { 3394 if otherV, ok := o.Attributes[k]; !ok || v != otherV { 3395 return false 3396 } 3397 } 3398 3399 // Check the instances 3400 if len(n.Instances) != len(o.Instances) { 3401 return false 3402 } 3403 idMap := make(map[string]*NodeDevice, len(n.Instances)) 3404 for _, d := range n.Instances { 3405 idMap[d.ID] = d 3406 } 3407 for _, otherD := range o.Instances { 3408 if d, ok := idMap[otherD.ID]; !ok || !d.Equal(otherD) { 3409 return false 3410 } 3411 } 3412 3413 return true 3414 } 3415 3416 // NodeDevice is an instance of a particular device. 3417 type NodeDevice struct { 3418 // ID is the ID of the device. 3419 ID string 3420 3421 // Healthy captures whether the device is healthy. 3422 Healthy bool 3423 3424 // HealthDescription is used to provide a human readable description of why 3425 // the device may be unhealthy. 3426 HealthDescription string 3427 3428 // Locality stores HW locality information for the node to optionally be 3429 // used when making placement decisions. 3430 Locality *NodeDeviceLocality 3431 } 3432 3433 func (n *NodeDevice) Equal(o *NodeDevice) bool { 3434 if o == nil && n == nil { 3435 return true 3436 } else if o == nil { 3437 return false 3438 } else if n == nil { 3439 return false 3440 } 3441 3442 if n.ID != o.ID { 3443 return false 3444 } else if n.Healthy != o.Healthy { 3445 return false 3446 } else if n.HealthDescription != o.HealthDescription { 3447 return false 3448 } else if !n.Locality.Equal(o.Locality) { 3449 return false 3450 } 3451 3452 return false 3453 } 3454 3455 func (n *NodeDevice) Copy() *NodeDevice { 3456 if n == nil { 3457 return nil 3458 } 3459 3460 // Copy the primitives 3461 nn := *n 3462 3463 // Copy the locality 3464 nn.Locality = nn.Locality.Copy() 3465 3466 return &nn 3467 } 3468 3469 // NodeDeviceLocality stores information about the devices hardware locality on 3470 // the node. 3471 type NodeDeviceLocality struct { 3472 // PciBusID is the PCI Bus ID for the device. 3473 PciBusID string 3474 } 3475 3476 func (n *NodeDeviceLocality) Equal(o *NodeDeviceLocality) bool { 3477 if o == nil && n == nil { 3478 return true 3479 } else if o == nil { 3480 return false 3481 } else if n == nil { 3482 return false 3483 } 3484 3485 if n.PciBusID != o.PciBusID { 3486 return false 3487 } 3488 3489 return true 3490 } 3491 3492 func (n *NodeDeviceLocality) Copy() *NodeDeviceLocality { 3493 if n == nil { 3494 return nil 3495 } 3496 3497 // Copy the primitives 3498 nn := *n 3499 return &nn 3500 } 3501 3502 // NodeReservedResources is used to capture the resources on a client node that 3503 // should be reserved and not made available to jobs. 3504 type NodeReservedResources struct { 3505 Cpu NodeReservedCpuResources 3506 Memory NodeReservedMemoryResources 3507 Disk NodeReservedDiskResources 3508 Networks NodeReservedNetworkResources 3509 } 3510 3511 func (n *NodeReservedResources) Copy() *NodeReservedResources { 3512 if n == nil { 3513 return nil 3514 } 3515 newN := new(NodeReservedResources) 3516 *newN = *n 3517 return newN 3518 } 3519 3520 // Comparable returns a comparable version of the node's reserved resources. The 3521 // returned resources doesn't contain any network information. This conversion 3522 // can be lossy so care must be taken when using it. 3523 func (n *NodeReservedResources) Comparable() *ComparableResources { 3524 if n == nil { 3525 return nil 3526 } 3527 3528 c := &ComparableResources{ 3529 Flattened: AllocatedTaskResources{ 3530 Cpu: AllocatedCpuResources{ 3531 CpuShares: n.Cpu.CpuShares, 3532 ReservedCores: n.Cpu.ReservedCpuCores, 3533 }, 3534 Memory: AllocatedMemoryResources{ 3535 MemoryMB: n.Memory.MemoryMB, 3536 }, 3537 }, 3538 Shared: AllocatedSharedResources{ 3539 DiskMB: n.Disk.DiskMB, 3540 }, 3541 } 3542 return c 3543 } 3544 3545 // NodeReservedCpuResources captures the reserved CPU resources of the node. 3546 type NodeReservedCpuResources struct { 3547 CpuShares int64 3548 ReservedCpuCores []uint16 3549 } 3550 3551 // NodeReservedMemoryResources captures the reserved memory resources of the node. 3552 type NodeReservedMemoryResources struct { 3553 MemoryMB int64 3554 } 3555 3556 // NodeReservedDiskResources captures the reserved disk resources of the node. 3557 type NodeReservedDiskResources struct { 3558 DiskMB int64 3559 } 3560 3561 // NodeReservedNetworkResources captures the reserved network resources of the node. 3562 type NodeReservedNetworkResources struct { 3563 // ReservedHostPorts is the set of ports reserved on all host network 3564 // interfaces. Its format is a comma separate list of integers or integer 3565 // ranges. (80,443,1000-2000,2005) 3566 ReservedHostPorts string 3567 } 3568 3569 // ParseReservedHostPorts returns the reserved host ports. 3570 func (n *NodeReservedNetworkResources) ParseReservedHostPorts() ([]uint64, error) { 3571 return ParsePortRanges(n.ReservedHostPorts) 3572 } 3573 3574 // AllocatedResources is the set of resources to be used by an allocation. 3575 type AllocatedResources struct { 3576 // Tasks is a mapping of task name to the resources for the task. 3577 Tasks map[string]*AllocatedTaskResources 3578 TaskLifecycles map[string]*TaskLifecycleConfig 3579 3580 // Shared is the set of resource that are shared by all tasks in the group. 3581 Shared AllocatedSharedResources 3582 } 3583 3584 func (a *AllocatedResources) Copy() *AllocatedResources { 3585 if a == nil { 3586 return nil 3587 } 3588 3589 out := AllocatedResources{ 3590 Shared: a.Shared.Copy(), 3591 } 3592 3593 if a.Tasks != nil { 3594 out.Tasks = make(map[string]*AllocatedTaskResources, len(out.Tasks)) 3595 for task, resource := range a.Tasks { 3596 out.Tasks[task] = resource.Copy() 3597 } 3598 } 3599 if a.TaskLifecycles != nil { 3600 out.TaskLifecycles = make(map[string]*TaskLifecycleConfig, len(out.TaskLifecycles)) 3601 for task, lifecycle := range a.TaskLifecycles { 3602 out.TaskLifecycles[task] = lifecycle.Copy() 3603 } 3604 3605 } 3606 3607 return &out 3608 } 3609 3610 // Comparable returns a comparable version of the allocations allocated 3611 // resources. This conversion can be lossy so care must be taken when using it. 3612 func (a *AllocatedResources) Comparable() *ComparableResources { 3613 if a == nil { 3614 return nil 3615 } 3616 3617 c := &ComparableResources{ 3618 Shared: a.Shared, 3619 } 3620 3621 prestartSidecarTasks := &AllocatedTaskResources{} 3622 prestartEphemeralTasks := &AllocatedTaskResources{} 3623 main := &AllocatedTaskResources{} 3624 poststopTasks := &AllocatedTaskResources{} 3625 3626 for taskName, r := range a.Tasks { 3627 lc := a.TaskLifecycles[taskName] 3628 if lc == nil { 3629 main.Add(r) 3630 } else if lc.Hook == TaskLifecycleHookPrestart { 3631 if lc.Sidecar { 3632 prestartSidecarTasks.Add(r) 3633 } else { 3634 prestartEphemeralTasks.Add(r) 3635 } 3636 } else if lc.Hook == TaskLifecycleHookPoststop { 3637 poststopTasks.Add(r) 3638 } 3639 } 3640 3641 // update this loop to account for lifecycle hook 3642 prestartEphemeralTasks.Max(main) 3643 prestartEphemeralTasks.Max(poststopTasks) 3644 prestartSidecarTasks.Add(prestartEphemeralTasks) 3645 c.Flattened.Add(prestartSidecarTasks) 3646 3647 // Add network resources that are at the task group level 3648 for _, network := range a.Shared.Networks { 3649 c.Flattened.Add(&AllocatedTaskResources{ 3650 Networks: []*NetworkResource{network}, 3651 }) 3652 } 3653 3654 return c 3655 } 3656 3657 // OldTaskResources returns the pre-0.9.0 map of task resources 3658 func (a *AllocatedResources) OldTaskResources() map[string]*Resources { 3659 m := make(map[string]*Resources, len(a.Tasks)) 3660 for name, res := range a.Tasks { 3661 m[name] = &Resources{ 3662 CPU: int(res.Cpu.CpuShares), 3663 MemoryMB: int(res.Memory.MemoryMB), 3664 MemoryMaxMB: int(res.Memory.MemoryMaxMB), 3665 Networks: res.Networks, 3666 } 3667 } 3668 3669 return m 3670 } 3671 3672 func (a *AllocatedResources) Canonicalize() { 3673 a.Shared.Canonicalize() 3674 3675 for _, r := range a.Tasks { 3676 for _, nw := range r.Networks { 3677 for _, port := range append(nw.DynamicPorts, nw.ReservedPorts...) { 3678 a.Shared.Ports = append(a.Shared.Ports, AllocatedPortMapping{ 3679 Label: port.Label, 3680 Value: port.Value, 3681 To: port.To, 3682 HostIP: nw.IP, 3683 }) 3684 } 3685 } 3686 } 3687 } 3688 3689 // AllocatedTaskResources are the set of resources allocated to a task. 3690 type AllocatedTaskResources struct { 3691 Cpu AllocatedCpuResources 3692 Memory AllocatedMemoryResources 3693 Networks Networks 3694 Devices []*AllocatedDeviceResource 3695 } 3696 3697 func (a *AllocatedTaskResources) Copy() *AllocatedTaskResources { 3698 if a == nil { 3699 return nil 3700 } 3701 newA := new(AllocatedTaskResources) 3702 *newA = *a 3703 3704 // Copy the networks 3705 newA.Networks = a.Networks.Copy() 3706 3707 // Copy the devices 3708 if newA.Devices != nil { 3709 n := len(a.Devices) 3710 newA.Devices = make([]*AllocatedDeviceResource, n) 3711 for i := 0; i < n; i++ { 3712 newA.Devices[i] = a.Devices[i].Copy() 3713 } 3714 } 3715 3716 return newA 3717 } 3718 3719 // NetIndex finds the matching net index using device name 3720 func (a *AllocatedTaskResources) NetIndex(n *NetworkResource) int { 3721 return a.Networks.NetIndex(n) 3722 } 3723 3724 func (a *AllocatedTaskResources) Add(delta *AllocatedTaskResources) { 3725 if delta == nil { 3726 return 3727 } 3728 3729 a.Cpu.Add(&delta.Cpu) 3730 a.Memory.Add(&delta.Memory) 3731 3732 for _, n := range delta.Networks { 3733 // Find the matching interface by IP or CIDR 3734 idx := a.NetIndex(n) 3735 if idx == -1 { 3736 a.Networks = append(a.Networks, n.Copy()) 3737 } else { 3738 a.Networks[idx].Add(n) 3739 } 3740 } 3741 3742 for _, d := range delta.Devices { 3743 // Find the matching device 3744 idx := AllocatedDevices(a.Devices).Index(d) 3745 if idx == -1 { 3746 a.Devices = append(a.Devices, d.Copy()) 3747 } else { 3748 a.Devices[idx].Add(d) 3749 } 3750 } 3751 } 3752 3753 func (a *AllocatedTaskResources) Max(other *AllocatedTaskResources) { 3754 if other == nil { 3755 return 3756 } 3757 3758 a.Cpu.Max(&other.Cpu) 3759 a.Memory.Max(&other.Memory) 3760 3761 for _, n := range other.Networks { 3762 // Find the matching interface by IP or CIDR 3763 idx := a.NetIndex(n) 3764 if idx == -1 { 3765 a.Networks = append(a.Networks, n.Copy()) 3766 } else { 3767 a.Networks[idx].Add(n) 3768 } 3769 } 3770 3771 for _, d := range other.Devices { 3772 // Find the matching device 3773 idx := AllocatedDevices(a.Devices).Index(d) 3774 if idx == -1 { 3775 a.Devices = append(a.Devices, d.Copy()) 3776 } else { 3777 a.Devices[idx].Add(d) 3778 } 3779 } 3780 } 3781 3782 // Comparable turns AllocatedTaskResources into ComparableResources 3783 // as a helper step in preemption 3784 func (a *AllocatedTaskResources) Comparable() *ComparableResources { 3785 ret := &ComparableResources{ 3786 Flattened: AllocatedTaskResources{ 3787 Cpu: AllocatedCpuResources{ 3788 CpuShares: a.Cpu.CpuShares, 3789 ReservedCores: a.Cpu.ReservedCores, 3790 }, 3791 Memory: AllocatedMemoryResources{ 3792 MemoryMB: a.Memory.MemoryMB, 3793 MemoryMaxMB: a.Memory.MemoryMaxMB, 3794 }, 3795 }, 3796 } 3797 ret.Flattened.Networks = append(ret.Flattened.Networks, a.Networks...) 3798 return ret 3799 } 3800 3801 // Subtract only subtracts CPU and Memory resources. Network utilization 3802 // is managed separately in NetworkIndex 3803 func (a *AllocatedTaskResources) Subtract(delta *AllocatedTaskResources) { 3804 if delta == nil { 3805 return 3806 } 3807 3808 a.Cpu.Subtract(&delta.Cpu) 3809 a.Memory.Subtract(&delta.Memory) 3810 } 3811 3812 // AllocatedSharedResources are the set of resources allocated to a task group. 3813 type AllocatedSharedResources struct { 3814 Networks Networks 3815 DiskMB int64 3816 Ports AllocatedPorts 3817 } 3818 3819 func (a AllocatedSharedResources) Copy() AllocatedSharedResources { 3820 return AllocatedSharedResources{ 3821 Networks: a.Networks.Copy(), 3822 DiskMB: a.DiskMB, 3823 Ports: a.Ports, 3824 } 3825 } 3826 3827 func (a *AllocatedSharedResources) Add(delta *AllocatedSharedResources) { 3828 if delta == nil { 3829 return 3830 } 3831 a.Networks = append(a.Networks, delta.Networks...) 3832 a.DiskMB += delta.DiskMB 3833 3834 } 3835 3836 func (a *AllocatedSharedResources) Subtract(delta *AllocatedSharedResources) { 3837 if delta == nil { 3838 return 3839 } 3840 3841 diff := map[*NetworkResource]bool{} 3842 for _, n := range delta.Networks { 3843 diff[n] = true 3844 } 3845 var nets Networks 3846 for _, n := range a.Networks { 3847 if _, ok := diff[n]; !ok { 3848 nets = append(nets, n) 3849 } 3850 } 3851 a.Networks = nets 3852 a.DiskMB -= delta.DiskMB 3853 } 3854 3855 func (a *AllocatedSharedResources) Canonicalize() { 3856 if len(a.Networks) > 0 { 3857 if len(a.Networks[0].DynamicPorts)+len(a.Networks[0].ReservedPorts) > 0 && len(a.Ports) == 0 { 3858 for _, ports := range [][]Port{a.Networks[0].DynamicPorts, a.Networks[0].ReservedPorts} { 3859 for _, p := range ports { 3860 a.Ports = append(a.Ports, AllocatedPortMapping{ 3861 Label: p.Label, 3862 Value: p.Value, 3863 To: p.To, 3864 HostIP: a.Networks[0].IP, 3865 }) 3866 } 3867 } 3868 } 3869 } 3870 } 3871 3872 // AllocatedCpuResources captures the allocated CPU resources. 3873 type AllocatedCpuResources struct { 3874 CpuShares int64 3875 ReservedCores []uint16 3876 } 3877 3878 func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { 3879 if delta == nil { 3880 return 3881 } 3882 3883 a.CpuShares += delta.CpuShares 3884 3885 a.ReservedCores = cpuset.New(a.ReservedCores...).Union(cpuset.New(delta.ReservedCores...)).ToSlice() 3886 } 3887 3888 func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { 3889 if delta == nil { 3890 return 3891 } 3892 3893 a.CpuShares -= delta.CpuShares 3894 a.ReservedCores = cpuset.New(a.ReservedCores...).Difference(cpuset.New(delta.ReservedCores...)).ToSlice() 3895 } 3896 3897 func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { 3898 if other == nil { 3899 return 3900 } 3901 3902 if other.CpuShares > a.CpuShares { 3903 a.CpuShares = other.CpuShares 3904 } 3905 3906 if len(other.ReservedCores) > len(a.ReservedCores) { 3907 a.ReservedCores = other.ReservedCores 3908 } 3909 } 3910 3911 // AllocatedMemoryResources captures the allocated memory resources. 3912 type AllocatedMemoryResources struct { 3913 MemoryMB int64 3914 MemoryMaxMB int64 3915 } 3916 3917 func (a *AllocatedMemoryResources) Add(delta *AllocatedMemoryResources) { 3918 if delta == nil { 3919 return 3920 } 3921 3922 a.MemoryMB += delta.MemoryMB 3923 if delta.MemoryMaxMB != 0 { 3924 a.MemoryMaxMB += delta.MemoryMaxMB 3925 } else { 3926 a.MemoryMaxMB += delta.MemoryMB 3927 } 3928 } 3929 3930 func (a *AllocatedMemoryResources) Subtract(delta *AllocatedMemoryResources) { 3931 if delta == nil { 3932 return 3933 } 3934 3935 a.MemoryMB -= delta.MemoryMB 3936 if delta.MemoryMaxMB != 0 { 3937 a.MemoryMaxMB -= delta.MemoryMaxMB 3938 } else { 3939 a.MemoryMaxMB -= delta.MemoryMB 3940 } 3941 } 3942 3943 func (a *AllocatedMemoryResources) Max(other *AllocatedMemoryResources) { 3944 if other == nil { 3945 return 3946 } 3947 3948 if other.MemoryMB > a.MemoryMB { 3949 a.MemoryMB = other.MemoryMB 3950 } 3951 if other.MemoryMaxMB > a.MemoryMaxMB { 3952 a.MemoryMaxMB = other.MemoryMaxMB 3953 } 3954 } 3955 3956 type AllocatedDevices []*AllocatedDeviceResource 3957 3958 // Index finds the matching index using the passed device. If not found, -1 is 3959 // returned. 3960 func (a AllocatedDevices) Index(d *AllocatedDeviceResource) int { 3961 if d == nil { 3962 return -1 3963 } 3964 3965 for i, o := range a { 3966 if o.ID().Equal(d.ID()) { 3967 return i 3968 } 3969 } 3970 3971 return -1 3972 } 3973 3974 // AllocatedDeviceResource captures a set of allocated devices. 3975 type AllocatedDeviceResource struct { 3976 // Vendor, Type, and Name are used to select the plugin to request the 3977 // device IDs from. 3978 Vendor string 3979 Type string 3980 Name string 3981 3982 // DeviceIDs is the set of allocated devices 3983 DeviceIDs []string 3984 } 3985 3986 func (a *AllocatedDeviceResource) ID() *DeviceIdTuple { 3987 if a == nil { 3988 return nil 3989 } 3990 3991 return &DeviceIdTuple{ 3992 Vendor: a.Vendor, 3993 Type: a.Type, 3994 Name: a.Name, 3995 } 3996 } 3997 3998 func (a *AllocatedDeviceResource) Add(delta *AllocatedDeviceResource) { 3999 if delta == nil { 4000 return 4001 } 4002 4003 a.DeviceIDs = append(a.DeviceIDs, delta.DeviceIDs...) 4004 } 4005 4006 func (a *AllocatedDeviceResource) Copy() *AllocatedDeviceResource { 4007 if a == nil { 4008 return a 4009 } 4010 4011 na := *a 4012 4013 // Copy the devices 4014 na.DeviceIDs = make([]string, len(a.DeviceIDs)) 4015 copy(na.DeviceIDs, a.DeviceIDs) 4016 return &na 4017 } 4018 4019 // ComparableResources is the set of resources allocated to a task group but 4020 // not keyed by Task, making it easier to compare. 4021 type ComparableResources struct { 4022 Flattened AllocatedTaskResources 4023 Shared AllocatedSharedResources 4024 } 4025 4026 func (c *ComparableResources) Add(delta *ComparableResources) { 4027 if delta == nil { 4028 return 4029 } 4030 4031 c.Flattened.Add(&delta.Flattened) 4032 c.Shared.Add(&delta.Shared) 4033 } 4034 4035 func (c *ComparableResources) Subtract(delta *ComparableResources) { 4036 if delta == nil { 4037 return 4038 } 4039 4040 c.Flattened.Subtract(&delta.Flattened) 4041 c.Shared.Subtract(&delta.Shared) 4042 } 4043 4044 func (c *ComparableResources) Copy() *ComparableResources { 4045 if c == nil { 4046 return nil 4047 } 4048 newR := new(ComparableResources) 4049 *newR = *c 4050 return newR 4051 } 4052 4053 // Superset checks if one set of resources is a superset of another. This 4054 // ignores network resources, and the NetworkIndex should be used for that. 4055 func (c *ComparableResources) Superset(other *ComparableResources) (bool, string) { 4056 if c.Flattened.Cpu.CpuShares < other.Flattened.Cpu.CpuShares { 4057 return false, "cpu" 4058 } 4059 4060 if len(c.Flattened.Cpu.ReservedCores) > 0 && !cpuset.New(c.Flattened.Cpu.ReservedCores...).IsSupersetOf(cpuset.New(other.Flattened.Cpu.ReservedCores...)) { 4061 return false, "cores" 4062 } 4063 if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { 4064 return false, "memory" 4065 } 4066 if c.Shared.DiskMB < other.Shared.DiskMB { 4067 return false, "disk" 4068 } 4069 return true, "" 4070 } 4071 4072 // NetIndex finds the matching net index using device name 4073 func (c *ComparableResources) NetIndex(n *NetworkResource) int { 4074 return c.Flattened.Networks.NetIndex(n) 4075 } 4076 4077 const ( 4078 // JobTypeCore is reserved for internal system tasks and is 4079 // always handled by the CoreScheduler. 4080 JobTypeCore = "_core" 4081 JobTypeService = "service" 4082 JobTypeBatch = "batch" 4083 JobTypeSystem = "system" 4084 JobTypeSysBatch = "sysbatch" 4085 ) 4086 4087 const ( 4088 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 4089 JobStatusRunning = "running" // Running means the job has non-terminal allocations 4090 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 4091 ) 4092 4093 const ( 4094 // JobMinPriority is the minimum allowed priority 4095 JobMinPriority = 1 4096 4097 // JobDefaultPriority is the default priority if not 4098 // not specified. 4099 JobDefaultPriority = 50 4100 4101 // JobMaxPriority is the maximum allowed priority 4102 JobMaxPriority = 100 4103 4104 // CoreJobPriority should be higher than any user 4105 // specified job so that it gets priority. This is important 4106 // for the system to remain healthy. 4107 CoreJobPriority = JobMaxPriority * 2 4108 4109 // JobTrackedVersions is the number of historic job versions that are 4110 // kept. 4111 JobTrackedVersions = 6 4112 4113 // JobTrackedScalingEvents is the number of scaling events that are 4114 // kept for a single task group. 4115 JobTrackedScalingEvents = 20 4116 ) 4117 4118 // Job is the scope of a scheduling request to Nomad. It is the largest 4119 // scoped object, and is a named collection of task groups. Each task group 4120 // is further composed of tasks. A task group (TG) is the unit of scheduling 4121 // however. 4122 type Job struct { 4123 // Stop marks whether the user has stopped the job. A stopped job will 4124 // have all created allocations stopped and acts as a way to stop a job 4125 // without purging it from the system. This allows existing allocs to be 4126 // queried and the job to be inspected as it is being killed. 4127 Stop bool 4128 4129 // Region is the Nomad region that handles scheduling this job 4130 Region string 4131 4132 // Namespace is the namespace the job is submitted into. 4133 Namespace string 4134 4135 // ID is a unique identifier for the job per region. It can be 4136 // specified hierarchically like LineOfBiz/OrgName/Team/Project 4137 ID string 4138 4139 // ParentID is the unique identifier of the job that spawned this job. 4140 ParentID string 4141 4142 // Name is the logical name of the job used to refer to it. This is unique 4143 // per region, but not unique globally. 4144 Name string 4145 4146 // Type is used to control various behaviors about the job. Most jobs 4147 // are service jobs, meaning they are expected to be long lived. 4148 // Some jobs are batch oriented meaning they run and then terminate. 4149 // This can be extended in the future to support custom schedulers. 4150 Type string 4151 4152 // Priority is used to control scheduling importance and if this job 4153 // can preempt other jobs. 4154 Priority int 4155 4156 // AllAtOnce is used to control if incremental scheduling of task groups 4157 // is allowed or if we must do a gang scheduling of the entire job. This 4158 // can slow down larger jobs if resources are not available. 4159 AllAtOnce bool 4160 4161 // Datacenters contains all the datacenters this job is allowed to span 4162 Datacenters []string 4163 4164 // Constraints can be specified at a job level and apply to 4165 // all the task groups and tasks. 4166 Constraints []*Constraint 4167 4168 // Affinities can be specified at the job level to express 4169 // scheduling preferences that apply to all groups and tasks 4170 Affinities []*Affinity 4171 4172 // Spread can be specified at the job level to express spreading 4173 // allocations across a desired attribute, such as datacenter 4174 Spreads []*Spread 4175 4176 // TaskGroups are the collections of task groups that this job needs 4177 // to run. Each task group is an atomic unit of scheduling and placement. 4178 TaskGroups []*TaskGroup 4179 4180 // See agent.ApiJobToStructJob 4181 // Update provides defaults for the TaskGroup Update stanzas 4182 Update UpdateStrategy 4183 4184 Multiregion *Multiregion 4185 4186 // Periodic is used to define the interval the job is run at. 4187 Periodic *PeriodicConfig 4188 4189 // ParameterizedJob is used to specify the job as a parameterized job 4190 // for dispatching. 4191 ParameterizedJob *ParameterizedJobConfig 4192 4193 // Dispatched is used to identify if the Job has been dispatched from a 4194 // parameterized job. 4195 Dispatched bool 4196 4197 // DispatchIdempotencyToken is optionally used to ensure that a dispatched job does not have any 4198 // non-terminal siblings which have the same token value. 4199 DispatchIdempotencyToken string 4200 4201 // Payload is the payload supplied when the job was dispatched. 4202 Payload []byte 4203 4204 // Meta is used to associate arbitrary metadata with this 4205 // job. This is opaque to Nomad. 4206 Meta map[string]string 4207 4208 // ConsulToken is the Consul token that proves the submitter of the job has 4209 // access to the Service Identity policies associated with the job's 4210 // Consul Connect enabled services. This field is only used to transfer the 4211 // token and is not stored after Job submission. 4212 ConsulToken string 4213 4214 // ConsulNamespace is the Consul namespace 4215 ConsulNamespace string 4216 4217 // VaultToken is the Vault token that proves the submitter of the job has 4218 // access to the specified Vault policies. This field is only used to 4219 // transfer the token and is not stored after Job submission. 4220 VaultToken string 4221 4222 // VaultNamespace is the Vault namespace 4223 VaultNamespace string 4224 4225 // NomadTokenID is the Accessor ID of the ACL token (if any) 4226 // used to register this version of the job. Used by deploymentwatcher. 4227 NomadTokenID string 4228 4229 // Job status 4230 Status string 4231 4232 // StatusDescription is meant to provide more human useful information 4233 StatusDescription string 4234 4235 // Stable marks a job as stable. Stability is only defined on "service" and 4236 // "system" jobs. The stability of a job will be set automatically as part 4237 // of a deployment and can be manually set via APIs. This field is updated 4238 // when the status of a corresponding deployment transitions to Failed 4239 // or Successful. This field is not meaningful for jobs that don't have an 4240 // update stanza. 4241 Stable bool 4242 4243 // Version is a monotonically increasing version number that is incremented 4244 // on each job register. 4245 Version uint64 4246 4247 // SubmitTime is the time at which the job was submitted as a UnixNano in 4248 // UTC 4249 SubmitTime int64 4250 4251 // Raft Indexes 4252 CreateIndex uint64 4253 ModifyIndex uint64 4254 JobModifyIndex uint64 4255 } 4256 4257 // NamespacedID returns the namespaced id useful for logging 4258 func (j *Job) NamespacedID() NamespacedID { 4259 return NamespacedID{ 4260 ID: j.ID, 4261 Namespace: j.Namespace, 4262 } 4263 } 4264 4265 // GetID implements the IDGetter interface, required for pagination. 4266 func (j *Job) GetID() string { 4267 if j == nil { 4268 return "" 4269 } 4270 return j.ID 4271 } 4272 4273 // GetNamespace implements the NamespaceGetter interface, required for 4274 // pagination and filtering namespaces in endpoints that support glob namespace 4275 // requests using tokens with limited access. 4276 func (j *Job) GetNamespace() string { 4277 if j == nil { 4278 return "" 4279 } 4280 return j.Namespace 4281 } 4282 4283 // GetCreateIndex implements the CreateIndexGetter interface, required for 4284 // pagination. 4285 func (j *Job) GetCreateIndex() uint64 { 4286 if j == nil { 4287 return 0 4288 } 4289 return j.CreateIndex 4290 } 4291 4292 // Canonicalize is used to canonicalize fields in the Job. This should be 4293 // called when registering a Job. 4294 func (j *Job) Canonicalize() { 4295 if j == nil { 4296 return 4297 } 4298 4299 // Ensure that an empty and nil map are treated the same to avoid scheduling 4300 // problems since we use reflect DeepEquals. 4301 if len(j.Meta) == 0 { 4302 j.Meta = nil 4303 } 4304 4305 // Ensure the job is in a namespace. 4306 if j.Namespace == "" { 4307 j.Namespace = DefaultNamespace 4308 } 4309 4310 for _, tg := range j.TaskGroups { 4311 tg.Canonicalize(j) 4312 } 4313 4314 if j.ParameterizedJob != nil { 4315 j.ParameterizedJob.Canonicalize() 4316 } 4317 4318 if j.Multiregion != nil { 4319 j.Multiregion.Canonicalize() 4320 } 4321 4322 if j.Periodic != nil { 4323 j.Periodic.Canonicalize() 4324 } 4325 } 4326 4327 // Copy returns a deep copy of the Job. It is expected that callers use recover. 4328 // This job can panic if the deep copy failed as it uses reflection. 4329 func (j *Job) Copy() *Job { 4330 if j == nil { 4331 return nil 4332 } 4333 nj := new(Job) 4334 *nj = *j 4335 nj.Datacenters = slices.Clone(nj.Datacenters) 4336 nj.Constraints = CopySliceConstraints(nj.Constraints) 4337 nj.Affinities = CopySliceAffinities(nj.Affinities) 4338 nj.Multiregion = nj.Multiregion.Copy() 4339 4340 if j.TaskGroups != nil { 4341 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 4342 for i, tg := range nj.TaskGroups { 4343 tgs[i] = tg.Copy() 4344 } 4345 nj.TaskGroups = tgs 4346 } 4347 4348 nj.Periodic = nj.Periodic.Copy() 4349 nj.Meta = maps.Clone(nj.Meta) 4350 nj.ParameterizedJob = nj.ParameterizedJob.Copy() 4351 return nj 4352 } 4353 4354 // Validate is used to check a job for reasonable configuration 4355 func (j *Job) Validate() error { 4356 var mErr multierror.Error 4357 4358 if j.Region == "" && j.Multiregion == nil { 4359 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 4360 } 4361 if j.ID == "" { 4362 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 4363 } else if strings.Contains(j.ID, " ") { 4364 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 4365 } else if strings.Contains(j.ID, "\000") { 4366 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a null character")) 4367 } 4368 if j.Name == "" { 4369 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 4370 } else if strings.Contains(j.Name, "\000") { 4371 mErr.Errors = append(mErr.Errors, errors.New("Job Name contains a null character")) 4372 } 4373 if j.Namespace == "" { 4374 mErr.Errors = append(mErr.Errors, errors.New("Job must be in a namespace")) 4375 } 4376 switch j.Type { 4377 case JobTypeCore, JobTypeService, JobTypeBatch, JobTypeSystem, JobTypeSysBatch: 4378 case "": 4379 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 4380 default: 4381 mErr.Errors = append(mErr.Errors, fmt.Errorf("Invalid job type: %q", j.Type)) 4382 } 4383 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 4384 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 4385 } 4386 if len(j.Datacenters) == 0 && !j.IsMultiregion() { 4387 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 4388 } else { 4389 for _, v := range j.Datacenters { 4390 if v == "" { 4391 mErr.Errors = append(mErr.Errors, errors.New("Job datacenter must be non-empty string")) 4392 } 4393 } 4394 } 4395 if len(j.TaskGroups) == 0 { 4396 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 4397 } 4398 for idx, constr := range j.Constraints { 4399 if err := constr.Validate(); err != nil { 4400 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 4401 mErr.Errors = append(mErr.Errors, outer) 4402 } 4403 } 4404 if j.Type == JobTypeSystem { 4405 if j.Affinities != nil { 4406 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 4407 } 4408 } else { 4409 for idx, affinity := range j.Affinities { 4410 if err := affinity.Validate(); err != nil { 4411 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 4412 mErr.Errors = append(mErr.Errors, outer) 4413 } 4414 } 4415 } 4416 4417 if j.Type == JobTypeSystem { 4418 if j.Spreads != nil { 4419 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 4420 } 4421 } else { 4422 for idx, spread := range j.Spreads { 4423 if err := spread.Validate(); err != nil { 4424 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 4425 mErr.Errors = append(mErr.Errors, outer) 4426 } 4427 } 4428 } 4429 4430 // Check for duplicate task groups 4431 taskGroups := make(map[string]int) 4432 for idx, tg := range j.TaskGroups { 4433 if tg.Name == "" { 4434 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 4435 } else if existing, ok := taskGroups[tg.Name]; ok { 4436 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 4437 } else { 4438 taskGroups[tg.Name] = idx 4439 } 4440 4441 if tg.ShutdownDelay != nil && *tg.ShutdownDelay < 0 { 4442 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 4443 } 4444 4445 if tg.StopAfterClientDisconnect != nil && *tg.StopAfterClientDisconnect != 0 { 4446 if *tg.StopAfterClientDisconnect > 0 && 4447 !(j.Type == JobTypeBatch || j.Type == JobTypeService) { 4448 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect can only be set in batch and service jobs")) 4449 } else if *tg.StopAfterClientDisconnect < 0 { 4450 mErr.Errors = append(mErr.Errors, errors.New("stop_after_client_disconnect must be a positive value")) 4451 } 4452 } 4453 4454 if j.Type == "system" && tg.Count > 1 { 4455 mErr.Errors = append(mErr.Errors, 4456 fmt.Errorf("Job task group %s has count %d. Count cannot exceed 1 with system scheduler", 4457 tg.Name, tg.Count)) 4458 } 4459 } 4460 4461 // Validate the task group 4462 for _, tg := range j.TaskGroups { 4463 if err := tg.Validate(j); err != nil { 4464 outer := fmt.Errorf("Task group %s validation failed: %v", tg.Name, err) 4465 mErr.Errors = append(mErr.Errors, outer) 4466 } 4467 } 4468 4469 // Validate periodic is only used with batch or sysbatch jobs. 4470 if j.IsPeriodic() && j.Periodic.Enabled { 4471 if j.Type != JobTypeBatch && j.Type != JobTypeSysBatch { 4472 mErr.Errors = append(mErr.Errors, fmt.Errorf( 4473 "Periodic can only be used with %q or %q scheduler", JobTypeBatch, JobTypeSysBatch, 4474 )) 4475 } 4476 4477 if err := j.Periodic.Validate(); err != nil { 4478 mErr.Errors = append(mErr.Errors, err) 4479 } 4480 } 4481 4482 if j.IsParameterized() { 4483 if j.Type != JobTypeBatch && j.Type != JobTypeSysBatch { 4484 mErr.Errors = append(mErr.Errors, fmt.Errorf( 4485 "Parameterized job can only be used with %q or %q scheduler", JobTypeBatch, JobTypeSysBatch, 4486 )) 4487 } 4488 4489 if err := j.ParameterizedJob.Validate(); err != nil { 4490 mErr.Errors = append(mErr.Errors, err) 4491 } 4492 } 4493 4494 if j.IsMultiregion() { 4495 if err := j.Multiregion.Validate(j.Type, j.Datacenters); err != nil { 4496 mErr.Errors = append(mErr.Errors, err) 4497 } 4498 } 4499 4500 return mErr.ErrorOrNil() 4501 } 4502 4503 // Warnings returns a list of warnings that may be from dubious settings or 4504 // deprecation warnings. 4505 func (j *Job) Warnings() error { 4506 var mErr multierror.Error 4507 4508 // Check the groups 4509 hasAutoPromote, allAutoPromote := false, true 4510 4511 for _, tg := range j.TaskGroups { 4512 if err := tg.Warnings(j); err != nil { 4513 outer := fmt.Errorf("Group %q has warnings: %v", tg.Name, err) 4514 mErr.Errors = append(mErr.Errors, outer) 4515 } 4516 4517 if u := tg.Update; u != nil { 4518 hasAutoPromote = hasAutoPromote || u.AutoPromote 4519 4520 // Having no canaries implies auto-promotion since there are no canaries to promote. 4521 allAutoPromote = allAutoPromote && (u.Canary == 0 || u.AutoPromote) 4522 } 4523 } 4524 4525 // Check AutoPromote, should be all or none 4526 if hasAutoPromote && !allAutoPromote { 4527 err := fmt.Errorf("auto_promote must be true for all groups to enable automatic promotion") 4528 mErr.Errors = append(mErr.Errors, err) 4529 } 4530 4531 return mErr.ErrorOrNil() 4532 } 4533 4534 // LookupTaskGroup finds a task group by name 4535 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 4536 for _, tg := range j.TaskGroups { 4537 if tg.Name == name { 4538 return tg 4539 } 4540 } 4541 return nil 4542 } 4543 4544 // CombinedTaskMeta takes a TaskGroup and Task name and returns the combined 4545 // meta data for the task. When joining Job, Group and Task Meta, the precedence 4546 // is by deepest scope (Task > Group > Job). 4547 func (j *Job) CombinedTaskMeta(groupName, taskName string) map[string]string { 4548 group := j.LookupTaskGroup(groupName) 4549 if group == nil { 4550 return j.Meta 4551 } 4552 4553 var meta map[string]string 4554 4555 task := group.LookupTask(taskName) 4556 if task != nil { 4557 meta = maps.Clone(task.Meta) 4558 } 4559 4560 if meta == nil { 4561 meta = make(map[string]string, len(group.Meta)+len(j.Meta)) 4562 } 4563 4564 // Add the group specific meta 4565 for k, v := range group.Meta { 4566 if _, ok := meta[k]; !ok { 4567 meta[k] = v 4568 } 4569 } 4570 4571 // Add the job specific meta 4572 for k, v := range j.Meta { 4573 if _, ok := meta[k]; !ok { 4574 meta[k] = v 4575 } 4576 } 4577 4578 return meta 4579 } 4580 4581 // Stopped returns if a job is stopped. 4582 func (j *Job) Stopped() bool { 4583 return j == nil || j.Stop 4584 } 4585 4586 // HasUpdateStrategy returns if any task group in the job has an update strategy 4587 func (j *Job) HasUpdateStrategy() bool { 4588 for _, tg := range j.TaskGroups { 4589 if !tg.Update.IsEmpty() { 4590 return true 4591 } 4592 } 4593 4594 return false 4595 } 4596 4597 // Stub is used to return a summary of the job 4598 func (j *Job) Stub(summary *JobSummary, fields *JobStubFields) *JobListStub { 4599 jobStub := &JobListStub{ 4600 ID: j.ID, 4601 Namespace: j.Namespace, 4602 ParentID: j.ParentID, 4603 Name: j.Name, 4604 Datacenters: j.Datacenters, 4605 Multiregion: j.Multiregion, 4606 Type: j.Type, 4607 Priority: j.Priority, 4608 Periodic: j.IsPeriodic(), 4609 ParameterizedJob: j.IsParameterized(), 4610 Stop: j.Stop, 4611 Status: j.Status, 4612 StatusDescription: j.StatusDescription, 4613 CreateIndex: j.CreateIndex, 4614 ModifyIndex: j.ModifyIndex, 4615 JobModifyIndex: j.JobModifyIndex, 4616 SubmitTime: j.SubmitTime, 4617 JobSummary: summary, 4618 } 4619 4620 if fields != nil { 4621 if fields.Meta { 4622 jobStub.Meta = j.Meta 4623 } 4624 } 4625 4626 return jobStub 4627 } 4628 4629 // IsPeriodic returns whether a job is periodic. 4630 func (j *Job) IsPeriodic() bool { 4631 return j.Periodic != nil 4632 } 4633 4634 // IsPeriodicActive returns whether the job is an active periodic job that will 4635 // create child jobs 4636 func (j *Job) IsPeriodicActive() bool { 4637 return j.IsPeriodic() && j.Periodic.Enabled && !j.Stopped() && !j.IsParameterized() 4638 } 4639 4640 // IsParameterized returns whether a job is parameterized job. 4641 func (j *Job) IsParameterized() bool { 4642 return j.ParameterizedJob != nil && !j.Dispatched 4643 } 4644 4645 // IsMultiregion returns whether a job is multiregion 4646 func (j *Job) IsMultiregion() bool { 4647 return j.Multiregion != nil && j.Multiregion.Regions != nil && len(j.Multiregion.Regions) > 0 4648 } 4649 4650 // IsPlugin returns whether a job is implements a plugin (currently just CSI) 4651 func (j *Job) IsPlugin() bool { 4652 for _, tg := range j.TaskGroups { 4653 for _, task := range tg.Tasks { 4654 if task.CSIPluginConfig != nil { 4655 return true 4656 } 4657 } 4658 } 4659 return false 4660 } 4661 4662 // Vault returns the set of Vault blocks per task group, per task 4663 func (j *Job) Vault() map[string]map[string]*Vault { 4664 blocks := make(map[string]map[string]*Vault, len(j.TaskGroups)) 4665 4666 for _, tg := range j.TaskGroups { 4667 tgBlocks := make(map[string]*Vault, len(tg.Tasks)) 4668 4669 for _, task := range tg.Tasks { 4670 if task.Vault == nil { 4671 continue 4672 } 4673 4674 tgBlocks[task.Name] = task.Vault 4675 } 4676 4677 if len(tgBlocks) != 0 { 4678 blocks[tg.Name] = tgBlocks 4679 } 4680 } 4681 4682 return blocks 4683 } 4684 4685 // ConnectTasks returns the set of Consul Connect enabled tasks defined on the 4686 // job that will require a Service Identity token in the case that Consul ACLs 4687 // are enabled. The TaskKind.Value is the name of the Consul service. 4688 // 4689 // This method is meaningful only after the Job has passed through the job 4690 // submission Mutator functions. 4691 func (j *Job) ConnectTasks() []TaskKind { 4692 var kinds []TaskKind 4693 for _, tg := range j.TaskGroups { 4694 for _, task := range tg.Tasks { 4695 if task.Kind.IsConnectProxy() || 4696 task.Kind.IsConnectNative() || 4697 task.Kind.IsAnyConnectGateway() { 4698 kinds = append(kinds, task.Kind) 4699 } 4700 } 4701 } 4702 return kinds 4703 } 4704 4705 // RequiredSignals returns a mapping of task groups to tasks to their required 4706 // set of signals 4707 func (j *Job) RequiredSignals() map[string]map[string][]string { 4708 signals := make(map[string]map[string][]string) 4709 4710 for _, tg := range j.TaskGroups { 4711 for _, task := range tg.Tasks { 4712 // Use this local one as a set 4713 taskSignals := make(map[string]struct{}) 4714 4715 // Check if the Vault change mode uses signals 4716 if task.Vault != nil && task.Vault.ChangeMode == VaultChangeModeSignal { 4717 taskSignals[task.Vault.ChangeSignal] = struct{}{} 4718 } 4719 4720 // If a user has specified a KillSignal, add it to required signals 4721 if task.KillSignal != "" { 4722 taskSignals[task.KillSignal] = struct{}{} 4723 } 4724 4725 // Check if any template change mode uses signals 4726 for _, t := range task.Templates { 4727 if t.ChangeMode != TemplateChangeModeSignal { 4728 continue 4729 } 4730 4731 taskSignals[t.ChangeSignal] = struct{}{} 4732 } 4733 4734 // Flatten and sort the signals 4735 l := len(taskSignals) 4736 if l == 0 { 4737 continue 4738 } 4739 4740 flat := make([]string, 0, l) 4741 for sig := range taskSignals { 4742 flat = append(flat, sig) 4743 } 4744 4745 sort.Strings(flat) 4746 tgSignals, ok := signals[tg.Name] 4747 if !ok { 4748 tgSignals = make(map[string][]string) 4749 signals[tg.Name] = tgSignals 4750 } 4751 tgSignals[task.Name] = flat 4752 } 4753 4754 } 4755 4756 return signals 4757 } 4758 4759 // SpecChanged determines if the functional specification has changed between 4760 // two job versions. 4761 func (j *Job) SpecChanged(new *Job) bool { 4762 if j == nil { 4763 return new != nil 4764 } 4765 4766 // Create a copy of the new job 4767 c := new.Copy() 4768 4769 // Update the new job so we can do a reflect 4770 c.Status = j.Status 4771 c.StatusDescription = j.StatusDescription 4772 c.Stable = j.Stable 4773 c.Version = j.Version 4774 c.CreateIndex = j.CreateIndex 4775 c.ModifyIndex = j.ModifyIndex 4776 c.JobModifyIndex = j.JobModifyIndex 4777 c.SubmitTime = j.SubmitTime 4778 4779 // cgbaker: FINISH: probably need some consideration of scaling policy ID here 4780 4781 // Deep equals the jobs 4782 return !reflect.DeepEqual(j, c) 4783 } 4784 4785 func (j *Job) SetSubmitTime() { 4786 j.SubmitTime = time.Now().UTC().UnixNano() 4787 } 4788 4789 // JobListStub is used to return a subset of job information 4790 // for the job list 4791 type JobListStub struct { 4792 ID string 4793 ParentID string 4794 Name string 4795 Namespace string `json:",omitempty"` 4796 Datacenters []string 4797 Multiregion *Multiregion 4798 Type string 4799 Priority int 4800 Periodic bool 4801 ParameterizedJob bool 4802 Stop bool 4803 Status string 4804 StatusDescription string 4805 JobSummary *JobSummary 4806 CreateIndex uint64 4807 ModifyIndex uint64 4808 JobModifyIndex uint64 4809 SubmitTime int64 4810 Meta map[string]string `json:",omitempty"` 4811 } 4812 4813 // JobSummary summarizes the state of the allocations of a job 4814 type JobSummary struct { 4815 // JobID is the ID of the job the summary is for 4816 JobID string 4817 4818 // Namespace is the namespace of the job and its summary 4819 Namespace string 4820 4821 // Summary contains the summary per task group for the Job 4822 Summary map[string]TaskGroupSummary 4823 4824 // Children contains a summary for the children of this job. 4825 Children *JobChildrenSummary 4826 4827 // Raft Indexes 4828 CreateIndex uint64 4829 ModifyIndex uint64 4830 } 4831 4832 // Copy returns a new copy of JobSummary 4833 func (js *JobSummary) Copy() *JobSummary { 4834 newJobSummary := new(JobSummary) 4835 *newJobSummary = *js 4836 newTGSummary := make(map[string]TaskGroupSummary, len(js.Summary)) 4837 for k, v := range js.Summary { 4838 newTGSummary[k] = v 4839 } 4840 newJobSummary.Summary = newTGSummary 4841 newJobSummary.Children = newJobSummary.Children.Copy() 4842 return newJobSummary 4843 } 4844 4845 // JobChildrenSummary contains the summary of children job statuses 4846 type JobChildrenSummary struct { 4847 Pending int64 4848 Running int64 4849 Dead int64 4850 } 4851 4852 // Copy returns a new copy of a JobChildrenSummary 4853 func (jc *JobChildrenSummary) Copy() *JobChildrenSummary { 4854 if jc == nil { 4855 return nil 4856 } 4857 4858 njc := new(JobChildrenSummary) 4859 *njc = *jc 4860 return njc 4861 } 4862 4863 // TaskGroupSummary summarizes the state of all the allocations of a particular 4864 // TaskGroup 4865 type TaskGroupSummary struct { 4866 Queued int 4867 Complete int 4868 Failed int 4869 Running int 4870 Starting int 4871 Lost int 4872 Unknown int 4873 } 4874 4875 const ( 4876 // Checks uses any registered health check state in combination with task 4877 // states to determine if an allocation is healthy. 4878 UpdateStrategyHealthCheck_Checks = "checks" 4879 4880 // TaskStates uses the task states of an allocation to determine if the 4881 // allocation is healthy. 4882 UpdateStrategyHealthCheck_TaskStates = "task_states" 4883 4884 // Manual allows the operator to manually signal to Nomad when an 4885 // allocations is healthy. This allows more advanced health checking that is 4886 // outside of the scope of Nomad. 4887 UpdateStrategyHealthCheck_Manual = "manual" 4888 ) 4889 4890 var ( 4891 // DefaultUpdateStrategy provides a baseline that can be used to upgrade 4892 // jobs with the old policy or for populating field defaults. 4893 DefaultUpdateStrategy = &UpdateStrategy{ 4894 Stagger: 30 * time.Second, 4895 MaxParallel: 1, 4896 HealthCheck: UpdateStrategyHealthCheck_Checks, 4897 MinHealthyTime: 10 * time.Second, 4898 HealthyDeadline: 5 * time.Minute, 4899 ProgressDeadline: 10 * time.Minute, 4900 AutoRevert: false, 4901 AutoPromote: false, 4902 Canary: 0, 4903 } 4904 ) 4905 4906 // UpdateStrategy is used to modify how updates are done 4907 type UpdateStrategy struct { 4908 // Stagger is used to determine the rate at which allocations are migrated 4909 // due to down or draining nodes. 4910 Stagger time.Duration 4911 4912 // MaxParallel is how many updates can be done in parallel 4913 MaxParallel int 4914 4915 // HealthCheck specifies the mechanism in which allocations are marked 4916 // healthy or unhealthy as part of a deployment. 4917 HealthCheck string 4918 4919 // MinHealthyTime is the minimum time an allocation must be in the healthy 4920 // state before it is marked as healthy, unblocking more allocations to be 4921 // rolled. 4922 MinHealthyTime time.Duration 4923 4924 // HealthyDeadline is the time in which an allocation must be marked as 4925 // healthy before it is automatically transitioned to unhealthy. This time 4926 // period doesn't count against the MinHealthyTime. 4927 HealthyDeadline time.Duration 4928 4929 // ProgressDeadline is the time in which an allocation as part of the 4930 // deployment must transition to healthy. If no allocation becomes healthy 4931 // after the deadline, the deployment is marked as failed. If the deadline 4932 // is zero, the first failure causes the deployment to fail. 4933 ProgressDeadline time.Duration 4934 4935 // AutoRevert declares that if a deployment fails because of unhealthy 4936 // allocations, there should be an attempt to auto-revert the job to a 4937 // stable version. 4938 AutoRevert bool 4939 4940 // AutoPromote declares that the deployment should be promoted when all canaries are 4941 // healthy 4942 AutoPromote bool 4943 4944 // Canary is the number of canaries to deploy when a change to the task 4945 // group is detected. 4946 Canary int 4947 } 4948 4949 func (u *UpdateStrategy) Copy() *UpdateStrategy { 4950 if u == nil { 4951 return nil 4952 } 4953 4954 c := new(UpdateStrategy) 4955 *c = *u 4956 return c 4957 } 4958 4959 func (u *UpdateStrategy) Validate() error { 4960 if u == nil { 4961 return nil 4962 } 4963 4964 var mErr multierror.Error 4965 switch u.HealthCheck { 4966 case UpdateStrategyHealthCheck_Checks, UpdateStrategyHealthCheck_TaskStates, UpdateStrategyHealthCheck_Manual: 4967 default: 4968 _ = multierror.Append(&mErr, fmt.Errorf("Invalid health check given: %q", u.HealthCheck)) 4969 } 4970 4971 if u.MaxParallel < 0 { 4972 _ = multierror.Append(&mErr, fmt.Errorf("Max parallel can not be less than zero: %d < 0", u.MaxParallel)) 4973 } 4974 if u.Canary < 0 { 4975 _ = multierror.Append(&mErr, fmt.Errorf("Canary count can not be less than zero: %d < 0", u.Canary)) 4976 } 4977 if u.Canary == 0 && u.AutoPromote { 4978 _ = multierror.Append(&mErr, fmt.Errorf("Auto Promote requires a Canary count greater than zero")) 4979 } 4980 if u.MinHealthyTime < 0 { 4981 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time may not be less than zero: %v", u.MinHealthyTime)) 4982 } 4983 if u.HealthyDeadline <= 0 { 4984 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be greater than zero: %v", u.HealthyDeadline)) 4985 } 4986 if u.ProgressDeadline < 0 { 4987 _ = multierror.Append(&mErr, fmt.Errorf("Progress deadline must be zero or greater: %v", u.ProgressDeadline)) 4988 } 4989 if u.MinHealthyTime >= u.HealthyDeadline { 4990 _ = multierror.Append(&mErr, fmt.Errorf("Minimum healthy time must be less than healthy deadline: %v > %v", u.MinHealthyTime, u.HealthyDeadline)) 4991 } 4992 if u.ProgressDeadline != 0 && u.HealthyDeadline >= u.ProgressDeadline { 4993 _ = multierror.Append(&mErr, fmt.Errorf("Healthy deadline must be less than progress deadline: %v > %v", u.HealthyDeadline, u.ProgressDeadline)) 4994 } 4995 if u.Stagger <= 0 { 4996 _ = multierror.Append(&mErr, fmt.Errorf("Stagger must be greater than zero: %v", u.Stagger)) 4997 } 4998 4999 return mErr.ErrorOrNil() 5000 } 5001 5002 func (u *UpdateStrategy) IsEmpty() bool { 5003 if u == nil { 5004 return true 5005 } 5006 5007 return u.MaxParallel == 0 5008 } 5009 5010 // Rolling returns if a rolling strategy should be used. 5011 // TODO(alexdadgar): Remove once no longer used by the scheduler. 5012 func (u *UpdateStrategy) Rolling() bool { 5013 return u.Stagger > 0 && u.MaxParallel > 0 5014 } 5015 5016 type Multiregion struct { 5017 Strategy *MultiregionStrategy 5018 Regions []*MultiregionRegion 5019 } 5020 5021 func (m *Multiregion) Canonicalize() { 5022 if m.Strategy == nil { 5023 m.Strategy = &MultiregionStrategy{} 5024 } 5025 if m.Regions == nil { 5026 m.Regions = []*MultiregionRegion{} 5027 } 5028 } 5029 5030 // Diff indicates whether the multiregion config has changed 5031 func (m *Multiregion) Diff(m2 *Multiregion) bool { 5032 return !reflect.DeepEqual(m, m2) 5033 } 5034 5035 func (m *Multiregion) Copy() *Multiregion { 5036 if m == nil { 5037 return nil 5038 } 5039 copy := new(Multiregion) 5040 if m.Strategy != nil { 5041 copy.Strategy = &MultiregionStrategy{ 5042 MaxParallel: m.Strategy.MaxParallel, 5043 OnFailure: m.Strategy.OnFailure, 5044 } 5045 } 5046 for _, region := range m.Regions { 5047 copyRegion := &MultiregionRegion{ 5048 Name: region.Name, 5049 Count: region.Count, 5050 Datacenters: []string{}, 5051 Meta: map[string]string{}, 5052 } 5053 copyRegion.Datacenters = append(copyRegion.Datacenters, region.Datacenters...) 5054 for k, v := range region.Meta { 5055 copyRegion.Meta[k] = v 5056 } 5057 copy.Regions = append(copy.Regions, copyRegion) 5058 } 5059 return copy 5060 } 5061 5062 type MultiregionStrategy struct { 5063 MaxParallel int 5064 OnFailure string 5065 } 5066 5067 type MultiregionRegion struct { 5068 Name string 5069 Count int 5070 Datacenters []string 5071 Meta map[string]string 5072 } 5073 5074 // Namespace allows logically grouping jobs and their associated objects. 5075 type Namespace struct { 5076 // Name is the name of the namespace 5077 Name string 5078 5079 // Description is a human readable description of the namespace 5080 Description string 5081 5082 // Quota is the quota specification that the namespace should account 5083 // against. 5084 Quota string 5085 5086 // Capabilities is the set of capabilities allowed for this namespace 5087 Capabilities *NamespaceCapabilities 5088 5089 // Meta is the set of metadata key/value pairs that attached to the namespace 5090 Meta map[string]string 5091 5092 // Hash is the hash of the namespace which is used to efficiently replicate 5093 // cross-regions. 5094 Hash []byte 5095 5096 // Raft Indexes 5097 CreateIndex uint64 5098 ModifyIndex uint64 5099 } 5100 5101 // NamespaceCapabilities represents a set of capabilities allowed for this 5102 // namespace, to be checked at job submission time. 5103 type NamespaceCapabilities struct { 5104 EnabledTaskDrivers []string 5105 DisabledTaskDrivers []string 5106 } 5107 5108 func (n *Namespace) Validate() error { 5109 var mErr multierror.Error 5110 5111 // Validate the name and description 5112 if !validNamespaceName.MatchString(n.Name) { 5113 err := fmt.Errorf("invalid name %q. Must match regex %s", n.Name, validNamespaceName) 5114 mErr.Errors = append(mErr.Errors, err) 5115 } 5116 if len(n.Description) > maxNamespaceDescriptionLength { 5117 err := fmt.Errorf("description longer than %d", maxNamespaceDescriptionLength) 5118 mErr.Errors = append(mErr.Errors, err) 5119 } 5120 5121 return mErr.ErrorOrNil() 5122 } 5123 5124 // SetHash is used to compute and set the hash of the namespace 5125 func (n *Namespace) SetHash() []byte { 5126 // Initialize a 256bit Blake2 hash (32 bytes) 5127 hash, err := blake2b.New256(nil) 5128 if err != nil { 5129 panic(err) 5130 } 5131 5132 // Write all the user set fields 5133 _, _ = hash.Write([]byte(n.Name)) 5134 _, _ = hash.Write([]byte(n.Description)) 5135 _, _ = hash.Write([]byte(n.Quota)) 5136 if n.Capabilities != nil { 5137 for _, driver := range n.Capabilities.EnabledTaskDrivers { 5138 _, _ = hash.Write([]byte(driver)) 5139 } 5140 for _, driver := range n.Capabilities.DisabledTaskDrivers { 5141 _, _ = hash.Write([]byte(driver)) 5142 } 5143 } 5144 5145 // sort keys to ensure hash stability when meta is stored later 5146 var keys []string 5147 for k := range n.Meta { 5148 keys = append(keys, k) 5149 } 5150 sort.Strings(keys) 5151 5152 for _, k := range keys { 5153 _, _ = hash.Write([]byte(k)) 5154 _, _ = hash.Write([]byte(n.Meta[k])) 5155 } 5156 5157 // Finalize the hash 5158 hashVal := hash.Sum(nil) 5159 5160 // Set and return the hash 5161 n.Hash = hashVal 5162 return hashVal 5163 } 5164 5165 func (n *Namespace) Copy() *Namespace { 5166 nc := new(Namespace) 5167 *nc = *n 5168 nc.Hash = make([]byte, len(n.Hash)) 5169 if n.Capabilities != nil { 5170 c := new(NamespaceCapabilities) 5171 *c = *n.Capabilities 5172 c.EnabledTaskDrivers = slices.Clone(n.Capabilities.EnabledTaskDrivers) 5173 c.DisabledTaskDrivers = slices.Clone(n.Capabilities.DisabledTaskDrivers) 5174 nc.Capabilities = c 5175 } 5176 if n.Meta != nil { 5177 nc.Meta = make(map[string]string, len(n.Meta)) 5178 for k, v := range n.Meta { 5179 nc.Meta[k] = v 5180 } 5181 } 5182 copy(nc.Hash, n.Hash) 5183 return nc 5184 } 5185 5186 // NamespaceListRequest is used to request a list of namespaces 5187 type NamespaceListRequest struct { 5188 QueryOptions 5189 } 5190 5191 // NamespaceListResponse is used for a list request 5192 type NamespaceListResponse struct { 5193 Namespaces []*Namespace 5194 QueryMeta 5195 } 5196 5197 // NamespaceSpecificRequest is used to query a specific namespace 5198 type NamespaceSpecificRequest struct { 5199 Name string 5200 QueryOptions 5201 } 5202 5203 // SingleNamespaceResponse is used to return a single namespace 5204 type SingleNamespaceResponse struct { 5205 Namespace *Namespace 5206 QueryMeta 5207 } 5208 5209 // NamespaceSetRequest is used to query a set of namespaces 5210 type NamespaceSetRequest struct { 5211 Namespaces []string 5212 QueryOptions 5213 } 5214 5215 // NamespaceSetResponse is used to return a set of namespaces 5216 type NamespaceSetResponse struct { 5217 Namespaces map[string]*Namespace // Keyed by namespace Name 5218 QueryMeta 5219 } 5220 5221 // NamespaceDeleteRequest is used to delete a set of namespaces 5222 type NamespaceDeleteRequest struct { 5223 Namespaces []string 5224 WriteRequest 5225 } 5226 5227 // NamespaceUpsertRequest is used to upsert a set of namespaces 5228 type NamespaceUpsertRequest struct { 5229 Namespaces []*Namespace 5230 WriteRequest 5231 } 5232 5233 const ( 5234 // PeriodicSpecCron is used for a cron spec. 5235 PeriodicSpecCron = "cron" 5236 5237 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 5238 // separated list of unix timestamps at which to launch. 5239 PeriodicSpecTest = "_internal_test" 5240 ) 5241 5242 // Periodic defines the interval a job should be run at. 5243 type PeriodicConfig struct { 5244 // Enabled determines if the job should be run periodically. 5245 Enabled bool 5246 5247 // Spec specifies the interval the job should be run as. It is parsed based 5248 // on the SpecType. 5249 Spec string 5250 5251 // SpecType defines the format of the spec. 5252 SpecType string 5253 5254 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 5255 ProhibitOverlap bool 5256 5257 // TimeZone is the user specified string that determines the time zone to 5258 // launch against. The time zones must be specified from IANA Time Zone 5259 // database, such as "America/New_York". 5260 // Reference: https://en.wikipedia.org/wiki/List_of_tz_database_time_zones 5261 // Reference: https://www.iana.org/time-zones 5262 TimeZone string 5263 5264 // location is the time zone to evaluate the launch time against 5265 location *time.Location 5266 } 5267 5268 func (p *PeriodicConfig) Copy() *PeriodicConfig { 5269 if p == nil { 5270 return nil 5271 } 5272 np := new(PeriodicConfig) 5273 *np = *p 5274 return np 5275 } 5276 5277 func (p *PeriodicConfig) Validate() error { 5278 if !p.Enabled { 5279 return nil 5280 } 5281 5282 var mErr multierror.Error 5283 if p.Spec == "" { 5284 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a spec")) 5285 } 5286 5287 // Check if we got a valid time zone 5288 if p.TimeZone != "" { 5289 if _, err := time.LoadLocation(p.TimeZone); err != nil { 5290 _ = multierror.Append(&mErr, fmt.Errorf("Invalid time zone %q: %v", p.TimeZone, err)) 5291 } 5292 } 5293 5294 switch p.SpecType { 5295 case PeriodicSpecCron: 5296 // Validate the cron spec 5297 if _, err := cronexpr.Parse(p.Spec); err != nil { 5298 _ = multierror.Append(&mErr, fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err)) 5299 } 5300 case PeriodicSpecTest: 5301 // No-op 5302 default: 5303 _ = multierror.Append(&mErr, fmt.Errorf("Unknown periodic specification type %q", p.SpecType)) 5304 } 5305 5306 return mErr.ErrorOrNil() 5307 } 5308 5309 func (p *PeriodicConfig) Canonicalize() { 5310 // Load the location 5311 l, err := time.LoadLocation(p.TimeZone) 5312 if err != nil { 5313 p.location = time.UTC 5314 } 5315 5316 p.location = l 5317 } 5318 5319 // CronParseNext is a helper that parses the next time for the given expression 5320 // but captures any panic that may occur in the underlying library. 5321 func CronParseNext(e *cronexpr.Expression, fromTime time.Time, spec string) (t time.Time, err error) { 5322 defer func() { 5323 if recover() != nil { 5324 t = time.Time{} 5325 err = fmt.Errorf("failed parsing cron expression: %q", spec) 5326 } 5327 }() 5328 5329 return e.Next(fromTime), nil 5330 } 5331 5332 // Next returns the closest time instant matching the spec that is after the 5333 // passed time. If no matching instance exists, the zero value of time.Time is 5334 // returned. The `time.Location` of the returned value matches that of the 5335 // passed time. 5336 func (p *PeriodicConfig) Next(fromTime time.Time) (time.Time, error) { 5337 switch p.SpecType { 5338 case PeriodicSpecCron: 5339 e, err := cronexpr.Parse(p.Spec) 5340 if err != nil { 5341 return time.Time{}, fmt.Errorf("failed parsing cron expression: %q: %v", p.Spec, err) 5342 } 5343 return CronParseNext(e, fromTime, p.Spec) 5344 case PeriodicSpecTest: 5345 split := strings.Split(p.Spec, ",") 5346 if len(split) == 1 && split[0] == "" { 5347 return time.Time{}, nil 5348 } 5349 5350 // Parse the times 5351 times := make([]time.Time, len(split)) 5352 for i, s := range split { 5353 unix, err := strconv.Atoi(s) 5354 if err != nil { 5355 return time.Time{}, nil 5356 } 5357 5358 times[i] = time.Unix(int64(unix), 0) 5359 } 5360 5361 // Find the next match 5362 for _, next := range times { 5363 if fromTime.Before(next) { 5364 return next, nil 5365 } 5366 } 5367 } 5368 5369 return time.Time{}, nil 5370 } 5371 5372 // GetLocation returns the location to use for determining the time zone to run 5373 // the periodic job against. 5374 func (p *PeriodicConfig) GetLocation() *time.Location { 5375 // Jobs pre 0.5.5 will not have this 5376 if p.location != nil { 5377 return p.location 5378 } 5379 5380 return time.UTC 5381 } 5382 5383 const ( 5384 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 5385 // when launching derived instances of it. 5386 PeriodicLaunchSuffix = "/periodic-" 5387 ) 5388 5389 // PeriodicLaunch tracks the last launch time of a periodic job. 5390 type PeriodicLaunch struct { 5391 ID string // ID of the periodic job. 5392 Namespace string // Namespace of the periodic job 5393 Launch time.Time // The last launch time. 5394 5395 // Raft Indexes 5396 CreateIndex uint64 5397 ModifyIndex uint64 5398 } 5399 5400 const ( 5401 DispatchPayloadForbidden = "forbidden" 5402 DispatchPayloadOptional = "optional" 5403 DispatchPayloadRequired = "required" 5404 5405 // DispatchLaunchSuffix is the string appended to the parameterized job's ID 5406 // when dispatching instances of it. 5407 DispatchLaunchSuffix = "/dispatch-" 5408 ) 5409 5410 // ParameterizedJobConfig is used to configure the parameterized job 5411 type ParameterizedJobConfig struct { 5412 // Payload configure the payload requirements 5413 Payload string 5414 5415 // MetaRequired is metadata keys that must be specified by the dispatcher 5416 MetaRequired []string 5417 5418 // MetaOptional is metadata keys that may be specified by the dispatcher 5419 MetaOptional []string 5420 } 5421 5422 func (d *ParameterizedJobConfig) Validate() error { 5423 var mErr multierror.Error 5424 switch d.Payload { 5425 case DispatchPayloadOptional, DispatchPayloadRequired, DispatchPayloadForbidden: 5426 default: 5427 _ = multierror.Append(&mErr, fmt.Errorf("Unknown payload requirement: %q", d.Payload)) 5428 } 5429 5430 // Check that the meta configurations are disjoint sets 5431 disjoint, offending := helper.IsDisjoint(d.MetaRequired, d.MetaOptional) 5432 if !disjoint { 5433 _ = multierror.Append(&mErr, fmt.Errorf("Required and optional meta keys should be disjoint. Following keys exist in both: %v", offending)) 5434 } 5435 5436 return mErr.ErrorOrNil() 5437 } 5438 5439 func (d *ParameterizedJobConfig) Canonicalize() { 5440 if d.Payload == "" { 5441 d.Payload = DispatchPayloadOptional 5442 } 5443 } 5444 5445 func (d *ParameterizedJobConfig) Copy() *ParameterizedJobConfig { 5446 if d == nil { 5447 return nil 5448 } 5449 nd := new(ParameterizedJobConfig) 5450 *nd = *d 5451 nd.MetaOptional = slices.Clone(nd.MetaOptional) 5452 nd.MetaRequired = slices.Clone(nd.MetaRequired) 5453 return nd 5454 } 5455 5456 // DispatchedID returns an ID appropriate for a job dispatched against a 5457 // particular parameterized job 5458 func DispatchedID(templateID, idPrefixTemplate string, t time.Time) string { 5459 u := uuid.Generate()[:8] 5460 5461 if idPrefixTemplate != "" { 5462 return fmt.Sprintf("%s%s%s-%d-%s", templateID, DispatchLaunchSuffix, idPrefixTemplate, t.Unix(), u) 5463 } 5464 5465 return fmt.Sprintf("%s%s%d-%s", templateID, DispatchLaunchSuffix, t.Unix(), u) 5466 } 5467 5468 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 5469 type DispatchPayloadConfig struct { 5470 // File specifies a relative path to where the input data should be written 5471 File string 5472 } 5473 5474 func (d *DispatchPayloadConfig) Copy() *DispatchPayloadConfig { 5475 if d == nil { 5476 return nil 5477 } 5478 nd := new(DispatchPayloadConfig) 5479 *nd = *d 5480 return nd 5481 } 5482 5483 func (d *DispatchPayloadConfig) Validate() error { 5484 // Verify the destination doesn't escape 5485 escaped, err := escapingfs.PathEscapesAllocViaRelative("task/local/", d.File) 5486 if err != nil { 5487 return fmt.Errorf("invalid destination path: %v", err) 5488 } else if escaped { 5489 return fmt.Errorf("destination escapes allocation directory") 5490 } 5491 5492 return nil 5493 } 5494 5495 const ( 5496 TaskLifecycleHookPrestart = "prestart" 5497 TaskLifecycleHookPoststart = "poststart" 5498 TaskLifecycleHookPoststop = "poststop" 5499 ) 5500 5501 type TaskLifecycleConfig struct { 5502 Hook string 5503 Sidecar bool 5504 } 5505 5506 func (d *TaskLifecycleConfig) Copy() *TaskLifecycleConfig { 5507 if d == nil { 5508 return nil 5509 } 5510 nd := new(TaskLifecycleConfig) 5511 *nd = *d 5512 return nd 5513 } 5514 5515 func (d *TaskLifecycleConfig) Validate() error { 5516 if d == nil { 5517 return nil 5518 } 5519 5520 switch d.Hook { 5521 case TaskLifecycleHookPrestart: 5522 case TaskLifecycleHookPoststart: 5523 case TaskLifecycleHookPoststop: 5524 case "": 5525 return fmt.Errorf("no lifecycle hook provided") 5526 default: 5527 return fmt.Errorf("invalid hook: %v", d.Hook) 5528 } 5529 5530 return nil 5531 } 5532 5533 var ( 5534 // These default restart policies needs to be in sync with 5535 // Canonicalize in api/tasks.go 5536 5537 DefaultServiceJobRestartPolicy = RestartPolicy{ 5538 Delay: 15 * time.Second, 5539 Attempts: 2, 5540 Interval: 30 * time.Minute, 5541 Mode: RestartPolicyModeFail, 5542 } 5543 DefaultBatchJobRestartPolicy = RestartPolicy{ 5544 Delay: 15 * time.Second, 5545 Attempts: 3, 5546 Interval: 24 * time.Hour, 5547 Mode: RestartPolicyModeFail, 5548 } 5549 ) 5550 5551 var ( 5552 // These default reschedule policies needs to be in sync with 5553 // NewDefaultReschedulePolicy in api/tasks.go 5554 5555 DefaultServiceJobReschedulePolicy = ReschedulePolicy{ 5556 Delay: 30 * time.Second, 5557 DelayFunction: "exponential", 5558 MaxDelay: 1 * time.Hour, 5559 Unlimited: true, 5560 } 5561 DefaultBatchJobReschedulePolicy = ReschedulePolicy{ 5562 Attempts: 1, 5563 Interval: 24 * time.Hour, 5564 Delay: 5 * time.Second, 5565 DelayFunction: "constant", 5566 } 5567 ) 5568 5569 const ( 5570 // RestartPolicyModeDelay causes an artificial delay till the next interval is 5571 // reached when the specified attempts have been reached in the interval. 5572 RestartPolicyModeDelay = "delay" 5573 5574 // RestartPolicyModeFail causes a job to fail if the specified number of 5575 // attempts are reached within an interval. 5576 RestartPolicyModeFail = "fail" 5577 5578 // RestartPolicyMinInterval is the minimum interval that is accepted for a 5579 // restart policy. 5580 RestartPolicyMinInterval = 5 * time.Second 5581 5582 // ReasonWithinPolicy describes restart events that are within policy 5583 ReasonWithinPolicy = "Restart within policy" 5584 ) 5585 5586 // JobScalingEvents contains the scaling events for a given job 5587 type JobScalingEvents struct { 5588 Namespace string 5589 JobID string 5590 5591 // This map is indexed by target; currently, this is just task group 5592 // the indexed array is sorted from newest to oldest event 5593 // the array should have less than JobTrackedScalingEvents entries 5594 ScalingEvents map[string][]*ScalingEvent 5595 5596 // Raft index 5597 ModifyIndex uint64 5598 } 5599 5600 // NewScalingEvent method for ScalingEvent objects. 5601 func NewScalingEvent(message string) *ScalingEvent { 5602 return &ScalingEvent{ 5603 Time: time.Now().Unix(), 5604 Message: message, 5605 } 5606 } 5607 5608 // ScalingEvent describes a scaling event against a Job 5609 type ScalingEvent struct { 5610 // Unix Nanosecond timestamp for the scaling event 5611 Time int64 5612 5613 // Count is the new scaling count, if provided 5614 Count *int64 5615 5616 // PreviousCount is the count at the time of the scaling event 5617 PreviousCount int64 5618 5619 // Message is the message describing a scaling event 5620 Message string 5621 5622 // Error indicates an error state for this scaling event 5623 Error bool 5624 5625 // Meta is a map of metadata returned during a scaling event 5626 Meta map[string]interface{} 5627 5628 // EvalID is the ID for an evaluation if one was created as part of a scaling event 5629 EvalID *string 5630 5631 // Raft index 5632 CreateIndex uint64 5633 } 5634 5635 func (e *ScalingEvent) SetError(error bool) *ScalingEvent { 5636 e.Error = error 5637 return e 5638 } 5639 5640 func (e *ScalingEvent) SetMeta(meta map[string]interface{}) *ScalingEvent { 5641 e.Meta = meta 5642 return e 5643 } 5644 5645 func (e *ScalingEvent) SetEvalID(evalID string) *ScalingEvent { 5646 e.EvalID = &evalID 5647 return e 5648 } 5649 5650 // ScalingEventRequest is by for Job.Scale endpoint 5651 // to register scaling events 5652 type ScalingEventRequest struct { 5653 Namespace string 5654 JobID string 5655 TaskGroup string 5656 5657 ScalingEvent *ScalingEvent 5658 } 5659 5660 // ScalingPolicy specifies the scaling policy for a scaling target 5661 type ScalingPolicy struct { 5662 // ID is a generated UUID used for looking up the scaling policy 5663 ID string 5664 5665 // Type is the type of scaling performed by the policy 5666 Type string 5667 5668 // Target contains information about the target of the scaling policy, like job and group 5669 Target map[string]string 5670 5671 // Policy is an opaque description of the scaling policy, passed to the autoscaler 5672 Policy map[string]interface{} 5673 5674 // Min is the minimum allowable scaling count for this target 5675 Min int64 5676 5677 // Max is the maximum allowable scaling count for this target 5678 Max int64 5679 5680 // Enabled indicates whether this policy has been enabled/disabled 5681 Enabled bool 5682 5683 CreateIndex uint64 5684 ModifyIndex uint64 5685 } 5686 5687 // JobKey returns a key that is unique to a job-scoped target, useful as a map 5688 // key. This uses the policy type, plus target (group and task). 5689 func (p *ScalingPolicy) JobKey() string { 5690 return p.Type + "\000" + 5691 p.Target[ScalingTargetGroup] + "\000" + 5692 p.Target[ScalingTargetTask] 5693 } 5694 5695 const ( 5696 ScalingTargetNamespace = "Namespace" 5697 ScalingTargetJob = "Job" 5698 ScalingTargetGroup = "Group" 5699 ScalingTargetTask = "Task" 5700 5701 ScalingPolicyTypeHorizontal = "horizontal" 5702 ) 5703 5704 func (p *ScalingPolicy) Canonicalize() { 5705 if p.Type == "" { 5706 p.Type = ScalingPolicyTypeHorizontal 5707 } 5708 } 5709 5710 func (p *ScalingPolicy) Copy() *ScalingPolicy { 5711 if p == nil { 5712 return nil 5713 } 5714 5715 opaquePolicyConfig, err := copystructure.Copy(p.Policy) 5716 if err != nil { 5717 panic(err.Error()) 5718 } 5719 5720 c := ScalingPolicy{ 5721 ID: p.ID, 5722 Policy: opaquePolicyConfig.(map[string]interface{}), 5723 Enabled: p.Enabled, 5724 Type: p.Type, 5725 Min: p.Min, 5726 Max: p.Max, 5727 CreateIndex: p.CreateIndex, 5728 ModifyIndex: p.ModifyIndex, 5729 } 5730 c.Target = make(map[string]string, len(p.Target)) 5731 for k, v := range p.Target { 5732 c.Target[k] = v 5733 } 5734 return &c 5735 } 5736 5737 func (p *ScalingPolicy) Validate() error { 5738 if p == nil { 5739 return nil 5740 } 5741 5742 var mErr multierror.Error 5743 5744 // Check policy type and target 5745 if p.Type == "" { 5746 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing scaling policy type")) 5747 } else { 5748 mErr.Errors = append(mErr.Errors, p.validateType().Errors...) 5749 } 5750 5751 // Check Min and Max 5752 if p.Max < 0 { 5753 mErr.Errors = append(mErr.Errors, 5754 fmt.Errorf("maximum count must be specified and non-negative")) 5755 } else if p.Max < p.Min { 5756 mErr.Errors = append(mErr.Errors, 5757 fmt.Errorf("maximum count must not be less than minimum count")) 5758 } 5759 5760 if p.Min < 0 { 5761 mErr.Errors = append(mErr.Errors, 5762 fmt.Errorf("minimum count must be specified and non-negative")) 5763 } 5764 5765 return mErr.ErrorOrNil() 5766 } 5767 5768 func (p *ScalingPolicy) validateTargetHorizontal() (mErr multierror.Error) { 5769 if len(p.Target) == 0 { 5770 // This is probably not a Nomad horizontal policy 5771 return 5772 } 5773 5774 // Nomad horizontal policies should have Namespace, Job and TaskGroup 5775 if p.Target[ScalingTargetNamespace] == "" { 5776 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target namespace")) 5777 } 5778 if p.Target[ScalingTargetJob] == "" { 5779 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target job")) 5780 } 5781 if p.Target[ScalingTargetGroup] == "" { 5782 mErr.Errors = append(mErr.Errors, fmt.Errorf("missing target group")) 5783 } 5784 return 5785 } 5786 5787 // Diff indicates whether the specification for a given scaling policy has changed 5788 func (p *ScalingPolicy) Diff(p2 *ScalingPolicy) bool { 5789 copy := *p2 5790 copy.ID = p.ID 5791 copy.CreateIndex = p.CreateIndex 5792 copy.ModifyIndex = p.ModifyIndex 5793 return !reflect.DeepEqual(*p, copy) 5794 } 5795 5796 // TargetTaskGroup updates a ScalingPolicy target to specify a given task group 5797 func (p *ScalingPolicy) TargetTaskGroup(job *Job, tg *TaskGroup) *ScalingPolicy { 5798 p.Target = map[string]string{ 5799 ScalingTargetNamespace: job.Namespace, 5800 ScalingTargetJob: job.ID, 5801 ScalingTargetGroup: tg.Name, 5802 } 5803 return p 5804 } 5805 5806 // TargetTask updates a ScalingPolicy target to specify a given task 5807 func (p *ScalingPolicy) TargetTask(job *Job, tg *TaskGroup, task *Task) *ScalingPolicy { 5808 p.TargetTaskGroup(job, tg) 5809 p.Target[ScalingTargetTask] = task.Name 5810 return p 5811 } 5812 5813 func (p *ScalingPolicy) Stub() *ScalingPolicyListStub { 5814 stub := &ScalingPolicyListStub{ 5815 ID: p.ID, 5816 Type: p.Type, 5817 Target: make(map[string]string), 5818 Enabled: p.Enabled, 5819 CreateIndex: p.CreateIndex, 5820 ModifyIndex: p.ModifyIndex, 5821 } 5822 for k, v := range p.Target { 5823 stub.Target[k] = v 5824 } 5825 return stub 5826 } 5827 5828 // GetScalingPolicies returns a slice of all scaling scaling policies for this job 5829 func (j *Job) GetScalingPolicies() []*ScalingPolicy { 5830 ret := make([]*ScalingPolicy, 0) 5831 5832 for _, tg := range j.TaskGroups { 5833 if tg.Scaling != nil { 5834 ret = append(ret, tg.Scaling) 5835 } 5836 } 5837 5838 ret = append(ret, j.GetEntScalingPolicies()...) 5839 5840 return ret 5841 } 5842 5843 // UsesDeployments returns a boolean indicating whether the job configuration 5844 // results in a deployment during scheduling. 5845 func (j *Job) UsesDeployments() bool { 5846 switch j.Type { 5847 case JobTypeService: 5848 return true 5849 default: 5850 return false 5851 } 5852 } 5853 5854 // ScalingPolicyListStub is used to return a subset of scaling policy information 5855 // for the scaling policy list 5856 type ScalingPolicyListStub struct { 5857 ID string 5858 Enabled bool 5859 Type string 5860 Target map[string]string 5861 CreateIndex uint64 5862 ModifyIndex uint64 5863 } 5864 5865 // RestartPolicy configures how Tasks are restarted when they crash or fail. 5866 type RestartPolicy struct { 5867 // Attempts is the number of restart that will occur in an interval. 5868 Attempts int 5869 5870 // Interval is a duration in which we can limit the number of restarts 5871 // within. 5872 Interval time.Duration 5873 5874 // Delay is the time between a failure and a restart. 5875 Delay time.Duration 5876 5877 // Mode controls what happens when the task restarts more than attempt times 5878 // in an interval. 5879 Mode string 5880 } 5881 5882 func (r *RestartPolicy) Copy() *RestartPolicy { 5883 if r == nil { 5884 return nil 5885 } 5886 nrp := new(RestartPolicy) 5887 *nrp = *r 5888 return nrp 5889 } 5890 5891 func (r *RestartPolicy) Validate() error { 5892 var mErr multierror.Error 5893 switch r.Mode { 5894 case RestartPolicyModeDelay, RestartPolicyModeFail: 5895 default: 5896 _ = multierror.Append(&mErr, fmt.Errorf("Unsupported restart mode: %q", r.Mode)) 5897 } 5898 5899 // Check for ambiguous/confusing settings 5900 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 5901 _ = multierror.Append(&mErr, fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts)) 5902 } 5903 5904 if r.Interval.Nanoseconds() < RestartPolicyMinInterval.Nanoseconds() { 5905 _ = multierror.Append(&mErr, fmt.Errorf("Interval can not be less than %v (got %v)", RestartPolicyMinInterval, r.Interval)) 5906 } 5907 if time.Duration(r.Attempts)*r.Delay > r.Interval { 5908 _ = multierror.Append(&mErr, 5909 fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay)) 5910 } 5911 return mErr.ErrorOrNil() 5912 } 5913 5914 func NewRestartPolicy(jobType string) *RestartPolicy { 5915 switch jobType { 5916 case JobTypeService, JobTypeSystem: 5917 rp := DefaultServiceJobRestartPolicy 5918 return &rp 5919 case JobTypeBatch: 5920 rp := DefaultBatchJobRestartPolicy 5921 return &rp 5922 } 5923 return nil 5924 } 5925 5926 const ReschedulePolicyMinInterval = 15 * time.Second 5927 const ReschedulePolicyMinDelay = 5 * time.Second 5928 5929 var RescheduleDelayFunctions = [...]string{"constant", "exponential", "fibonacci"} 5930 5931 // ReschedulePolicy configures how Tasks are rescheduled when they crash or fail. 5932 type ReschedulePolicy struct { 5933 // Attempts limits the number of rescheduling attempts that can occur in an interval. 5934 Attempts int 5935 5936 // Interval is a duration in which we can limit the number of reschedule attempts. 5937 Interval time.Duration 5938 5939 // Delay is a minimum duration to wait between reschedule attempts. 5940 // The delay function determines how much subsequent reschedule attempts are delayed by. 5941 Delay time.Duration 5942 5943 // DelayFunction determines how the delay progressively changes on subsequent reschedule 5944 // attempts. Valid values are "exponential", "constant", and "fibonacci". 5945 DelayFunction string 5946 5947 // MaxDelay is an upper bound on the delay. 5948 MaxDelay time.Duration 5949 5950 // Unlimited allows infinite rescheduling attempts. Only allowed when delay is set 5951 // between reschedule attempts. 5952 Unlimited bool 5953 } 5954 5955 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 5956 if r == nil { 5957 return nil 5958 } 5959 nrp := new(ReschedulePolicy) 5960 *nrp = *r 5961 return nrp 5962 } 5963 5964 func (r *ReschedulePolicy) Enabled() bool { 5965 enabled := r != nil && (r.Attempts > 0 || r.Unlimited) 5966 return enabled 5967 } 5968 5969 // Validate uses different criteria to validate the reschedule policy 5970 // Delay must be a minimum of 5 seconds 5971 // Delay Ceiling is ignored if Delay Function is "constant" 5972 // Number of possible attempts is validated, given the interval, delay and delay function 5973 func (r *ReschedulePolicy) Validate() error { 5974 if !r.Enabled() { 5975 return nil 5976 } 5977 var mErr multierror.Error 5978 // Check for ambiguous/confusing settings 5979 if r.Attempts > 0 { 5980 if r.Interval <= 0 { 5981 _ = multierror.Append(&mErr, fmt.Errorf("Interval must be a non zero value if Attempts > 0")) 5982 } 5983 if r.Unlimited { 5984 _ = multierror.Append(&mErr, fmt.Errorf("Reschedule Policy with Attempts = %v, Interval = %v, "+ 5985 "and Unlimited = %v is ambiguous", r.Attempts, r.Interval, r.Unlimited)) 5986 _ = multierror.Append(&mErr, errors.New("If Attempts >0, Unlimited cannot also be set to true")) 5987 } 5988 } 5989 5990 delayPreCheck := true 5991 // Delay should be bigger than the default 5992 if r.Delay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 5993 _ = multierror.Append(&mErr, fmt.Errorf("Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 5994 delayPreCheck = false 5995 } 5996 5997 // Must use a valid delay function 5998 if !isValidDelayFunction(r.DelayFunction) { 5999 _ = multierror.Append(&mErr, fmt.Errorf("Invalid delay function %q, must be one of %q", r.DelayFunction, RescheduleDelayFunctions)) 6000 delayPreCheck = false 6001 } 6002 6003 // Validate MaxDelay if not using linear delay progression 6004 if r.DelayFunction != "constant" { 6005 if r.MaxDelay.Nanoseconds() < ReschedulePolicyMinDelay.Nanoseconds() { 6006 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than %v (got %v)", ReschedulePolicyMinDelay, r.Delay)) 6007 delayPreCheck = false 6008 } 6009 if r.MaxDelay < r.Delay { 6010 _ = multierror.Append(&mErr, fmt.Errorf("Max Delay cannot be less than Delay %v (got %v)", r.Delay, r.MaxDelay)) 6011 delayPreCheck = false 6012 } 6013 6014 } 6015 6016 // Validate Interval and other delay parameters if attempts are limited 6017 if !r.Unlimited { 6018 if r.Interval.Nanoseconds() < ReschedulePolicyMinInterval.Nanoseconds() { 6019 _ = multierror.Append(&mErr, fmt.Errorf("Interval cannot be less than %v (got %v)", ReschedulePolicyMinInterval, r.Interval)) 6020 } 6021 if !delayPreCheck { 6022 // We can't cross validate the rest of the delay params if delayPreCheck fails, so return early 6023 return mErr.ErrorOrNil() 6024 } 6025 crossValidationErr := r.validateDelayParams() 6026 if crossValidationErr != nil { 6027 _ = multierror.Append(&mErr, crossValidationErr) 6028 } 6029 } 6030 return mErr.ErrorOrNil() 6031 } 6032 6033 func isValidDelayFunction(delayFunc string) bool { 6034 for _, value := range RescheduleDelayFunctions { 6035 if value == delayFunc { 6036 return true 6037 } 6038 } 6039 return false 6040 } 6041 6042 func (r *ReschedulePolicy) validateDelayParams() error { 6043 ok, possibleAttempts, recommendedInterval := r.viableAttempts() 6044 if ok { 6045 return nil 6046 } 6047 var mErr multierror.Error 6048 if r.DelayFunction == "constant" { 6049 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v and "+ 6050 "delay function %q", possibleAttempts, r.Interval, r.Delay, r.DelayFunction)) 6051 } else { 6052 _ = multierror.Append(&mErr, fmt.Errorf("Nomad can only make %v attempts in %v with initial delay %v, "+ 6053 "delay function %q, and delay ceiling %v", possibleAttempts, r.Interval, r.Delay, r.DelayFunction, r.MaxDelay)) 6054 } 6055 _ = multierror.Append(&mErr, fmt.Errorf("Set the interval to at least %v to accommodate %v attempts", recommendedInterval.Round(time.Second), r.Attempts)) 6056 return mErr.ErrorOrNil() 6057 } 6058 6059 func (r *ReschedulePolicy) viableAttempts() (bool, int, time.Duration) { 6060 var possibleAttempts int 6061 var recommendedInterval time.Duration 6062 valid := true 6063 switch r.DelayFunction { 6064 case "constant": 6065 recommendedInterval = time.Duration(r.Attempts) * r.Delay 6066 if r.Interval < recommendedInterval { 6067 possibleAttempts = int(r.Interval / r.Delay) 6068 valid = false 6069 } 6070 case "exponential": 6071 for i := 0; i < r.Attempts; i++ { 6072 nextDelay := time.Duration(math.Pow(2, float64(i))) * r.Delay 6073 if nextDelay > r.MaxDelay { 6074 nextDelay = r.MaxDelay 6075 recommendedInterval += nextDelay 6076 } else { 6077 recommendedInterval = nextDelay 6078 } 6079 if recommendedInterval < r.Interval { 6080 possibleAttempts++ 6081 } 6082 } 6083 if possibleAttempts < r.Attempts { 6084 valid = false 6085 } 6086 case "fibonacci": 6087 var slots []time.Duration 6088 slots = append(slots, r.Delay) 6089 slots = append(slots, r.Delay) 6090 reachedCeiling := false 6091 for i := 2; i < r.Attempts; i++ { 6092 var nextDelay time.Duration 6093 if reachedCeiling { 6094 //switch to linear 6095 nextDelay = slots[i-1] + r.MaxDelay 6096 } else { 6097 nextDelay = slots[i-1] + slots[i-2] 6098 if nextDelay > r.MaxDelay { 6099 nextDelay = r.MaxDelay 6100 reachedCeiling = true 6101 } 6102 } 6103 slots = append(slots, nextDelay) 6104 } 6105 recommendedInterval = slots[len(slots)-1] 6106 if r.Interval < recommendedInterval { 6107 valid = false 6108 // calculate possible attempts 6109 for i := 0; i < len(slots); i++ { 6110 if slots[i] > r.Interval { 6111 possibleAttempts = i 6112 break 6113 } 6114 } 6115 } 6116 default: 6117 return false, 0, 0 6118 } 6119 if possibleAttempts < 0 { // can happen if delay is bigger than interval 6120 possibleAttempts = 0 6121 } 6122 return valid, possibleAttempts, recommendedInterval 6123 } 6124 6125 func NewReschedulePolicy(jobType string) *ReschedulePolicy { 6126 switch jobType { 6127 case JobTypeService: 6128 rp := DefaultServiceJobReschedulePolicy 6129 return &rp 6130 case JobTypeBatch: 6131 rp := DefaultBatchJobReschedulePolicy 6132 return &rp 6133 } 6134 return nil 6135 } 6136 6137 const ( 6138 MigrateStrategyHealthChecks = "checks" 6139 MigrateStrategyHealthStates = "task_states" 6140 ) 6141 6142 type MigrateStrategy struct { 6143 MaxParallel int 6144 HealthCheck string 6145 MinHealthyTime time.Duration 6146 HealthyDeadline time.Duration 6147 } 6148 6149 // DefaultMigrateStrategy is used for backwards compat with pre-0.8 Allocations 6150 // that lack an update strategy. 6151 // 6152 // This function should match its counterpart in api/tasks.go 6153 func DefaultMigrateStrategy() *MigrateStrategy { 6154 return &MigrateStrategy{ 6155 MaxParallel: 1, 6156 HealthCheck: MigrateStrategyHealthChecks, 6157 MinHealthyTime: 10 * time.Second, 6158 HealthyDeadline: 5 * time.Minute, 6159 } 6160 } 6161 6162 func (m *MigrateStrategy) Validate() error { 6163 var mErr multierror.Error 6164 6165 if m.MaxParallel < 0 { 6166 _ = multierror.Append(&mErr, fmt.Errorf("MaxParallel must be >= 0 but found %d", m.MaxParallel)) 6167 } 6168 6169 switch m.HealthCheck { 6170 case MigrateStrategyHealthChecks, MigrateStrategyHealthStates: 6171 // ok 6172 case "": 6173 if m.MaxParallel > 0 { 6174 _ = multierror.Append(&mErr, fmt.Errorf("Missing HealthCheck")) 6175 } 6176 default: 6177 _ = multierror.Append(&mErr, fmt.Errorf("Invalid HealthCheck: %q", m.HealthCheck)) 6178 } 6179 6180 if m.MinHealthyTime < 0 { 6181 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime is %s and must be >= 0", m.MinHealthyTime)) 6182 } 6183 6184 if m.HealthyDeadline < 0 { 6185 _ = multierror.Append(&mErr, fmt.Errorf("HealthyDeadline is %s and must be >= 0", m.HealthyDeadline)) 6186 } 6187 6188 if m.MinHealthyTime > m.HealthyDeadline { 6189 _ = multierror.Append(&mErr, fmt.Errorf("MinHealthyTime must be less than HealthyDeadline")) 6190 } 6191 6192 return mErr.ErrorOrNil() 6193 } 6194 6195 // TaskGroup is an atomic unit of placement. Each task group belongs to 6196 // a job and may contain any number of tasks. A task group support running 6197 // in many replicas using the same configuration.. 6198 type TaskGroup struct { 6199 // Name of the task group 6200 Name string 6201 6202 // Count is the number of replicas of this task group that should 6203 // be scheduled. 6204 Count int 6205 6206 // Update is used to control the update strategy for this task group 6207 Update *UpdateStrategy 6208 6209 // Migrate is used to control the migration strategy for this task group 6210 Migrate *MigrateStrategy 6211 6212 // Constraints can be specified at a task group level and apply to 6213 // all the tasks contained. 6214 Constraints []*Constraint 6215 6216 // Scaling is the list of autoscaling policies for the TaskGroup 6217 Scaling *ScalingPolicy 6218 6219 // RestartPolicy of a TaskGroup 6220 RestartPolicy *RestartPolicy 6221 6222 // Tasks are the collection of tasks that this task group needs to run 6223 Tasks []*Task 6224 6225 // EphemeralDisk is the disk resources that the task group requests 6226 EphemeralDisk *EphemeralDisk 6227 6228 // Meta is used to associate arbitrary metadata with this 6229 // task group. This is opaque to Nomad. 6230 Meta map[string]string 6231 6232 // ReschedulePolicy is used to configure how the scheduler should 6233 // retry failed allocations. 6234 ReschedulePolicy *ReschedulePolicy 6235 6236 // Affinities can be specified at the task group level to express 6237 // scheduling preferences. 6238 Affinities []*Affinity 6239 6240 // Spread can be specified at the task group level to express spreading 6241 // allocations across a desired attribute, such as datacenter 6242 Spreads []*Spread 6243 6244 // Networks are the network configuration for the task group. This can be 6245 // overridden in the task. 6246 Networks Networks 6247 6248 // Consul configuration specific to this task group 6249 Consul *Consul 6250 6251 // Services this group provides 6252 Services []*Service 6253 6254 // Volumes is a map of volumes that have been requested by the task group. 6255 Volumes map[string]*VolumeRequest 6256 6257 // ShutdownDelay is the amount of time to wait between deregistering 6258 // group services in consul and stopping tasks. 6259 ShutdownDelay *time.Duration 6260 6261 // StopAfterClientDisconnect, if set, configures the client to stop the task group 6262 // after this duration since the last known good heartbeat 6263 StopAfterClientDisconnect *time.Duration 6264 6265 // MaxClientDisconnect, if set, configures the client to allow placed 6266 // allocations for tasks in this group to attempt to resume running without a restart. 6267 MaxClientDisconnect *time.Duration 6268 } 6269 6270 func (tg *TaskGroup) Copy() *TaskGroup { 6271 if tg == nil { 6272 return nil 6273 } 6274 ntg := new(TaskGroup) 6275 *ntg = *tg 6276 ntg.Update = ntg.Update.Copy() 6277 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 6278 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 6279 ntg.ReschedulePolicy = ntg.ReschedulePolicy.Copy() 6280 ntg.Affinities = CopySliceAffinities(ntg.Affinities) 6281 ntg.Spreads = CopySliceSpreads(ntg.Spreads) 6282 ntg.Volumes = CopyMapVolumeRequest(ntg.Volumes) 6283 ntg.Scaling = ntg.Scaling.Copy() 6284 ntg.Consul = ntg.Consul.Copy() 6285 6286 // Copy the network objects 6287 if tg.Networks != nil { 6288 n := len(tg.Networks) 6289 ntg.Networks = make([]*NetworkResource, n) 6290 for i := 0; i < n; i++ { 6291 ntg.Networks[i] = tg.Networks[i].Copy() 6292 } 6293 } 6294 6295 if tg.Tasks != nil { 6296 tasks := make([]*Task, len(ntg.Tasks)) 6297 for i, t := range ntg.Tasks { 6298 tasks[i] = t.Copy() 6299 } 6300 ntg.Tasks = tasks 6301 } 6302 6303 ntg.Meta = maps.Clone(ntg.Meta) 6304 6305 if tg.EphemeralDisk != nil { 6306 ntg.EphemeralDisk = tg.EphemeralDisk.Copy() 6307 } 6308 6309 if tg.Services != nil { 6310 ntg.Services = make([]*Service, len(tg.Services)) 6311 for i, s := range tg.Services { 6312 ntg.Services[i] = s.Copy() 6313 } 6314 } 6315 6316 if tg.ShutdownDelay != nil { 6317 ntg.ShutdownDelay = tg.ShutdownDelay 6318 } 6319 6320 if tg.StopAfterClientDisconnect != nil { 6321 ntg.StopAfterClientDisconnect = tg.StopAfterClientDisconnect 6322 } 6323 6324 if tg.MaxClientDisconnect != nil { 6325 ntg.MaxClientDisconnect = tg.MaxClientDisconnect 6326 } 6327 6328 return ntg 6329 } 6330 6331 // Canonicalize is used to canonicalize fields in the TaskGroup. 6332 func (tg *TaskGroup) Canonicalize(job *Job) { 6333 // Ensure that an empty and nil map are treated the same to avoid scheduling 6334 // problems since we use reflect DeepEquals. 6335 if len(tg.Meta) == 0 { 6336 tg.Meta = nil 6337 } 6338 6339 // Set the default restart policy. 6340 if tg.RestartPolicy == nil { 6341 tg.RestartPolicy = NewRestartPolicy(job.Type) 6342 } 6343 6344 if tg.ReschedulePolicy == nil { 6345 tg.ReschedulePolicy = NewReschedulePolicy(job.Type) 6346 } 6347 6348 // Canonicalize Migrate for service jobs 6349 if job.Type == JobTypeService && tg.Migrate == nil { 6350 tg.Migrate = DefaultMigrateStrategy() 6351 } 6352 6353 // Set a default ephemeral disk object if the user has not requested for one 6354 if tg.EphemeralDisk == nil { 6355 tg.EphemeralDisk = DefaultEphemeralDisk() 6356 } 6357 6358 if tg.Scaling != nil { 6359 tg.Scaling.Canonicalize() 6360 } 6361 6362 for _, service := range tg.Services { 6363 service.Canonicalize(job.Name, tg.Name, "group", job.Namespace) 6364 } 6365 6366 for _, network := range tg.Networks { 6367 network.Canonicalize() 6368 } 6369 6370 for _, task := range tg.Tasks { 6371 task.Canonicalize(job, tg) 6372 } 6373 } 6374 6375 // NomadServices returns a list of all group and task - level services in tg that 6376 // are making use of the nomad service provider. 6377 func (tg *TaskGroup) NomadServices() []*Service { 6378 var services []*Service 6379 for _, service := range tg.Services { 6380 if service.Provider == ServiceProviderNomad { 6381 services = append(services, service) 6382 } 6383 } 6384 for _, task := range tg.Tasks { 6385 for _, service := range task.Services { 6386 if service.Provider == ServiceProviderNomad { 6387 services = append(services, service) 6388 } 6389 } 6390 } 6391 return services 6392 } 6393 6394 // Validate is used to check a task group for reasonable configuration 6395 func (tg *TaskGroup) Validate(j *Job) error { 6396 var mErr multierror.Error 6397 if tg.Name == "" { 6398 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 6399 } else if strings.Contains(tg.Name, "\000") { 6400 mErr.Errors = append(mErr.Errors, errors.New("Task group name contains null character")) 6401 } 6402 if tg.Count < 0 { 6403 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 6404 } 6405 if len(tg.Tasks) == 0 { 6406 // could be a lone consul gateway inserted by the connect mutator 6407 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 6408 } 6409 6410 if tg.MaxClientDisconnect != nil && tg.StopAfterClientDisconnect != nil { 6411 mErr.Errors = append(mErr.Errors, errors.New("Task group cannot be configured with both max_client_disconnect and stop_after_client_disconnect")) 6412 } 6413 6414 if tg.MaxClientDisconnect != nil && *tg.MaxClientDisconnect < 0 { 6415 mErr.Errors = append(mErr.Errors, errors.New("max_client_disconnect cannot be negative")) 6416 } 6417 6418 for idx, constr := range tg.Constraints { 6419 if err := constr.Validate(); err != nil { 6420 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 6421 mErr.Errors = append(mErr.Errors, outer) 6422 } 6423 } 6424 if j.Type == JobTypeSystem { 6425 if tg.Affinities != nil { 6426 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 6427 } 6428 } else { 6429 for idx, affinity := range tg.Affinities { 6430 if err := affinity.Validate(); err != nil { 6431 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 6432 mErr.Errors = append(mErr.Errors, outer) 6433 } 6434 } 6435 } 6436 6437 if tg.RestartPolicy != nil { 6438 if err := tg.RestartPolicy.Validate(); err != nil { 6439 mErr.Errors = append(mErr.Errors, err) 6440 } 6441 } else { 6442 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 6443 } 6444 6445 if j.Type == JobTypeSystem { 6446 if tg.Spreads != nil { 6447 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have a spread stanza")) 6448 } 6449 } else { 6450 for idx, spread := range tg.Spreads { 6451 if err := spread.Validate(); err != nil { 6452 outer := fmt.Errorf("Spread %d validation failed: %s", idx+1, err) 6453 mErr.Errors = append(mErr.Errors, outer) 6454 } 6455 } 6456 } 6457 6458 if j.Type == JobTypeSystem { 6459 if tg.ReschedulePolicy != nil { 6460 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs should not have a reschedule policy")) 6461 } 6462 } else { 6463 if tg.ReschedulePolicy != nil { 6464 if err := tg.ReschedulePolicy.Validate(); err != nil { 6465 mErr.Errors = append(mErr.Errors, err) 6466 } 6467 } else { 6468 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a reschedule policy", tg.Name)) 6469 } 6470 } 6471 6472 if tg.EphemeralDisk != nil { 6473 if err := tg.EphemeralDisk.Validate(); err != nil { 6474 mErr.Errors = append(mErr.Errors, err) 6475 } 6476 } else { 6477 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have an ephemeral disk object", tg.Name)) 6478 } 6479 6480 // Validate the update strategy 6481 if u := tg.Update; u != nil { 6482 switch j.Type { 6483 case JobTypeService, JobTypeSystem: 6484 default: 6485 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow update block", j.Type)) 6486 } 6487 if err := u.Validate(); err != nil { 6488 mErr.Errors = append(mErr.Errors, err) 6489 } 6490 } 6491 6492 // Validate the migration strategy 6493 switch j.Type { 6494 case JobTypeService: 6495 if tg.Migrate != nil { 6496 if err := tg.Migrate.Validate(); err != nil { 6497 mErr.Errors = append(mErr.Errors, err) 6498 } 6499 } 6500 default: 6501 if tg.Migrate != nil { 6502 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job type %q does not allow migrate block", j.Type)) 6503 } 6504 } 6505 6506 // Check that there is only one leader task if any 6507 tasks := make(map[string]int) 6508 leaderTasks := 0 6509 for idx, task := range tg.Tasks { 6510 if task.Name == "" { 6511 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 6512 } else if existing, ok := tasks[task.Name]; ok { 6513 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 6514 } else { 6515 tasks[task.Name] = idx 6516 } 6517 6518 if task.Leader { 6519 leaderTasks++ 6520 } 6521 } 6522 6523 if leaderTasks > 1 { 6524 mErr.Errors = append(mErr.Errors, fmt.Errorf("Only one task may be marked as leader")) 6525 } 6526 6527 // Validate the volume requests 6528 var canaries int 6529 if tg.Update != nil { 6530 canaries = tg.Update.Canary 6531 } 6532 for name, volReq := range tg.Volumes { 6533 if err := volReq.Validate(tg.Count, canaries); err != nil { 6534 mErr.Errors = append(mErr.Errors, fmt.Errorf( 6535 "Task group volume validation for %s failed: %v", name, err)) 6536 } 6537 } 6538 6539 // Validate task group and task network resources 6540 if err := tg.validateNetworks(); err != nil { 6541 outer := fmt.Errorf("Task group network validation failed: %v", err) 6542 mErr.Errors = append(mErr.Errors, outer) 6543 } 6544 6545 // Validate task group and task services 6546 if err := tg.validateServices(); err != nil { 6547 outer := fmt.Errorf("Task group service validation failed: %v", err) 6548 mErr.Errors = append(mErr.Errors, outer) 6549 } 6550 6551 // Validate group service script-checks 6552 if err := tg.validateScriptChecksInGroupServices(); err != nil { 6553 outer := fmt.Errorf("Task group service check validation failed: %v", err) 6554 mErr.Errors = append(mErr.Errors, outer) 6555 } 6556 6557 // Validate the scaling policy 6558 if err := tg.validateScalingPolicy(j); err != nil { 6559 outer := fmt.Errorf("Task group scaling policy validation failed: %v", err) 6560 mErr.Errors = append(mErr.Errors, outer) 6561 } 6562 6563 // Validate the tasks 6564 for _, task := range tg.Tasks { 6565 // Validate the task does not reference undefined volume mounts 6566 for i, mnt := range task.VolumeMounts { 6567 if mnt.Volume == "" { 6568 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing an empty volume", task.Name, i)) 6569 continue 6570 } 6571 6572 if _, ok := tg.Volumes[mnt.Volume]; !ok { 6573 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %s has a volume mount (%d) referencing undefined volume %s", task.Name, i, mnt.Volume)) 6574 continue 6575 } 6576 } 6577 6578 if err := task.Validate(tg.EphemeralDisk, j.Type, tg.Services, tg.Networks); err != nil { 6579 outer := fmt.Errorf("Task %s validation failed: %v", task.Name, err) 6580 mErr.Errors = append(mErr.Errors, outer) 6581 } 6582 } 6583 6584 return mErr.ErrorOrNil() 6585 } 6586 6587 func (tg *TaskGroup) validateNetworks() error { 6588 var mErr multierror.Error 6589 portLabels := make(map[string]string) 6590 // host_network -> static port tracking 6591 staticPortsIndex := make(map[string]map[int]string) 6592 6593 for _, net := range tg.Networks { 6594 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6595 if other, ok := portLabels[port.Label]; ok { 6596 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6597 } else { 6598 portLabels[port.Label] = "taskgroup network" 6599 } 6600 6601 if port.Value != 0 { 6602 hostNetwork := port.HostNetwork 6603 if hostNetwork == "" { 6604 hostNetwork = "default" 6605 } 6606 staticPorts, ok := staticPortsIndex[hostNetwork] 6607 if !ok { 6608 staticPorts = make(map[int]string) 6609 } 6610 // static port 6611 if other, ok := staticPorts[port.Value]; ok { 6612 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6613 mErr.Errors = append(mErr.Errors, err) 6614 } else if port.Value > math.MaxUint16 { 6615 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6616 mErr.Errors = append(mErr.Errors, err) 6617 } else { 6618 staticPorts[port.Value] = fmt.Sprintf("taskgroup network:%s", port.Label) 6619 staticPortsIndex[hostNetwork] = staticPorts 6620 } 6621 } 6622 6623 if port.To < -1 { 6624 err := fmt.Errorf("Port %q cannot be mapped to negative value %d", port.Label, port.To) 6625 mErr.Errors = append(mErr.Errors, err) 6626 } else if port.To > math.MaxUint16 { 6627 err := fmt.Errorf("Port %q cannot be mapped to a port (%d) greater than %d", port.Label, port.To, math.MaxUint16) 6628 mErr.Errors = append(mErr.Errors, err) 6629 } 6630 } 6631 6632 // Validate the hostname field to be a valid DNS name. If the parameter 6633 // looks like it includes an interpolation value, we skip this. It 6634 // would be nice to validate additional parameters, but this isn't the 6635 // right place. 6636 if net.Hostname != "" && !strings.Contains(net.Hostname, "${") { 6637 if _, ok := dns.IsDomainName(net.Hostname); !ok { 6638 mErr.Errors = append(mErr.Errors, errors.New("Hostname is not a valid DNS name")) 6639 } 6640 } 6641 } 6642 6643 // Check for duplicate tasks or port labels, and no duplicated static ports 6644 for _, task := range tg.Tasks { 6645 if task.Resources == nil { 6646 continue 6647 } 6648 6649 for _, net := range task.Resources.Networks { 6650 for _, port := range append(net.ReservedPorts, net.DynamicPorts...) { 6651 if other, ok := portLabels[port.Label]; ok { 6652 mErr.Errors = append(mErr.Errors, fmt.Errorf("Port label %s already in use by %s", port.Label, other)) 6653 } 6654 6655 if port.Value != 0 { 6656 hostNetwork := port.HostNetwork 6657 if hostNetwork == "" { 6658 hostNetwork = "default" 6659 } 6660 staticPorts, ok := staticPortsIndex[hostNetwork] 6661 if !ok { 6662 staticPorts = make(map[int]string) 6663 } 6664 if other, ok := staticPorts[port.Value]; ok { 6665 err := fmt.Errorf("Static port %d already reserved by %s", port.Value, other) 6666 mErr.Errors = append(mErr.Errors, err) 6667 } else if port.Value > math.MaxUint16 { 6668 err := fmt.Errorf("Port %s (%d) cannot be greater than %d", port.Label, port.Value, math.MaxUint16) 6669 mErr.Errors = append(mErr.Errors, err) 6670 } else { 6671 staticPorts[port.Value] = fmt.Sprintf("%s:%s", task.Name, port.Label) 6672 staticPortsIndex[hostNetwork] = staticPorts 6673 } 6674 } 6675 } 6676 } 6677 } 6678 return mErr.ErrorOrNil() 6679 } 6680 6681 // validateServices runs Service.Validate() on group-level services, checks 6682 // group service checks that refer to tasks only refer to tasks that exist. 6683 func (tg *TaskGroup) validateServices() error { 6684 var mErr multierror.Error 6685 6686 // Accumulate task names in this group 6687 taskSet := set.New[string](len(tg.Tasks)) 6688 6689 // each service in a group must be unique (i.e. used in MakeAllocServiceID) 6690 type unique struct { 6691 name string 6692 task string 6693 port string 6694 } 6695 6696 // Accumulate service IDs in this group 6697 idSet := set.New[unique](0) 6698 6699 // Accumulate IDs that are duplicates 6700 idDuplicateSet := set.New[unique](0) 6701 6702 // Accumulate the providers used for this task group. Currently, Nomad only 6703 // allows the use of a single service provider within a task group. 6704 providerSet := set.New[string](1) 6705 6706 // Create a map of known tasks and their services so we can compare 6707 // vs the group-level services and checks 6708 for _, task := range tg.Tasks { 6709 taskSet.Insert(task.Name) 6710 6711 if len(task.Services) == 0 { 6712 continue 6713 } 6714 6715 for _, service := range task.Services { 6716 6717 // Ensure no task-level service can only specify the task it belongs to. 6718 if service.TaskName != "" && service.TaskName != task.Name { 6719 mErr.Errors = append(mErr.Errors, 6720 fmt.Errorf("Service %s is invalid: may only specify task the service belongs to, got %q", service.Name, service.TaskName), 6721 ) 6722 } 6723 6724 // Ensure no task-level checks can only specify the task they belong to. 6725 for _, check := range service.Checks { 6726 if check.TaskName != "" && check.TaskName != task.Name { 6727 mErr.Errors = append(mErr.Errors, 6728 fmt.Errorf("Check %s is invalid: may only specify task the check belongs to, got %q", check.Name, check.TaskName), 6729 ) 6730 } 6731 } 6732 6733 // Track that we have seen this service id 6734 id := unique{service.Name, task.Name, service.PortLabel} 6735 if !idSet.Insert(id) { 6736 // accumulate duplicates for a single error later on 6737 idDuplicateSet.Insert(id) 6738 } 6739 6740 // Track that we have seen this service provider 6741 providerSet.Insert(service.Provider) 6742 } 6743 } 6744 6745 for i, service := range tg.Services { 6746 6747 // Track that we have seen this service id 6748 id := unique{service.Name, "group", service.PortLabel} 6749 if !idSet.Insert(id) { 6750 // accumulate duplicates for a single error later on 6751 idDuplicateSet.Insert(id) 6752 } 6753 6754 // Track that we have seen this service provider 6755 providerSet.Insert(service.Provider) 6756 6757 if err := service.Validate(); err != nil { 6758 outer := fmt.Errorf("Service[%d] %s validation failed: %s", i, service.Name, err) 6759 mErr.Errors = append(mErr.Errors, outer) 6760 // we break here to avoid the risk of crashing on null-pointer 6761 // access in a later step, accepting that we might miss out on 6762 // error messages to provide the user. 6763 continue 6764 } 6765 if service.AddressMode == AddressModeDriver { 6766 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"driver\", only services defined in a \"task\" block can use this mode", service.Name)) 6767 } 6768 6769 for _, check := range service.Checks { 6770 if check.TaskName != "" { 6771 if check.AddressMode == AddressModeDriver { 6772 mErr.Errors = append(mErr.Errors, fmt.Errorf("Check %q invalid: cannot use address_mode=\"driver\", only checks defined in a \"task\" service block can use this mode", service.Name)) 6773 } 6774 if !taskSet.Contains(check.TaskName) { 6775 mErr.Errors = append(mErr.Errors, 6776 fmt.Errorf("Check %s invalid: refers to non-existent task %s", check.Name, check.TaskName)) 6777 } 6778 } 6779 } 6780 } 6781 6782 // Produce an error of any services which are not unique enough in the group 6783 // i.e. have same <task, name, port> 6784 if idDuplicateSet.Size() > 0 { 6785 mErr.Errors = append(mErr.Errors, 6786 fmt.Errorf( 6787 "Services are not unique: %s", 6788 idDuplicateSet.String( 6789 func(u unique) string { 6790 s := u.task + "->" + u.name 6791 if u.port != "" { 6792 s += ":" + u.port 6793 } 6794 return s 6795 }, 6796 ), 6797 ), 6798 ) 6799 } 6800 6801 // The initial feature release of native service discovery only allows for 6802 // a single service provider to be used across all services in a task 6803 // group. 6804 if providerSet.Size() > 1 { 6805 mErr.Errors = append(mErr.Errors, 6806 errors.New("Multiple service providers used: task group services must use the same provider")) 6807 } 6808 6809 return mErr.ErrorOrNil() 6810 } 6811 6812 // validateScriptChecksInGroupServices ensures group-level services with script 6813 // checks know what task driver to use. Either the service.task or service.check.task 6814 // parameter must be configured. 6815 func (tg *TaskGroup) validateScriptChecksInGroupServices() error { 6816 var mErr multierror.Error 6817 for _, service := range tg.Services { 6818 if service.TaskName == "" { 6819 for _, check := range service.Checks { 6820 if check.Type == "script" && check.TaskName == "" { 6821 mErr.Errors = append(mErr.Errors, 6822 fmt.Errorf("Service [%s]->%s or Check %s must specify task parameter", 6823 tg.Name, service.Name, check.Name, 6824 )) 6825 } 6826 } 6827 } 6828 } 6829 return mErr.ErrorOrNil() 6830 } 6831 6832 // validateScalingPolicy ensures that the scaling policy has consistent 6833 // min and max, not in conflict with the task group count 6834 func (tg *TaskGroup) validateScalingPolicy(j *Job) error { 6835 if tg.Scaling == nil { 6836 return nil 6837 } 6838 6839 var mErr multierror.Error 6840 6841 err := tg.Scaling.Validate() 6842 if err != nil { 6843 // prefix scaling policy errors 6844 if me, ok := err.(*multierror.Error); ok { 6845 for _, e := range me.Errors { 6846 mErr.Errors = append(mErr.Errors, fmt.Errorf("Scaling policy invalid: %s", e)) 6847 } 6848 } 6849 } 6850 6851 if tg.Scaling.Max < int64(tg.Count) { 6852 mErr.Errors = append(mErr.Errors, 6853 fmt.Errorf("Scaling policy invalid: task group count must not be greater than maximum count in scaling policy")) 6854 } 6855 6856 if int64(tg.Count) < tg.Scaling.Min && !(j.IsMultiregion() && tg.Count == 0 && j.Region == "global") { 6857 mErr.Errors = append(mErr.Errors, 6858 fmt.Errorf("Scaling policy invalid: task group count must not be less than minimum count in scaling policy")) 6859 } 6860 6861 return mErr.ErrorOrNil() 6862 } 6863 6864 // Warnings returns a list of warnings that may be from dubious settings or 6865 // deprecation warnings. 6866 func (tg *TaskGroup) Warnings(j *Job) error { 6867 var mErr multierror.Error 6868 6869 // Validate the update strategy 6870 if u := tg.Update; u != nil { 6871 // Check the counts are appropriate 6872 if tg.Count > 1 && u.MaxParallel > tg.Count && !(j.IsMultiregion() && tg.Count == 0) { 6873 mErr.Errors = append(mErr.Errors, 6874 fmt.Errorf("Update max parallel count is greater than task group count (%d > %d). "+ 6875 "A destructive change would result in the simultaneous replacement of all allocations.", u.MaxParallel, tg.Count)) 6876 } 6877 } 6878 6879 // Check for mbits network field 6880 if len(tg.Networks) > 0 && tg.Networks[0].MBits > 0 { 6881 mErr.Errors = append(mErr.Errors, fmt.Errorf("mbits has been deprecated as of Nomad 0.12.0. Please remove mbits from the network block")) 6882 } 6883 6884 for _, t := range tg.Tasks { 6885 if err := t.Warnings(); err != nil { 6886 err = multierror.Prefix(err, fmt.Sprintf("Task %q:", t.Name)) 6887 mErr.Errors = append(mErr.Errors, err) 6888 } 6889 } 6890 6891 return mErr.ErrorOrNil() 6892 } 6893 6894 // LookupTask finds a task by name 6895 func (tg *TaskGroup) LookupTask(name string) *Task { 6896 for _, t := range tg.Tasks { 6897 if t.Name == name { 6898 return t 6899 } 6900 } 6901 return nil 6902 } 6903 6904 // UsesConnect for convenience returns true if the TaskGroup contains at least 6905 // one service that makes use of Consul Connect features. 6906 // 6907 // Currently used for validating that the task group contains one or more connect 6908 // aware services before generating a service identity token. 6909 func (tg *TaskGroup) UsesConnect() bool { 6910 for _, service := range tg.Services { 6911 if service.Connect != nil { 6912 if service.Connect.IsNative() || service.Connect.HasSidecar() || service.Connect.IsGateway() { 6913 return true 6914 } 6915 } 6916 } 6917 return false 6918 } 6919 6920 // UsesConnectGateway for convenience returns true if the TaskGroup contains at 6921 // least one service that makes use of Consul Connect Gateway features. 6922 func (tg *TaskGroup) UsesConnectGateway() bool { 6923 for _, service := range tg.Services { 6924 if service.Connect != nil { 6925 if service.Connect.IsGateway() { 6926 return true 6927 } 6928 } 6929 } 6930 return false 6931 } 6932 6933 func (tg *TaskGroup) GoString() string { 6934 return fmt.Sprintf("*%#v", *tg) 6935 } 6936 6937 // CheckRestart describes if and when a task should be restarted based on 6938 // failing health checks. 6939 type CheckRestart struct { 6940 Limit int // Restart task after this many unhealthy intervals 6941 Grace time.Duration // Grace time to give tasks after starting to get healthy 6942 IgnoreWarnings bool // If true treat checks in `warning` as passing 6943 } 6944 6945 func (c *CheckRestart) Copy() *CheckRestart { 6946 if c == nil { 6947 return nil 6948 } 6949 6950 nc := new(CheckRestart) 6951 *nc = *c 6952 return nc 6953 } 6954 6955 func (c *CheckRestart) Equal(o *CheckRestart) bool { 6956 if c == nil || o == nil { 6957 return c == o 6958 } 6959 6960 if c.Limit != o.Limit { 6961 return false 6962 } 6963 6964 if c.Grace != o.Grace { 6965 return false 6966 } 6967 6968 if c.IgnoreWarnings != o.IgnoreWarnings { 6969 return false 6970 } 6971 6972 return true 6973 } 6974 6975 func (c *CheckRestart) Validate() error { 6976 if c == nil { 6977 return nil 6978 } 6979 6980 var mErr multierror.Error 6981 if c.Limit < 0 { 6982 mErr.Errors = append(mErr.Errors, fmt.Errorf("limit must be greater than or equal to 0 but found %d", c.Limit)) 6983 } 6984 6985 if c.Grace < 0 { 6986 mErr.Errors = append(mErr.Errors, fmt.Errorf("grace period must be greater than or equal to 0 but found %d", c.Grace)) 6987 } 6988 6989 return mErr.ErrorOrNil() 6990 } 6991 6992 const ( 6993 // DefaultKillTimeout is the default timeout between signaling a task it 6994 // will be killed and killing it. 6995 DefaultKillTimeout = 5 * time.Second 6996 ) 6997 6998 // LogConfig provides configuration for log rotation 6999 type LogConfig struct { 7000 MaxFiles int 7001 MaxFileSizeMB int 7002 } 7003 7004 func (l *LogConfig) Equal(o *LogConfig) bool { 7005 if l == nil || o == nil { 7006 return l == o 7007 } 7008 7009 if l.MaxFiles != o.MaxFiles { 7010 return false 7011 } 7012 7013 if l.MaxFileSizeMB != o.MaxFileSizeMB { 7014 return false 7015 } 7016 7017 return true 7018 } 7019 7020 func (l *LogConfig) Copy() *LogConfig { 7021 if l == nil { 7022 return nil 7023 } 7024 return &LogConfig{ 7025 MaxFiles: l.MaxFiles, 7026 MaxFileSizeMB: l.MaxFileSizeMB, 7027 } 7028 } 7029 7030 // DefaultLogConfig returns the default LogConfig values. 7031 func DefaultLogConfig() *LogConfig { 7032 return &LogConfig{ 7033 MaxFiles: 10, 7034 MaxFileSizeMB: 10, 7035 } 7036 } 7037 7038 // Validate returns an error if the log config specified are less than 7039 // the minimum allowed. 7040 func (l *LogConfig) Validate() error { 7041 var mErr multierror.Error 7042 if l.MaxFiles < 1 { 7043 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 7044 } 7045 if l.MaxFileSizeMB < 1 { 7046 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 7047 } 7048 return mErr.ErrorOrNil() 7049 } 7050 7051 // Task is a single process typically that is executed as part of a task group. 7052 type Task struct { 7053 // Name of the task 7054 Name string 7055 7056 // Driver is used to control which driver is used 7057 Driver string 7058 7059 // User is used to determine which user will run the task. It defaults to 7060 // the same user the Nomad client is being run as. 7061 User string 7062 7063 // Config is provided to the driver to initialize 7064 Config map[string]interface{} 7065 7066 // Map of environment variables to be used by the driver 7067 Env map[string]string 7068 7069 // List of service definitions exposed by the Task 7070 Services []*Service 7071 7072 // Vault is used to define the set of Vault policies that this task should 7073 // have access to. 7074 Vault *Vault 7075 7076 // Templates are the set of templates to be rendered for the task. 7077 Templates []*Template 7078 7079 // Constraints can be specified at a task level and apply only to 7080 // the particular task. 7081 Constraints []*Constraint 7082 7083 // Affinities can be specified at the task level to express 7084 // scheduling preferences 7085 Affinities []*Affinity 7086 7087 // Resources is the resources needed by this task 7088 Resources *Resources 7089 7090 // RestartPolicy of a TaskGroup 7091 RestartPolicy *RestartPolicy 7092 7093 // DispatchPayload configures how the task retrieves its input from a dispatch 7094 DispatchPayload *DispatchPayloadConfig 7095 7096 Lifecycle *TaskLifecycleConfig 7097 7098 // Meta is used to associate arbitrary metadata with this 7099 // task. This is opaque to Nomad. 7100 Meta map[string]string 7101 7102 // KillTimeout is the time between signaling a task that it will be 7103 // killed and killing it. 7104 KillTimeout time.Duration 7105 7106 // LogConfig provides configuration for log rotation 7107 LogConfig *LogConfig 7108 7109 // Artifacts is a list of artifacts to download and extract before running 7110 // the task. 7111 Artifacts []*TaskArtifact 7112 7113 // Leader marks the task as the leader within the group. When the leader 7114 // task exits, other tasks will be gracefully terminated. 7115 Leader bool 7116 7117 // ShutdownDelay is the duration of the delay between de-registering a 7118 // task from Consul and sending it a signal to shutdown. See #2441 7119 ShutdownDelay time.Duration 7120 7121 // VolumeMounts is a list of Volume name <-> mount configurations that will be 7122 // attached to this task. 7123 VolumeMounts []*VolumeMount 7124 7125 // ScalingPolicies is a list of scaling policies scoped to this task 7126 ScalingPolicies []*ScalingPolicy 7127 7128 // KillSignal is the kill signal to use for the task. This is an optional 7129 // specification and defaults to SIGINT 7130 KillSignal string 7131 7132 // Used internally to manage tasks according to their TaskKind. Initial use case 7133 // is for Consul Connect 7134 Kind TaskKind 7135 7136 // CSIPluginConfig is used to configure the plugin supervisor for the task. 7137 CSIPluginConfig *TaskCSIPluginConfig 7138 } 7139 7140 // UsesConnect is for conveniently detecting if the Task is able to make use 7141 // of Consul Connect features. This will be indicated in the TaskKind of the 7142 // Task, which exports known types of Tasks. UsesConnect will be true if the 7143 // task is a connect proxy, connect native, or is a connect gateway. 7144 func (t *Task) UsesConnect() bool { 7145 return t.Kind.IsConnectNative() || t.UsesConnectSidecar() 7146 } 7147 7148 func (t *Task) UsesConnectSidecar() bool { 7149 return t.Kind.IsConnectProxy() || t.Kind.IsAnyConnectGateway() 7150 } 7151 7152 func (t *Task) IsPrestart() bool { 7153 return t != nil && t.Lifecycle != nil && 7154 t.Lifecycle.Hook == TaskLifecycleHookPrestart 7155 } 7156 7157 func (t *Task) IsMain() bool { 7158 return t != nil && (t.Lifecycle == nil || t.Lifecycle.Hook == "") 7159 } 7160 7161 func (t *Task) IsPoststart() bool { 7162 return t != nil && t.Lifecycle != nil && 7163 t.Lifecycle.Hook == TaskLifecycleHookPoststart 7164 } 7165 7166 func (t *Task) IsPoststop() bool { 7167 return t != nil && t.Lifecycle != nil && 7168 t.Lifecycle.Hook == TaskLifecycleHookPoststop 7169 } 7170 7171 func (t *Task) Copy() *Task { 7172 if t == nil { 7173 return nil 7174 } 7175 nt := new(Task) 7176 *nt = *t 7177 nt.Env = maps.Clone(nt.Env) 7178 7179 if t.Services != nil { 7180 services := make([]*Service, len(nt.Services)) 7181 for i, s := range nt.Services { 7182 services[i] = s.Copy() 7183 } 7184 nt.Services = services 7185 } 7186 7187 nt.Constraints = CopySliceConstraints(nt.Constraints) 7188 nt.Affinities = CopySliceAffinities(nt.Affinities) 7189 nt.VolumeMounts = CopySliceVolumeMount(nt.VolumeMounts) 7190 nt.CSIPluginConfig = nt.CSIPluginConfig.Copy() 7191 7192 nt.Vault = nt.Vault.Copy() 7193 nt.Resources = nt.Resources.Copy() 7194 nt.LogConfig = nt.LogConfig.Copy() 7195 nt.Meta = maps.Clone(nt.Meta) 7196 nt.DispatchPayload = nt.DispatchPayload.Copy() 7197 nt.Lifecycle = nt.Lifecycle.Copy() 7198 7199 if t.Artifacts != nil { 7200 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 7201 for _, a := range nt.Artifacts { 7202 artifacts = append(artifacts, a.Copy()) 7203 } 7204 nt.Artifacts = artifacts 7205 } 7206 7207 if i, err := copystructure.Copy(nt.Config); err != nil { 7208 panic(err.Error()) 7209 } else { 7210 nt.Config = i.(map[string]interface{}) 7211 } 7212 7213 if t.Templates != nil { 7214 templates := make([]*Template, len(t.Templates)) 7215 for i, tmpl := range nt.Templates { 7216 templates[i] = tmpl.Copy() 7217 } 7218 nt.Templates = templates 7219 } 7220 7221 return nt 7222 } 7223 7224 // Canonicalize canonicalizes fields in the task. 7225 func (t *Task) Canonicalize(job *Job, tg *TaskGroup) { 7226 // Ensure that an empty and nil map are treated the same to avoid scheduling 7227 // problems since we use reflect DeepEquals. 7228 if len(t.Meta) == 0 { 7229 t.Meta = nil 7230 } 7231 if len(t.Config) == 0 { 7232 t.Config = nil 7233 } 7234 if len(t.Env) == 0 { 7235 t.Env = nil 7236 } 7237 7238 for _, service := range t.Services { 7239 service.Canonicalize(job.Name, tg.Name, t.Name, job.Namespace) 7240 } 7241 7242 // If Resources are nil initialize them to defaults, otherwise canonicalize 7243 if t.Resources == nil { 7244 t.Resources = DefaultResources() 7245 } else { 7246 t.Resources.Canonicalize() 7247 } 7248 7249 if t.RestartPolicy == nil { 7250 t.RestartPolicy = tg.RestartPolicy 7251 } 7252 7253 // Set the default timeout if it is not specified. 7254 if t.KillTimeout == 0 { 7255 t.KillTimeout = DefaultKillTimeout 7256 } 7257 7258 if t.Vault != nil { 7259 t.Vault.Canonicalize() 7260 } 7261 7262 for _, template := range t.Templates { 7263 template.Canonicalize() 7264 } 7265 } 7266 7267 func (t *Task) GoString() string { 7268 return fmt.Sprintf("*%#v", *t) 7269 } 7270 7271 // Validate is used to check a task for reasonable configuration 7272 func (t *Task) Validate(ephemeralDisk *EphemeralDisk, jobType string, tgServices []*Service, tgNetworks Networks) error { 7273 var mErr multierror.Error 7274 if t.Name == "" { 7275 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 7276 } 7277 if strings.ContainsAny(t.Name, `/\`) { 7278 // We enforce this so that when creating the directory on disk it will 7279 // not have any slashes. 7280 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include slashes")) 7281 } else if strings.Contains(t.Name, "\000") { 7282 mErr.Errors = append(mErr.Errors, errors.New("Task name cannot include null characters")) 7283 } 7284 if t.Driver == "" { 7285 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 7286 } 7287 if t.KillTimeout < 0 { 7288 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 7289 } 7290 if t.ShutdownDelay < 0 { 7291 mErr.Errors = append(mErr.Errors, errors.New("ShutdownDelay must be a positive value")) 7292 } 7293 7294 // Validate the resources. 7295 if t.Resources == nil { 7296 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 7297 } else if err := t.Resources.Validate(); err != nil { 7298 mErr.Errors = append(mErr.Errors, err) 7299 } 7300 7301 // Validate the log config 7302 if t.LogConfig == nil { 7303 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 7304 } else if err := t.LogConfig.Validate(); err != nil { 7305 mErr.Errors = append(mErr.Errors, err) 7306 } 7307 7308 for idx, constr := range t.Constraints { 7309 if err := constr.Validate(); err != nil { 7310 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 7311 mErr.Errors = append(mErr.Errors, outer) 7312 } 7313 7314 switch constr.Operand { 7315 case ConstraintDistinctHosts, ConstraintDistinctProperty: 7316 outer := fmt.Errorf("Constraint %d has disallowed Operand at task level: %s", idx+1, constr.Operand) 7317 mErr.Errors = append(mErr.Errors, outer) 7318 } 7319 } 7320 7321 if jobType == JobTypeSystem { 7322 if t.Affinities != nil { 7323 mErr.Errors = append(mErr.Errors, fmt.Errorf("System jobs may not have an affinity stanza")) 7324 } 7325 } else { 7326 for idx, affinity := range t.Affinities { 7327 if err := affinity.Validate(); err != nil { 7328 outer := fmt.Errorf("Affinity %d validation failed: %s", idx+1, err) 7329 mErr.Errors = append(mErr.Errors, outer) 7330 } 7331 } 7332 } 7333 7334 // Validate Services 7335 if err := validateServices(t, tgNetworks); err != nil { 7336 mErr.Errors = append(mErr.Errors, err) 7337 } 7338 7339 if t.LogConfig != nil && ephemeralDisk != nil { 7340 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 7341 if ephemeralDisk.SizeMB <= logUsage { 7342 mErr.Errors = append(mErr.Errors, 7343 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 7344 logUsage, ephemeralDisk.SizeMB)) 7345 } 7346 } 7347 7348 for idx, artifact := range t.Artifacts { 7349 if err := artifact.Validate(); err != nil { 7350 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 7351 mErr.Errors = append(mErr.Errors, outer) 7352 } 7353 } 7354 7355 if t.Vault != nil { 7356 if err := t.Vault.Validate(); err != nil { 7357 mErr.Errors = append(mErr.Errors, fmt.Errorf("Vault validation failed: %v", err)) 7358 } 7359 } 7360 7361 destinations := make(map[string]int, len(t.Templates)) 7362 for idx, tmpl := range t.Templates { 7363 if err := tmpl.Validate(); err != nil { 7364 outer := fmt.Errorf("Template %d validation failed: %s", idx+1, err) 7365 mErr.Errors = append(mErr.Errors, outer) 7366 } 7367 7368 if other, ok := destinations[tmpl.DestPath]; ok { 7369 outer := fmt.Errorf("Template %d has same destination as %d", idx+1, other) 7370 mErr.Errors = append(mErr.Errors, outer) 7371 } else { 7372 destinations[tmpl.DestPath] = idx + 1 7373 } 7374 } 7375 7376 // Validate the dispatch payload block if there 7377 if t.DispatchPayload != nil { 7378 if err := t.DispatchPayload.Validate(); err != nil { 7379 mErr.Errors = append(mErr.Errors, fmt.Errorf("Dispatch Payload validation failed: %v", err)) 7380 } 7381 } 7382 7383 // Validate the Lifecycle block if there 7384 if t.Lifecycle != nil { 7385 if err := t.Lifecycle.Validate(); err != nil { 7386 mErr.Errors = append(mErr.Errors, fmt.Errorf("Lifecycle validation failed: %v", err)) 7387 } 7388 7389 } 7390 7391 // Validation for TaskKind field which is used for Consul Connect integration 7392 if t.Kind.IsConnectProxy() { 7393 // This task is a Connect proxy so it should not have service stanzas 7394 if len(t.Services) > 0 { 7395 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have a service stanza")) 7396 } 7397 if t.Leader { 7398 mErr.Errors = append(mErr.Errors, fmt.Errorf("Connect proxy task must not have leader set")) 7399 } 7400 7401 // Ensure the proxy task has a corresponding service entry 7402 serviceErr := ValidateConnectProxyService(t.Kind.Value(), tgServices) 7403 if serviceErr != nil { 7404 mErr.Errors = append(mErr.Errors, serviceErr) 7405 } 7406 } 7407 7408 // Validation for volumes 7409 for idx, vm := range t.VolumeMounts { 7410 if !MountPropagationModeIsValid(vm.PropagationMode) { 7411 mErr.Errors = append(mErr.Errors, fmt.Errorf("Volume Mount (%d) has an invalid propagation mode: \"%s\"", idx, vm.PropagationMode)) 7412 } 7413 } 7414 7415 // Validate CSI Plugin Config 7416 if t.CSIPluginConfig != nil { 7417 if t.CSIPluginConfig.ID == "" { 7418 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig must have a non-empty PluginID")) 7419 } 7420 7421 if !CSIPluginTypeIsValid(t.CSIPluginConfig.Type) { 7422 mErr.Errors = append(mErr.Errors, fmt.Errorf("CSIPluginConfig PluginType must be one of 'node', 'controller', or 'monolith', got: \"%s\"", t.CSIPluginConfig.Type)) 7423 } 7424 7425 // TODO: Investigate validation of the PluginMountDir. Not much we can do apart from check IsAbs until after we understand its execution environment though :( 7426 } 7427 7428 return mErr.ErrorOrNil() 7429 } 7430 7431 // validateServices takes a task and validates the services within it are valid 7432 // and reference ports that exist. 7433 func validateServices(t *Task, tgNetworks Networks) error { 7434 var mErr multierror.Error 7435 7436 // Ensure that services don't ask for nonexistent ports and their names are 7437 // unique. 7438 servicePorts := make(map[string]map[string]struct{}) 7439 addServicePort := func(label, service string) { 7440 if _, ok := servicePorts[label]; !ok { 7441 servicePorts[label] = map[string]struct{}{} 7442 } 7443 servicePorts[label][service] = struct{}{} 7444 } 7445 knownServices := make(map[string]struct{}) 7446 for i, service := range t.Services { 7447 if err := service.Validate(); err != nil { 7448 outer := fmt.Errorf("service[%d] %+q validation failed: %s", i, service.Name, err) 7449 mErr.Errors = append(mErr.Errors, outer) 7450 } 7451 7452 if service.AddressMode == AddressModeAlloc { 7453 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot use address_mode=\"alloc\", only services defined in a \"group\" block can use this mode", service.Name)) 7454 } 7455 7456 // Ensure that services with the same name are not being registered for 7457 // the same port 7458 if _, ok := knownServices[service.Name+service.PortLabel]; ok { 7459 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 7460 } 7461 knownServices[service.Name+service.PortLabel] = struct{}{} 7462 7463 if service.PortLabel != "" { 7464 if service.AddressMode == "driver" { 7465 // Numeric port labels are valid for address_mode=driver 7466 _, err := strconv.Atoi(service.PortLabel) 7467 if err != nil { 7468 // Not a numeric port label, add it to list to check 7469 addServicePort(service.PortLabel, service.Name) 7470 } 7471 } else { 7472 addServicePort(service.PortLabel, service.Name) 7473 } 7474 } 7475 7476 // connect block is only allowed on group level 7477 if service.Connect != nil { 7478 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q cannot have \"connect\" block, only services defined in a \"group\" block can", service.Name)) 7479 } 7480 7481 // Ensure that check names are unique and have valid ports 7482 knownChecks := make(map[string]struct{}) 7483 for _, check := range service.Checks { 7484 if _, ok := knownChecks[check.Name]; ok { 7485 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 7486 } 7487 knownChecks[check.Name] = struct{}{} 7488 7489 if check.AddressMode == AddressModeAlloc { 7490 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q cannot use address_mode=\"alloc\", only checks defined in a \"group\" service block can use this mode", service.Name)) 7491 } 7492 7493 if !check.RequiresPort() { 7494 // No need to continue validating check if it doesn't need a port 7495 continue 7496 } 7497 7498 effectivePort := check.PortLabel 7499 if effectivePort == "" { 7500 // Inherits from service 7501 effectivePort = service.PortLabel 7502 } 7503 7504 if effectivePort == "" { 7505 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is missing a port", check.Name)) 7506 continue 7507 } 7508 7509 isNumeric := false 7510 portNumber, err := strconv.Atoi(effectivePort) 7511 if err == nil { 7512 isNumeric = true 7513 } 7514 7515 // Numeric ports are fine for address_mode = "driver" 7516 if check.AddressMode == "driver" && isNumeric { 7517 if portNumber <= 0 { 7518 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q has invalid numeric port %d", check.Name, portNumber)) 7519 } 7520 continue 7521 } 7522 7523 if isNumeric { 7524 mErr.Errors = append(mErr.Errors, fmt.Errorf(`check %q cannot use a numeric port %d without setting address_mode="driver"`, check.Name, portNumber)) 7525 continue 7526 } 7527 7528 // PortLabel must exist, report errors by its parent service 7529 addServicePort(effectivePort, service.Name) 7530 } 7531 } 7532 7533 // Get the set of group port labels. 7534 portLabels := make(map[string]struct{}) 7535 if len(tgNetworks) > 0 { 7536 ports := tgNetworks[0].PortLabels() 7537 for portLabel := range ports { 7538 portLabels[portLabel] = struct{}{} 7539 } 7540 } 7541 7542 // COMPAT(0.13) 7543 // Append the set of task port labels. (Note that network resources on the 7544 // task resources are deprecated, but we must let them continue working; a 7545 // warning will be emitted on job submission). 7546 if t.Resources != nil { 7547 for _, network := range t.Resources.Networks { 7548 for portLabel := range network.PortLabels() { 7549 portLabels[portLabel] = struct{}{} 7550 } 7551 } 7552 } 7553 7554 // Iterate over a sorted list of keys to make error listings stable 7555 keys := make([]string, 0, len(servicePorts)) 7556 for p := range servicePorts { 7557 keys = append(keys, p) 7558 } 7559 sort.Strings(keys) 7560 7561 // Ensure all ports referenced in services exist. 7562 for _, servicePort := range keys { 7563 services := servicePorts[servicePort] 7564 _, ok := portLabels[servicePort] 7565 if !ok { 7566 names := make([]string, 0, len(services)) 7567 for name := range services { 7568 names = append(names, name) 7569 } 7570 7571 // Keep order deterministic 7572 sort.Strings(names) 7573 joined := strings.Join(names, ", ") 7574 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 7575 mErr.Errors = append(mErr.Errors, err) 7576 } 7577 } 7578 7579 // Ensure address mode is valid 7580 return mErr.ErrorOrNil() 7581 } 7582 7583 func (t *Task) Warnings() error { 7584 var mErr multierror.Error 7585 7586 // Validate the resources 7587 if t.Resources != nil && t.Resources.IOPS != 0 { 7588 mErr.Errors = append(mErr.Errors, fmt.Errorf("IOPS has been deprecated as of Nomad 0.9.0. Please remove IOPS from resource stanza.")) 7589 } 7590 7591 if t.Resources != nil && len(t.Resources.Networks) != 0 { 7592 mErr.Errors = append(mErr.Errors, fmt.Errorf("task network resources have been deprecated as of Nomad 0.12.0. Please configure networking via group network block.")) 7593 } 7594 7595 for idx, tmpl := range t.Templates { 7596 if err := tmpl.Warnings(); err != nil { 7597 err = multierror.Prefix(err, fmt.Sprintf("Template[%d]", idx)) 7598 mErr.Errors = append(mErr.Errors, err) 7599 } 7600 } 7601 7602 return mErr.ErrorOrNil() 7603 } 7604 7605 // TaskKind identifies the special kinds of tasks using the following format: 7606 // '<kind_name>(:<identifier>)`. The TaskKind can optionally include an identifier that 7607 // is opaque to the Task. This identifier can be used to relate the task to some 7608 // other entity based on the kind. 7609 // 7610 // For example, a task may have the TaskKind of `connect-proxy:service` where 7611 // 'connect-proxy' is the kind name and 'service' is the identifier that relates the 7612 // task to the service name of which it is a connect proxy for. 7613 type TaskKind string 7614 7615 func NewTaskKind(name, identifier string) TaskKind { 7616 return TaskKind(fmt.Sprintf("%s:%s", name, identifier)) 7617 } 7618 7619 // Name returns the kind name portion of the TaskKind 7620 func (k TaskKind) Name() string { 7621 return strings.Split(string(k), ":")[0] 7622 } 7623 7624 // Value returns the identifier of the TaskKind or an empty string if it doesn't 7625 // include one. 7626 func (k TaskKind) Value() string { 7627 if s := strings.SplitN(string(k), ":", 2); len(s) > 1 { 7628 return s[1] 7629 } 7630 return "" 7631 } 7632 7633 func (k TaskKind) hasPrefix(prefix string) bool { 7634 return strings.HasPrefix(string(k), prefix+":") && len(k) > len(prefix)+1 7635 } 7636 7637 // IsConnectProxy returns true if the TaskKind is connect-proxy. 7638 func (k TaskKind) IsConnectProxy() bool { 7639 return k.hasPrefix(ConnectProxyPrefix) 7640 } 7641 7642 // IsConnectNative returns true if the TaskKind is connect-native. 7643 func (k TaskKind) IsConnectNative() bool { 7644 return k.hasPrefix(ConnectNativePrefix) 7645 } 7646 7647 // IsConnectIngress returns true if the TaskKind is connect-ingress. 7648 func (k TaskKind) IsConnectIngress() bool { 7649 return k.hasPrefix(ConnectIngressPrefix) 7650 } 7651 7652 // IsConnectTerminating returns true if the TaskKind is connect-terminating. 7653 func (k TaskKind) IsConnectTerminating() bool { 7654 return k.hasPrefix(ConnectTerminatingPrefix) 7655 } 7656 7657 // IsConnectMesh returns true if the TaskKind is connect-mesh. 7658 func (k TaskKind) IsConnectMesh() bool { 7659 return k.hasPrefix(ConnectMeshPrefix) 7660 } 7661 7662 // IsAnyConnectGateway returns true if the TaskKind represents any one of the 7663 // supported connect gateway types. 7664 func (k TaskKind) IsAnyConnectGateway() bool { 7665 switch { 7666 case k.IsConnectIngress(): 7667 return true 7668 case k.IsConnectTerminating(): 7669 return true 7670 case k.IsConnectMesh(): 7671 return true 7672 default: 7673 return false 7674 } 7675 } 7676 7677 const ( 7678 // ConnectProxyPrefix is the prefix used for fields referencing a Consul Connect 7679 // Proxy 7680 ConnectProxyPrefix = "connect-proxy" 7681 7682 // ConnectNativePrefix is the prefix used for fields referencing a Connect 7683 // Native Task 7684 ConnectNativePrefix = "connect-native" 7685 7686 // ConnectIngressPrefix is the prefix used for fields referencing a Consul 7687 // Connect Ingress Gateway Proxy. 7688 ConnectIngressPrefix = "connect-ingress" 7689 7690 // ConnectTerminatingPrefix is the prefix used for fields referencing a Consul 7691 // Connect Terminating Gateway Proxy. 7692 ConnectTerminatingPrefix = "connect-terminating" 7693 7694 // ConnectMeshPrefix is the prefix used for fields referencing a Consul Connect 7695 // Mesh Gateway Proxy. 7696 ConnectMeshPrefix = "connect-mesh" 7697 ) 7698 7699 // ValidateConnectProxyService checks that the service that is being 7700 // proxied by this task exists in the task group and contains 7701 // valid Connect config. 7702 func ValidateConnectProxyService(serviceName string, tgServices []*Service) error { 7703 found := false 7704 names := make([]string, 0, len(tgServices)) 7705 for _, svc := range tgServices { 7706 if svc.Connect == nil || svc.Connect.SidecarService == nil { 7707 continue 7708 } 7709 7710 if svc.Name == serviceName { 7711 found = true 7712 break 7713 } 7714 7715 // Build up list of mismatched Connect service names for error 7716 // reporting. 7717 names = append(names, svc.Name) 7718 } 7719 7720 if !found { 7721 if len(names) == 0 { 7722 return fmt.Errorf("No Connect services in task group with Connect proxy (%q)", serviceName) 7723 } else { 7724 return fmt.Errorf("Connect proxy service name (%q) not found in Connect services from task group: %s", serviceName, names) 7725 } 7726 } 7727 7728 return nil 7729 } 7730 7731 const ( 7732 // TemplateChangeModeNoop marks that no action should be taken if the 7733 // template is re-rendered 7734 TemplateChangeModeNoop = "noop" 7735 7736 // TemplateChangeModeSignal marks that the task should be signaled if the 7737 // template is re-rendered 7738 TemplateChangeModeSignal = "signal" 7739 7740 // TemplateChangeModeRestart marks that the task should be restarted if the 7741 // template is re-rendered 7742 TemplateChangeModeRestart = "restart" 7743 7744 // TemplateChangeModeScript marks that the task should trigger a script if 7745 // the template is re-rendered 7746 TemplateChangeModeScript = "script" 7747 ) 7748 7749 var ( 7750 // TemplateChangeModeInvalidError is the error for when an invalid change 7751 // mode is given 7752 TemplateChangeModeInvalidError = errors.New("Invalid change mode. Must be one of the following: noop, signal, script, restart") 7753 ) 7754 7755 // Template represents a template configuration to be rendered for a given task 7756 type Template struct { 7757 // SourcePath is the path to the template to be rendered 7758 SourcePath string 7759 7760 // DestPath is the path to where the template should be rendered 7761 DestPath string 7762 7763 // EmbeddedTmpl store the raw template. This is useful for smaller templates 7764 // where they are embedded in the job file rather than sent as an artifact 7765 EmbeddedTmpl string 7766 7767 // ChangeMode indicates what should be done if the template is re-rendered 7768 ChangeMode string 7769 7770 // ChangeSignal is the signal that should be sent if the change mode 7771 // requires it. 7772 ChangeSignal string 7773 7774 // ChangeScript is the configuration of the script. It's required if 7775 // ChangeMode is set to script. 7776 ChangeScript *ChangeScript 7777 7778 // Splay is used to avoid coordinated restarts of processes by applying a 7779 // random wait between 0 and the given splay value before signalling the 7780 // application of a change 7781 Splay time.Duration 7782 7783 // Perms is the permission the file should be written out with. 7784 Perms string 7785 // User and group that should own the file. 7786 Uid *int 7787 Gid *int 7788 7789 // LeftDelim and RightDelim are optional configurations to control what 7790 // delimiter is utilized when parsing the template. 7791 LeftDelim string 7792 RightDelim string 7793 7794 // Envvars enables exposing the template as environment variables 7795 // instead of as a file. The template must be of the form: 7796 // 7797 // VAR_NAME_1={{ key service/my-key }} 7798 // VAR_NAME_2=raw string and {{ env "attr.kernel.name" }} 7799 // 7800 // Lines will be split on the initial "=" with the first part being the 7801 // key name and the second part the value. 7802 // Empty lines and lines starting with # will be ignored, but to avoid 7803 // escaping issues #s within lines will not be treated as comments. 7804 Envvars bool 7805 7806 // VaultGrace is the grace duration between lease renewal and reacquiring a 7807 // secret. If the lease of a secret is less than the grace, a new secret is 7808 // acquired. 7809 // COMPAT(0.12) VaultGrace has been ignored by Vault since Vault v0.5. 7810 VaultGrace time.Duration 7811 7812 // WaitConfig is used to override the global WaitConfig on a per-template basis 7813 Wait *WaitConfig 7814 7815 // ErrMissingKey is used to control how the template behaves when attempting 7816 // to index a struct or map key that does not exist. 7817 ErrMissingKey bool 7818 } 7819 7820 // DefaultTemplate returns a default template. 7821 func DefaultTemplate() *Template { 7822 return &Template{ 7823 ChangeMode: TemplateChangeModeRestart, 7824 Splay: 5 * time.Second, 7825 Perms: "0644", 7826 } 7827 } 7828 7829 func (t *Template) Copy() *Template { 7830 if t == nil { 7831 return nil 7832 } 7833 nt := new(Template) 7834 *nt = *t 7835 7836 nt.ChangeScript = t.ChangeScript.Copy() 7837 nt.Wait = t.Wait.Copy() 7838 7839 return nt 7840 } 7841 7842 func (t *Template) Canonicalize() { 7843 if t.ChangeSignal != "" { 7844 t.ChangeSignal = strings.ToUpper(t.ChangeSignal) 7845 } 7846 } 7847 7848 func (t *Template) Validate() error { 7849 var mErr multierror.Error 7850 7851 // Verify we have something to render 7852 if t.SourcePath == "" && t.EmbeddedTmpl == "" { 7853 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a source path or have an embedded template")) 7854 } 7855 7856 // Verify we can render somewhere 7857 if t.DestPath == "" { 7858 _ = multierror.Append(&mErr, fmt.Errorf("Must specify a destination for the template")) 7859 } 7860 7861 // Verify the destination doesn't escape 7862 escaped, err := escapingfs.PathEscapesAllocViaRelative("task", t.DestPath) 7863 if err != nil { 7864 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 7865 } else if escaped { 7866 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 7867 } 7868 7869 // Verify a proper change mode 7870 switch t.ChangeMode { 7871 case TemplateChangeModeNoop, TemplateChangeModeRestart: 7872 case TemplateChangeModeSignal: 7873 if t.ChangeSignal == "" { 7874 _ = multierror.Append(&mErr, fmt.Errorf("Must specify signal value when change mode is signal")) 7875 } 7876 if t.Envvars { 7877 _ = multierror.Append(&mErr, fmt.Errorf("cannot use signals with env var templates")) 7878 } 7879 case TemplateChangeModeScript: 7880 if t.ChangeScript == nil { 7881 _ = multierror.Append(&mErr, fmt.Errorf("must specify change script configuration value when change mode is script")) 7882 } 7883 7884 if err = t.ChangeScript.Validate(); err != nil { 7885 _ = multierror.Append(&mErr, err) 7886 } 7887 default: 7888 _ = multierror.Append(&mErr, TemplateChangeModeInvalidError) 7889 } 7890 7891 // Verify the splay is positive 7892 if t.Splay < 0 { 7893 _ = multierror.Append(&mErr, fmt.Errorf("Must specify positive splay value")) 7894 } 7895 7896 // Verify the permissions 7897 if t.Perms != "" { 7898 if _, err := strconv.ParseUint(t.Perms, 8, 12); err != nil { 7899 _ = multierror.Append(&mErr, fmt.Errorf("Failed to parse %q as octal: %v", t.Perms, err)) 7900 } 7901 } 7902 7903 if err = t.Wait.Validate(); err != nil { 7904 _ = multierror.Append(&mErr, err) 7905 } 7906 7907 return mErr.ErrorOrNil() 7908 } 7909 7910 func (t *Template) Warnings() error { 7911 var mErr multierror.Error 7912 7913 // Deprecation notice for vault_grace 7914 if t.VaultGrace != 0 { 7915 mErr.Errors = append(mErr.Errors, fmt.Errorf("VaultGrace has been deprecated as of Nomad 0.11 and ignored since Vault 0.5. Please remove VaultGrace / vault_grace from template stanza.")) 7916 } 7917 7918 return mErr.ErrorOrNil() 7919 } 7920 7921 // DiffID fulfills the DiffableWithID interface. 7922 func (t *Template) DiffID() string { 7923 return t.DestPath 7924 } 7925 7926 // ChangeScript holds the configuration for the script that is executed if 7927 // change mode is set to script 7928 type ChangeScript struct { 7929 // Command is the full path to the script 7930 Command string 7931 // Args is a slice of arguments passed to the script 7932 Args []string 7933 // Timeout is the amount of seconds we wait for the script to finish 7934 Timeout time.Duration 7935 // FailOnError indicates whether a task should fail in case script execution 7936 // fails or log script failure and don't interrupt the task 7937 FailOnError bool 7938 } 7939 7940 func (cs *ChangeScript) Copy() *ChangeScript { 7941 if cs == nil { 7942 return nil 7943 } 7944 7945 ncs := new(ChangeScript) 7946 *ncs = *cs 7947 7948 // args is a slice! 7949 ncs.Args = slices.Clone(cs.Args) 7950 7951 return ncs 7952 } 7953 7954 // Validate makes sure all the required fields of ChangeScript are present 7955 func (cs *ChangeScript) Validate() error { 7956 if cs == nil { 7957 return nil 7958 } 7959 7960 if cs.Command == "" { 7961 return fmt.Errorf("must specify script path value when change mode is script") 7962 } 7963 7964 return nil 7965 } 7966 7967 // WaitConfig is the Min/Max duration used by the Consul Template Watcher. Consul 7968 // Template relies on pointer based business logic. This struct uses pointers so 7969 // that we tell the different between zero values and unset values. 7970 type WaitConfig struct { 7971 Min *time.Duration 7972 Max *time.Duration 7973 } 7974 7975 // Copy returns a deep copy of this configuration. 7976 func (wc *WaitConfig) Copy() *WaitConfig { 7977 if wc == nil { 7978 return nil 7979 } 7980 7981 nwc := new(WaitConfig) 7982 7983 if wc.Min != nil { 7984 nwc.Min = wc.Min 7985 } 7986 7987 if wc.Max != nil { 7988 nwc.Max = wc.Max 7989 } 7990 7991 return nwc 7992 } 7993 7994 func (wc *WaitConfig) Equal(o *WaitConfig) bool { 7995 if wc.Min == nil && o.Min != nil { 7996 return false 7997 } 7998 7999 if wc.Max == nil && o.Max != nil { 8000 return false 8001 } 8002 8003 if wc.Min != nil && (o.Min == nil || *wc.Min != *o.Min) { 8004 return false 8005 } 8006 8007 if wc.Max != nil && (o.Max == nil || *wc.Max != *o.Max) { 8008 return false 8009 } 8010 8011 return true 8012 } 8013 8014 // Validate that the min is not greater than the max 8015 func (wc *WaitConfig) Validate() error { 8016 if wc == nil { 8017 return nil 8018 } 8019 8020 // If either one is nil, they aren't comparable, so they can't be invalid. 8021 if wc.Min == nil || wc.Max == nil { 8022 return nil 8023 } 8024 8025 if *wc.Min > *wc.Max { 8026 return fmt.Errorf("wait min %s is greater than max %s", wc.Min, wc.Max) 8027 } 8028 8029 return nil 8030 } 8031 8032 // AllocStateField records a single event that changes the state of the whole allocation 8033 type AllocStateField uint8 8034 8035 const ( 8036 AllocStateFieldClientStatus AllocStateField = iota 8037 ) 8038 8039 type AllocState struct { 8040 Field AllocStateField 8041 Value string 8042 Time time.Time 8043 } 8044 8045 // TaskHandle is optional handle to a task propogated to the servers for use 8046 // by remote tasks. Since remote tasks are not implicitly lost when the node 8047 // they are assigned to is down, their state is migrated to the replacement 8048 // allocation. 8049 // 8050 // Minimal set of fields from plugins/drivers/task_handle.go:TaskHandle 8051 type TaskHandle struct { 8052 // Version of driver state. Used by the driver to gracefully handle 8053 // plugin upgrades. 8054 Version int 8055 8056 // Driver-specific state containing a handle to the remote task. 8057 DriverState []byte 8058 } 8059 8060 func (h *TaskHandle) Copy() *TaskHandle { 8061 if h == nil { 8062 return nil 8063 } 8064 8065 newTH := TaskHandle{ 8066 Version: h.Version, 8067 DriverState: make([]byte, len(h.DriverState)), 8068 } 8069 copy(newTH.DriverState, h.DriverState) 8070 return &newTH 8071 } 8072 8073 // Set of possible states for a task. 8074 const ( 8075 TaskStatePending = "pending" // The task is waiting to be run. 8076 TaskStateRunning = "running" // The task is currently running. 8077 TaskStateDead = "dead" // Terminal state of task. 8078 ) 8079 8080 // TaskState tracks the current state of a task and events that caused state 8081 // transitions. 8082 type TaskState struct { 8083 // The current state of the task. 8084 State string 8085 8086 // Failed marks a task as having failed 8087 Failed bool 8088 8089 // Restarts is the number of times the task has restarted 8090 Restarts uint64 8091 8092 // LastRestart is the time the task last restarted. It is updated each time the 8093 // task restarts 8094 LastRestart time.Time 8095 8096 // StartedAt is the time the task is started. It is updated each time the 8097 // task starts 8098 StartedAt time.Time 8099 8100 // FinishedAt is the time at which the task transitioned to dead and will 8101 // not be started again. 8102 FinishedAt time.Time 8103 8104 // Series of task events that transition the state of the task. 8105 Events []*TaskEvent 8106 8107 // Experimental - TaskHandle is based on drivers.TaskHandle and used 8108 // by remote task drivers to migrate task handles between allocations. 8109 TaskHandle *TaskHandle 8110 } 8111 8112 // NewTaskState returns a TaskState initialized in the Pending state. 8113 func NewTaskState() *TaskState { 8114 return &TaskState{ 8115 State: TaskStatePending, 8116 } 8117 } 8118 8119 // Canonicalize ensures the TaskState has a State set. It should default to 8120 // Pending. 8121 func (ts *TaskState) Canonicalize() { 8122 if ts.State == "" { 8123 ts.State = TaskStatePending 8124 } 8125 } 8126 8127 func (ts *TaskState) Copy() *TaskState { 8128 if ts == nil { 8129 return nil 8130 } 8131 newTS := new(TaskState) 8132 *newTS = *ts 8133 8134 if ts.Events != nil { 8135 newTS.Events = make([]*TaskEvent, len(ts.Events)) 8136 for i, e := range ts.Events { 8137 newTS.Events[i] = e.Copy() 8138 } 8139 } 8140 8141 newTS.TaskHandle = ts.TaskHandle.Copy() 8142 return newTS 8143 } 8144 8145 // Successful returns whether a task finished successfully. Only meaningful for 8146 // for batch allocations or ephemeral (non-sidecar) lifecycle tasks part of a 8147 // service or system allocation. 8148 func (ts *TaskState) Successful() bool { 8149 return ts.State == TaskStateDead && !ts.Failed 8150 } 8151 8152 const ( 8153 // TaskSetupFailure indicates that the task could not be started due to a 8154 // a setup failure. 8155 TaskSetupFailure = "Setup Failure" 8156 8157 // TaskDriveFailure indicates that the task could not be started due to a 8158 // failure in the driver. TaskDriverFailure is considered Recoverable. 8159 TaskDriverFailure = "Driver Failure" 8160 8161 // TaskReceived signals that the task has been pulled by the client at the 8162 // given timestamp. 8163 TaskReceived = "Received" 8164 8165 // TaskFailedValidation indicates the task was invalid and as such was not run. 8166 // TaskFailedValidation is not considered Recoverable. 8167 TaskFailedValidation = "Failed Validation" 8168 8169 // TaskStarted signals that the task was started and its timestamp can be 8170 // used to determine the running length of the task. 8171 TaskStarted = "Started" 8172 8173 // TaskTerminated indicates that the task was started and exited. 8174 TaskTerminated = "Terminated" 8175 8176 // TaskKilling indicates a kill signal has been sent to the task. 8177 TaskKilling = "Killing" 8178 8179 // TaskKilled indicates a user has killed the task. 8180 TaskKilled = "Killed" 8181 8182 // TaskRestarting indicates that task terminated and is being restarted. 8183 TaskRestarting = "Restarting" 8184 8185 // TaskNotRestarting indicates that the task has failed and is not being 8186 // restarted because it has exceeded its restart policy. 8187 TaskNotRestarting = "Not Restarting" 8188 8189 // TaskRestartSignal indicates that the task has been signaled to be 8190 // restarted 8191 TaskRestartSignal = "Restart Signaled" 8192 8193 // TaskSignaling indicates that the task is being signalled. 8194 TaskSignaling = "Signaling" 8195 8196 // TaskDownloadingArtifacts means the task is downloading the artifacts 8197 // specified in the task. 8198 TaskDownloadingArtifacts = "Downloading Artifacts" 8199 8200 // TaskArtifactDownloadFailed indicates that downloading the artifacts 8201 // failed. 8202 TaskArtifactDownloadFailed = "Failed Artifact Download" 8203 8204 // TaskBuildingTaskDir indicates that the task directory/chroot is being 8205 // built. 8206 TaskBuildingTaskDir = "Building Task Directory" 8207 8208 // TaskSetup indicates the task runner is setting up the task environment 8209 TaskSetup = "Task Setup" 8210 8211 // TaskDiskExceeded indicates that one of the tasks in a taskgroup has 8212 // exceeded the requested disk resources. 8213 TaskDiskExceeded = "Disk Resources Exceeded" 8214 8215 // TaskSiblingFailed indicates that a sibling task in the task group has 8216 // failed. 8217 TaskSiblingFailed = "Sibling Task Failed" 8218 8219 // TaskDriverMessage is an informational event message emitted by 8220 // drivers such as when they're performing a long running action like 8221 // downloading an image. 8222 TaskDriverMessage = "Driver" 8223 8224 // TaskLeaderDead indicates that the leader task within the has finished. 8225 TaskLeaderDead = "Leader Task Dead" 8226 8227 // TaskMainDead indicates that the main tasks have dead 8228 TaskMainDead = "Main Tasks Dead" 8229 8230 // TaskHookFailed indicates that one of the hooks for a task failed. 8231 TaskHookFailed = "Task hook failed" 8232 8233 // TaskHookMessage indicates that one of the hooks for a task emitted a 8234 // message. 8235 TaskHookMessage = "Task hook message" 8236 8237 // TaskRestoreFailed indicates Nomad was unable to reattach to a 8238 // restored task. 8239 TaskRestoreFailed = "Failed Restoring Task" 8240 8241 // TaskPluginUnhealthy indicates that a plugin managed by Nomad became unhealthy 8242 TaskPluginUnhealthy = "Plugin became unhealthy" 8243 8244 // TaskPluginHealthy indicates that a plugin managed by Nomad became healthy 8245 TaskPluginHealthy = "Plugin became healthy" 8246 8247 // TaskClientReconnected indicates that the client running the task disconnected. 8248 TaskClientReconnected = "Reconnected" 8249 8250 // TaskWaitingShuttingDownDelay indicates that the task is waiting for 8251 // shutdown delay before being TaskKilled 8252 TaskWaitingShuttingDownDelay = "Waiting for shutdown delay" 8253 ) 8254 8255 // TaskEvent is an event that effects the state of a task and contains meta-data 8256 // appropriate to the events type. 8257 type TaskEvent struct { 8258 Type string 8259 Time int64 // Unix Nanosecond timestamp 8260 8261 Message string // A possible message explaining the termination of the task. 8262 8263 // DisplayMessage is a human friendly message about the event 8264 DisplayMessage string 8265 8266 // Details is a map with annotated info about the event 8267 Details map[string]string 8268 8269 // DEPRECATION NOTICE: The following fields are deprecated and will be removed 8270 // in a future release. Field values are available in the Details map. 8271 8272 // FailsTask marks whether this event fails the task. 8273 // Deprecated, use Details["fails_task"] to access this. 8274 FailsTask bool 8275 8276 // Restart fields. 8277 // Deprecated, use Details["restart_reason"] to access this. 8278 RestartReason string 8279 8280 // Setup Failure fields. 8281 // Deprecated, use Details["setup_error"] to access this. 8282 SetupError string 8283 8284 // Driver Failure fields. 8285 // Deprecated, use Details["driver_error"] to access this. 8286 DriverError string // A driver error occurred while starting the task. 8287 8288 // Task Terminated Fields. 8289 8290 // Deprecated, use Details["exit_code"] to access this. 8291 ExitCode int // The exit code of the task. 8292 8293 // Deprecated, use Details["signal"] to access this. 8294 Signal int // The signal that terminated the task. 8295 8296 // Killing fields 8297 // Deprecated, use Details["kill_timeout"] to access this. 8298 KillTimeout time.Duration 8299 8300 // Task Killed Fields. 8301 // Deprecated, use Details["kill_error"] to access this. 8302 KillError string // Error killing the task. 8303 8304 // KillReason is the reason the task was killed 8305 // Deprecated, use Details["kill_reason"] to access this. 8306 KillReason string 8307 8308 // TaskRestarting fields. 8309 // Deprecated, use Details["start_delay"] to access this. 8310 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 8311 8312 // Artifact Download fields 8313 // Deprecated, use Details["download_error"] to access this. 8314 DownloadError string // Error downloading artifacts 8315 8316 // Validation fields 8317 // Deprecated, use Details["validation_error"] to access this. 8318 ValidationError string // Validation error 8319 8320 // The maximum allowed task disk size. 8321 // Deprecated, use Details["disk_limit"] to access this. 8322 DiskLimit int64 8323 8324 // Name of the sibling task that caused termination of the task that 8325 // the TaskEvent refers to. 8326 // Deprecated, use Details["failed_sibling"] to access this. 8327 FailedSibling string 8328 8329 // VaultError is the error from token renewal 8330 // Deprecated, use Details["vault_renewal_error"] to access this. 8331 VaultError string 8332 8333 // TaskSignalReason indicates the reason the task is being signalled. 8334 // Deprecated, use Details["task_signal_reason"] to access this. 8335 TaskSignalReason string 8336 8337 // TaskSignal is the signal that was sent to the task 8338 // Deprecated, use Details["task_signal"] to access this. 8339 TaskSignal string 8340 8341 // DriverMessage indicates a driver action being taken. 8342 // Deprecated, use Details["driver_message"] to access this. 8343 DriverMessage string 8344 8345 // GenericSource is the source of a message. 8346 // Deprecated, is redundant with event type. 8347 GenericSource string 8348 } 8349 8350 func (e *TaskEvent) PopulateEventDisplayMessage() { 8351 // Build up the description based on the event type. 8352 if e == nil { //TODO(preetha) needs investigation alloc_runner's Run method sends a nil event when sigterming nomad. Why? 8353 return 8354 } 8355 8356 if e.DisplayMessage != "" { 8357 return 8358 } 8359 8360 var desc string 8361 switch e.Type { 8362 case TaskSetup: 8363 desc = e.Message 8364 case TaskStarted: 8365 desc = "Task started by client" 8366 case TaskReceived: 8367 desc = "Task received by client" 8368 case TaskFailedValidation: 8369 if e.ValidationError != "" { 8370 desc = e.ValidationError 8371 } else { 8372 desc = "Validation of task failed" 8373 } 8374 case TaskSetupFailure: 8375 if e.SetupError != "" { 8376 desc = e.SetupError 8377 } else { 8378 desc = "Task setup failed" 8379 } 8380 case TaskDriverFailure: 8381 if e.DriverError != "" { 8382 desc = e.DriverError 8383 } else { 8384 desc = "Failed to start task" 8385 } 8386 case TaskDownloadingArtifacts: 8387 desc = "Client is downloading artifacts" 8388 case TaskArtifactDownloadFailed: 8389 if e.DownloadError != "" { 8390 desc = e.DownloadError 8391 } else { 8392 desc = "Failed to download artifacts" 8393 } 8394 case TaskKilling: 8395 if e.KillReason != "" { 8396 desc = e.KillReason 8397 } else if e.KillTimeout != 0 { 8398 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", e.KillTimeout) 8399 } else { 8400 desc = "Sent interrupt" 8401 } 8402 case TaskKilled: 8403 if e.KillError != "" { 8404 desc = e.KillError 8405 } else { 8406 desc = "Task successfully killed" 8407 } 8408 case TaskTerminated: 8409 var parts []string 8410 parts = append(parts, fmt.Sprintf("Exit Code: %d", e.ExitCode)) 8411 8412 if e.Signal != 0 { 8413 parts = append(parts, fmt.Sprintf("Signal: %d", e.Signal)) 8414 } 8415 8416 if e.Message != "" { 8417 parts = append(parts, fmt.Sprintf("Exit Message: %q", e.Message)) 8418 } 8419 desc = strings.Join(parts, ", ") 8420 case TaskRestarting: 8421 in := fmt.Sprintf("Task restarting in %v", time.Duration(e.StartDelay)) 8422 if e.RestartReason != "" && e.RestartReason != ReasonWithinPolicy { 8423 desc = fmt.Sprintf("%s - %s", e.RestartReason, in) 8424 } else { 8425 desc = in 8426 } 8427 case TaskNotRestarting: 8428 if e.RestartReason != "" { 8429 desc = e.RestartReason 8430 } else { 8431 desc = "Task exceeded restart policy" 8432 } 8433 case TaskSiblingFailed: 8434 if e.FailedSibling != "" { 8435 desc = fmt.Sprintf("Task's sibling %q failed", e.FailedSibling) 8436 } else { 8437 desc = "Task's sibling failed" 8438 } 8439 case TaskSignaling: 8440 sig := e.TaskSignal 8441 reason := e.TaskSignalReason 8442 8443 if sig == "" && reason == "" { 8444 desc = "Task being sent a signal" 8445 } else if sig == "" { 8446 desc = reason 8447 } else if reason == "" { 8448 desc = fmt.Sprintf("Task being sent signal %v", sig) 8449 } else { 8450 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 8451 } 8452 case TaskRestartSignal: 8453 if e.RestartReason != "" { 8454 desc = e.RestartReason 8455 } else { 8456 desc = "Task signaled to restart" 8457 } 8458 case TaskDriverMessage: 8459 desc = e.DriverMessage 8460 case TaskLeaderDead: 8461 desc = "Leader Task in Group dead" 8462 case TaskMainDead: 8463 desc = "Main tasks in the group died" 8464 case TaskClientReconnected: 8465 desc = "Client reconnected" 8466 default: 8467 desc = e.Message 8468 } 8469 8470 e.DisplayMessage = desc 8471 } 8472 8473 func (e *TaskEvent) GoString() string { 8474 if e == nil { 8475 return "" 8476 } 8477 return fmt.Sprintf("%v - %v", e.Time, e.Type) 8478 } 8479 8480 // SetDisplayMessage sets the display message of TaskEvent 8481 func (e *TaskEvent) SetDisplayMessage(msg string) *TaskEvent { 8482 e.DisplayMessage = msg 8483 return e 8484 } 8485 8486 // SetMessage sets the message of TaskEvent 8487 func (e *TaskEvent) SetMessage(msg string) *TaskEvent { 8488 e.Message = msg 8489 e.Details["message"] = msg 8490 return e 8491 } 8492 8493 func (e *TaskEvent) Copy() *TaskEvent { 8494 if e == nil { 8495 return nil 8496 } 8497 copy := new(TaskEvent) 8498 *copy = *e 8499 return copy 8500 } 8501 8502 func NewTaskEvent(event string) *TaskEvent { 8503 return &TaskEvent{ 8504 Type: event, 8505 Time: time.Now().UnixNano(), 8506 Details: make(map[string]string), 8507 } 8508 } 8509 8510 // SetSetupError is used to store an error that occurred while setting up the 8511 // task 8512 func (e *TaskEvent) SetSetupError(err error) *TaskEvent { 8513 if err != nil { 8514 e.SetupError = err.Error() 8515 e.Details["setup_error"] = err.Error() 8516 } 8517 return e 8518 } 8519 8520 func (e *TaskEvent) SetFailsTask() *TaskEvent { 8521 e.FailsTask = true 8522 e.Details["fails_task"] = "true" 8523 return e 8524 } 8525 8526 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 8527 if err != nil { 8528 e.DriverError = err.Error() 8529 e.Details["driver_error"] = err.Error() 8530 } 8531 return e 8532 } 8533 8534 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 8535 e.ExitCode = c 8536 e.Details["exit_code"] = fmt.Sprintf("%d", c) 8537 return e 8538 } 8539 8540 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 8541 e.Signal = s 8542 e.Details["signal"] = fmt.Sprintf("%d", s) 8543 return e 8544 } 8545 8546 func (e *TaskEvent) SetSignalText(s string) *TaskEvent { 8547 e.Details["signal"] = s 8548 return e 8549 } 8550 8551 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 8552 if err != nil { 8553 e.Message = err.Error() 8554 e.Details["exit_message"] = err.Error() 8555 } 8556 return e 8557 } 8558 8559 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 8560 if err != nil { 8561 e.KillError = err.Error() 8562 e.Details["kill_error"] = err.Error() 8563 } 8564 return e 8565 } 8566 8567 func (e *TaskEvent) SetKillReason(r string) *TaskEvent { 8568 e.KillReason = r 8569 e.Details["kill_reason"] = r 8570 return e 8571 } 8572 8573 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 8574 e.StartDelay = int64(delay) 8575 e.Details["start_delay"] = fmt.Sprintf("%d", delay) 8576 return e 8577 } 8578 8579 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 8580 e.RestartReason = reason 8581 e.Details["restart_reason"] = reason 8582 return e 8583 } 8584 8585 func (e *TaskEvent) SetTaskSignalReason(r string) *TaskEvent { 8586 e.TaskSignalReason = r 8587 e.Details["task_signal_reason"] = r 8588 return e 8589 } 8590 8591 func (e *TaskEvent) SetTaskSignal(s os.Signal) *TaskEvent { 8592 e.TaskSignal = s.String() 8593 e.Details["task_signal"] = s.String() 8594 return e 8595 } 8596 8597 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 8598 if err != nil { 8599 e.DownloadError = err.Error() 8600 e.Details["download_error"] = err.Error() 8601 } 8602 return e 8603 } 8604 8605 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 8606 if err != nil { 8607 e.ValidationError = err.Error() 8608 e.Details["validation_error"] = err.Error() 8609 } 8610 return e 8611 } 8612 8613 func (e *TaskEvent) SetKillTimeout(timeout, maxTimeout time.Duration) *TaskEvent { 8614 actual := helper.Min(timeout, maxTimeout) 8615 e.KillTimeout = actual 8616 e.Details["kill_timeout"] = actual.String() 8617 return e 8618 } 8619 8620 func (e *TaskEvent) SetDiskLimit(limit int64) *TaskEvent { 8621 e.DiskLimit = limit 8622 e.Details["disk_limit"] = fmt.Sprintf("%d", limit) 8623 return e 8624 } 8625 8626 func (e *TaskEvent) SetFailedSibling(sibling string) *TaskEvent { 8627 e.FailedSibling = sibling 8628 e.Details["failed_sibling"] = sibling 8629 return e 8630 } 8631 8632 func (e *TaskEvent) SetVaultRenewalError(err error) *TaskEvent { 8633 if err != nil { 8634 e.VaultError = err.Error() 8635 e.Details["vault_renewal_error"] = err.Error() 8636 } 8637 return e 8638 } 8639 8640 func (e *TaskEvent) SetDriverMessage(m string) *TaskEvent { 8641 e.DriverMessage = m 8642 e.Details["driver_message"] = m 8643 return e 8644 } 8645 8646 func (e *TaskEvent) SetOOMKilled(oom bool) *TaskEvent { 8647 e.Details["oom_killed"] = strconv.FormatBool(oom) 8648 return e 8649 } 8650 8651 // TaskArtifact is an artifact to download before running the task. 8652 type TaskArtifact struct { 8653 // GetterSource is the source to download an artifact using go-getter 8654 GetterSource string 8655 8656 // GetterOptions are options to use when downloading the artifact using 8657 // go-getter. 8658 GetterOptions map[string]string 8659 8660 // GetterHeaders are headers to use when downloading the artifact using 8661 // go-getter. 8662 GetterHeaders map[string]string 8663 8664 // GetterMode is the go-getter.ClientMode for fetching resources. 8665 // Defaults to "any" but can be set to "file" or "dir". 8666 GetterMode string 8667 8668 // RelativeDest is the download destination given relative to the task's 8669 // directory. 8670 RelativeDest string 8671 } 8672 8673 func (ta *TaskArtifact) Copy() *TaskArtifact { 8674 if ta == nil { 8675 return nil 8676 } 8677 return &TaskArtifact{ 8678 GetterSource: ta.GetterSource, 8679 GetterOptions: maps.Clone(ta.GetterOptions), 8680 GetterHeaders: maps.Clone(ta.GetterHeaders), 8681 GetterMode: ta.GetterMode, 8682 RelativeDest: ta.RelativeDest, 8683 } 8684 } 8685 8686 func (ta *TaskArtifact) GoString() string { 8687 return fmt.Sprintf("%+v", ta) 8688 } 8689 8690 // DiffID fulfills the DiffableWithID interface. 8691 func (ta *TaskArtifact) DiffID() string { 8692 return ta.RelativeDest 8693 } 8694 8695 // hashStringMap appends a deterministic hash of m onto h. 8696 func hashStringMap(h hash.Hash, m map[string]string) { 8697 keys := make([]string, 0, len(m)) 8698 for k := range m { 8699 keys = append(keys, k) 8700 } 8701 sort.Strings(keys) 8702 for _, k := range keys { 8703 _, _ = h.Write([]byte(k)) 8704 _, _ = h.Write([]byte(m[k])) 8705 } 8706 } 8707 8708 // Hash creates a unique identifier for a TaskArtifact as the same GetterSource 8709 // may be specified multiple times with different destinations. 8710 func (ta *TaskArtifact) Hash() string { 8711 h, err := blake2b.New256(nil) 8712 if err != nil { 8713 panic(err) 8714 } 8715 8716 _, _ = h.Write([]byte(ta.GetterSource)) 8717 8718 hashStringMap(h, ta.GetterOptions) 8719 hashStringMap(h, ta.GetterHeaders) 8720 8721 _, _ = h.Write([]byte(ta.GetterMode)) 8722 _, _ = h.Write([]byte(ta.RelativeDest)) 8723 return base64.RawStdEncoding.EncodeToString(h.Sum(nil)) 8724 } 8725 8726 func (ta *TaskArtifact) Validate() error { 8727 // Verify the source 8728 var mErr multierror.Error 8729 if ta.GetterSource == "" { 8730 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 8731 } 8732 8733 switch ta.GetterMode { 8734 case "": 8735 // Default to any 8736 ta.GetterMode = GetterModeAny 8737 case GetterModeAny, GetterModeFile, GetterModeDir: 8738 // Ok 8739 default: 8740 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid artifact mode %q; must be one of: %s, %s, %s", 8741 ta.GetterMode, GetterModeAny, GetterModeFile, GetterModeDir)) 8742 } 8743 8744 escaped, err := escapingfs.PathEscapesAllocViaRelative("task", ta.RelativeDest) 8745 if err != nil { 8746 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid destination path: %v", err)) 8747 } else if escaped { 8748 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes allocation directory")) 8749 } 8750 8751 if err := ta.validateChecksum(); err != nil { 8752 mErr.Errors = append(mErr.Errors, err) 8753 } 8754 8755 return mErr.ErrorOrNil() 8756 } 8757 8758 func (ta *TaskArtifact) validateChecksum() error { 8759 check, ok := ta.GetterOptions["checksum"] 8760 if !ok { 8761 return nil 8762 } 8763 8764 // Job struct validation occurs before interpolation resolution can be effective. 8765 // Skip checking if checksum contain variable reference, and artifacts fetching will 8766 // eventually fail, if checksum is indeed invalid. 8767 if args.ContainsEnv(check) { 8768 return nil 8769 } 8770 8771 check = strings.TrimSpace(check) 8772 if check == "" { 8773 return fmt.Errorf("checksum value cannot be empty") 8774 } 8775 8776 parts := strings.Split(check, ":") 8777 if l := len(parts); l != 2 { 8778 return fmt.Errorf(`checksum must be given as "type:value"; got %q`, check) 8779 } 8780 8781 checksumVal := parts[1] 8782 checksumBytes, err := hex.DecodeString(checksumVal) 8783 if err != nil { 8784 return fmt.Errorf("invalid checksum: %v", err) 8785 } 8786 8787 checksumType := parts[0] 8788 expectedLength := 0 8789 switch checksumType { 8790 case "md5": 8791 expectedLength = md5.Size 8792 case "sha1": 8793 expectedLength = sha1.Size 8794 case "sha256": 8795 expectedLength = sha256.Size 8796 case "sha512": 8797 expectedLength = sha512.Size 8798 default: 8799 return fmt.Errorf("unsupported checksum type: %s", checksumType) 8800 } 8801 8802 if len(checksumBytes) != expectedLength { 8803 return fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal) 8804 } 8805 8806 return nil 8807 } 8808 8809 const ( 8810 ConstraintDistinctProperty = "distinct_property" 8811 ConstraintDistinctHosts = "distinct_hosts" 8812 ConstraintRegex = "regexp" 8813 ConstraintVersion = "version" 8814 ConstraintSemver = "semver" 8815 ConstraintSetContains = "set_contains" 8816 ConstraintSetContainsAll = "set_contains_all" 8817 ConstraintSetContainsAny = "set_contains_any" 8818 ConstraintAttributeIsSet = "is_set" 8819 ConstraintAttributeIsNotSet = "is_not_set" 8820 ) 8821 8822 // A Constraint is used to restrict placement options. 8823 type Constraint struct { 8824 LTarget string // Left-hand target 8825 RTarget string // Right-hand target 8826 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 8827 } 8828 8829 // Equal checks if two constraints are equal. 8830 func (c *Constraint) Equal(o *Constraint) bool { 8831 return c == o || 8832 c.LTarget == o.LTarget && 8833 c.RTarget == o.RTarget && 8834 c.Operand == o.Operand 8835 } 8836 8837 func (c *Constraint) Copy() *Constraint { 8838 if c == nil { 8839 return nil 8840 } 8841 return &Constraint{ 8842 LTarget: c.LTarget, 8843 RTarget: c.RTarget, 8844 Operand: c.Operand, 8845 } 8846 } 8847 8848 func (c *Constraint) String() string { 8849 return fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 8850 } 8851 8852 func (c *Constraint) Validate() error { 8853 var mErr multierror.Error 8854 if c.Operand == "" { 8855 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 8856 } 8857 8858 // requireLtarget specifies whether the constraint requires an LTarget to be 8859 // provided. 8860 requireLtarget := true 8861 8862 // Perform additional validation based on operand 8863 switch c.Operand { 8864 case ConstraintDistinctHosts: 8865 requireLtarget = false 8866 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8867 if c.RTarget == "" { 8868 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains constraint requires an RTarget")) 8869 } 8870 case ConstraintRegex: 8871 if _, err := regexp.Compile(c.RTarget); err != nil { 8872 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8873 } 8874 case ConstraintVersion: 8875 if _, err := version.NewConstraint(c.RTarget); err != nil { 8876 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 8877 } 8878 case ConstraintSemver: 8879 if _, err := semver.NewConstraint(c.RTarget); err != nil { 8880 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver constraint is invalid: %v", err)) 8881 } 8882 case ConstraintDistinctProperty: 8883 // If a count is set, make sure it is convertible to a uint64 8884 if c.RTarget != "" { 8885 count, err := strconv.ParseUint(c.RTarget, 10, 64) 8886 if err != nil { 8887 mErr.Errors = append(mErr.Errors, fmt.Errorf("Failed to convert RTarget %q to uint64: %v", c.RTarget, err)) 8888 } else if count < 1 { 8889 mErr.Errors = append(mErr.Errors, fmt.Errorf("Distinct Property must have an allowed count of 1 or greater: %d < 1", count)) 8890 } 8891 } 8892 case ConstraintAttributeIsSet, ConstraintAttributeIsNotSet: 8893 if c.RTarget != "" { 8894 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q does not support an RTarget", c.Operand)) 8895 } 8896 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8897 if c.RTarget == "" { 8898 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", c.Operand)) 8899 } 8900 default: 8901 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown constraint type %q", c.Operand)) 8902 } 8903 8904 // Ensure we have an LTarget for the constraints that need one 8905 if requireLtarget && c.LTarget == "" { 8906 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required by constraint")) 8907 } 8908 8909 return mErr.ErrorOrNil() 8910 } 8911 8912 type Constraints []*Constraint 8913 8914 // Equal compares Constraints as a set 8915 func (xs *Constraints) Equal(ys *Constraints) bool { 8916 if xs == ys { 8917 return true 8918 } 8919 if xs == nil || ys == nil { 8920 return false 8921 } 8922 if len(*xs) != len(*ys) { 8923 return false 8924 } 8925 SETEQUALS: 8926 for _, x := range *xs { 8927 for _, y := range *ys { 8928 if x.Equal(y) { 8929 continue SETEQUALS 8930 } 8931 } 8932 return false 8933 } 8934 return true 8935 } 8936 8937 // Affinity is used to score placement options based on a weight 8938 type Affinity struct { 8939 LTarget string // Left-hand target 8940 RTarget string // Right-hand target 8941 Operand string // Affinity operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 8942 Weight int8 // Weight applied to nodes that match the affinity. Can be negative 8943 } 8944 8945 // Equal checks if two affinities are equal. 8946 func (a *Affinity) Equal(o *Affinity) bool { 8947 return a == o || 8948 a.LTarget == o.LTarget && 8949 a.RTarget == o.RTarget && 8950 a.Operand == o.Operand && 8951 a.Weight == o.Weight 8952 } 8953 8954 func (a *Affinity) Copy() *Affinity { 8955 if a == nil { 8956 return nil 8957 } 8958 return &Affinity{ 8959 LTarget: a.LTarget, 8960 RTarget: a.RTarget, 8961 Operand: a.Operand, 8962 Weight: a.Weight, 8963 } 8964 } 8965 8966 func (a *Affinity) String() string { 8967 return fmt.Sprintf("%s %s %s %v", a.LTarget, a.Operand, a.RTarget, a.Weight) 8968 } 8969 8970 func (a *Affinity) Validate() error { 8971 var mErr multierror.Error 8972 if a.Operand == "" { 8973 mErr.Errors = append(mErr.Errors, errors.New("Missing affinity operand")) 8974 } 8975 8976 // Perform additional validation based on operand 8977 switch a.Operand { 8978 case ConstraintSetContainsAll, ConstraintSetContainsAny, ConstraintSetContains: 8979 if a.RTarget == "" { 8980 mErr.Errors = append(mErr.Errors, fmt.Errorf("Set contains operators require an RTarget")) 8981 } 8982 case ConstraintRegex: 8983 if _, err := regexp.Compile(a.RTarget); err != nil { 8984 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 8985 } 8986 case ConstraintVersion: 8987 if _, err := version.NewConstraint(a.RTarget); err != nil { 8988 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version affinity is invalid: %v", err)) 8989 } 8990 case ConstraintSemver: 8991 if _, err := semver.NewConstraint(a.RTarget); err != nil { 8992 mErr.Errors = append(mErr.Errors, fmt.Errorf("Semver affinity is invalid: %v", err)) 8993 } 8994 case "=", "==", "is", "!=", "not", "<", "<=", ">", ">=": 8995 if a.RTarget == "" { 8996 mErr.Errors = append(mErr.Errors, fmt.Errorf("Operator %q requires an RTarget", a.Operand)) 8997 } 8998 default: 8999 mErr.Errors = append(mErr.Errors, fmt.Errorf("Unknown affinity operator %q", a.Operand)) 9000 } 9001 9002 // Ensure we have an LTarget 9003 if a.LTarget == "" { 9004 mErr.Errors = append(mErr.Errors, fmt.Errorf("No LTarget provided but is required")) 9005 } 9006 9007 // Ensure that weight is between -100 and 100, and not zero 9008 if a.Weight == 0 { 9009 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight cannot be zero")) 9010 } 9011 9012 if a.Weight > 100 || a.Weight < -100 { 9013 mErr.Errors = append(mErr.Errors, fmt.Errorf("Affinity weight must be within the range [-100,100]")) 9014 } 9015 9016 return mErr.ErrorOrNil() 9017 } 9018 9019 // Spread is used to specify desired distribution of allocations according to weight 9020 type Spread struct { 9021 // Attribute is the node attribute used as the spread criteria 9022 Attribute string 9023 9024 // Weight is the relative weight of this spread, useful when there are multiple 9025 // spread and affinities 9026 Weight int8 9027 9028 // SpreadTarget is used to describe desired percentages for each attribute value 9029 SpreadTarget []*SpreadTarget 9030 9031 // Memoized string representation 9032 str string 9033 } 9034 9035 type Affinities []*Affinity 9036 9037 // Equal compares Affinities as a set 9038 func (xs *Affinities) Equal(ys *Affinities) bool { 9039 if xs == ys { 9040 return true 9041 } 9042 if xs == nil || ys == nil { 9043 return false 9044 } 9045 if len(*xs) != len(*ys) { 9046 return false 9047 } 9048 SETEQUALS: 9049 for _, x := range *xs { 9050 for _, y := range *ys { 9051 if x.Equal(y) { 9052 continue SETEQUALS 9053 } 9054 } 9055 return false 9056 } 9057 return true 9058 } 9059 9060 func (s *Spread) Copy() *Spread { 9061 if s == nil { 9062 return nil 9063 } 9064 ns := new(Spread) 9065 *ns = *s 9066 9067 ns.SpreadTarget = CopySliceSpreadTarget(s.SpreadTarget) 9068 return ns 9069 } 9070 9071 func (s *Spread) String() string { 9072 if s.str != "" { 9073 return s.str 9074 } 9075 s.str = fmt.Sprintf("%s %s %v", s.Attribute, s.SpreadTarget, s.Weight) 9076 return s.str 9077 } 9078 9079 func (s *Spread) Validate() error { 9080 var mErr multierror.Error 9081 if s.Attribute == "" { 9082 mErr.Errors = append(mErr.Errors, errors.New("Missing spread attribute")) 9083 } 9084 if s.Weight <= 0 || s.Weight > 100 { 9085 mErr.Errors = append(mErr.Errors, errors.New("Spread stanza must have a positive weight from 0 to 100")) 9086 } 9087 seen := make(map[string]struct{}) 9088 sumPercent := uint32(0) 9089 9090 for _, target := range s.SpreadTarget { 9091 // Make sure there are no duplicates 9092 _, ok := seen[target.Value] 9093 if !ok { 9094 seen[target.Value] = struct{}{} 9095 } else { 9096 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target value %q already defined", target.Value)) 9097 } 9098 if target.Percent > 100 { 9099 mErr.Errors = append(mErr.Errors, fmt.Errorf("Spread target percentage for value %q must be between 0 and 100", target.Value)) 9100 } 9101 sumPercent += uint32(target.Percent) 9102 } 9103 if sumPercent > 100 { 9104 mErr.Errors = append(mErr.Errors, fmt.Errorf("Sum of spread target percentages must not be greater than 100%%; got %d%%", sumPercent)) 9105 } 9106 return mErr.ErrorOrNil() 9107 } 9108 9109 // SpreadTarget is used to specify desired percentages for each attribute value 9110 type SpreadTarget struct { 9111 // Value is a single attribute value, like "dc1" 9112 Value string 9113 9114 // Percent is the desired percentage of allocs 9115 Percent uint8 9116 9117 // Memoized string representation 9118 str string 9119 } 9120 9121 func (s *SpreadTarget) Copy() *SpreadTarget { 9122 if s == nil { 9123 return nil 9124 } 9125 9126 ns := new(SpreadTarget) 9127 *ns = *s 9128 return ns 9129 } 9130 9131 func (s *SpreadTarget) String() string { 9132 if s.str != "" { 9133 return s.str 9134 } 9135 s.str = fmt.Sprintf("%q %v%%", s.Value, s.Percent) 9136 return s.str 9137 } 9138 9139 // EphemeralDisk is an ephemeral disk object 9140 type EphemeralDisk struct { 9141 // Sticky indicates whether the allocation is sticky to a node 9142 Sticky bool 9143 9144 // SizeMB is the size of the local disk 9145 SizeMB int 9146 9147 // Migrate determines if Nomad client should migrate the allocation dir for 9148 // sticky allocations 9149 Migrate bool 9150 } 9151 9152 // DefaultEphemeralDisk returns a EphemeralDisk with default configurations 9153 func DefaultEphemeralDisk() *EphemeralDisk { 9154 return &EphemeralDisk{ 9155 SizeMB: 300, 9156 } 9157 } 9158 9159 // Validate validates EphemeralDisk 9160 func (d *EphemeralDisk) Validate() error { 9161 if d.SizeMB < 10 { 9162 return fmt.Errorf("minimum DiskMB value is 10; got %d", d.SizeMB) 9163 } 9164 return nil 9165 } 9166 9167 // Copy copies the EphemeralDisk struct and returns a new one 9168 func (d *EphemeralDisk) Copy() *EphemeralDisk { 9169 ld := new(EphemeralDisk) 9170 *ld = *d 9171 return ld 9172 } 9173 9174 var ( 9175 // VaultUnrecoverableError matches unrecoverable errors returned by a Vault 9176 // server 9177 VaultUnrecoverableError = regexp.MustCompile(`Code:\s+40(0|3|4)`) 9178 ) 9179 9180 const ( 9181 // VaultChangeModeNoop takes no action when a new token is retrieved. 9182 VaultChangeModeNoop = "noop" 9183 9184 // VaultChangeModeSignal signals the task when a new token is retrieved. 9185 VaultChangeModeSignal = "signal" 9186 9187 // VaultChangeModeRestart restarts the task when a new token is retrieved. 9188 VaultChangeModeRestart = "restart" 9189 ) 9190 9191 // Vault stores the set of permissions a task needs access to from Vault. 9192 type Vault struct { 9193 // Policies is the set of policies that the task needs access to 9194 Policies []string 9195 9196 // Namespace is the vault namespace that should be used. 9197 Namespace string 9198 9199 // Env marks whether the Vault Token should be exposed as an environment 9200 // variable 9201 Env bool 9202 9203 // ChangeMode is used to configure the task's behavior when the Vault 9204 // token changes because the original token could not be renewed in time. 9205 ChangeMode string 9206 9207 // ChangeSignal is the signal sent to the task when a new token is 9208 // retrieved. This is only valid when using the signal change mode. 9209 ChangeSignal string 9210 } 9211 9212 func DefaultVaultBlock() *Vault { 9213 return &Vault{ 9214 Env: true, 9215 ChangeMode: VaultChangeModeRestart, 9216 } 9217 } 9218 9219 // Copy returns a copy of this Vault block. 9220 func (v *Vault) Copy() *Vault { 9221 if v == nil { 9222 return nil 9223 } 9224 9225 nv := new(Vault) 9226 *nv = *v 9227 return nv 9228 } 9229 9230 func (v *Vault) Canonicalize() { 9231 if v.ChangeSignal != "" { 9232 v.ChangeSignal = strings.ToUpper(v.ChangeSignal) 9233 } 9234 9235 if v.ChangeMode == "" { 9236 v.ChangeMode = VaultChangeModeRestart 9237 } 9238 } 9239 9240 // Validate returns if the Vault block is valid. 9241 func (v *Vault) Validate() error { 9242 if v == nil { 9243 return nil 9244 } 9245 9246 var mErr multierror.Error 9247 if len(v.Policies) == 0 { 9248 _ = multierror.Append(&mErr, fmt.Errorf("Policy list cannot be empty")) 9249 } 9250 9251 for _, p := range v.Policies { 9252 if p == "root" { 9253 _ = multierror.Append(&mErr, fmt.Errorf("Can not specify \"root\" policy")) 9254 } 9255 } 9256 9257 switch v.ChangeMode { 9258 case VaultChangeModeSignal: 9259 if v.ChangeSignal == "" { 9260 _ = multierror.Append(&mErr, fmt.Errorf("Signal must be specified when using change mode %q", VaultChangeModeSignal)) 9261 } 9262 case VaultChangeModeNoop, VaultChangeModeRestart: 9263 default: 9264 _ = multierror.Append(&mErr, fmt.Errorf("Unknown change mode %q", v.ChangeMode)) 9265 } 9266 9267 return mErr.ErrorOrNil() 9268 } 9269 9270 const ( 9271 // DeploymentStatuses are the various states a deployment can be be in 9272 DeploymentStatusRunning = "running" 9273 DeploymentStatusPaused = "paused" 9274 DeploymentStatusFailed = "failed" 9275 DeploymentStatusSuccessful = "successful" 9276 DeploymentStatusCancelled = "cancelled" 9277 DeploymentStatusInitializing = "initializing" 9278 DeploymentStatusPending = "pending" 9279 DeploymentStatusBlocked = "blocked" 9280 DeploymentStatusUnblocking = "unblocking" 9281 9282 // TODO Statuses and Descriptions do not match 1:1 and we sometimes use the Description as a status flag 9283 9284 // DeploymentStatusDescriptions are the various descriptions of the states a 9285 // deployment can be in. 9286 DeploymentStatusDescriptionRunning = "Deployment is running" 9287 DeploymentStatusDescriptionRunningNeedsPromotion = "Deployment is running but requires manual promotion" 9288 DeploymentStatusDescriptionRunningAutoPromotion = "Deployment is running pending automatic promotion" 9289 DeploymentStatusDescriptionPaused = "Deployment is paused" 9290 DeploymentStatusDescriptionSuccessful = "Deployment completed successfully" 9291 DeploymentStatusDescriptionStoppedJob = "Cancelled because job is stopped" 9292 DeploymentStatusDescriptionNewerJob = "Cancelled due to newer version of job" 9293 DeploymentStatusDescriptionFailedAllocations = "Failed due to unhealthy allocations" 9294 DeploymentStatusDescriptionProgressDeadline = "Failed due to progress deadline" 9295 DeploymentStatusDescriptionFailedByUser = "Deployment marked as failed" 9296 9297 // used only in multiregion deployments 9298 DeploymentStatusDescriptionFailedByPeer = "Failed because of an error in peer region" 9299 DeploymentStatusDescriptionBlocked = "Deployment is complete but waiting for peer region" 9300 DeploymentStatusDescriptionUnblocking = "Deployment is unblocking remaining regions" 9301 DeploymentStatusDescriptionPendingForPeer = "Deployment is pending, waiting for peer region" 9302 ) 9303 9304 // DeploymentStatusDescriptionRollback is used to get the status description of 9305 // a deployment when rolling back to an older job. 9306 func DeploymentStatusDescriptionRollback(baseDescription string, jobVersion uint64) string { 9307 return fmt.Sprintf("%s - rolling back to job version %d", baseDescription, jobVersion) 9308 } 9309 9310 // DeploymentStatusDescriptionRollbackNoop is used to get the status description of 9311 // a deployment when rolling back is not possible because it has the same specification 9312 func DeploymentStatusDescriptionRollbackNoop(baseDescription string, jobVersion uint64) string { 9313 return fmt.Sprintf("%s - not rolling back to stable job version %d as current job has same specification", baseDescription, jobVersion) 9314 } 9315 9316 // DeploymentStatusDescriptionNoRollbackTarget is used to get the status description of 9317 // a deployment when there is no target to rollback to but autorevert is desired. 9318 func DeploymentStatusDescriptionNoRollbackTarget(baseDescription string) string { 9319 return fmt.Sprintf("%s - no stable job version to auto revert to", baseDescription) 9320 } 9321 9322 // Deployment is the object that represents a job deployment which is used to 9323 // transition a job between versions. 9324 type Deployment struct { 9325 // ID is a generated UUID for the deployment 9326 ID string 9327 9328 // Namespace is the namespace the deployment is created in 9329 Namespace string 9330 9331 // JobID is the job the deployment is created for 9332 JobID string 9333 9334 // JobVersion is the version of the job at which the deployment is tracking 9335 JobVersion uint64 9336 9337 // JobModifyIndex is the ModifyIndex of the job which the deployment is 9338 // tracking. 9339 JobModifyIndex uint64 9340 9341 // JobSpecModifyIndex is the JobModifyIndex of the job which the 9342 // deployment is tracking. 9343 JobSpecModifyIndex uint64 9344 9345 // JobCreateIndex is the create index of the job which the deployment is 9346 // tracking. It is needed so that if the job gets stopped and reran we can 9347 // present the correct list of deployments for the job and not old ones. 9348 JobCreateIndex uint64 9349 9350 // Multiregion specifies if deployment is part of multiregion deployment 9351 IsMultiregion bool 9352 9353 // TaskGroups is the set of task groups effected by the deployment and their 9354 // current deployment status. 9355 TaskGroups map[string]*DeploymentState 9356 9357 // The status of the deployment 9358 Status string 9359 9360 // StatusDescription allows a human readable description of the deployment 9361 // status. 9362 StatusDescription string 9363 9364 // EvalPriority tracks the priority of the evaluation which lead to the 9365 // creation of this Deployment object. Any additional evaluations created 9366 // as a result of this deployment can therefore inherit this value, which 9367 // is not guaranteed to be that of the job priority parameter. 9368 EvalPriority int 9369 9370 CreateIndex uint64 9371 ModifyIndex uint64 9372 } 9373 9374 // NewDeployment creates a new deployment given the job. 9375 func NewDeployment(job *Job, evalPriority int) *Deployment { 9376 return &Deployment{ 9377 ID: uuid.Generate(), 9378 Namespace: job.Namespace, 9379 JobID: job.ID, 9380 JobVersion: job.Version, 9381 JobModifyIndex: job.ModifyIndex, 9382 JobSpecModifyIndex: job.JobModifyIndex, 9383 JobCreateIndex: job.CreateIndex, 9384 IsMultiregion: job.IsMultiregion(), 9385 Status: DeploymentStatusRunning, 9386 StatusDescription: DeploymentStatusDescriptionRunning, 9387 TaskGroups: make(map[string]*DeploymentState, len(job.TaskGroups)), 9388 EvalPriority: evalPriority, 9389 } 9390 } 9391 9392 func (d *Deployment) Copy() *Deployment { 9393 if d == nil { 9394 return nil 9395 } 9396 9397 c := &Deployment{} 9398 *c = *d 9399 9400 c.TaskGroups = nil 9401 if l := len(d.TaskGroups); d.TaskGroups != nil { 9402 c.TaskGroups = make(map[string]*DeploymentState, l) 9403 for tg, s := range d.TaskGroups { 9404 c.TaskGroups[tg] = s.Copy() 9405 } 9406 } 9407 9408 return c 9409 } 9410 9411 // Active returns whether the deployment is active or terminal. 9412 func (d *Deployment) Active() bool { 9413 switch d.Status { 9414 case DeploymentStatusRunning, DeploymentStatusPaused, DeploymentStatusBlocked, 9415 DeploymentStatusUnblocking, DeploymentStatusInitializing, DeploymentStatusPending: 9416 return true 9417 default: 9418 return false 9419 } 9420 } 9421 9422 // GetID is a helper for getting the ID when the object may be nil 9423 func (d *Deployment) GetID() string { 9424 if d == nil { 9425 return "" 9426 } 9427 return d.ID 9428 } 9429 9430 // GetCreateIndex implements the CreateIndexGetter interface, required for 9431 // pagination. 9432 func (d *Deployment) GetCreateIndex() uint64 { 9433 if d == nil { 9434 return 0 9435 } 9436 return d.CreateIndex 9437 } 9438 9439 // HasPlacedCanaries returns whether the deployment has placed canaries 9440 func (d *Deployment) HasPlacedCanaries() bool { 9441 if d == nil || len(d.TaskGroups) == 0 { 9442 return false 9443 } 9444 for _, group := range d.TaskGroups { 9445 if len(group.PlacedCanaries) != 0 { 9446 return true 9447 } 9448 } 9449 return false 9450 } 9451 9452 // RequiresPromotion returns whether the deployment requires promotion to 9453 // continue 9454 func (d *Deployment) RequiresPromotion() bool { 9455 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 9456 return false 9457 } 9458 for _, group := range d.TaskGroups { 9459 if group.DesiredCanaries > 0 && !group.Promoted { 9460 return true 9461 } 9462 } 9463 return false 9464 } 9465 9466 // HasAutoPromote determines if all taskgroups are marked auto_promote 9467 func (d *Deployment) HasAutoPromote() bool { 9468 if d == nil || len(d.TaskGroups) == 0 || d.Status != DeploymentStatusRunning { 9469 return false 9470 } 9471 for _, group := range d.TaskGroups { 9472 if group.DesiredCanaries > 0 && !group.AutoPromote { 9473 return false 9474 } 9475 } 9476 return true 9477 } 9478 9479 func (d *Deployment) GoString() string { 9480 base := fmt.Sprintf("Deployment ID %q for job %q has status %q (%v):", d.ID, d.JobID, d.Status, d.StatusDescription) 9481 for group, state := range d.TaskGroups { 9482 base += fmt.Sprintf("\nTask Group %q has state:\n%#v", group, state) 9483 } 9484 return base 9485 } 9486 9487 // DeploymentState tracks the state of a deployment for a given task group. 9488 type DeploymentState struct { 9489 // AutoRevert marks whether the task group has indicated the job should be 9490 // reverted on failure 9491 AutoRevert bool 9492 9493 // AutoPromote marks promotion triggered automatically by healthy canaries 9494 // copied from TaskGroup UpdateStrategy in scheduler.reconcile 9495 AutoPromote bool 9496 9497 // ProgressDeadline is the deadline by which an allocation must transition 9498 // to healthy before the deployment is considered failed. This value is set 9499 // by the jobspec `update.progress_deadline` field. 9500 ProgressDeadline time.Duration 9501 9502 // RequireProgressBy is the time by which an allocation must transition to 9503 // healthy before the deployment is considered failed. This value is reset 9504 // to "now" + ProgressDeadline when an allocation updates the deployment. 9505 RequireProgressBy time.Time 9506 9507 // Promoted marks whether the canaries have been promoted 9508 Promoted bool 9509 9510 // PlacedCanaries is the set of placed canary allocations 9511 PlacedCanaries []string 9512 9513 // DesiredCanaries is the number of canaries that should be created. 9514 DesiredCanaries int 9515 9516 // DesiredTotal is the total number of allocations that should be created as 9517 // part of the deployment. 9518 DesiredTotal int 9519 9520 // PlacedAllocs is the number of allocations that have been placed 9521 PlacedAllocs int 9522 9523 // HealthyAllocs is the number of allocations that have been marked healthy. 9524 HealthyAllocs int 9525 9526 // UnhealthyAllocs are allocations that have been marked as unhealthy. 9527 UnhealthyAllocs int 9528 } 9529 9530 func (d *DeploymentState) GoString() string { 9531 base := fmt.Sprintf("\tDesired Total: %d", d.DesiredTotal) 9532 base += fmt.Sprintf("\n\tDesired Canaries: %d", d.DesiredCanaries) 9533 base += fmt.Sprintf("\n\tPlaced Canaries: %#v", d.PlacedCanaries) 9534 base += fmt.Sprintf("\n\tPromoted: %v", d.Promoted) 9535 base += fmt.Sprintf("\n\tPlaced: %d", d.PlacedAllocs) 9536 base += fmt.Sprintf("\n\tHealthy: %d", d.HealthyAllocs) 9537 base += fmt.Sprintf("\n\tUnhealthy: %d", d.UnhealthyAllocs) 9538 base += fmt.Sprintf("\n\tAutoRevert: %v", d.AutoRevert) 9539 base += fmt.Sprintf("\n\tAutoPromote: %v", d.AutoPromote) 9540 return base 9541 } 9542 9543 func (d *DeploymentState) Copy() *DeploymentState { 9544 c := &DeploymentState{} 9545 *c = *d 9546 c.PlacedCanaries = slices.Clone(d.PlacedCanaries) 9547 return c 9548 } 9549 9550 // DeploymentStatusUpdate is used to update the status of a given deployment 9551 type DeploymentStatusUpdate struct { 9552 // DeploymentID is the ID of the deployment to update 9553 DeploymentID string 9554 9555 // Status is the new status of the deployment. 9556 Status string 9557 9558 // StatusDescription is the new status description of the deployment. 9559 StatusDescription string 9560 } 9561 9562 // RescheduleTracker encapsulates previous reschedule events 9563 type RescheduleTracker struct { 9564 Events []*RescheduleEvent 9565 } 9566 9567 func (rt *RescheduleTracker) Copy() *RescheduleTracker { 9568 if rt == nil { 9569 return nil 9570 } 9571 nt := &RescheduleTracker{} 9572 *nt = *rt 9573 rescheduleEvents := make([]*RescheduleEvent, 0, len(rt.Events)) 9574 for _, tracker := range rt.Events { 9575 rescheduleEvents = append(rescheduleEvents, tracker.Copy()) 9576 } 9577 nt.Events = rescheduleEvents 9578 return nt 9579 } 9580 9581 // RescheduleEvent is used to keep track of previous attempts at rescheduling an allocation 9582 type RescheduleEvent struct { 9583 // RescheduleTime is the timestamp of a reschedule attempt 9584 RescheduleTime int64 9585 9586 // PrevAllocID is the ID of the previous allocation being restarted 9587 PrevAllocID string 9588 9589 // PrevNodeID is the node ID of the previous allocation 9590 PrevNodeID string 9591 9592 // Delay is the reschedule delay associated with the attempt 9593 Delay time.Duration 9594 } 9595 9596 func NewRescheduleEvent(rescheduleTime int64, prevAllocID string, prevNodeID string, delay time.Duration) *RescheduleEvent { 9597 return &RescheduleEvent{RescheduleTime: rescheduleTime, 9598 PrevAllocID: prevAllocID, 9599 PrevNodeID: prevNodeID, 9600 Delay: delay} 9601 } 9602 9603 func (re *RescheduleEvent) Copy() *RescheduleEvent { 9604 if re == nil { 9605 return nil 9606 } 9607 copy := new(RescheduleEvent) 9608 *copy = *re 9609 return copy 9610 } 9611 9612 // DesiredTransition is used to mark an allocation as having a desired state 9613 // transition. This information can be used by the scheduler to make the 9614 // correct decision. 9615 type DesiredTransition struct { 9616 // Migrate is used to indicate that this allocation should be stopped and 9617 // migrated to another node. 9618 Migrate *bool 9619 9620 // Reschedule is used to indicate that this allocation is eligible to be 9621 // rescheduled. Most allocations are automatically eligible for 9622 // rescheduling, so this field is only required when an allocation is not 9623 // automatically eligible. An example is an allocation that is part of a 9624 // deployment. 9625 Reschedule *bool 9626 9627 // ForceReschedule is used to indicate that this allocation must be rescheduled. 9628 // This field is only used when operators want to force a placement even if 9629 // a failed allocation is not eligible to be rescheduled 9630 ForceReschedule *bool 9631 9632 // NoShutdownDelay, if set to true, will override the group and 9633 // task shutdown_delay configuration and ignore the delay for any 9634 // allocations stopped as a result of this Deregister call. 9635 NoShutdownDelay *bool 9636 } 9637 9638 // Merge merges the two desired transitions, preferring the values from the 9639 // passed in object. 9640 func (d *DesiredTransition) Merge(o *DesiredTransition) { 9641 if o.Migrate != nil { 9642 d.Migrate = o.Migrate 9643 } 9644 9645 if o.Reschedule != nil { 9646 d.Reschedule = o.Reschedule 9647 } 9648 9649 if o.ForceReschedule != nil { 9650 d.ForceReschedule = o.ForceReschedule 9651 } 9652 9653 if o.NoShutdownDelay != nil { 9654 d.NoShutdownDelay = o.NoShutdownDelay 9655 } 9656 } 9657 9658 // ShouldMigrate returns whether the transition object dictates a migration. 9659 func (d *DesiredTransition) ShouldMigrate() bool { 9660 return d.Migrate != nil && *d.Migrate 9661 } 9662 9663 // ShouldReschedule returns whether the transition object dictates a 9664 // rescheduling. 9665 func (d *DesiredTransition) ShouldReschedule() bool { 9666 return d.Reschedule != nil && *d.Reschedule 9667 } 9668 9669 // ShouldForceReschedule returns whether the transition object dictates a 9670 // forced rescheduling. 9671 func (d *DesiredTransition) ShouldForceReschedule() bool { 9672 if d == nil { 9673 return false 9674 } 9675 return d.ForceReschedule != nil && *d.ForceReschedule 9676 } 9677 9678 // ShouldIgnoreShutdownDelay returns whether the transition object dictates 9679 // that shutdown skip any shutdown delays. 9680 func (d *DesiredTransition) ShouldIgnoreShutdownDelay() bool { 9681 if d == nil { 9682 return false 9683 } 9684 return d.NoShutdownDelay != nil && *d.NoShutdownDelay 9685 } 9686 9687 const ( 9688 AllocDesiredStatusRun = "run" // Allocation should run 9689 AllocDesiredStatusStop = "stop" // Allocation should stop 9690 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 9691 ) 9692 9693 const ( 9694 AllocClientStatusPending = "pending" 9695 AllocClientStatusRunning = "running" 9696 AllocClientStatusComplete = "complete" 9697 AllocClientStatusFailed = "failed" 9698 AllocClientStatusLost = "lost" 9699 AllocClientStatusUnknown = "unknown" 9700 ) 9701 9702 // Allocation is used to allocate the placement of a task group to a node. 9703 type Allocation struct { 9704 // msgpack omit empty fields during serialization 9705 _struct bool `codec:",omitempty"` // nolint: structcheck 9706 9707 // ID of the allocation (UUID) 9708 ID string 9709 9710 // Namespace is the namespace the allocation is created in 9711 Namespace string 9712 9713 // ID of the evaluation that generated this allocation 9714 EvalID string 9715 9716 // Name is a logical name of the allocation. 9717 Name string 9718 9719 // NodeID is the node this is being placed on 9720 NodeID string 9721 9722 // NodeName is the name of the node this is being placed on. 9723 NodeName string 9724 9725 // Job is the parent job of the task group being allocated. 9726 // This is copied at allocation time to avoid issues if the job 9727 // definition is updated. 9728 JobID string 9729 Job *Job 9730 9731 // TaskGroup is the name of the task group that should be run 9732 TaskGroup string 9733 9734 // COMPAT(0.11): Remove in 0.11 9735 // Resources is the total set of resources allocated as part 9736 // of this allocation of the task group. Dynamic ports will be set by 9737 // the scheduler. 9738 Resources *Resources 9739 9740 // SharedResources are the resources that are shared by all the tasks in an 9741 // allocation 9742 // Deprecated: use AllocatedResources.Shared instead. 9743 // Keep field to allow us to handle upgrade paths from old versions 9744 SharedResources *Resources 9745 9746 // TaskResources is the set of resources allocated to each 9747 // task. These should sum to the total Resources. Dynamic ports will be 9748 // set by the scheduler. 9749 // Deprecated: use AllocatedResources.Tasks instead. 9750 // Keep field to allow us to handle upgrade paths from old versions 9751 TaskResources map[string]*Resources 9752 9753 // AllocatedResources is the total resources allocated for the task group. 9754 AllocatedResources *AllocatedResources 9755 9756 // Metrics associated with this allocation 9757 Metrics *AllocMetric 9758 9759 // Desired Status of the allocation on the client 9760 DesiredStatus string 9761 9762 // DesiredStatusDescription is meant to provide more human useful information 9763 DesiredDescription string 9764 9765 // DesiredTransition is used to indicate that a state transition 9766 // is desired for a given reason. 9767 DesiredTransition DesiredTransition 9768 9769 // Status of the allocation on the client 9770 ClientStatus string 9771 9772 // ClientStatusDescription is meant to provide more human useful information 9773 ClientDescription string 9774 9775 // TaskStates stores the state of each task, 9776 TaskStates map[string]*TaskState 9777 9778 // AllocStates track meta data associated with changes to the state of the whole allocation, like becoming lost 9779 AllocStates []*AllocState 9780 9781 // PreviousAllocation is the allocation that this allocation is replacing 9782 PreviousAllocation string 9783 9784 // NextAllocation is the allocation that this allocation is being replaced by 9785 NextAllocation string 9786 9787 // DeploymentID identifies an allocation as being created from a 9788 // particular deployment 9789 DeploymentID string 9790 9791 // DeploymentStatus captures the status of the allocation as part of the 9792 // given deployment 9793 DeploymentStatus *AllocDeploymentStatus 9794 9795 // RescheduleTrackers captures details of previous reschedule attempts of the allocation 9796 RescheduleTracker *RescheduleTracker 9797 9798 // NetworkStatus captures networking details of an allocation known at runtime 9799 NetworkStatus *AllocNetworkStatus 9800 9801 // FollowupEvalID captures a follow up evaluation created to handle a failed allocation 9802 // that can be rescheduled in the future 9803 FollowupEvalID string 9804 9805 // PreemptedAllocations captures IDs of any allocations that were preempted 9806 // in order to place this allocation 9807 PreemptedAllocations []string 9808 9809 // PreemptedByAllocation tracks the alloc ID of the allocation that caused this allocation 9810 // to stop running because it got preempted 9811 PreemptedByAllocation string 9812 9813 // SignedIdentities is a map of task names to signed identity/capability 9814 // claim tokens for those tasks. If needed, it is populated in the plan 9815 // applier. 9816 SignedIdentities map[string]string `json:"-"` 9817 9818 // SigningKeyID is the key used to sign the SignedIdentities field. 9819 SigningKeyID string 9820 9821 // Raft Indexes 9822 CreateIndex uint64 9823 ModifyIndex uint64 9824 9825 // AllocModifyIndex is not updated when the client updates allocations. This 9826 // lets the client pull only the allocs updated by the server. 9827 AllocModifyIndex uint64 9828 9829 // CreateTime is the time the allocation has finished scheduling and been 9830 // verified by the plan applier. 9831 CreateTime int64 9832 9833 // ModifyTime is the time the allocation was last updated. 9834 ModifyTime int64 9835 } 9836 9837 // GetID implements the IDGetter interface, required for pagination. 9838 func (a *Allocation) GetID() string { 9839 if a == nil { 9840 return "" 9841 } 9842 return a.ID 9843 } 9844 9845 // GetNamespace implements the NamespaceGetter interface, required for 9846 // pagination and filtering namespaces in endpoints that support glob namespace 9847 // requests using tokens with limited access. 9848 func (a *Allocation) GetNamespace() string { 9849 if a == nil { 9850 return "" 9851 } 9852 return a.Namespace 9853 } 9854 9855 // GetCreateIndex implements the CreateIndexGetter interface, required for 9856 // pagination. 9857 func (a *Allocation) GetCreateIndex() uint64 { 9858 if a == nil { 9859 return 0 9860 } 9861 return a.CreateIndex 9862 } 9863 9864 // ConsulNamespace returns the Consul namespace of the task group associated 9865 // with this allocation. 9866 func (a *Allocation) ConsulNamespace() string { 9867 return a.Job.LookupTaskGroup(a.TaskGroup).Consul.GetNamespace() 9868 } 9869 9870 func (a *Allocation) JobNamespacedID() NamespacedID { 9871 return NewNamespacedID(a.JobID, a.Namespace) 9872 } 9873 9874 // Index returns the index of the allocation. If the allocation is from a task 9875 // group with count greater than 1, there will be multiple allocations for it. 9876 func (a *Allocation) Index() uint { 9877 l := len(a.Name) 9878 prefix := len(a.JobID) + len(a.TaskGroup) + 2 9879 if l <= 3 || l <= prefix { 9880 return uint(0) 9881 } 9882 9883 strNum := a.Name[prefix : len(a.Name)-1] 9884 num, _ := strconv.Atoi(strNum) 9885 return uint(num) 9886 } 9887 9888 // Copy provides a copy of the allocation and deep copies the job 9889 func (a *Allocation) Copy() *Allocation { 9890 return a.copyImpl(true) 9891 } 9892 9893 // CopySkipJob provides a copy of the allocation but doesn't deep copy the job 9894 func (a *Allocation) CopySkipJob() *Allocation { 9895 return a.copyImpl(false) 9896 } 9897 9898 // Canonicalize Allocation to ensure fields are initialized to the expectations 9899 // of this version of Nomad. Should be called when restoring persisted 9900 // Allocations or receiving Allocations from Nomad agents potentially on an 9901 // older version of Nomad. 9902 func (a *Allocation) Canonicalize() { 9903 if a.AllocatedResources == nil && a.TaskResources != nil { 9904 ar := AllocatedResources{} 9905 9906 tasks := make(map[string]*AllocatedTaskResources, len(a.TaskResources)) 9907 for name, tr := range a.TaskResources { 9908 atr := AllocatedTaskResources{} 9909 atr.Cpu.CpuShares = int64(tr.CPU) 9910 atr.Memory.MemoryMB = int64(tr.MemoryMB) 9911 atr.Networks = tr.Networks.Copy() 9912 9913 tasks[name] = &atr 9914 } 9915 ar.Tasks = tasks 9916 9917 if a.SharedResources != nil { 9918 ar.Shared.DiskMB = int64(a.SharedResources.DiskMB) 9919 ar.Shared.Networks = a.SharedResources.Networks.Copy() 9920 } 9921 9922 a.AllocatedResources = &ar 9923 } 9924 9925 a.Job.Canonicalize() 9926 } 9927 9928 func (a *Allocation) copyImpl(job bool) *Allocation { 9929 if a == nil { 9930 return nil 9931 } 9932 na := new(Allocation) 9933 *na = *a 9934 9935 if job { 9936 na.Job = na.Job.Copy() 9937 } 9938 9939 na.AllocatedResources = na.AllocatedResources.Copy() 9940 na.Resources = na.Resources.Copy() 9941 na.SharedResources = na.SharedResources.Copy() 9942 9943 if a.TaskResources != nil { 9944 tr := make(map[string]*Resources, len(na.TaskResources)) 9945 for task, resource := range na.TaskResources { 9946 tr[task] = resource.Copy() 9947 } 9948 na.TaskResources = tr 9949 } 9950 9951 na.Metrics = na.Metrics.Copy() 9952 na.DeploymentStatus = na.DeploymentStatus.Copy() 9953 9954 if a.TaskStates != nil { 9955 ts := make(map[string]*TaskState, len(na.TaskStates)) 9956 for task, state := range na.TaskStates { 9957 ts[task] = state.Copy() 9958 } 9959 na.TaskStates = ts 9960 } 9961 9962 na.RescheduleTracker = a.RescheduleTracker.Copy() 9963 na.PreemptedAllocations = slices.Clone(a.PreemptedAllocations) 9964 return na 9965 } 9966 9967 // TerminalStatus returns if the desired or actual status is terminal and 9968 // will no longer transition. 9969 func (a *Allocation) TerminalStatus() bool { 9970 // First check the desired state and if that isn't terminal, check client 9971 // state. 9972 return a.ServerTerminalStatus() || a.ClientTerminalStatus() 9973 } 9974 9975 // ServerTerminalStatus returns true if the desired state of the allocation is terminal 9976 func (a *Allocation) ServerTerminalStatus() bool { 9977 switch a.DesiredStatus { 9978 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 9979 return true 9980 default: 9981 return false 9982 } 9983 } 9984 9985 // ClientTerminalStatus returns if the client status is terminal and will no longer transition 9986 func (a *Allocation) ClientTerminalStatus() bool { 9987 switch a.ClientStatus { 9988 case AllocClientStatusComplete, AllocClientStatusFailed, AllocClientStatusLost: 9989 return true 9990 default: 9991 return false 9992 } 9993 } 9994 9995 // ShouldReschedule returns if the allocation is eligible to be rescheduled according 9996 // to its status and ReschedulePolicy given its failure time 9997 func (a *Allocation) ShouldReschedule(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 9998 // First check the desired state 9999 switch a.DesiredStatus { 10000 case AllocDesiredStatusStop, AllocDesiredStatusEvict: 10001 return false 10002 default: 10003 } 10004 switch a.ClientStatus { 10005 case AllocClientStatusFailed: 10006 return a.RescheduleEligible(reschedulePolicy, failTime) 10007 default: 10008 return false 10009 } 10010 } 10011 10012 // RescheduleEligible returns if the allocation is eligible to be rescheduled according 10013 // to its ReschedulePolicy and the current state of its reschedule trackers 10014 func (a *Allocation) RescheduleEligible(reschedulePolicy *ReschedulePolicy, failTime time.Time) bool { 10015 if reschedulePolicy == nil { 10016 return false 10017 } 10018 attempts := reschedulePolicy.Attempts 10019 enabled := attempts > 0 || reschedulePolicy.Unlimited 10020 if !enabled { 10021 return false 10022 } 10023 if reschedulePolicy.Unlimited { 10024 return true 10025 } 10026 // Early return true if there are no attempts yet and the number of allowed attempts is > 0 10027 if (a.RescheduleTracker == nil || len(a.RescheduleTracker.Events) == 0) && attempts > 0 { 10028 return true 10029 } 10030 attempted, _ := a.rescheduleInfo(reschedulePolicy, failTime) 10031 return attempted < attempts 10032 } 10033 10034 func (a *Allocation) rescheduleInfo(reschedulePolicy *ReschedulePolicy, failTime time.Time) (int, int) { 10035 if reschedulePolicy == nil { 10036 return 0, 0 10037 } 10038 attempts := reschedulePolicy.Attempts 10039 interval := reschedulePolicy.Interval 10040 10041 attempted := 0 10042 if a.RescheduleTracker != nil && attempts > 0 { 10043 for j := len(a.RescheduleTracker.Events) - 1; j >= 0; j-- { 10044 lastAttempt := a.RescheduleTracker.Events[j].RescheduleTime 10045 timeDiff := failTime.UTC().UnixNano() - lastAttempt 10046 if timeDiff < interval.Nanoseconds() { 10047 attempted += 1 10048 } 10049 } 10050 } 10051 return attempted, attempts 10052 } 10053 10054 func (a *Allocation) RescheduleInfo() (int, int) { 10055 return a.rescheduleInfo(a.ReschedulePolicy(), a.LastEventTime()) 10056 } 10057 10058 // LastEventTime is the time of the last task event in the allocation. 10059 // It is used to determine allocation failure time. If the FinishedAt field 10060 // is not set, the alloc's modify time is used 10061 func (a *Allocation) LastEventTime() time.Time { 10062 var lastEventTime time.Time 10063 if a.TaskStates != nil { 10064 for _, s := range a.TaskStates { 10065 if lastEventTime.IsZero() || s.FinishedAt.After(lastEventTime) { 10066 lastEventTime = s.FinishedAt 10067 } 10068 } 10069 } 10070 10071 if lastEventTime.IsZero() { 10072 return time.Unix(0, a.ModifyTime).UTC() 10073 } 10074 return lastEventTime 10075 } 10076 10077 // ReschedulePolicy returns the reschedule policy based on the task group 10078 func (a *Allocation) ReschedulePolicy() *ReschedulePolicy { 10079 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10080 if tg == nil { 10081 return nil 10082 } 10083 return tg.ReschedulePolicy 10084 } 10085 10086 // MigrateStrategy returns the migrate strategy based on the task group 10087 func (a *Allocation) MigrateStrategy() *MigrateStrategy { 10088 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10089 if tg == nil { 10090 return nil 10091 } 10092 return tg.Migrate 10093 } 10094 10095 // NextRescheduleTime returns a time on or after which the allocation is eligible to be rescheduled, 10096 // and whether the next reschedule time is within policy's interval if the policy doesn't allow unlimited reschedules 10097 func (a *Allocation) NextRescheduleTime() (time.Time, bool) { 10098 failTime := a.LastEventTime() 10099 reschedulePolicy := a.ReschedulePolicy() 10100 if a.DesiredStatus == AllocDesiredStatusStop || a.ClientStatus != AllocClientStatusFailed || failTime.IsZero() || reschedulePolicy == nil { 10101 return time.Time{}, false 10102 } 10103 10104 return a.nextRescheduleTime(failTime, reschedulePolicy) 10105 } 10106 10107 func (a *Allocation) nextRescheduleTime(failTime time.Time, reschedulePolicy *ReschedulePolicy) (time.Time, bool) { 10108 nextDelay := a.NextDelay() 10109 nextRescheduleTime := failTime.Add(nextDelay) 10110 rescheduleEligible := reschedulePolicy.Unlimited || (reschedulePolicy.Attempts > 0 && a.RescheduleTracker == nil) 10111 if reschedulePolicy.Attempts > 0 && a.RescheduleTracker != nil && a.RescheduleTracker.Events != nil { 10112 // Check for eligibility based on the interval if max attempts is set 10113 attempted, attempts := a.rescheduleInfo(reschedulePolicy, failTime) 10114 rescheduleEligible = attempted < attempts && nextDelay < reschedulePolicy.Interval 10115 } 10116 return nextRescheduleTime, rescheduleEligible 10117 } 10118 10119 // NextRescheduleTimeByFailTime works like NextRescheduleTime but allows callers 10120 // specify a failure time. Useful for things like determining whether to reschedule 10121 // an alloc on a disconnected node. 10122 func (a *Allocation) NextRescheduleTimeByFailTime(failTime time.Time) (time.Time, bool) { 10123 reschedulePolicy := a.ReschedulePolicy() 10124 if reschedulePolicy == nil { 10125 return time.Time{}, false 10126 } 10127 10128 return a.nextRescheduleTime(failTime, reschedulePolicy) 10129 } 10130 10131 // ShouldClientStop tests an alloc for StopAfterClientDisconnect configuration 10132 func (a *Allocation) ShouldClientStop() bool { 10133 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10134 if tg == nil || 10135 tg.StopAfterClientDisconnect == nil || 10136 *tg.StopAfterClientDisconnect == 0*time.Nanosecond { 10137 return false 10138 } 10139 return true 10140 } 10141 10142 // WaitClientStop uses the reschedule delay mechanism to block rescheduling until 10143 // StopAfterClientDisconnect's block interval passes 10144 func (a *Allocation) WaitClientStop() time.Time { 10145 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10146 10147 // An alloc can only be marked lost once, so use the first lost transition 10148 var t time.Time 10149 for _, s := range a.AllocStates { 10150 if s.Field == AllocStateFieldClientStatus && 10151 s.Value == AllocClientStatusLost { 10152 t = s.Time 10153 break 10154 } 10155 } 10156 10157 // On the first pass, the alloc hasn't been marked lost yet, and so we start 10158 // counting from now 10159 if t.IsZero() { 10160 t = time.Now().UTC() 10161 } 10162 10163 // Find the max kill timeout 10164 kill := DefaultKillTimeout 10165 for _, t := range tg.Tasks { 10166 if t.KillTimeout > kill { 10167 kill = t.KillTimeout 10168 } 10169 } 10170 10171 return t.Add(*tg.StopAfterClientDisconnect + kill) 10172 } 10173 10174 // DisconnectTimeout uses the MaxClientDisconnect to compute when the allocation 10175 // should transition to lost. 10176 func (a *Allocation) DisconnectTimeout(now time.Time) time.Time { 10177 if a == nil || a.Job == nil { 10178 return now 10179 } 10180 10181 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10182 10183 timeout := tg.MaxClientDisconnect 10184 10185 if timeout == nil { 10186 return now 10187 } 10188 10189 return now.Add(*timeout) 10190 } 10191 10192 // SupportsDisconnectedClients determines whether both the server and the task group 10193 // are configured to allow the allocation to reconnect after network connectivity 10194 // has been lost and then restored. 10195 func (a *Allocation) SupportsDisconnectedClients(serverSupportsDisconnectedClients bool) bool { 10196 if !serverSupportsDisconnectedClients { 10197 return false 10198 } 10199 10200 if a.Job != nil { 10201 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10202 if tg != nil { 10203 return tg.MaxClientDisconnect != nil 10204 } 10205 } 10206 10207 return false 10208 } 10209 10210 // NextDelay returns a duration after which the allocation can be rescheduled. 10211 // It is calculated according to the delay function and previous reschedule attempts. 10212 func (a *Allocation) NextDelay() time.Duration { 10213 policy := a.ReschedulePolicy() 10214 // Can be nil if the task group was updated to remove its reschedule policy 10215 if policy == nil { 10216 return 0 10217 } 10218 delayDur := policy.Delay 10219 if a.RescheduleTracker == nil || a.RescheduleTracker.Events == nil || len(a.RescheduleTracker.Events) == 0 { 10220 return delayDur 10221 } 10222 events := a.RescheduleTracker.Events 10223 switch policy.DelayFunction { 10224 case "exponential": 10225 delayDur = a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1].Delay * 2 10226 case "fibonacci": 10227 if len(events) >= 2 { 10228 fibN1Delay := events[len(events)-1].Delay 10229 fibN2Delay := events[len(events)-2].Delay 10230 // Handle reset of delay ceiling which should cause 10231 // a new series to start 10232 if fibN2Delay == policy.MaxDelay && fibN1Delay == policy.Delay { 10233 delayDur = fibN1Delay 10234 } else { 10235 delayDur = fibN1Delay + fibN2Delay 10236 } 10237 } 10238 default: 10239 return delayDur 10240 } 10241 if policy.MaxDelay > 0 && delayDur > policy.MaxDelay { 10242 delayDur = policy.MaxDelay 10243 // check if delay needs to be reset 10244 10245 lastRescheduleEvent := a.RescheduleTracker.Events[len(a.RescheduleTracker.Events)-1] 10246 timeDiff := a.LastEventTime().UTC().UnixNano() - lastRescheduleEvent.RescheduleTime 10247 if timeDiff > delayDur.Nanoseconds() { 10248 delayDur = policy.Delay 10249 } 10250 10251 } 10252 10253 return delayDur 10254 } 10255 10256 // Terminated returns if the allocation is in a terminal state on a client. 10257 func (a *Allocation) Terminated() bool { 10258 if a.ClientStatus == AllocClientStatusFailed || 10259 a.ClientStatus == AllocClientStatusComplete || 10260 a.ClientStatus == AllocClientStatusLost { 10261 return true 10262 } 10263 return false 10264 } 10265 10266 // SetStop updates the allocation in place to a DesiredStatus stop, with the ClientStatus 10267 func (a *Allocation) SetStop(clientStatus, clientDesc string) { 10268 a.DesiredStatus = AllocDesiredStatusStop 10269 a.ClientStatus = clientStatus 10270 a.ClientDescription = clientDesc 10271 a.AppendState(AllocStateFieldClientStatus, clientStatus) 10272 } 10273 10274 // AppendState creates and appends an AllocState entry recording the time of the state 10275 // transition. Used to mark the transition to lost 10276 func (a *Allocation) AppendState(field AllocStateField, value string) { 10277 a.AllocStates = append(a.AllocStates, &AllocState{ 10278 Field: field, 10279 Value: value, 10280 Time: time.Now().UTC(), 10281 }) 10282 } 10283 10284 // RanSuccessfully returns whether the client has ran the allocation and all 10285 // tasks finished successfully. Critically this function returns whether the 10286 // allocation has ran to completion and not just that the alloc has converged to 10287 // its desired state. That is to say that a batch allocation must have finished 10288 // with exit code 0 on all task groups. This doesn't really have meaning on a 10289 // non-batch allocation because a service and system allocation should not 10290 // finish. 10291 func (a *Allocation) RanSuccessfully() bool { 10292 // Handle the case the client hasn't started the allocation. 10293 if len(a.TaskStates) == 0 { 10294 return false 10295 } 10296 10297 // Check to see if all the tasks finished successfully in the allocation 10298 allSuccess := true 10299 for _, state := range a.TaskStates { 10300 allSuccess = allSuccess && state.Successful() 10301 } 10302 10303 return allSuccess 10304 } 10305 10306 // ShouldMigrate returns if the allocation needs data migration 10307 func (a *Allocation) ShouldMigrate() bool { 10308 if a.PreviousAllocation == "" { 10309 return false 10310 } 10311 10312 if a.DesiredStatus == AllocDesiredStatusStop || a.DesiredStatus == AllocDesiredStatusEvict { 10313 return false 10314 } 10315 10316 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10317 10318 // if the task group is nil or the ephemeral disk block isn't present then 10319 // we won't migrate 10320 if tg == nil || tg.EphemeralDisk == nil { 10321 return false 10322 } 10323 10324 // We won't migrate any data is the user hasn't enabled migration or the 10325 // disk is not marked as sticky 10326 if !tg.EphemeralDisk.Migrate || !tg.EphemeralDisk.Sticky { 10327 return false 10328 } 10329 10330 return true 10331 } 10332 10333 // SetEventDisplayMessages populates the display message if its not already set, 10334 // a temporary fix to handle old allocations that don't have it. 10335 // This method will be removed in a future release. 10336 func (a *Allocation) SetEventDisplayMessages() { 10337 setDisplayMsg(a.TaskStates) 10338 } 10339 10340 // ComparableResources returns the resources on the allocation 10341 // handling upgrade paths. After 0.11 calls to this should be replaced with: 10342 // alloc.AllocatedResources.Comparable() 10343 // 10344 // COMPAT(0.11): Remove in 0.11 10345 func (a *Allocation) ComparableResources() *ComparableResources { 10346 // Alloc already has 0.9+ behavior 10347 if a.AllocatedResources != nil { 10348 return a.AllocatedResources.Comparable() 10349 } 10350 10351 var resources *Resources 10352 if a.Resources != nil { 10353 resources = a.Resources 10354 } else if a.TaskResources != nil { 10355 resources = new(Resources) 10356 resources.Add(a.SharedResources) 10357 for _, taskResource := range a.TaskResources { 10358 resources.Add(taskResource) 10359 } 10360 } 10361 10362 // Upgrade path 10363 return &ComparableResources{ 10364 Flattened: AllocatedTaskResources{ 10365 Cpu: AllocatedCpuResources{ 10366 CpuShares: int64(resources.CPU), 10367 }, 10368 Memory: AllocatedMemoryResources{ 10369 MemoryMB: int64(resources.MemoryMB), 10370 MemoryMaxMB: int64(resources.MemoryMaxMB), 10371 }, 10372 Networks: resources.Networks, 10373 }, 10374 Shared: AllocatedSharedResources{ 10375 DiskMB: int64(resources.DiskMB), 10376 }, 10377 } 10378 } 10379 10380 // LookupTask by name from the Allocation. Returns nil if the Job is not set, the 10381 // TaskGroup does not exist, or the task name cannot be found. 10382 func (a *Allocation) LookupTask(name string) *Task { 10383 if a.Job == nil { 10384 return nil 10385 } 10386 10387 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10388 if tg == nil { 10389 return nil 10390 } 10391 10392 return tg.LookupTask(name) 10393 } 10394 10395 // Stub returns a list stub for the allocation 10396 func (a *Allocation) Stub(fields *AllocStubFields) *AllocListStub { 10397 s := &AllocListStub{ 10398 ID: a.ID, 10399 EvalID: a.EvalID, 10400 Name: a.Name, 10401 Namespace: a.Namespace, 10402 NodeID: a.NodeID, 10403 NodeName: a.NodeName, 10404 JobID: a.JobID, 10405 JobType: a.Job.Type, 10406 JobVersion: a.Job.Version, 10407 TaskGroup: a.TaskGroup, 10408 DesiredStatus: a.DesiredStatus, 10409 DesiredDescription: a.DesiredDescription, 10410 ClientStatus: a.ClientStatus, 10411 ClientDescription: a.ClientDescription, 10412 DesiredTransition: a.DesiredTransition, 10413 TaskStates: a.TaskStates, 10414 DeploymentStatus: a.DeploymentStatus, 10415 FollowupEvalID: a.FollowupEvalID, 10416 RescheduleTracker: a.RescheduleTracker, 10417 PreemptedAllocations: a.PreemptedAllocations, 10418 PreemptedByAllocation: a.PreemptedByAllocation, 10419 CreateIndex: a.CreateIndex, 10420 ModifyIndex: a.ModifyIndex, 10421 CreateTime: a.CreateTime, 10422 ModifyTime: a.ModifyTime, 10423 } 10424 10425 if fields != nil { 10426 if fields.Resources { 10427 s.AllocatedResources = a.AllocatedResources 10428 } 10429 if !fields.TaskStates { 10430 s.TaskStates = nil 10431 } 10432 } 10433 10434 return s 10435 } 10436 10437 // AllocationDiff converts an Allocation type to an AllocationDiff type 10438 // If at any time, modification are made to AllocationDiff so that an 10439 // Allocation can no longer be safely converted to AllocationDiff, 10440 // this method should be changed accordingly. 10441 func (a *Allocation) AllocationDiff() *AllocationDiff { 10442 return (*AllocationDiff)(a) 10443 } 10444 10445 // Expired determines whether an allocation has exceeded its MaxClientDisonnect 10446 // duration relative to the passed time stamp. 10447 func (a *Allocation) Expired(now time.Time) bool { 10448 if a == nil || a.Job == nil { 10449 return false 10450 } 10451 10452 // If alloc is not Unknown it cannot be expired. 10453 if a.ClientStatus != AllocClientStatusUnknown { 10454 return false 10455 } 10456 10457 lastUnknown := a.LastUnknown() 10458 if lastUnknown.IsZero() { 10459 return false 10460 } 10461 10462 tg := a.Job.LookupTaskGroup(a.TaskGroup) 10463 if tg == nil { 10464 return false 10465 } 10466 10467 if tg.MaxClientDisconnect == nil { 10468 return false 10469 } 10470 10471 expiry := lastUnknown.Add(*tg.MaxClientDisconnect) 10472 return now.UTC().After(expiry) || now.UTC().Equal(expiry) 10473 } 10474 10475 // LastUnknown returns the timestamp for the last time the allocation 10476 // transitioned into the unknown client status. 10477 func (a *Allocation) LastUnknown() time.Time { 10478 var lastUnknown time.Time 10479 10480 for _, s := range a.AllocStates { 10481 if s.Field == AllocStateFieldClientStatus && 10482 s.Value == AllocClientStatusUnknown { 10483 if lastUnknown.IsZero() || lastUnknown.Before(s.Time) { 10484 lastUnknown = s.Time 10485 } 10486 } 10487 } 10488 10489 return lastUnknown.UTC() 10490 } 10491 10492 // NeedsToReconnect returns true if the last known ClientStatus value is 10493 // "unknown" and so the allocation did not reconnect yet. 10494 func (a *Allocation) NeedsToReconnect() bool { 10495 disconnected := false 10496 10497 // AllocStates are appended to the list and we only need the latest 10498 // ClientStatus transition, so traverse from the end until we find one. 10499 for i := len(a.AllocStates) - 1; i >= 0; i-- { 10500 s := a.AllocStates[i] 10501 if s.Field != AllocStateFieldClientStatus { 10502 continue 10503 } 10504 10505 disconnected = s.Value == AllocClientStatusUnknown 10506 break 10507 } 10508 10509 return disconnected 10510 } 10511 10512 func (a *Allocation) ToIdentityClaims(job *Job) *IdentityClaims { 10513 now := jwt.NewNumericDate(time.Now().UTC()) 10514 claims := &IdentityClaims{ 10515 Namespace: a.Namespace, 10516 JobID: a.JobID, 10517 AllocationID: a.ID, 10518 RegisteredClaims: jwt.RegisteredClaims{ 10519 // TODO: in Nomad 1.5.0 we'll have a refresh loop to 10520 // prevent allocation identities from expiring before the 10521 // allocation is terminal. Once that's implemented, add an 10522 // ExpiresAt here ExpiresAt: &jwt.NumericDate{}, 10523 NotBefore: now, 10524 IssuedAt: now, 10525 }, 10526 } 10527 if job != nil && job.ParentID != "" { 10528 claims.JobID = job.ParentID 10529 } 10530 return claims 10531 } 10532 10533 func (a *Allocation) ToTaskIdentityClaims(job *Job, taskName string) *IdentityClaims { 10534 claims := a.ToIdentityClaims(job) 10535 if claims != nil { 10536 claims.TaskName = taskName 10537 } 10538 return claims 10539 } 10540 10541 // IdentityClaims are the input to a JWT identifying a workload. It 10542 // should never be serialized to msgpack unsigned. 10543 type IdentityClaims struct { 10544 Namespace string `json:"nomad_namespace"` 10545 JobID string `json:"nomad_job_id"` 10546 AllocationID string `json:"nomad_allocation_id"` 10547 TaskName string `json:"nomad_task"` 10548 10549 jwt.RegisteredClaims 10550 } 10551 10552 // AllocationDiff is another named type for Allocation (to use the same fields), 10553 // which is used to represent the delta for an Allocation. If you need a method 10554 // defined on the al 10555 type AllocationDiff Allocation 10556 10557 // AllocListStub is used to return a subset of alloc information 10558 type AllocListStub struct { 10559 ID string 10560 EvalID string 10561 Name string 10562 Namespace string 10563 NodeID string 10564 NodeName string 10565 JobID string 10566 JobType string 10567 JobVersion uint64 10568 TaskGroup string 10569 AllocatedResources *AllocatedResources `json:",omitempty"` 10570 DesiredStatus string 10571 DesiredDescription string 10572 ClientStatus string 10573 ClientDescription string 10574 DesiredTransition DesiredTransition 10575 TaskStates map[string]*TaskState 10576 DeploymentStatus *AllocDeploymentStatus 10577 FollowupEvalID string 10578 RescheduleTracker *RescheduleTracker 10579 PreemptedAllocations []string 10580 PreemptedByAllocation string 10581 CreateIndex uint64 10582 ModifyIndex uint64 10583 CreateTime int64 10584 ModifyTime int64 10585 } 10586 10587 // SetEventDisplayMessages populates the display message if its not already 10588 // set, a temporary fix to handle old allocations that don't have it. This 10589 // method will be removed in a future release. 10590 func (a *AllocListStub) SetEventDisplayMessages() { 10591 setDisplayMsg(a.TaskStates) 10592 } 10593 10594 func setDisplayMsg(taskStates map[string]*TaskState) { 10595 for _, taskState := range taskStates { 10596 for _, event := range taskState.Events { 10597 event.PopulateEventDisplayMessage() 10598 } 10599 } 10600 } 10601 10602 // AllocStubFields defines which fields are included in the AllocListStub. 10603 type AllocStubFields struct { 10604 // Resources includes resource-related fields if true. 10605 Resources bool 10606 10607 // TaskStates removes the TaskStates field if false (default is to 10608 // include TaskStates). 10609 TaskStates bool 10610 } 10611 10612 func NewAllocStubFields() *AllocStubFields { 10613 return &AllocStubFields{ 10614 // Maintain backward compatibility by retaining task states by 10615 // default. 10616 TaskStates: true, 10617 } 10618 } 10619 10620 // AllocMetric is used to track various metrics while attempting 10621 // to make an allocation. These are used to debug a job, or to better 10622 // understand the pressure within the system. 10623 type AllocMetric struct { 10624 // NodesEvaluated is the number of nodes that were evaluated 10625 NodesEvaluated int 10626 10627 // NodesFiltered is the number of nodes filtered due to a constraint 10628 NodesFiltered int 10629 10630 // NodesAvailable is the number of nodes available for evaluation per DC. 10631 NodesAvailable map[string]int 10632 10633 // ClassFiltered is the number of nodes filtered by class 10634 ClassFiltered map[string]int 10635 10636 // ConstraintFiltered is the number of failures caused by constraint 10637 ConstraintFiltered map[string]int 10638 10639 // NodesExhausted is the number of nodes skipped due to being 10640 // exhausted of at least one resource 10641 NodesExhausted int 10642 10643 // ClassExhausted is the number of nodes exhausted by class 10644 ClassExhausted map[string]int 10645 10646 // DimensionExhausted provides the count by dimension or reason 10647 DimensionExhausted map[string]int 10648 10649 // QuotaExhausted provides the exhausted dimensions 10650 QuotaExhausted []string 10651 10652 // ResourcesExhausted provides the amount of resources exhausted by task 10653 // during the allocation placement 10654 ResourcesExhausted map[string]*Resources 10655 10656 // Scores is the scores of the final few nodes remaining 10657 // for placement. The top score is typically selected. 10658 // Deprecated: Replaced by ScoreMetaData in Nomad 0.9 10659 Scores map[string]float64 10660 10661 // ScoreMetaData is a slice of top scoring nodes displayed in the CLI 10662 ScoreMetaData []*NodeScoreMeta 10663 10664 // nodeScoreMeta is used to keep scores for a single node id. It is cleared out after 10665 // we receive normalized score during the last step of the scoring stack. 10666 nodeScoreMeta *NodeScoreMeta 10667 10668 // topScores is used to maintain a heap of the top K nodes with 10669 // the highest normalized score 10670 topScores *kheap.ScoreHeap 10671 10672 // AllocationTime is a measure of how long the allocation 10673 // attempt took. This can affect performance and SLAs. 10674 AllocationTime time.Duration 10675 10676 // CoalescedFailures indicates the number of other 10677 // allocations that were coalesced into this failed allocation. 10678 // This is to prevent creating many failed allocations for a 10679 // single task group. 10680 CoalescedFailures int 10681 } 10682 10683 func (a *AllocMetric) Copy() *AllocMetric { 10684 if a == nil { 10685 return nil 10686 } 10687 na := new(AllocMetric) 10688 *na = *a 10689 na.NodesAvailable = maps.Clone(na.NodesAvailable) 10690 na.ClassFiltered = maps.Clone(na.ClassFiltered) 10691 na.ConstraintFiltered = maps.Clone(na.ConstraintFiltered) 10692 na.ClassExhausted = maps.Clone(na.ClassExhausted) 10693 na.DimensionExhausted = maps.Clone(na.DimensionExhausted) 10694 na.QuotaExhausted = slices.Clone(na.QuotaExhausted) 10695 na.Scores = maps.Clone(na.Scores) 10696 na.ScoreMetaData = CopySliceNodeScoreMeta(na.ScoreMetaData) 10697 return na 10698 } 10699 10700 func (a *AllocMetric) EvaluateNode() { 10701 a.NodesEvaluated += 1 10702 } 10703 10704 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 10705 a.NodesFiltered += 1 10706 if node != nil && node.NodeClass != "" { 10707 if a.ClassFiltered == nil { 10708 a.ClassFiltered = make(map[string]int) 10709 } 10710 a.ClassFiltered[node.NodeClass] += 1 10711 } 10712 if constraint != "" { 10713 if a.ConstraintFiltered == nil { 10714 a.ConstraintFiltered = make(map[string]int) 10715 } 10716 a.ConstraintFiltered[constraint] += 1 10717 } 10718 } 10719 10720 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 10721 a.NodesExhausted += 1 10722 if node != nil && node.NodeClass != "" { 10723 if a.ClassExhausted == nil { 10724 a.ClassExhausted = make(map[string]int) 10725 } 10726 a.ClassExhausted[node.NodeClass] += 1 10727 } 10728 if dimension != "" { 10729 if a.DimensionExhausted == nil { 10730 a.DimensionExhausted = make(map[string]int) 10731 } 10732 a.DimensionExhausted[dimension] += 1 10733 } 10734 } 10735 10736 func (a *AllocMetric) ExhaustQuota(dimensions []string) { 10737 if a.QuotaExhausted == nil { 10738 a.QuotaExhausted = make([]string, 0, len(dimensions)) 10739 } 10740 10741 a.QuotaExhausted = append(a.QuotaExhausted, dimensions...) 10742 } 10743 10744 // ExhaustResources updates the amount of resources exhausted for the 10745 // allocation because of the given task group. 10746 func (a *AllocMetric) ExhaustResources(tg *TaskGroup) { 10747 if a.DimensionExhausted == nil { 10748 return 10749 } 10750 10751 if a.ResourcesExhausted == nil { 10752 a.ResourcesExhausted = make(map[string]*Resources) 10753 } 10754 10755 for _, t := range tg.Tasks { 10756 exhaustedResources := a.ResourcesExhausted[t.Name] 10757 if exhaustedResources == nil { 10758 exhaustedResources = &Resources{} 10759 } 10760 10761 if a.DimensionExhausted["memory"] > 0 { 10762 exhaustedResources.MemoryMB += t.Resources.MemoryMB 10763 } 10764 10765 if a.DimensionExhausted["cpu"] > 0 { 10766 exhaustedResources.CPU += t.Resources.CPU 10767 } 10768 10769 a.ResourcesExhausted[t.Name] = exhaustedResources 10770 } 10771 } 10772 10773 // ScoreNode is used to gather top K scoring nodes in a heap 10774 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 10775 // Create nodeScoreMeta lazily if its the first time or if its a new node 10776 if a.nodeScoreMeta == nil || a.nodeScoreMeta.NodeID != node.ID { 10777 a.nodeScoreMeta = &NodeScoreMeta{ 10778 NodeID: node.ID, 10779 Scores: make(map[string]float64), 10780 } 10781 } 10782 if name == NormScorerName { 10783 a.nodeScoreMeta.NormScore = score 10784 // Once we have the normalized score we can push to the heap 10785 // that tracks top K by normalized score 10786 10787 // Create the heap if its not there already 10788 if a.topScores == nil { 10789 a.topScores = kheap.NewScoreHeap(MaxRetainedNodeScores) 10790 } 10791 heap.Push(a.topScores, a.nodeScoreMeta) 10792 10793 // Clear out this entry because its now in the heap 10794 a.nodeScoreMeta = nil 10795 } else { 10796 a.nodeScoreMeta.Scores[name] = score 10797 } 10798 } 10799 10800 // PopulateScoreMetaData populates a map of scorer to scoring metadata 10801 // The map is populated by popping elements from a heap of top K scores 10802 // maintained per scorer 10803 func (a *AllocMetric) PopulateScoreMetaData() { 10804 if a.topScores == nil { 10805 return 10806 } 10807 10808 if a.ScoreMetaData == nil { 10809 a.ScoreMetaData = make([]*NodeScoreMeta, a.topScores.Len()) 10810 } 10811 heapItems := a.topScores.GetItemsReverse() 10812 for i, item := range heapItems { 10813 a.ScoreMetaData[i] = item.(*NodeScoreMeta) 10814 } 10815 } 10816 10817 // MaxNormScore returns the ScoreMetaData entry with the highest normalized 10818 // score. 10819 func (a *AllocMetric) MaxNormScore() *NodeScoreMeta { 10820 if a == nil || len(a.ScoreMetaData) == 0 { 10821 return nil 10822 } 10823 return a.ScoreMetaData[0] 10824 } 10825 10826 // NodeScoreMeta captures scoring meta data derived from 10827 // different scoring factors. 10828 type NodeScoreMeta struct { 10829 NodeID string 10830 Scores map[string]float64 10831 NormScore float64 10832 } 10833 10834 func (s *NodeScoreMeta) Copy() *NodeScoreMeta { 10835 if s == nil { 10836 return nil 10837 } 10838 ns := new(NodeScoreMeta) 10839 *ns = *s 10840 return ns 10841 } 10842 10843 func (s *NodeScoreMeta) String() string { 10844 return fmt.Sprintf("%s %f %v", s.NodeID, s.NormScore, s.Scores) 10845 } 10846 10847 func (s *NodeScoreMeta) Score() float64 { 10848 return s.NormScore 10849 } 10850 10851 func (s *NodeScoreMeta) Data() interface{} { 10852 return s 10853 } 10854 10855 // AllocNetworkStatus captures the status of an allocation's network during runtime. 10856 // Depending on the network mode, an allocation's address may need to be known to other 10857 // systems in Nomad such as service registration. 10858 type AllocNetworkStatus struct { 10859 InterfaceName string 10860 Address string 10861 DNS *DNSConfig 10862 } 10863 10864 func (a *AllocNetworkStatus) Copy() *AllocNetworkStatus { 10865 if a == nil { 10866 return nil 10867 } 10868 return &AllocNetworkStatus{ 10869 InterfaceName: a.InterfaceName, 10870 Address: a.Address, 10871 DNS: a.DNS.Copy(), 10872 } 10873 } 10874 10875 // NetworkStatus is an interface satisfied by alloc runner, for acquiring the 10876 // network status of an allocation. 10877 type NetworkStatus interface { 10878 NetworkStatus() *AllocNetworkStatus 10879 } 10880 10881 // AllocDeploymentStatus captures the status of the allocation as part of the 10882 // deployment. This can include things like if the allocation has been marked as 10883 // healthy. 10884 type AllocDeploymentStatus struct { 10885 // Healthy marks whether the allocation has been marked healthy or unhealthy 10886 // as part of a deployment. It can be unset if it has neither been marked 10887 // healthy or unhealthy. 10888 Healthy *bool 10889 10890 // Timestamp is the time at which the health status was set. 10891 Timestamp time.Time 10892 10893 // Canary marks whether the allocation is a canary or not. A canary that has 10894 // been promoted will have this field set to false. 10895 Canary bool 10896 10897 // ModifyIndex is the raft index in which the deployment status was last 10898 // changed. 10899 ModifyIndex uint64 10900 } 10901 10902 // HasHealth returns true if the allocation has its health set. 10903 func (a *AllocDeploymentStatus) HasHealth() bool { 10904 return a != nil && a.Healthy != nil 10905 } 10906 10907 // IsHealthy returns if the allocation is marked as healthy as part of a 10908 // deployment 10909 func (a *AllocDeploymentStatus) IsHealthy() bool { 10910 if a == nil { 10911 return false 10912 } 10913 10914 return a.Healthy != nil && *a.Healthy 10915 } 10916 10917 // IsUnhealthy returns if the allocation is marked as unhealthy as part of a 10918 // deployment 10919 func (a *AllocDeploymentStatus) IsUnhealthy() bool { 10920 if a == nil { 10921 return false 10922 } 10923 10924 return a.Healthy != nil && !*a.Healthy 10925 } 10926 10927 // IsCanary returns if the allocation is marked as a canary 10928 func (a *AllocDeploymentStatus) IsCanary() bool { 10929 if a == nil { 10930 return false 10931 } 10932 10933 return a.Canary 10934 } 10935 10936 func (a *AllocDeploymentStatus) Copy() *AllocDeploymentStatus { 10937 if a == nil { 10938 return nil 10939 } 10940 10941 c := new(AllocDeploymentStatus) 10942 *c = *a 10943 10944 if a.Healthy != nil { 10945 c.Healthy = pointer.Of(*a.Healthy) 10946 } 10947 10948 return c 10949 } 10950 10951 const ( 10952 EvalStatusBlocked = "blocked" 10953 EvalStatusPending = "pending" 10954 EvalStatusComplete = "complete" 10955 EvalStatusFailed = "failed" 10956 EvalStatusCancelled = "canceled" 10957 ) 10958 10959 const ( 10960 EvalTriggerJobRegister = "job-register" 10961 EvalTriggerJobDeregister = "job-deregister" 10962 EvalTriggerPeriodicJob = "periodic-job" 10963 EvalTriggerNodeDrain = "node-drain" 10964 EvalTriggerNodeUpdate = "node-update" 10965 EvalTriggerAllocStop = "alloc-stop" 10966 EvalTriggerScheduled = "scheduled" 10967 EvalTriggerRollingUpdate = "rolling-update" 10968 EvalTriggerDeploymentWatcher = "deployment-watcher" 10969 EvalTriggerFailedFollowUp = "failed-follow-up" 10970 EvalTriggerMaxPlans = "max-plan-attempts" 10971 EvalTriggerRetryFailedAlloc = "alloc-failure" 10972 EvalTriggerQueuedAllocs = "queued-allocs" 10973 EvalTriggerPreemption = "preemption" 10974 EvalTriggerScaling = "job-scaling" 10975 EvalTriggerMaxDisconnectTimeout = "max-disconnect-timeout" 10976 EvalTriggerReconnect = "reconnect" 10977 ) 10978 10979 const ( 10980 // CoreJobEvalGC is used for the garbage collection of evaluations 10981 // and allocations. We periodically scan evaluations in a terminal state, 10982 // in which all the corresponding allocations are also terminal. We 10983 // delete these out of the system to bound the state. 10984 CoreJobEvalGC = "eval-gc" 10985 10986 // CoreJobNodeGC is used for the garbage collection of failed nodes. 10987 // We periodically scan nodes in a terminal state, and if they have no 10988 // corresponding allocations we delete these out of the system. 10989 CoreJobNodeGC = "node-gc" 10990 10991 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 10992 // periodically scan garbage collectible jobs and check if both their 10993 // evaluations and allocations are terminal. If so, we delete these out of 10994 // the system. 10995 CoreJobJobGC = "job-gc" 10996 10997 // CoreJobDeploymentGC is used for the garbage collection of eligible 10998 // deployments. We periodically scan garbage collectible deployments and 10999 // check if they are terminal. If so, we delete these out of the system. 11000 CoreJobDeploymentGC = "deployment-gc" 11001 11002 // CoreJobCSIVolumeClaimGC is use for the garbage collection of CSI 11003 // volume claims. We periodically scan volumes to see if no allocs are 11004 // claiming them. If so, we unclaim the volume. 11005 CoreJobCSIVolumeClaimGC = "csi-volume-claim-gc" 11006 11007 // CoreJobCSIPluginGC is use for the garbage collection of CSI plugins. 11008 // We periodically scan plugins to see if they have no associated volumes 11009 // or allocs running them. If so, we delete the plugin. 11010 CoreJobCSIPluginGC = "csi-plugin-gc" 11011 11012 // CoreJobOneTimeTokenGC is use for the garbage collection of one-time 11013 // tokens. We periodically scan for expired tokens and delete them. 11014 CoreJobOneTimeTokenGC = "one-time-token-gc" 11015 11016 // CoreJobLocalTokenExpiredGC is used for the garbage collection of 11017 // expired local ACL tokens. We periodically scan for expired tokens and 11018 // delete them. 11019 CoreJobLocalTokenExpiredGC = "local-token-expired-gc" 11020 11021 // CoreJobGlobalTokenExpiredGC is used for the garbage collection of 11022 // expired global ACL tokens. We periodically scan for expired tokens and 11023 // delete them. 11024 CoreJobGlobalTokenExpiredGC = "global-token-expired-gc" 11025 11026 // CoreJobRootKeyRotateGC is used for periodic key rotation and 11027 // garbage collection of unused encryption keys. 11028 CoreJobRootKeyRotateOrGC = "root-key-rotate-gc" 11029 11030 // CoreJobVariablesRekey is used to fully rotate the encryption keys for 11031 // variables by decrypting all variables and re-encrypting them with the 11032 // active key 11033 CoreJobVariablesRekey = "variables-rekey" 11034 11035 // CoreJobForceGC is used to force garbage collection of all GCable objects. 11036 CoreJobForceGC = "force-gc" 11037 ) 11038 11039 // Evaluation is used anytime we need to apply business logic as a result 11040 // of a change to our desired state (job specification) or the emergent state 11041 // (registered nodes). When the inputs change, we need to "evaluate" them, 11042 // potentially taking action (allocation of work) or doing nothing if the state 11043 // of the world does not require it. 11044 type Evaluation struct { 11045 // msgpack omit empty fields during serialization 11046 _struct bool `codec:",omitempty"` // nolint: structcheck 11047 11048 // ID is a randomly generated UUID used for this evaluation. This 11049 // is assigned upon the creation of the evaluation. 11050 ID string 11051 11052 // Namespace is the namespace the evaluation is created in 11053 Namespace string 11054 11055 // Priority is used to control scheduling importance and if this job 11056 // can preempt other jobs. 11057 Priority int 11058 11059 // Type is used to control which schedulers are available to handle 11060 // this evaluation. 11061 Type string 11062 11063 // TriggeredBy is used to give some insight into why this Eval 11064 // was created. (Job change, node failure, alloc failure, etc). 11065 TriggeredBy string 11066 11067 // JobID is the job this evaluation is scoped to. Evaluations cannot 11068 // be run in parallel for a given JobID, so we serialize on this. 11069 JobID string 11070 11071 // JobModifyIndex is the modify index of the job at the time 11072 // the evaluation was created 11073 JobModifyIndex uint64 11074 11075 // NodeID is the node that was affected triggering the evaluation. 11076 NodeID string 11077 11078 // NodeModifyIndex is the modify index of the node at the time 11079 // the evaluation was created 11080 NodeModifyIndex uint64 11081 11082 // DeploymentID is the ID of the deployment that triggered the evaluation. 11083 DeploymentID string 11084 11085 // Status of the evaluation 11086 Status string 11087 11088 // StatusDescription is meant to provide more human useful information 11089 StatusDescription string 11090 11091 // Wait is a minimum wait time for running the eval. This is used to 11092 // support a rolling upgrade in versions prior to 0.7.0 11093 // Deprecated 11094 Wait time.Duration 11095 11096 // WaitUntil is the time when this eval should be run. This is used to 11097 // supported delayed rescheduling of failed allocations, and delayed 11098 // stopping of allocations that are configured with max_client_disconnect. 11099 WaitUntil time.Time 11100 11101 // NextEval is the evaluation ID for the eval created to do a followup. 11102 // This is used to support rolling upgrades and failed-follow-up evals, where 11103 // we need a chain of evaluations. 11104 NextEval string 11105 11106 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 11107 // This is used to support rolling upgrades and failed-follow-up evals, where 11108 // we need a chain of evaluations. 11109 PreviousEval string 11110 11111 // BlockedEval is the evaluation ID for a created blocked eval. A 11112 // blocked eval will be created if all allocations could not be placed due 11113 // to constraints or lacking resources. 11114 BlockedEval string 11115 11116 // RelatedEvals is a list of all the evaluations that are related (next, 11117 // previous, or blocked) to this one. It may be nil if not requested. 11118 RelatedEvals []*EvaluationStub 11119 11120 // FailedTGAllocs are task groups which have allocations that could not be 11121 // made, but the metrics are persisted so that the user can use the feedback 11122 // to determine the cause. 11123 FailedTGAllocs map[string]*AllocMetric 11124 11125 // ClassEligibility tracks computed node classes that have been explicitly 11126 // marked as eligible or ineligible. 11127 ClassEligibility map[string]bool 11128 11129 // QuotaLimitReached marks whether a quota limit was reached for the 11130 // evaluation. 11131 QuotaLimitReached string 11132 11133 // EscapedComputedClass marks whether the job has constraints that are not 11134 // captured by computed node classes. 11135 EscapedComputedClass bool 11136 11137 // AnnotatePlan triggers the scheduler to provide additional annotations 11138 // during the evaluation. This should not be set during normal operations. 11139 AnnotatePlan bool 11140 11141 // QueuedAllocations is the number of unplaced allocations at the time the 11142 // evaluation was processed. The map is keyed by Task Group names. 11143 QueuedAllocations map[string]int 11144 11145 // LeaderACL provides the ACL token to when issuing RPCs back to the 11146 // leader. This will be a valid management token as long as the leader is 11147 // active. This should not ever be exposed via the API. 11148 LeaderACL string 11149 11150 // SnapshotIndex is the Raft index of the snapshot used to process the 11151 // evaluation. The index will either be set when it has gone through the 11152 // scheduler or if a blocked evaluation is being created. The index is set 11153 // in this case so we can determine if an early unblocking is required since 11154 // capacity has changed since the evaluation was created. This can result in 11155 // the SnapshotIndex being less than the CreateIndex. 11156 SnapshotIndex uint64 11157 11158 // Raft Indexes 11159 CreateIndex uint64 11160 ModifyIndex uint64 11161 11162 CreateTime int64 11163 ModifyTime int64 11164 } 11165 11166 type EvaluationStub struct { 11167 ID string 11168 Namespace string 11169 Priority int 11170 Type string 11171 TriggeredBy string 11172 JobID string 11173 NodeID string 11174 DeploymentID string 11175 Status string 11176 StatusDescription string 11177 WaitUntil time.Time 11178 NextEval string 11179 PreviousEval string 11180 BlockedEval string 11181 CreateIndex uint64 11182 ModifyIndex uint64 11183 CreateTime int64 11184 ModifyTime int64 11185 } 11186 11187 // GetID implements the IDGetter interface, required for pagination. 11188 func (e *Evaluation) GetID() string { 11189 if e == nil { 11190 return "" 11191 } 11192 return e.ID 11193 } 11194 11195 // GetNamespace implements the NamespaceGetter interface, required for pagination. 11196 func (e *Evaluation) GetNamespace() string { 11197 if e == nil { 11198 return "" 11199 } 11200 return e.Namespace 11201 } 11202 11203 // GetCreateIndex implements the CreateIndexGetter interface, required for 11204 // pagination. 11205 func (e *Evaluation) GetCreateIndex() uint64 { 11206 if e == nil { 11207 return 0 11208 } 11209 return e.CreateIndex 11210 } 11211 11212 // TerminalStatus returns if the current status is terminal and 11213 // will no longer transition. 11214 func (e *Evaluation) TerminalStatus() bool { 11215 switch e.Status { 11216 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 11217 return true 11218 default: 11219 return false 11220 } 11221 } 11222 11223 func (e *Evaluation) GoString() string { 11224 return fmt.Sprintf("<Eval %q JobID: %q Namespace: %q>", e.ID, e.JobID, e.Namespace) 11225 } 11226 11227 func (e *Evaluation) RelatedIDs() []string { 11228 if e == nil { 11229 return nil 11230 } 11231 11232 ids := []string{e.NextEval, e.PreviousEval, e.BlockedEval} 11233 related := make([]string, 0, len(ids)) 11234 11235 for _, id := range ids { 11236 if id != "" { 11237 related = append(related, id) 11238 } 11239 } 11240 11241 return related 11242 } 11243 11244 func (e *Evaluation) Stub() *EvaluationStub { 11245 if e == nil { 11246 return nil 11247 } 11248 11249 return &EvaluationStub{ 11250 ID: e.ID, 11251 Namespace: e.Namespace, 11252 Priority: e.Priority, 11253 Type: e.Type, 11254 TriggeredBy: e.TriggeredBy, 11255 JobID: e.JobID, 11256 NodeID: e.NodeID, 11257 DeploymentID: e.DeploymentID, 11258 Status: e.Status, 11259 StatusDescription: e.StatusDescription, 11260 WaitUntil: e.WaitUntil, 11261 NextEval: e.NextEval, 11262 PreviousEval: e.PreviousEval, 11263 BlockedEval: e.BlockedEval, 11264 CreateIndex: e.CreateIndex, 11265 ModifyIndex: e.ModifyIndex, 11266 CreateTime: e.CreateTime, 11267 ModifyTime: e.ModifyTime, 11268 } 11269 } 11270 11271 func (e *Evaluation) Copy() *Evaluation { 11272 if e == nil { 11273 return nil 11274 } 11275 ne := new(Evaluation) 11276 *ne = *e 11277 11278 // Copy ClassEligibility 11279 if e.ClassEligibility != nil { 11280 classes := make(map[string]bool, len(e.ClassEligibility)) 11281 for class, elig := range e.ClassEligibility { 11282 classes[class] = elig 11283 } 11284 ne.ClassEligibility = classes 11285 } 11286 11287 // Copy FailedTGAllocs 11288 if e.FailedTGAllocs != nil { 11289 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 11290 for tg, metric := range e.FailedTGAllocs { 11291 failedTGs[tg] = metric.Copy() 11292 } 11293 ne.FailedTGAllocs = failedTGs 11294 } 11295 11296 // Copy queued allocations 11297 if e.QueuedAllocations != nil { 11298 queuedAllocations := make(map[string]int, len(e.QueuedAllocations)) 11299 for tg, num := range e.QueuedAllocations { 11300 queuedAllocations[tg] = num 11301 } 11302 ne.QueuedAllocations = queuedAllocations 11303 } 11304 11305 return ne 11306 } 11307 11308 // ShouldEnqueue checks if a given evaluation should be enqueued into the 11309 // eval_broker 11310 func (e *Evaluation) ShouldEnqueue() bool { 11311 switch e.Status { 11312 case EvalStatusPending: 11313 return true 11314 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 11315 return false 11316 default: 11317 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 11318 } 11319 } 11320 11321 // ShouldBlock checks if a given evaluation should be entered into the blocked 11322 // eval tracker. 11323 func (e *Evaluation) ShouldBlock() bool { 11324 switch e.Status { 11325 case EvalStatusBlocked: 11326 return true 11327 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 11328 return false 11329 default: 11330 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 11331 } 11332 } 11333 11334 // MakePlan is used to make a plan from the given evaluation 11335 // for a given Job 11336 func (e *Evaluation) MakePlan(j *Job) *Plan { 11337 p := &Plan{ 11338 EvalID: e.ID, 11339 Priority: e.Priority, 11340 Job: j, 11341 NodeUpdate: make(map[string][]*Allocation), 11342 NodeAllocation: make(map[string][]*Allocation), 11343 NodePreemptions: make(map[string][]*Allocation), 11344 } 11345 if j != nil { 11346 p.AllAtOnce = j.AllAtOnce 11347 } 11348 return p 11349 } 11350 11351 // NextRollingEval creates an evaluation to followup this eval for rolling updates 11352 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 11353 now := time.Now().UTC().UnixNano() 11354 return &Evaluation{ 11355 ID: uuid.Generate(), 11356 Namespace: e.Namespace, 11357 Priority: e.Priority, 11358 Type: e.Type, 11359 TriggeredBy: EvalTriggerRollingUpdate, 11360 JobID: e.JobID, 11361 JobModifyIndex: e.JobModifyIndex, 11362 Status: EvalStatusPending, 11363 Wait: wait, 11364 PreviousEval: e.ID, 11365 CreateTime: now, 11366 ModifyTime: now, 11367 } 11368 } 11369 11370 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 11371 // failed allocations. It takes the classes marked explicitly eligible or 11372 // ineligible, whether the job has escaped computed node classes and whether the 11373 // quota limit was reached. 11374 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, 11375 escaped bool, quotaReached string, failedTGAllocs map[string]*AllocMetric) *Evaluation { 11376 now := time.Now().UTC().UnixNano() 11377 return &Evaluation{ 11378 ID: uuid.Generate(), 11379 Namespace: e.Namespace, 11380 Priority: e.Priority, 11381 Type: e.Type, 11382 TriggeredBy: EvalTriggerQueuedAllocs, 11383 JobID: e.JobID, 11384 JobModifyIndex: e.JobModifyIndex, 11385 Status: EvalStatusBlocked, 11386 PreviousEval: e.ID, 11387 FailedTGAllocs: failedTGAllocs, 11388 ClassEligibility: classEligibility, 11389 EscapedComputedClass: escaped, 11390 QuotaLimitReached: quotaReached, 11391 CreateTime: now, 11392 ModifyTime: now, 11393 } 11394 } 11395 11396 // CreateFailedFollowUpEval creates a follow up evaluation when the current one 11397 // has been marked as failed because it has hit the delivery limit and will not 11398 // be retried by the eval_broker. Callers should copy the created eval's ID to 11399 // into the old eval's NextEval field. 11400 func (e *Evaluation) CreateFailedFollowUpEval(wait time.Duration) *Evaluation { 11401 now := time.Now().UTC().UnixNano() 11402 return &Evaluation{ 11403 ID: uuid.Generate(), 11404 Namespace: e.Namespace, 11405 Priority: e.Priority, 11406 Type: e.Type, 11407 TriggeredBy: EvalTriggerFailedFollowUp, 11408 JobID: e.JobID, 11409 JobModifyIndex: e.JobModifyIndex, 11410 Status: EvalStatusPending, 11411 Wait: wait, 11412 PreviousEval: e.ID, 11413 CreateTime: now, 11414 ModifyTime: now, 11415 } 11416 } 11417 11418 // UpdateModifyTime takes into account that clocks on different servers may be 11419 // slightly out of sync. Even in case of a leader change, this method will 11420 // guarantee that ModifyTime will always be after CreateTime. 11421 func (e *Evaluation) UpdateModifyTime() { 11422 now := time.Now().UTC().UnixNano() 11423 if now <= e.CreateTime { 11424 e.ModifyTime = e.CreateTime + 1 11425 } else { 11426 e.ModifyTime = now 11427 } 11428 } 11429 11430 // Plan is used to submit a commit plan for task allocations. These 11431 // are submitted to the leader which verifies that resources have 11432 // not been overcommitted before admitting the plan. 11433 type Plan struct { 11434 // msgpack omit empty fields during serialization 11435 _struct bool `codec:",omitempty"` // nolint: structcheck 11436 11437 // EvalID is the evaluation ID this plan is associated with 11438 EvalID string 11439 11440 // EvalToken is used to prevent a split-brain processing of 11441 // an evaluation. There should only be a single scheduler running 11442 // an Eval at a time, but this could be violated after a leadership 11443 // transition. This unique token is used to reject plans that are 11444 // being submitted from a different leader. 11445 EvalToken string 11446 11447 // Priority is the priority of the upstream job 11448 Priority int 11449 11450 // AllAtOnce is used to control if incremental scheduling of task groups 11451 // is allowed or if we must do a gang scheduling of the entire job. 11452 // If this is false, a plan may be partially applied. Otherwise, the 11453 // entire plan must be able to make progress. 11454 AllAtOnce bool 11455 11456 // Job is the parent job of all the allocations in the Plan. 11457 // Since a Plan only involves a single Job, we can reduce the size 11458 // of the plan by only including it once. 11459 Job *Job 11460 11461 // NodeUpdate contains all the allocations to be stopped or evicted for 11462 // each node. 11463 NodeUpdate map[string][]*Allocation 11464 11465 // NodeAllocation contains all the allocations for each node. 11466 // The evicts must be considered prior to the allocations. 11467 NodeAllocation map[string][]*Allocation 11468 11469 // Annotations contains annotations by the scheduler to be used by operators 11470 // to understand the decisions made by the scheduler. 11471 Annotations *PlanAnnotations 11472 11473 // Deployment is the deployment created or updated by the scheduler that 11474 // should be applied by the planner. 11475 Deployment *Deployment 11476 11477 // DeploymentUpdates is a set of status updates to apply to the given 11478 // deployments. This allows the scheduler to cancel any unneeded deployment 11479 // because the job is stopped or the update block is removed. 11480 DeploymentUpdates []*DeploymentStatusUpdate 11481 11482 // NodePreemptions is a map from node id to a set of allocations from other 11483 // lower priority jobs that are preempted. Preempted allocations are marked 11484 // as evicted. 11485 NodePreemptions map[string][]*Allocation 11486 11487 // SnapshotIndex is the Raft index of the snapshot used to create the 11488 // Plan. The leader will wait to evaluate the plan until its StateStore 11489 // has reached at least this index. 11490 SnapshotIndex uint64 11491 } 11492 11493 func (p *Plan) GoString() string { 11494 out := fmt.Sprintf("(eval %s", p.EvalID[:8]) 11495 if p.Job != nil { 11496 out += fmt.Sprintf(", job %s", p.Job.ID) 11497 } 11498 if p.Deployment != nil { 11499 out += fmt.Sprintf(", deploy %s", p.Deployment.ID[:8]) 11500 } 11501 if len(p.NodeUpdate) > 0 { 11502 out += ", NodeUpdates: " 11503 for node, allocs := range p.NodeUpdate { 11504 out += fmt.Sprintf("(node[%s]", node[:8]) 11505 for _, alloc := range allocs { 11506 out += fmt.Sprintf(" (%s stop/evict)", alloc.ID[:8]) 11507 } 11508 out += ")" 11509 } 11510 } 11511 if len(p.NodeAllocation) > 0 { 11512 out += ", NodeAllocations: " 11513 for node, allocs := range p.NodeAllocation { 11514 out += fmt.Sprintf("(node[%s]", node[:8]) 11515 for _, alloc := range allocs { 11516 out += fmt.Sprintf(" (%s %s %s)", 11517 alloc.ID[:8], alloc.Name, alloc.DesiredStatus, 11518 ) 11519 } 11520 out += ")" 11521 } 11522 } 11523 if len(p.NodePreemptions) > 0 { 11524 out += ", NodePreemptions: " 11525 for node, allocs := range p.NodePreemptions { 11526 out += fmt.Sprintf("(node[%s]", node[:8]) 11527 for _, alloc := range allocs { 11528 out += fmt.Sprintf(" (%s %s %s)", 11529 alloc.ID[:8], alloc.Name, alloc.DesiredStatus, 11530 ) 11531 } 11532 out += ")" 11533 } 11534 } 11535 if len(p.DeploymentUpdates) > 0 { 11536 out += ", DeploymentUpdates: " 11537 for _, dupdate := range p.DeploymentUpdates { 11538 out += fmt.Sprintf("(%s %s)", 11539 dupdate.DeploymentID[:8], dupdate.Status) 11540 } 11541 } 11542 if p.Annotations != nil { 11543 out += ", Annotations: " 11544 for tg, updates := range p.Annotations.DesiredTGUpdates { 11545 out += fmt.Sprintf("(update[%s] %v)", tg, updates) 11546 } 11547 for _, preempted := range p.Annotations.PreemptedAllocs { 11548 out += fmt.Sprintf("(preempt %s)", preempted.ID[:8]) 11549 } 11550 } 11551 11552 out += ")" 11553 return out 11554 } 11555 11556 // AppendStoppedAlloc marks an allocation to be stopped. The clientStatus of the 11557 // allocation may be optionally set by passing in a non-empty value. 11558 func (p *Plan) AppendStoppedAlloc(alloc *Allocation, desiredDesc, clientStatus, followupEvalID string) { 11559 newAlloc := new(Allocation) 11560 *newAlloc = *alloc 11561 11562 // If the job is not set in the plan we are deregistering a job so we 11563 // extract the job from the allocation. 11564 if p.Job == nil && newAlloc.Job != nil { 11565 p.Job = newAlloc.Job 11566 } 11567 11568 // Normalize the job 11569 newAlloc.Job = nil 11570 11571 // Strip the resources as it can be rebuilt. 11572 newAlloc.Resources = nil 11573 11574 newAlloc.DesiredStatus = AllocDesiredStatusStop 11575 newAlloc.DesiredDescription = desiredDesc 11576 11577 if clientStatus != "" { 11578 newAlloc.ClientStatus = clientStatus 11579 } 11580 11581 newAlloc.AppendState(AllocStateFieldClientStatus, clientStatus) 11582 11583 if followupEvalID != "" { 11584 newAlloc.FollowupEvalID = followupEvalID 11585 } 11586 11587 node := alloc.NodeID 11588 existing := p.NodeUpdate[node] 11589 p.NodeUpdate[node] = append(existing, newAlloc) 11590 } 11591 11592 // AppendPreemptedAlloc is used to append an allocation that's being preempted to the plan. 11593 // To minimize the size of the plan, this only sets a minimal set of fields in the allocation 11594 func (p *Plan) AppendPreemptedAlloc(alloc *Allocation, preemptingAllocID string) { 11595 newAlloc := &Allocation{} 11596 newAlloc.ID = alloc.ID 11597 newAlloc.JobID = alloc.JobID 11598 newAlloc.Namespace = alloc.Namespace 11599 newAlloc.DesiredStatus = AllocDesiredStatusEvict 11600 newAlloc.PreemptedByAllocation = preemptingAllocID 11601 11602 desiredDesc := fmt.Sprintf("Preempted by alloc ID %v", preemptingAllocID) 11603 newAlloc.DesiredDescription = desiredDesc 11604 11605 // TaskResources are needed by the plan applier to check if allocations fit 11606 // after removing preempted allocations 11607 if alloc.AllocatedResources != nil { 11608 newAlloc.AllocatedResources = alloc.AllocatedResources 11609 } else { 11610 // COMPAT Remove in version 0.11 11611 newAlloc.TaskResources = alloc.TaskResources 11612 newAlloc.SharedResources = alloc.SharedResources 11613 } 11614 11615 // Append this alloc to slice for this node 11616 node := alloc.NodeID 11617 existing := p.NodePreemptions[node] 11618 p.NodePreemptions[node] = append(existing, newAlloc) 11619 } 11620 11621 // AppendUnknownAlloc marks an allocation as unknown. 11622 func (p *Plan) AppendUnknownAlloc(alloc *Allocation) { 11623 // Strip the resources as they can be rebuilt. 11624 alloc.Resources = nil 11625 11626 existing := p.NodeAllocation[alloc.NodeID] 11627 p.NodeAllocation[alloc.NodeID] = append(existing, alloc) 11628 } 11629 11630 func (p *Plan) PopUpdate(alloc *Allocation) { 11631 existing := p.NodeUpdate[alloc.NodeID] 11632 n := len(existing) 11633 if n > 0 && existing[n-1].ID == alloc.ID { 11634 existing = existing[:n-1] 11635 if len(existing) > 0 { 11636 p.NodeUpdate[alloc.NodeID] = existing 11637 } else { 11638 delete(p.NodeUpdate, alloc.NodeID) 11639 } 11640 } 11641 } 11642 11643 // AppendAlloc appends the alloc to the plan allocations. 11644 // Uses the passed job if explicitly passed, otherwise 11645 // it is assumed the alloc will use the plan Job version. 11646 func (p *Plan) AppendAlloc(alloc *Allocation, job *Job) { 11647 node := alloc.NodeID 11648 existing := p.NodeAllocation[node] 11649 11650 alloc.Job = job 11651 11652 p.NodeAllocation[node] = append(existing, alloc) 11653 } 11654 11655 // IsNoOp checks if this plan would do nothing 11656 func (p *Plan) IsNoOp() bool { 11657 return len(p.NodeUpdate) == 0 && 11658 len(p.NodeAllocation) == 0 && 11659 p.Deployment == nil && 11660 len(p.DeploymentUpdates) == 0 11661 } 11662 11663 // NormalizeAllocations normalizes allocations to remove fields that can 11664 // be fetched from the MemDB instead of sending over the wire 11665 func (p *Plan) NormalizeAllocations() { 11666 for _, allocs := range p.NodeUpdate { 11667 for i, alloc := range allocs { 11668 allocs[i] = &Allocation{ 11669 ID: alloc.ID, 11670 DesiredDescription: alloc.DesiredDescription, 11671 ClientStatus: alloc.ClientStatus, 11672 FollowupEvalID: alloc.FollowupEvalID, 11673 } 11674 } 11675 } 11676 11677 for _, allocs := range p.NodePreemptions { 11678 for i, alloc := range allocs { 11679 allocs[i] = &Allocation{ 11680 ID: alloc.ID, 11681 PreemptedByAllocation: alloc.PreemptedByAllocation, 11682 } 11683 } 11684 } 11685 } 11686 11687 // PlanResult is the result of a plan submitted to the leader. 11688 type PlanResult struct { 11689 // NodeUpdate contains all the evictions and stops that were committed. 11690 NodeUpdate map[string][]*Allocation 11691 11692 // NodeAllocation contains all the allocations that were committed. 11693 NodeAllocation map[string][]*Allocation 11694 11695 // Deployment is the deployment that was committed. 11696 Deployment *Deployment 11697 11698 // DeploymentUpdates is the set of deployment updates that were committed. 11699 DeploymentUpdates []*DeploymentStatusUpdate 11700 11701 // NodePreemptions is a map from node id to a set of allocations from other 11702 // lower priority jobs that are preempted. Preempted allocations are marked 11703 // as stopped. 11704 NodePreemptions map[string][]*Allocation 11705 11706 // RejectedNodes are nodes the scheduler worker has rejected placements for 11707 // and should be considered for ineligibility by the plan applier to avoid 11708 // retrying them repeatedly. 11709 RejectedNodes []string 11710 11711 // IneligibleNodes are nodes the plan applier has repeatedly rejected 11712 // placements for and should therefore be considered ineligible by workers 11713 // to avoid retrying them repeatedly. 11714 IneligibleNodes []string 11715 11716 // RefreshIndex is the index the worker should refresh state up to. 11717 // This allows all evictions and allocations to be materialized. 11718 // If any allocations were rejected due to stale data (node state, 11719 // over committed) this can be used to force a worker refresh. 11720 RefreshIndex uint64 11721 11722 // AllocIndex is the Raft index in which the evictions and 11723 // allocations took place. This is used for the write index. 11724 AllocIndex uint64 11725 } 11726 11727 // IsNoOp checks if this plan result would do nothing 11728 func (p *PlanResult) IsNoOp() bool { 11729 return len(p.IneligibleNodes) == 0 && len(p.NodeUpdate) == 0 && 11730 len(p.NodeAllocation) == 0 && len(p.DeploymentUpdates) == 0 && 11731 p.Deployment == nil 11732 } 11733 11734 // FullCommit is used to check if all the allocations in a plan 11735 // were committed as part of the result. Returns if there was 11736 // a match, and the number of expected and actual allocations. 11737 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 11738 expected := 0 11739 actual := 0 11740 for name, allocList := range plan.NodeAllocation { 11741 didAlloc := p.NodeAllocation[name] 11742 expected += len(allocList) 11743 actual += len(didAlloc) 11744 } 11745 return actual == expected, expected, actual 11746 } 11747 11748 // PlanAnnotations holds annotations made by the scheduler to give further debug 11749 // information to operators. 11750 type PlanAnnotations struct { 11751 // DesiredTGUpdates is the set of desired updates per task group. 11752 DesiredTGUpdates map[string]*DesiredUpdates 11753 11754 // PreemptedAllocs is the set of allocations to be preempted to make the placement successful. 11755 PreemptedAllocs []*AllocListStub 11756 } 11757 11758 // DesiredUpdates is the set of changes the scheduler would like to make given 11759 // sufficient resources and cluster capacity. 11760 type DesiredUpdates struct { 11761 Ignore uint64 11762 Place uint64 11763 Migrate uint64 11764 Stop uint64 11765 InPlaceUpdate uint64 11766 DestructiveUpdate uint64 11767 Canary uint64 11768 Preemptions uint64 11769 } 11770 11771 func (d *DesiredUpdates) GoString() string { 11772 return fmt.Sprintf("(place %d) (inplace %d) (destructive %d) (stop %d) (migrate %d) (ignore %d) (canary %d)", 11773 d.Place, d.InPlaceUpdate, d.DestructiveUpdate, d.Stop, d.Migrate, d.Ignore, d.Canary) 11774 } 11775 11776 // msgpackHandle is a shared handle for encoding/decoding of structs 11777 var MsgpackHandle = func() *codec.MsgpackHandle { 11778 h := &codec.MsgpackHandle{} 11779 h.RawToString = true 11780 11781 // maintain binary format from time prior to upgrading latest ugorji 11782 h.BasicHandle.TimeNotBuiltin = true 11783 11784 // Sets the default type for decoding a map into a nil interface{}. 11785 // This is necessary in particular because we store the driver configs as a 11786 // nil interface{}. 11787 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 11788 11789 // only review struct codec tags 11790 h.TypeInfos = codec.NewTypeInfos([]string{"codec"}) 11791 11792 return h 11793 }() 11794 11795 // Decode is used to decode a MsgPack encoded object 11796 func Decode(buf []byte, out interface{}) error { 11797 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 11798 } 11799 11800 // Encode is used to encode a MsgPack object with type prefix 11801 func Encode(t MessageType, msg interface{}) ([]byte, error) { 11802 var buf bytes.Buffer 11803 buf.WriteByte(uint8(t)) 11804 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 11805 return buf.Bytes(), err 11806 } 11807 11808 // KeyringResponse is a unified key response and can be used for install, 11809 // remove, use, as well as listing key queries. 11810 type KeyringResponse struct { 11811 Messages map[string]string 11812 Keys map[string]int 11813 NumNodes int 11814 } 11815 11816 // KeyringRequest is request objects for serf key operations. 11817 type KeyringRequest struct { 11818 Key string 11819 } 11820 11821 // RecoverableError wraps an error and marks whether it is recoverable and could 11822 // be retried or it is fatal. 11823 type RecoverableError struct { 11824 Err string 11825 Recoverable bool 11826 } 11827 11828 // NewRecoverableError is used to wrap an error and mark it as recoverable or 11829 // not. 11830 func NewRecoverableError(e error, recoverable bool) error { 11831 if e == nil { 11832 return nil 11833 } 11834 11835 return &RecoverableError{ 11836 Err: e.Error(), 11837 Recoverable: recoverable, 11838 } 11839 } 11840 11841 // WrapRecoverable wraps an existing error in a new RecoverableError with a new 11842 // message. If the error was recoverable before the returned error is as well; 11843 // otherwise it is unrecoverable. 11844 func WrapRecoverable(msg string, err error) error { 11845 return &RecoverableError{Err: msg, Recoverable: IsRecoverable(err)} 11846 } 11847 11848 func (r *RecoverableError) Error() string { 11849 return r.Err 11850 } 11851 11852 func (r *RecoverableError) IsRecoverable() bool { 11853 return r.Recoverable 11854 } 11855 11856 func (r *RecoverableError) IsUnrecoverable() bool { 11857 return !r.Recoverable 11858 } 11859 11860 // Recoverable is an interface for errors to implement to indicate whether or 11861 // not they are fatal or recoverable. 11862 type Recoverable interface { 11863 error 11864 IsRecoverable() bool 11865 } 11866 11867 // IsRecoverable returns true if error is a RecoverableError with 11868 // Recoverable=true. Otherwise false is returned. 11869 func IsRecoverable(e error) bool { 11870 if re, ok := e.(Recoverable); ok { 11871 return re.IsRecoverable() 11872 } 11873 return false 11874 } 11875 11876 // WrappedServerError wraps an error and satisfies 11877 // both the Recoverable and the ServerSideError interfaces 11878 type WrappedServerError struct { 11879 Err error 11880 } 11881 11882 // NewWrappedServerError is used to create a wrapped server side error 11883 func NewWrappedServerError(e error) error { 11884 return &WrappedServerError{ 11885 Err: e, 11886 } 11887 } 11888 11889 func (r *WrappedServerError) IsRecoverable() bool { 11890 return IsRecoverable(r.Err) 11891 } 11892 11893 func (r *WrappedServerError) Error() string { 11894 return r.Err.Error() 11895 } 11896 11897 func (r *WrappedServerError) IsServerSide() bool { 11898 return true 11899 } 11900 11901 // ServerSideError is an interface for errors to implement to indicate 11902 // errors occurring after the request makes it to a server 11903 type ServerSideError interface { 11904 error 11905 IsServerSide() bool 11906 } 11907 11908 // IsServerSide returns true if error is a wrapped 11909 // server side error 11910 func IsServerSide(e error) bool { 11911 if se, ok := e.(ServerSideError); ok { 11912 return se.IsServerSide() 11913 } 11914 return false 11915 } 11916 11917 // ACLPolicy is used to represent an ACL policy 11918 type ACLPolicy struct { 11919 Name string // Unique name 11920 Description string // Human readable 11921 Rules string // HCL or JSON format 11922 RulesJSON *acl.Policy // Generated from Rules on read 11923 JobACL *JobACL 11924 Hash []byte 11925 11926 CreateIndex uint64 11927 ModifyIndex uint64 11928 } 11929 11930 // JobACL represents an ACL policy's attachment to a job, group, or task. 11931 type JobACL struct { 11932 Namespace string // namespace of the job 11933 JobID string // ID of the job 11934 Group string // ID of the group 11935 Task string // ID of the task 11936 } 11937 11938 // SetHash is used to compute and set the hash of the ACL policy 11939 func (a *ACLPolicy) SetHash() []byte { 11940 // Initialize a 256bit Blake2 hash (32 bytes) 11941 hash, err := blake2b.New256(nil) 11942 if err != nil { 11943 panic(err) 11944 } 11945 11946 // Write all the user set fields 11947 _, _ = hash.Write([]byte(a.Name)) 11948 _, _ = hash.Write([]byte(a.Description)) 11949 _, _ = hash.Write([]byte(a.Rules)) 11950 11951 if a.JobACL != nil { 11952 _, _ = hash.Write([]byte(a.JobACL.Namespace)) 11953 _, _ = hash.Write([]byte(a.JobACL.JobID)) 11954 _, _ = hash.Write([]byte(a.JobACL.Group)) 11955 _, _ = hash.Write([]byte(a.JobACL.Task)) 11956 } 11957 11958 // Finalize the hash 11959 hashVal := hash.Sum(nil) 11960 11961 // Set and return the hash 11962 a.Hash = hashVal 11963 return hashVal 11964 } 11965 11966 func (a *ACLPolicy) Stub() *ACLPolicyListStub { 11967 return &ACLPolicyListStub{ 11968 Name: a.Name, 11969 Description: a.Description, 11970 Hash: a.Hash, 11971 CreateIndex: a.CreateIndex, 11972 ModifyIndex: a.ModifyIndex, 11973 } 11974 } 11975 11976 func (a *ACLPolicy) Validate() error { 11977 var mErr multierror.Error 11978 if !validPolicyName.MatchString(a.Name) { 11979 err := fmt.Errorf("invalid name '%s'", a.Name) 11980 mErr.Errors = append(mErr.Errors, err) 11981 } 11982 if _, err := acl.Parse(a.Rules); err != nil { 11983 err = fmt.Errorf("failed to parse rules: %v", err) 11984 mErr.Errors = append(mErr.Errors, err) 11985 } 11986 if len(a.Description) > maxPolicyDescriptionLength { 11987 err := fmt.Errorf("description longer than %d", maxPolicyDescriptionLength) 11988 mErr.Errors = append(mErr.Errors, err) 11989 } 11990 if a.JobACL != nil { 11991 if a.JobACL.JobID != "" && a.JobACL.Namespace == "" { 11992 err := fmt.Errorf("namespace must be set to set job ID") 11993 mErr.Errors = append(mErr.Errors, err) 11994 } 11995 if a.JobACL.Group != "" && a.JobACL.JobID == "" { 11996 err := fmt.Errorf("job ID must be set to set group") 11997 mErr.Errors = append(mErr.Errors, err) 11998 } 11999 if a.JobACL.Task != "" && a.JobACL.Group == "" { 12000 err := fmt.Errorf("group must be set to set task") 12001 mErr.Errors = append(mErr.Errors, err) 12002 } 12003 } 12004 12005 return mErr.ErrorOrNil() 12006 } 12007 12008 // ACLPolicyListStub is used to for listing ACL policies 12009 type ACLPolicyListStub struct { 12010 Name string 12011 Description string 12012 Hash []byte 12013 CreateIndex uint64 12014 ModifyIndex uint64 12015 } 12016 12017 // ACLPolicyListRequest is used to request a list of policies 12018 type ACLPolicyListRequest struct { 12019 QueryOptions 12020 } 12021 12022 // ACLPolicySpecificRequest is used to query a specific policy 12023 type ACLPolicySpecificRequest struct { 12024 Name string 12025 QueryOptions 12026 } 12027 12028 // ACLPolicySetRequest is used to query a set of policies 12029 type ACLPolicySetRequest struct { 12030 Names []string 12031 QueryOptions 12032 } 12033 12034 // ACLPolicyListResponse is used for a list request 12035 type ACLPolicyListResponse struct { 12036 Policies []*ACLPolicyListStub 12037 QueryMeta 12038 } 12039 12040 // SingleACLPolicyResponse is used to return a single policy 12041 type SingleACLPolicyResponse struct { 12042 Policy *ACLPolicy 12043 QueryMeta 12044 } 12045 12046 // ACLPolicySetResponse is used to return a set of policies 12047 type ACLPolicySetResponse struct { 12048 Policies map[string]*ACLPolicy 12049 QueryMeta 12050 } 12051 12052 // ACLPolicyDeleteRequest is used to delete a set of policies 12053 type ACLPolicyDeleteRequest struct { 12054 Names []string 12055 WriteRequest 12056 } 12057 12058 // ACLPolicyUpsertRequest is used to upsert a set of policies 12059 type ACLPolicyUpsertRequest struct { 12060 Policies []*ACLPolicy 12061 WriteRequest 12062 } 12063 12064 // ACLToken represents a client token which is used to Authenticate 12065 type ACLToken struct { 12066 AccessorID string // Public Accessor ID (UUID) 12067 SecretID string // Secret ID, private (UUID) 12068 Name string // Human friendly name 12069 Type string // Client or Management 12070 Policies []string // Policies this token ties to 12071 12072 // Roles represents the ACL roles that this token is tied to. The token 12073 // will inherit the permissions of all policies detailed within the role. 12074 Roles []*ACLTokenRoleLink 12075 12076 Global bool // Global or Region local 12077 Hash []byte 12078 CreateTime time.Time // Time of creation 12079 12080 // ExpirationTime represents the point after which a token should be 12081 // considered revoked and is eligible for destruction. This time should 12082 // always use UTC to account for multi-region global tokens. It is a 12083 // pointer, so we can store nil, rather than the zero value of time.Time. 12084 ExpirationTime *time.Time 12085 12086 // ExpirationTTL is a convenience field for helping set ExpirationTime to a 12087 // value of CreateTime+ExpirationTTL. This can only be set during token 12088 // creation. This is a string version of a time.Duration like "2m". 12089 ExpirationTTL time.Duration 12090 12091 CreateIndex uint64 12092 ModifyIndex uint64 12093 } 12094 12095 // GetID implements the IDGetter interface, required for pagination. 12096 func (a *ACLToken) GetID() string { 12097 if a == nil { 12098 return "" 12099 } 12100 return a.AccessorID 12101 } 12102 12103 // GetCreateIndex implements the CreateIndexGetter interface, required for 12104 // pagination. 12105 func (a *ACLToken) GetCreateIndex() uint64 { 12106 if a == nil { 12107 return 0 12108 } 12109 return a.CreateIndex 12110 } 12111 12112 func (a *ACLToken) Copy() *ACLToken { 12113 c := new(ACLToken) 12114 *c = *a 12115 12116 c.Policies = make([]string, len(a.Policies)) 12117 copy(c.Policies, a.Policies) 12118 12119 c.Hash = make([]byte, len(a.Hash)) 12120 copy(c.Hash, a.Hash) 12121 12122 c.Roles = make([]*ACLTokenRoleLink, len(a.Roles)) 12123 copy(c.Roles, a.Roles) 12124 12125 return c 12126 } 12127 12128 var ( 12129 // AnonymousACLToken is used no SecretID is provided, and the 12130 // request is made anonymously. 12131 AnonymousACLToken = &ACLToken{ 12132 AccessorID: "anonymous", 12133 Name: "Anonymous Token", 12134 Type: ACLClientToken, 12135 Policies: []string{"anonymous"}, 12136 Global: false, 12137 } 12138 12139 // LeaderACLToken is used to represent a leader's own token; this object 12140 // never gets used except on the leader 12141 LeaderACLToken = &ACLToken{ 12142 AccessorID: "leader", 12143 Name: "Leader Token", 12144 Type: ACLManagementToken, 12145 } 12146 ) 12147 12148 type ACLTokenListStub struct { 12149 AccessorID string 12150 Name string 12151 Type string 12152 Policies []string 12153 Roles []*ACLTokenRoleLink 12154 Global bool 12155 Hash []byte 12156 CreateTime time.Time 12157 ExpirationTime *time.Time 12158 CreateIndex uint64 12159 ModifyIndex uint64 12160 } 12161 12162 // SetHash is used to compute and set the hash of the ACL token. It only hashes 12163 // fields which can be updated, and as such, does not hash fields such as 12164 // ExpirationTime. 12165 func (a *ACLToken) SetHash() []byte { 12166 // Initialize a 256bit Blake2 hash (32 bytes) 12167 hash, err := blake2b.New256(nil) 12168 if err != nil { 12169 panic(err) 12170 } 12171 12172 // Write all the user set fields 12173 _, _ = hash.Write([]byte(a.Name)) 12174 _, _ = hash.Write([]byte(a.Type)) 12175 for _, policyName := range a.Policies { 12176 _, _ = hash.Write([]byte(policyName)) 12177 } 12178 if a.Global { 12179 _, _ = hash.Write([]byte("global")) 12180 } else { 12181 _, _ = hash.Write([]byte("local")) 12182 } 12183 12184 // Iterate the ACL role links and hash the ID. The ID is immutable and the 12185 // canonical way to reference a role. The name can be modified by 12186 // operators, but won't impact the ACL token resolution. 12187 for _, roleLink := range a.Roles { 12188 _, _ = hash.Write([]byte(roleLink.ID)) 12189 } 12190 12191 // Finalize the hash 12192 hashVal := hash.Sum(nil) 12193 12194 // Set and return the hash 12195 a.Hash = hashVal 12196 return hashVal 12197 } 12198 12199 func (a *ACLToken) Stub() *ACLTokenListStub { 12200 return &ACLTokenListStub{ 12201 AccessorID: a.AccessorID, 12202 Name: a.Name, 12203 Type: a.Type, 12204 Policies: a.Policies, 12205 Roles: a.Roles, 12206 Global: a.Global, 12207 Hash: a.Hash, 12208 CreateTime: a.CreateTime, 12209 ExpirationTime: a.ExpirationTime, 12210 CreateIndex: a.CreateIndex, 12211 ModifyIndex: a.ModifyIndex, 12212 } 12213 } 12214 12215 // ACLTokenListRequest is used to request a list of tokens 12216 type ACLTokenListRequest struct { 12217 GlobalOnly bool 12218 QueryOptions 12219 } 12220 12221 // ACLTokenSpecificRequest is used to query a specific token 12222 type ACLTokenSpecificRequest struct { 12223 AccessorID string 12224 QueryOptions 12225 } 12226 12227 // ACLTokenSetRequest is used to query a set of tokens 12228 type ACLTokenSetRequest struct { 12229 AccessorIDS []string 12230 QueryOptions 12231 } 12232 12233 // ACLTokenListResponse is used for a list request 12234 type ACLTokenListResponse struct { 12235 Tokens []*ACLTokenListStub 12236 QueryMeta 12237 } 12238 12239 // SingleACLTokenResponse is used to return a single token 12240 type SingleACLTokenResponse struct { 12241 Token *ACLToken 12242 QueryMeta 12243 } 12244 12245 // ACLTokenSetResponse is used to return a set of token 12246 type ACLTokenSetResponse struct { 12247 Tokens map[string]*ACLToken // Keyed by Accessor ID 12248 QueryMeta 12249 } 12250 12251 // ResolveACLTokenRequest is used to resolve a specific token 12252 type ResolveACLTokenRequest struct { 12253 SecretID string 12254 QueryOptions 12255 } 12256 12257 // ResolveACLTokenResponse is used to resolve a single token 12258 type ResolveACLTokenResponse struct { 12259 Token *ACLToken 12260 QueryMeta 12261 } 12262 12263 // ACLTokenDeleteRequest is used to delete a set of tokens 12264 type ACLTokenDeleteRequest struct { 12265 AccessorIDs []string 12266 WriteRequest 12267 } 12268 12269 // ACLTokenBootstrapRequest is used to bootstrap ACLs 12270 type ACLTokenBootstrapRequest struct { 12271 Token *ACLToken // Not client specifiable 12272 ResetIndex uint64 // Reset index is used to clear the bootstrap token 12273 BootstrapSecret string 12274 WriteRequest 12275 } 12276 12277 // ACLTokenUpsertRequest is used to upsert a set of tokens 12278 type ACLTokenUpsertRequest struct { 12279 Tokens []*ACLToken 12280 WriteRequest 12281 } 12282 12283 // ACLTokenUpsertResponse is used to return from an ACLTokenUpsertRequest 12284 type ACLTokenUpsertResponse struct { 12285 Tokens []*ACLToken 12286 WriteMeta 12287 } 12288 12289 // OneTimeToken is used to log into the web UI using a token provided by the 12290 // command line. 12291 type OneTimeToken struct { 12292 OneTimeSecretID string 12293 AccessorID string 12294 ExpiresAt time.Time 12295 CreateIndex uint64 12296 ModifyIndex uint64 12297 } 12298 12299 // OneTimeTokenUpsertRequest is the request for a UpsertOneTimeToken RPC 12300 type OneTimeTokenUpsertRequest struct { 12301 WriteRequest 12302 } 12303 12304 // OneTimeTokenUpsertResponse is the response to a UpsertOneTimeToken RPC. 12305 type OneTimeTokenUpsertResponse struct { 12306 OneTimeToken *OneTimeToken 12307 WriteMeta 12308 } 12309 12310 // OneTimeTokenExchangeRequest is a request to swap the one-time token with 12311 // the backing ACL token 12312 type OneTimeTokenExchangeRequest struct { 12313 OneTimeSecretID string 12314 WriteRequest 12315 } 12316 12317 // OneTimeTokenExchangeResponse is the response to swapping the one-time token 12318 // with the backing ACL token 12319 type OneTimeTokenExchangeResponse struct { 12320 Token *ACLToken 12321 WriteMeta 12322 } 12323 12324 // OneTimeTokenDeleteRequest is a request to delete a group of one-time tokens 12325 type OneTimeTokenDeleteRequest struct { 12326 AccessorIDs []string 12327 WriteRequest 12328 } 12329 12330 // OneTimeTokenExpireRequest is a request to delete all expired one-time tokens 12331 type OneTimeTokenExpireRequest struct { 12332 Timestamp time.Time 12333 WriteRequest 12334 } 12335 12336 // RpcError is used for serializing errors with a potential error code 12337 type RpcError struct { 12338 Message string 12339 Code *int64 12340 } 12341 12342 func NewRpcError(err error, code *int64) *RpcError { 12343 return &RpcError{ 12344 Message: err.Error(), 12345 Code: code, 12346 } 12347 } 12348 12349 func (r *RpcError) Error() string { 12350 return r.Message 12351 }