github.com/dkerwin/nomad@v0.3.3-0.20160525181927-74554135514b/nomad/structs/structs.go (about) 1 package structs 2 3 import ( 4 "bytes" 5 "crypto/md5" 6 "crypto/sha1" 7 "crypto/sha256" 8 "crypto/sha512" 9 "encoding/hex" 10 "errors" 11 "fmt" 12 "io" 13 "path/filepath" 14 "reflect" 15 "regexp" 16 "strconv" 17 "strings" 18 "time" 19 20 "github.com/gorhill/cronexpr" 21 "github.com/hashicorp/go-multierror" 22 "github.com/hashicorp/go-version" 23 "github.com/hashicorp/nomad/helper/args" 24 "github.com/mitchellh/copystructure" 25 "github.com/ugorji/go/codec" 26 27 hcodec "github.com/hashicorp/go-msgpack/codec" 28 ) 29 30 var ( 31 ErrNoLeader = fmt.Errorf("No cluster leader") 32 ErrNoRegionPath = fmt.Errorf("No path to region") 33 ) 34 35 type MessageType uint8 36 37 const ( 38 NodeRegisterRequestType MessageType = iota 39 NodeDeregisterRequestType 40 NodeUpdateStatusRequestType 41 NodeUpdateDrainRequestType 42 JobRegisterRequestType 43 JobDeregisterRequestType 44 EvalUpdateRequestType 45 EvalDeleteRequestType 46 AllocUpdateRequestType 47 AllocClientUpdateRequestType 48 ) 49 50 const ( 51 // IgnoreUnknownTypeFlag is set along with a MessageType 52 // to indicate that the message type can be safely ignored 53 // if it is not recognized. This is for future proofing, so 54 // that new commands can be added in a way that won't cause 55 // old servers to crash when the FSM attempts to process them. 56 IgnoreUnknownTypeFlag MessageType = 128 57 ) 58 59 // RPCInfo is used to describe common information about query 60 type RPCInfo interface { 61 RequestRegion() string 62 IsRead() bool 63 AllowStaleRead() bool 64 } 65 66 // QueryOptions is used to specify various flags for read queries 67 type QueryOptions struct { 68 // The target region for this query 69 Region string 70 71 // If set, wait until query exceeds given index. Must be provided 72 // with MaxQueryTime. 73 MinQueryIndex uint64 74 75 // Provided with MinQueryIndex to wait for change. 76 MaxQueryTime time.Duration 77 78 // If set, any follower can service the request. Results 79 // may be arbitrarily stale. 80 AllowStale bool 81 82 // If set, used as prefix for resource list searches 83 Prefix string 84 } 85 86 func (q QueryOptions) RequestRegion() string { 87 return q.Region 88 } 89 90 // QueryOption only applies to reads, so always true 91 func (q QueryOptions) IsRead() bool { 92 return true 93 } 94 95 func (q QueryOptions) AllowStaleRead() bool { 96 return q.AllowStale 97 } 98 99 type WriteRequest struct { 100 // The target region for this write 101 Region string 102 } 103 104 func (w WriteRequest) RequestRegion() string { 105 // The target region for this request 106 return w.Region 107 } 108 109 // WriteRequest only applies to writes, always false 110 func (w WriteRequest) IsRead() bool { 111 return false 112 } 113 114 func (w WriteRequest) AllowStaleRead() bool { 115 return false 116 } 117 118 // QueryMeta allows a query response to include potentially 119 // useful metadata about a query 120 type QueryMeta struct { 121 // This is the index associated with the read 122 Index uint64 123 124 // If AllowStale is used, this is time elapsed since 125 // last contact between the follower and leader. This 126 // can be used to gauge staleness. 127 LastContact time.Duration 128 129 // Used to indicate if there is a known leader node 130 KnownLeader bool 131 } 132 133 // WriteMeta allows a write response to include potentially 134 // useful metadata about the write 135 type WriteMeta struct { 136 // This is the index associated with the write 137 Index uint64 138 } 139 140 // NodeRegisterRequest is used for Node.Register endpoint 141 // to register a node as being a schedulable entity. 142 type NodeRegisterRequest struct { 143 Node *Node 144 WriteRequest 145 } 146 147 // NodeDeregisterRequest is used for Node.Deregister endpoint 148 // to deregister a node as being a schedulable entity. 149 type NodeDeregisterRequest struct { 150 NodeID string 151 WriteRequest 152 } 153 154 // NodeUpdateStatusRequest is used for Node.UpdateStatus endpoint 155 // to update the status of a node. 156 type NodeUpdateStatusRequest struct { 157 NodeID string 158 Status string 159 WriteRequest 160 } 161 162 // NodeUpdateDrainRequest is used for updatin the drain status 163 type NodeUpdateDrainRequest struct { 164 NodeID string 165 Drain bool 166 WriteRequest 167 } 168 169 // NodeEvaluateRequest is used to re-evaluate the ndoe 170 type NodeEvaluateRequest struct { 171 NodeID string 172 WriteRequest 173 } 174 175 // NodeSpecificRequest is used when we just need to specify a target node 176 type NodeSpecificRequest struct { 177 NodeID string 178 QueryOptions 179 } 180 181 // JobRegisterRequest is used for Job.Register endpoint 182 // to register a job as being a schedulable entity. 183 type JobRegisterRequest struct { 184 Job *Job 185 WriteRequest 186 } 187 188 // JobDeregisterRequest is used for Job.Deregister endpoint 189 // to deregister a job as being a schedulable entity. 190 type JobDeregisterRequest struct { 191 JobID string 192 WriteRequest 193 } 194 195 // JobEvaluateRequest is used when we just need to re-evaluate a target job 196 type JobEvaluateRequest struct { 197 JobID string 198 WriteRequest 199 } 200 201 // JobSpecificRequest is used when we just need to specify a target job 202 type JobSpecificRequest struct { 203 JobID string 204 QueryOptions 205 } 206 207 // JobListRequest is used to parameterize a list request 208 type JobListRequest struct { 209 QueryOptions 210 } 211 212 // JobPlanRequest is used for the Job.Plan endpoint to trigger a dry-run 213 // evaluation of the Job. 214 type JobPlanRequest struct { 215 Job *Job 216 Diff bool // Toggles an annotated diff 217 WriteRequest 218 } 219 220 // NodeListRequest is used to parameterize a list request 221 type NodeListRequest struct { 222 QueryOptions 223 } 224 225 // EvalUpdateRequest is used for upserting evaluations. 226 type EvalUpdateRequest struct { 227 Evals []*Evaluation 228 EvalToken string 229 WriteRequest 230 } 231 232 // EvalDeleteRequest is used for deleting an evaluation. 233 type EvalDeleteRequest struct { 234 Evals []string 235 Allocs []string 236 WriteRequest 237 } 238 239 // EvalSpecificRequest is used when we just need to specify a target evaluation 240 type EvalSpecificRequest struct { 241 EvalID string 242 QueryOptions 243 } 244 245 // EvalAckRequest is used to Ack/Nack a specific evaluation 246 type EvalAckRequest struct { 247 EvalID string 248 Token string 249 WriteRequest 250 } 251 252 // EvalDequeueRequest is used when we want to dequeue an evaluation 253 type EvalDequeueRequest struct { 254 Schedulers []string 255 Timeout time.Duration 256 WriteRequest 257 } 258 259 // EvalListRequest is used to list the evaluations 260 type EvalListRequest struct { 261 QueryOptions 262 } 263 264 // PlanRequest is used to submit an allocation plan to the leader 265 type PlanRequest struct { 266 Plan *Plan 267 WriteRequest 268 } 269 270 // AllocUpdateRequest is used to submit changes to allocations, either 271 // to cause evictions or to assign new allocaitons. Both can be done 272 // within a single transaction 273 type AllocUpdateRequest struct { 274 // Alloc is the list of new allocations to assign 275 Alloc []*Allocation 276 277 // Job is the shared parent job of the allocations. 278 // It is pulled out since it is common to reduce payload size. 279 Job *Job 280 281 WriteRequest 282 } 283 284 // AllocListRequest is used to request a list of allocations 285 type AllocListRequest struct { 286 QueryOptions 287 } 288 289 // AllocSpecificRequest is used to query a specific allocation 290 type AllocSpecificRequest struct { 291 AllocID string 292 QueryOptions 293 } 294 295 // AllocsGetcRequest is used to query a set of allocations 296 type AllocsGetRequest struct { 297 AllocIDs []string 298 QueryOptions 299 } 300 301 // PeriodicForceReqeuest is used to force a specific periodic job. 302 type PeriodicForceRequest struct { 303 JobID string 304 WriteRequest 305 } 306 307 // GenericRequest is used to request where no 308 // specific information is needed. 309 type GenericRequest struct { 310 QueryOptions 311 } 312 313 // GenericResponse is used to respond to a request where no 314 // specific response information is needed. 315 type GenericResponse struct { 316 WriteMeta 317 } 318 319 const ( 320 ProtocolVersion = "protocol" 321 APIMajorVersion = "api.major" 322 APIMinorVersion = "api.minor" 323 ) 324 325 // VersionResponse is used for the Status.Version reseponse 326 type VersionResponse struct { 327 Build string 328 Versions map[string]int 329 QueryMeta 330 } 331 332 // JobRegisterResponse is used to respond to a job registration 333 type JobRegisterResponse struct { 334 EvalID string 335 EvalCreateIndex uint64 336 JobModifyIndex uint64 337 QueryMeta 338 } 339 340 // JobDeregisterResponse is used to respond to a job deregistration 341 type JobDeregisterResponse struct { 342 EvalID string 343 EvalCreateIndex uint64 344 JobModifyIndex uint64 345 QueryMeta 346 } 347 348 // NodeUpdateResponse is used to respond to a node update 349 type NodeUpdateResponse struct { 350 HeartbeatTTL time.Duration 351 EvalIDs []string 352 EvalCreateIndex uint64 353 NodeModifyIndex uint64 354 QueryMeta 355 } 356 357 // NodeDrainUpdateResponse is used to respond to a node drain update 358 type NodeDrainUpdateResponse struct { 359 EvalIDs []string 360 EvalCreateIndex uint64 361 NodeModifyIndex uint64 362 QueryMeta 363 } 364 365 // NodeAllocsResponse is used to return allocs for a single node 366 type NodeAllocsResponse struct { 367 Allocs []*Allocation 368 QueryMeta 369 } 370 371 // NodeClientAllocsResponse is used to return allocs meta data for a single node 372 type NodeClientAllocsResponse struct { 373 Allocs map[string]uint64 374 QueryMeta 375 } 376 377 // SingleNodeResponse is used to return a single node 378 type SingleNodeResponse struct { 379 Node *Node 380 QueryMeta 381 } 382 383 // JobListResponse is used for a list request 384 type NodeListResponse struct { 385 Nodes []*NodeListStub 386 QueryMeta 387 } 388 389 // SingleJobResponse is used to return a single job 390 type SingleJobResponse struct { 391 Job *Job 392 QueryMeta 393 } 394 395 // JobListResponse is used for a list request 396 type JobListResponse struct { 397 Jobs []*JobListStub 398 QueryMeta 399 } 400 401 // JobPlanResponse is used to respond to a job plan request 402 type JobPlanResponse struct { 403 // Annotations stores annotations explaining decisions the scheduler made. 404 Annotations *PlanAnnotations 405 406 // JobModifyIndex is the modification index of the job. The value can be 407 // used when running `nomad run` to ensure that the Job wasn’t modified 408 // since the last plan. If the job is being created, the value is zero. 409 JobModifyIndex uint64 410 411 // CreatedEvals is the set of evaluations created by the scheduler. The 412 // reasons for this can be rolling-updates or blocked evals. 413 CreatedEvals []*Evaluation 414 415 // Diff contains the diff of the job and annotations on whether the change 416 // causes an in-place update or create/destroy 417 Diff *JobDiff 418 419 WriteMeta 420 } 421 422 // SingleAllocResponse is used to return a single allocation 423 type SingleAllocResponse struct { 424 Alloc *Allocation 425 QueryMeta 426 } 427 428 // AllocsGetResponse is used to return a set of allocations 429 type AllocsGetResponse struct { 430 Allocs []*Allocation 431 QueryMeta 432 } 433 434 // JobAllocationsResponse is used to return the allocations for a job 435 type JobAllocationsResponse struct { 436 Allocations []*AllocListStub 437 QueryMeta 438 } 439 440 // JobEvaluationsResponse is used to return the evaluations for a job 441 type JobEvaluationsResponse struct { 442 Evaluations []*Evaluation 443 QueryMeta 444 } 445 446 // SingleEvalResponse is used to return a single evaluation 447 type SingleEvalResponse struct { 448 Eval *Evaluation 449 QueryMeta 450 } 451 452 // EvalDequeueResponse is used to return from a dequeue 453 type EvalDequeueResponse struct { 454 Eval *Evaluation 455 Token string 456 QueryMeta 457 } 458 459 // PlanResponse is used to return from a PlanRequest 460 type PlanResponse struct { 461 Result *PlanResult 462 WriteMeta 463 } 464 465 // AllocListResponse is used for a list request 466 type AllocListResponse struct { 467 Allocations []*AllocListStub 468 QueryMeta 469 } 470 471 // EvalListResponse is used for a list request 472 type EvalListResponse struct { 473 Evaluations []*Evaluation 474 QueryMeta 475 } 476 477 // EvalAllocationsResponse is used to return the allocations for an evaluation 478 type EvalAllocationsResponse struct { 479 Allocations []*AllocListStub 480 QueryMeta 481 } 482 483 // PeriodicForceResponse is used to respond to a periodic job force launch 484 type PeriodicForceResponse struct { 485 EvalID string 486 EvalCreateIndex uint64 487 WriteMeta 488 } 489 490 const ( 491 NodeStatusInit = "initializing" 492 NodeStatusReady = "ready" 493 NodeStatusDown = "down" 494 ) 495 496 // ShouldDrainNode checks if a given node status should trigger an 497 // evaluation. Some states don't require any further action. 498 func ShouldDrainNode(status string) bool { 499 switch status { 500 case NodeStatusInit, NodeStatusReady: 501 return false 502 case NodeStatusDown: 503 return true 504 default: 505 panic(fmt.Sprintf("unhandled node status %s", status)) 506 } 507 } 508 509 // ValidNodeStatus is used to check if a node status is valid 510 func ValidNodeStatus(status string) bool { 511 switch status { 512 case NodeStatusInit, NodeStatusReady, NodeStatusDown: 513 return true 514 default: 515 return false 516 } 517 } 518 519 // Node is a representation of a schedulable client node 520 type Node struct { 521 // ID is a unique identifier for the node. It can be constructed 522 // by doing a concatenation of the Name and Datacenter as a simple 523 // approach. Alternatively a UUID may be used. 524 ID string 525 526 // Datacenter for this node 527 Datacenter string 528 529 // Node name 530 Name string 531 532 // HTTPAddr is the address on which the Nomad client is listening for http 533 // requests 534 HTTPAddr string 535 536 // Attributes is an arbitrary set of key/value 537 // data that can be used for constraints. Examples 538 // include "kernel.name=linux", "arch=386", "driver.docker=1", 539 // "docker.runtime=1.8.3" 540 Attributes map[string]string 541 542 // Resources is the available resources on the client. 543 // For example 'cpu=2' 'memory=2048' 544 Resources *Resources 545 546 // Reserved is the set of resources that are reserved, 547 // and should be subtracted from the total resources for 548 // the purposes of scheduling. This may be provide certain 549 // high-watermark tolerances or because of external schedulers 550 // consuming resources. 551 Reserved *Resources 552 553 // Links are used to 'link' this client to external 554 // systems. For example 'consul=foo.dc1' 'aws=i-83212' 555 // 'ami=ami-123' 556 Links map[string]string 557 558 // Meta is used to associate arbitrary metadata with this 559 // client. This is opaque to Nomad. 560 Meta map[string]string 561 562 // NodeClass is an opaque identifier used to group nodes 563 // together for the purpose of determining scheduling pressure. 564 NodeClass string 565 566 // ComputedClass is a unique id that identifies nodes with a common set of 567 // attributes and capabilities. 568 ComputedClass string 569 570 // Drain is controlled by the servers, and not the client. 571 // If true, no jobs will be scheduled to this node, and existing 572 // allocations will be drained. 573 Drain bool 574 575 // Status of this node 576 Status string 577 578 // StatusDescription is meant to provide more human useful information 579 StatusDescription string 580 581 // Raft Indexes 582 CreateIndex uint64 583 ModifyIndex uint64 584 } 585 586 func (n *Node) Copy() *Node { 587 if n == nil { 588 return nil 589 } 590 nn := new(Node) 591 *nn = *n 592 nn.Attributes = CopyMapStringString(nn.Attributes) 593 nn.Resources = nn.Resources.Copy() 594 nn.Reserved = nn.Reserved.Copy() 595 nn.Links = CopyMapStringString(nn.Links) 596 nn.Meta = CopyMapStringString(nn.Meta) 597 return nn 598 } 599 600 // TerminalStatus returns if the current status is terminal and 601 // will no longer transition. 602 func (n *Node) TerminalStatus() bool { 603 switch n.Status { 604 case NodeStatusDown: 605 return true 606 default: 607 return false 608 } 609 } 610 611 // Stub returns a summarized version of the node 612 func (n *Node) Stub() *NodeListStub { 613 return &NodeListStub{ 614 ID: n.ID, 615 Datacenter: n.Datacenter, 616 Name: n.Name, 617 NodeClass: n.NodeClass, 618 Drain: n.Drain, 619 Status: n.Status, 620 StatusDescription: n.StatusDescription, 621 CreateIndex: n.CreateIndex, 622 ModifyIndex: n.ModifyIndex, 623 } 624 } 625 626 // NodeListStub is used to return a subset of job information 627 // for the job list 628 type NodeListStub struct { 629 ID string 630 Datacenter string 631 Name string 632 NodeClass string 633 Drain bool 634 Status string 635 StatusDescription string 636 CreateIndex uint64 637 ModifyIndex uint64 638 } 639 640 // Resources is used to define the resources available 641 // on a client 642 type Resources struct { 643 CPU int 644 MemoryMB int `mapstructure:"memory"` 645 DiskMB int `mapstructure:"disk"` 646 IOPS int 647 Networks []*NetworkResource 648 } 649 650 // DefaultResources returns the minimum resources a task can use and be valid. 651 func DefaultResources() *Resources { 652 return &Resources{ 653 CPU: 100, 654 MemoryMB: 10, 655 DiskMB: 300, 656 IOPS: 0, 657 } 658 } 659 660 // Merge merges this resource with another resource. 661 func (r *Resources) Merge(other *Resources) { 662 if other.CPU != 0 { 663 r.CPU = other.CPU 664 } 665 if other.MemoryMB != 0 { 666 r.MemoryMB = other.MemoryMB 667 } 668 if other.DiskMB != 0 { 669 r.DiskMB = other.DiskMB 670 } 671 if other.IOPS != 0 { 672 r.IOPS = other.IOPS 673 } 674 if len(other.Networks) != 0 { 675 r.Networks = other.Networks 676 } 677 } 678 679 // MeetsMinResources returns an error if the resources specified are less than 680 // the minimum allowed. 681 func (r *Resources) MeetsMinResources() error { 682 var mErr multierror.Error 683 if r.CPU < 20 { 684 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum CPU value is 20; got %d", r.CPU)) 685 } 686 if r.MemoryMB < 10 { 687 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MemoryMB value is 10; got %d", r.MemoryMB)) 688 } 689 if r.DiskMB < 10 { 690 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum DiskMB value is 10; got %d", r.DiskMB)) 691 } 692 if r.IOPS < 0 { 693 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum IOPS value is 0; got %d", r.IOPS)) 694 } 695 for i, n := range r.Networks { 696 if err := n.MeetsMinResources(); err != nil { 697 mErr.Errors = append(mErr.Errors, fmt.Errorf("network resource at index %d failed: %v", i, err)) 698 } 699 } 700 701 return mErr.ErrorOrNil() 702 } 703 704 // Copy returns a deep copy of the resources 705 func (r *Resources) Copy() *Resources { 706 if r == nil { 707 return nil 708 } 709 newR := new(Resources) 710 *newR = *r 711 if r.Networks != nil { 712 n := len(r.Networks) 713 newR.Networks = make([]*NetworkResource, n) 714 for i := 0; i < n; i++ { 715 newR.Networks[i] = r.Networks[i].Copy() 716 } 717 } 718 return newR 719 } 720 721 // NetIndex finds the matching net index using device name 722 func (r *Resources) NetIndex(n *NetworkResource) int { 723 for idx, net := range r.Networks { 724 if net.Device == n.Device { 725 return idx 726 } 727 } 728 return -1 729 } 730 731 // Superset checks if one set of resources is a superset 732 // of another. This ignores network resources, and the NetworkIndex 733 // should be used for that. 734 func (r *Resources) Superset(other *Resources) (bool, string) { 735 if r.CPU < other.CPU { 736 return false, "cpu exhausted" 737 } 738 if r.MemoryMB < other.MemoryMB { 739 return false, "memory exhausted" 740 } 741 if r.DiskMB < other.DiskMB { 742 return false, "disk exhausted" 743 } 744 if r.IOPS < other.IOPS { 745 return false, "iops exhausted" 746 } 747 return true, "" 748 } 749 750 // Add adds the resources of the delta to this, potentially 751 // returning an error if not possible. 752 func (r *Resources) Add(delta *Resources) error { 753 if delta == nil { 754 return nil 755 } 756 r.CPU += delta.CPU 757 r.MemoryMB += delta.MemoryMB 758 r.DiskMB += delta.DiskMB 759 r.IOPS += delta.IOPS 760 761 for _, n := range delta.Networks { 762 // Find the matching interface by IP or CIDR 763 idx := r.NetIndex(n) 764 if idx == -1 { 765 r.Networks = append(r.Networks, n.Copy()) 766 } else { 767 r.Networks[idx].Add(n) 768 } 769 } 770 return nil 771 } 772 773 func (r *Resources) GoString() string { 774 return fmt.Sprintf("*%#v", *r) 775 } 776 777 type Port struct { 778 Label string 779 Value int `mapstructure:"static"` 780 } 781 782 // NetworkResource is used to represent available network 783 // resources 784 type NetworkResource struct { 785 Device string // Name of the device 786 CIDR string // CIDR block of addresses 787 IP string // IP address 788 MBits int // Throughput 789 ReservedPorts []Port // Reserved ports 790 DynamicPorts []Port // Dynamically assigned ports 791 } 792 793 // MeetsMinResources returns an error if the resources specified are less than 794 // the minimum allowed. 795 func (n *NetworkResource) MeetsMinResources() error { 796 var mErr multierror.Error 797 if n.MBits < 1 { 798 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum MBits value is 1; got %d", n.MBits)) 799 } 800 return mErr.ErrorOrNil() 801 } 802 803 // Copy returns a deep copy of the network resource 804 func (n *NetworkResource) Copy() *NetworkResource { 805 if n == nil { 806 return nil 807 } 808 newR := new(NetworkResource) 809 *newR = *n 810 if n.ReservedPorts != nil { 811 newR.ReservedPorts = make([]Port, len(n.ReservedPorts)) 812 copy(newR.ReservedPorts, n.ReservedPorts) 813 } 814 if n.DynamicPorts != nil { 815 newR.DynamicPorts = make([]Port, len(n.DynamicPorts)) 816 copy(newR.DynamicPorts, n.DynamicPorts) 817 } 818 return newR 819 } 820 821 // Add adds the resources of the delta to this, potentially 822 // returning an error if not possible. 823 func (n *NetworkResource) Add(delta *NetworkResource) { 824 if len(delta.ReservedPorts) > 0 { 825 n.ReservedPorts = append(n.ReservedPorts, delta.ReservedPorts...) 826 } 827 n.MBits += delta.MBits 828 n.DynamicPorts = append(n.DynamicPorts, delta.DynamicPorts...) 829 } 830 831 func (n *NetworkResource) GoString() string { 832 return fmt.Sprintf("*%#v", *n) 833 } 834 835 func (n *NetworkResource) MapLabelToValues(port_map map[string]int) map[string]int { 836 labelValues := make(map[string]int) 837 ports := append(n.ReservedPorts, n.DynamicPorts...) 838 for _, port := range ports { 839 if mapping, ok := port_map[port.Label]; ok { 840 labelValues[port.Label] = mapping 841 } else { 842 labelValues[port.Label] = port.Value 843 } 844 } 845 return labelValues 846 } 847 848 const ( 849 // JobTypeNomad is reserved for internal system tasks and is 850 // always handled by the CoreScheduler. 851 JobTypeCore = "_core" 852 JobTypeService = "service" 853 JobTypeBatch = "batch" 854 JobTypeSystem = "system" 855 ) 856 857 const ( 858 JobStatusPending = "pending" // Pending means the job is waiting on scheduling 859 JobStatusRunning = "running" // Running means the job has non-terminal allocations 860 JobStatusDead = "dead" // Dead means all evaluation's and allocations are terminal 861 ) 862 863 const ( 864 // JobMinPriority is the minimum allowed priority 865 JobMinPriority = 1 866 867 // JobDefaultPriority is the default priority if not 868 // not specified. 869 JobDefaultPriority = 50 870 871 // JobMaxPriority is the maximum allowed priority 872 JobMaxPriority = 100 873 874 // Ensure CoreJobPriority is higher than any user 875 // specified job so that it gets priority. This is important 876 // for the system to remain healthy. 877 CoreJobPriority = JobMaxPriority * 2 878 ) 879 880 // Job is the scope of a scheduling request to Nomad. It is the largest 881 // scoped object, and is a named collection of task groups. Each task group 882 // is further composed of tasks. A task group (TG) is the unit of scheduling 883 // however. 884 type Job struct { 885 // Region is the Nomad region that handles scheduling this job 886 Region string 887 888 // ID is a unique identifier for the job per region. It can be 889 // specified hierarchically like LineOfBiz/OrgName/Team/Project 890 ID string 891 892 // ParentID is the unique identifier of the job that spawned this job. 893 ParentID string 894 895 // Name is the logical name of the job used to refer to it. This is unique 896 // per region, but not unique globally. 897 Name string 898 899 // Type is used to control various behaviors about the job. Most jobs 900 // are service jobs, meaning they are expected to be long lived. 901 // Some jobs are batch oriented meaning they run and then terminate. 902 // This can be extended in the future to support custom schedulers. 903 Type string 904 905 // Priority is used to control scheduling importance and if this job 906 // can preempt other jobs. 907 Priority int 908 909 // AllAtOnce is used to control if incremental scheduling of task groups 910 // is allowed or if we must do a gang scheduling of the entire job. This 911 // can slow down larger jobs if resources are not available. 912 AllAtOnce bool `mapstructure:"all_at_once"` 913 914 // Datacenters contains all the datacenters this job is allowed to span 915 Datacenters []string 916 917 // Constraints can be specified at a job level and apply to 918 // all the task groups and tasks. 919 Constraints []*Constraint 920 921 // TaskGroups are the collections of task groups that this job needs 922 // to run. Each task group is an atomic unit of scheduling and placement. 923 TaskGroups []*TaskGroup 924 925 // Update is used to control the update strategy 926 Update UpdateStrategy 927 928 // Periodic is used to define the interval the job is run at. 929 Periodic *PeriodicConfig 930 931 // Meta is used to associate arbitrary metadata with this 932 // job. This is opaque to Nomad. 933 Meta map[string]string 934 935 // Job status 936 Status string 937 938 // StatusDescription is meant to provide more human useful information 939 StatusDescription string 940 941 // Raft Indexes 942 CreateIndex uint64 943 ModifyIndex uint64 944 JobModifyIndex uint64 945 } 946 947 // InitFields is used to initialize fields in the Job. This should be called 948 // when registering a Job. 949 func (j *Job) InitFields() { 950 for _, tg := range j.TaskGroups { 951 tg.InitFields(j) 952 } 953 } 954 955 // Copy returns a deep copy of the Job. It is expected that callers use recover. 956 // This job can panic if the deep copy failed as it uses reflection. 957 func (j *Job) Copy() *Job { 958 if j == nil { 959 return nil 960 } 961 nj := new(Job) 962 *nj = *j 963 nj.Datacenters = CopySliceString(nj.Datacenters) 964 nj.Constraints = CopySliceConstraints(nj.Constraints) 965 966 if j.TaskGroups != nil { 967 tgs := make([]*TaskGroup, len(nj.TaskGroups)) 968 for i, tg := range nj.TaskGroups { 969 tgs[i] = tg.Copy() 970 } 971 nj.TaskGroups = tgs 972 } 973 974 nj.Periodic = nj.Periodic.Copy() 975 nj.Meta = CopyMapStringString(nj.Meta) 976 return nj 977 } 978 979 // Validate is used to sanity check a job input 980 func (j *Job) Validate() error { 981 var mErr multierror.Error 982 if j.Region == "" { 983 mErr.Errors = append(mErr.Errors, errors.New("Missing job region")) 984 } 985 if j.ID == "" { 986 mErr.Errors = append(mErr.Errors, errors.New("Missing job ID")) 987 } else if strings.Contains(j.ID, " ") { 988 mErr.Errors = append(mErr.Errors, errors.New("Job ID contains a space")) 989 } 990 if j.Name == "" { 991 mErr.Errors = append(mErr.Errors, errors.New("Missing job name")) 992 } 993 if j.Type == "" { 994 mErr.Errors = append(mErr.Errors, errors.New("Missing job type")) 995 } 996 if j.Priority < JobMinPriority || j.Priority > JobMaxPriority { 997 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job priority must be between [%d, %d]", JobMinPriority, JobMaxPriority)) 998 } 999 if len(j.Datacenters) == 0 { 1000 mErr.Errors = append(mErr.Errors, errors.New("Missing job datacenters")) 1001 } 1002 if len(j.TaskGroups) == 0 { 1003 mErr.Errors = append(mErr.Errors, errors.New("Missing job task groups")) 1004 } 1005 for idx, constr := range j.Constraints { 1006 if err := constr.Validate(); err != nil { 1007 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1008 mErr.Errors = append(mErr.Errors, outer) 1009 } 1010 } 1011 1012 // Check for duplicate task groups 1013 taskGroups := make(map[string]int) 1014 for idx, tg := range j.TaskGroups { 1015 if tg.Name == "" { 1016 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d missing name", idx+1)) 1017 } else if existing, ok := taskGroups[tg.Name]; ok { 1018 mErr.Errors = append(mErr.Errors, fmt.Errorf("Job task group %d redefines '%s' from group %d", idx+1, tg.Name, existing+1)) 1019 } else { 1020 taskGroups[tg.Name] = idx 1021 } 1022 1023 if j.Type == "system" && tg.Count != 1 { 1024 mErr.Errors = append(mErr.Errors, 1025 fmt.Errorf("Job task group %d has count %d. Only count of 1 is supported with system scheduler", 1026 idx+1, tg.Count)) 1027 } 1028 } 1029 1030 // Validate the task group 1031 for idx, tg := range j.TaskGroups { 1032 if err := tg.Validate(); err != nil { 1033 outer := fmt.Errorf("Task group %d validation failed: %s", idx+1, err) 1034 mErr.Errors = append(mErr.Errors, outer) 1035 } 1036 } 1037 1038 // Validate periodic is only used with batch jobs. 1039 if j.IsPeriodic() { 1040 if j.Type != JobTypeBatch { 1041 mErr.Errors = append(mErr.Errors, 1042 fmt.Errorf("Periodic can only be used with %q scheduler", JobTypeBatch)) 1043 } 1044 1045 if err := j.Periodic.Validate(); err != nil { 1046 mErr.Errors = append(mErr.Errors, err) 1047 } 1048 } 1049 1050 return mErr.ErrorOrNil() 1051 } 1052 1053 // LookupTaskGroup finds a task group by name 1054 func (j *Job) LookupTaskGroup(name string) *TaskGroup { 1055 for _, tg := range j.TaskGroups { 1056 if tg.Name == name { 1057 return tg 1058 } 1059 } 1060 return nil 1061 } 1062 1063 // Stub is used to return a summary of the job 1064 func (j *Job) Stub() *JobListStub { 1065 return &JobListStub{ 1066 ID: j.ID, 1067 ParentID: j.ParentID, 1068 Name: j.Name, 1069 Type: j.Type, 1070 Priority: j.Priority, 1071 Status: j.Status, 1072 StatusDescription: j.StatusDescription, 1073 CreateIndex: j.CreateIndex, 1074 ModifyIndex: j.ModifyIndex, 1075 } 1076 } 1077 1078 // IsPeriodic returns whether a job is periodic. 1079 func (j *Job) IsPeriodic() bool { 1080 return j.Periodic != nil 1081 } 1082 1083 // JobListStub is used to return a subset of job information 1084 // for the job list 1085 type JobListStub struct { 1086 ID string 1087 ParentID string 1088 Name string 1089 Type string 1090 Priority int 1091 Status string 1092 StatusDescription string 1093 CreateIndex uint64 1094 ModifyIndex uint64 1095 } 1096 1097 // UpdateStrategy is used to modify how updates are done 1098 type UpdateStrategy struct { 1099 // Stagger is the amount of time between the updates 1100 Stagger time.Duration 1101 1102 // MaxParallel is how many updates can be done in parallel 1103 MaxParallel int `mapstructure:"max_parallel"` 1104 } 1105 1106 // Rolling returns if a rolling strategy should be used 1107 func (u *UpdateStrategy) Rolling() bool { 1108 return u.Stagger > 0 && u.MaxParallel > 0 1109 } 1110 1111 const ( 1112 // PeriodicSpecCron is used for a cron spec. 1113 PeriodicSpecCron = "cron" 1114 1115 // PeriodicSpecTest is only used by unit tests. It is a sorted, comma 1116 // separated list of unix timestamps at which to launch. 1117 PeriodicSpecTest = "_internal_test" 1118 ) 1119 1120 // Periodic defines the interval a job should be run at. 1121 type PeriodicConfig struct { 1122 // Enabled determines if the job should be run periodically. 1123 Enabled bool 1124 1125 // Spec specifies the interval the job should be run as. It is parsed based 1126 // on the SpecType. 1127 Spec string 1128 1129 // SpecType defines the format of the spec. 1130 SpecType string 1131 1132 // ProhibitOverlap enforces that spawned jobs do not run in parallel. 1133 ProhibitOverlap bool `mapstructure:"prohibit_overlap"` 1134 } 1135 1136 func (p *PeriodicConfig) Copy() *PeriodicConfig { 1137 if p == nil { 1138 return nil 1139 } 1140 np := new(PeriodicConfig) 1141 *np = *p 1142 return np 1143 } 1144 1145 func (p *PeriodicConfig) Validate() error { 1146 if !p.Enabled { 1147 return nil 1148 } 1149 1150 if p.Spec == "" { 1151 return fmt.Errorf("Must specify a spec") 1152 } 1153 1154 switch p.SpecType { 1155 case PeriodicSpecCron: 1156 // Validate the cron spec 1157 if _, err := cronexpr.Parse(p.Spec); err != nil { 1158 return fmt.Errorf("Invalid cron spec %q: %v", p.Spec, err) 1159 } 1160 case PeriodicSpecTest: 1161 // No-op 1162 default: 1163 return fmt.Errorf("Unknown periodic specification type %q", p.SpecType) 1164 } 1165 1166 return nil 1167 } 1168 1169 // Next returns the closest time instant matching the spec that is after the 1170 // passed time. If no matching instance exists, the zero value of time.Time is 1171 // returned. The `time.Location` of the returned value matches that of the 1172 // passed time. 1173 func (p *PeriodicConfig) Next(fromTime time.Time) time.Time { 1174 switch p.SpecType { 1175 case PeriodicSpecCron: 1176 if e, err := cronexpr.Parse(p.Spec); err == nil { 1177 return e.Next(fromTime) 1178 } 1179 case PeriodicSpecTest: 1180 split := strings.Split(p.Spec, ",") 1181 if len(split) == 1 && split[0] == "" { 1182 return time.Time{} 1183 } 1184 1185 // Parse the times 1186 times := make([]time.Time, len(split)) 1187 for i, s := range split { 1188 unix, err := strconv.Atoi(s) 1189 if err != nil { 1190 return time.Time{} 1191 } 1192 1193 times[i] = time.Unix(int64(unix), 0) 1194 } 1195 1196 // Find the next match 1197 for _, next := range times { 1198 if fromTime.Before(next) { 1199 return next 1200 } 1201 } 1202 } 1203 1204 return time.Time{} 1205 } 1206 1207 const ( 1208 // PeriodicLaunchSuffix is the string appended to the periodic jobs ID 1209 // when launching derived instances of it. 1210 PeriodicLaunchSuffix = "/periodic-" 1211 ) 1212 1213 // PeriodicLaunch tracks the last launch time of a periodic job. 1214 type PeriodicLaunch struct { 1215 ID string // ID of the periodic job. 1216 Launch time.Time // The last launch time. 1217 1218 // Raft Indexes 1219 CreateIndex uint64 1220 ModifyIndex uint64 1221 } 1222 1223 var ( 1224 defaultServiceJobRestartPolicy = RestartPolicy{ 1225 Delay: 15 * time.Second, 1226 Attempts: 2, 1227 Interval: 1 * time.Minute, 1228 Mode: RestartPolicyModeDelay, 1229 } 1230 defaultBatchJobRestartPolicy = RestartPolicy{ 1231 Delay: 15 * time.Second, 1232 Attempts: 15, 1233 Interval: 7 * 24 * time.Hour, 1234 Mode: RestartPolicyModeDelay, 1235 } 1236 ) 1237 1238 const ( 1239 // RestartPolicyModeDelay causes an artificial delay till the next interval is 1240 // reached when the specified attempts have been reached in the interval. 1241 RestartPolicyModeDelay = "delay" 1242 1243 // RestartPolicyModeFail causes a job to fail if the specified number of 1244 // attempts are reached within an interval. 1245 RestartPolicyModeFail = "fail" 1246 ) 1247 1248 // RestartPolicy configures how Tasks are restarted when they crash or fail. 1249 type RestartPolicy struct { 1250 // Attempts is the number of restart that will occur in an interval. 1251 Attempts int 1252 1253 // Interval is a duration in which we can limit the number of restarts 1254 // within. 1255 Interval time.Duration 1256 1257 // Delay is the time between a failure and a restart. 1258 Delay time.Duration 1259 1260 // Mode controls what happens when the task restarts more than attempt times 1261 // in an interval. 1262 Mode string 1263 } 1264 1265 func (r *RestartPolicy) Copy() *RestartPolicy { 1266 if r == nil { 1267 return nil 1268 } 1269 nrp := new(RestartPolicy) 1270 *nrp = *r 1271 return nrp 1272 } 1273 1274 func (r *RestartPolicy) Validate() error { 1275 switch r.Mode { 1276 case RestartPolicyModeDelay, RestartPolicyModeFail: 1277 default: 1278 return fmt.Errorf("Unsupported restart mode: %q", r.Mode) 1279 } 1280 1281 // Check for ambiguous/confusing settings 1282 if r.Attempts == 0 && r.Mode != RestartPolicyModeFail { 1283 return fmt.Errorf("Restart policy %q with %d attempts is ambiguous", r.Mode, r.Attempts) 1284 } 1285 1286 if r.Interval == 0 { 1287 return nil 1288 } 1289 if time.Duration(r.Attempts)*r.Delay > r.Interval { 1290 return fmt.Errorf("Nomad can't restart the TaskGroup %v times in an interval of %v with a delay of %v", r.Attempts, r.Interval, r.Delay) 1291 } 1292 return nil 1293 } 1294 1295 func NewRestartPolicy(jobType string) *RestartPolicy { 1296 switch jobType { 1297 case JobTypeService, JobTypeSystem: 1298 rp := defaultServiceJobRestartPolicy 1299 return &rp 1300 case JobTypeBatch: 1301 rp := defaultBatchJobRestartPolicy 1302 return &rp 1303 } 1304 return nil 1305 } 1306 1307 // TaskGroup is an atomic unit of placement. Each task group belongs to 1308 // a job and may contain any number of tasks. A task group support running 1309 // in many replicas using the same configuration.. 1310 type TaskGroup struct { 1311 // Name of the task group 1312 Name string 1313 1314 // Count is the number of replicas of this task group that should 1315 // be scheduled. 1316 Count int 1317 1318 // Constraints can be specified at a task group level and apply to 1319 // all the tasks contained. 1320 Constraints []*Constraint 1321 1322 //RestartPolicy of a TaskGroup 1323 RestartPolicy *RestartPolicy 1324 1325 // Tasks are the collection of tasks that this task group needs to run 1326 Tasks []*Task 1327 1328 // Meta is used to associate arbitrary metadata with this 1329 // task group. This is opaque to Nomad. 1330 Meta map[string]string 1331 } 1332 1333 func (tg *TaskGroup) Copy() *TaskGroup { 1334 if tg == nil { 1335 return nil 1336 } 1337 ntg := new(TaskGroup) 1338 *ntg = *tg 1339 ntg.Constraints = CopySliceConstraints(ntg.Constraints) 1340 1341 ntg.RestartPolicy = ntg.RestartPolicy.Copy() 1342 1343 if tg.Tasks != nil { 1344 tasks := make([]*Task, len(ntg.Tasks)) 1345 for i, t := range ntg.Tasks { 1346 tasks[i] = t.Copy() 1347 } 1348 ntg.Tasks = tasks 1349 } 1350 1351 ntg.Meta = CopyMapStringString(ntg.Meta) 1352 return ntg 1353 } 1354 1355 // InitFields is used to initialize fields in the TaskGroup. 1356 func (tg *TaskGroup) InitFields(job *Job) { 1357 // Set the default restart policy. 1358 if tg.RestartPolicy == nil { 1359 tg.RestartPolicy = NewRestartPolicy(job.Type) 1360 } 1361 1362 for _, task := range tg.Tasks { 1363 task.InitFields(job, tg) 1364 } 1365 } 1366 1367 // Validate is used to sanity check a task group 1368 func (tg *TaskGroup) Validate() error { 1369 var mErr multierror.Error 1370 if tg.Name == "" { 1371 mErr.Errors = append(mErr.Errors, errors.New("Missing task group name")) 1372 } 1373 if tg.Count < 0 { 1374 mErr.Errors = append(mErr.Errors, errors.New("Task group count can't be negative")) 1375 } 1376 if len(tg.Tasks) == 0 { 1377 mErr.Errors = append(mErr.Errors, errors.New("Missing tasks for task group")) 1378 } 1379 for idx, constr := range tg.Constraints { 1380 if err := constr.Validate(); err != nil { 1381 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1382 mErr.Errors = append(mErr.Errors, outer) 1383 } 1384 } 1385 1386 if tg.RestartPolicy != nil { 1387 if err := tg.RestartPolicy.Validate(); err != nil { 1388 mErr.Errors = append(mErr.Errors, err) 1389 } 1390 } else { 1391 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task Group %v should have a restart policy", tg.Name)) 1392 } 1393 1394 // Check for duplicate tasks 1395 tasks := make(map[string]int) 1396 for idx, task := range tg.Tasks { 1397 if task.Name == "" { 1398 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d missing name", idx+1)) 1399 } else if existing, ok := tasks[task.Name]; ok { 1400 mErr.Errors = append(mErr.Errors, fmt.Errorf("Task %d redefines '%s' from task %d", idx+1, task.Name, existing+1)) 1401 } else { 1402 tasks[task.Name] = idx 1403 } 1404 } 1405 1406 // Validate the tasks 1407 for idx, task := range tg.Tasks { 1408 if err := task.Validate(); err != nil { 1409 outer := fmt.Errorf("Task %d validation failed: %s", idx+1, err) 1410 mErr.Errors = append(mErr.Errors, outer) 1411 } 1412 } 1413 return mErr.ErrorOrNil() 1414 } 1415 1416 // LookupTask finds a task by name 1417 func (tg *TaskGroup) LookupTask(name string) *Task { 1418 for _, t := range tg.Tasks { 1419 if t.Name == name { 1420 return t 1421 } 1422 } 1423 return nil 1424 } 1425 1426 func (tg *TaskGroup) GoString() string { 1427 return fmt.Sprintf("*%#v", *tg) 1428 } 1429 1430 const ( 1431 ServiceCheckHTTP = "http" 1432 ServiceCheckTCP = "tcp" 1433 ServiceCheckDocker = "docker" 1434 ServiceCheckScript = "script" 1435 ) 1436 1437 // The ServiceCheck data model represents the consul health check that 1438 // Nomad registers for a Task 1439 type ServiceCheck struct { 1440 Name string // Name of the check, defaults to id 1441 Type string // Type of the check - tcp, http, docker and script 1442 Command string // Command is the command to run for script checks 1443 Args []string // Args is a list of argumes for script checks 1444 Path string // path of the health check url for http type check 1445 Protocol string // Protocol to use if check is http, defaults to http 1446 Interval time.Duration // Interval of the check 1447 Timeout time.Duration // Timeout of the response from the check before consul fails the check 1448 } 1449 1450 func (sc *ServiceCheck) Copy() *ServiceCheck { 1451 if sc == nil { 1452 return nil 1453 } 1454 nsc := new(ServiceCheck) 1455 *nsc = *sc 1456 return nsc 1457 } 1458 1459 func (sc *ServiceCheck) Validate() error { 1460 t := strings.ToLower(sc.Type) 1461 if t != ServiceCheckTCP && t != ServiceCheckHTTP && t != ServiceCheckScript { 1462 return fmt.Errorf("service check must be either http, tcp or script type") 1463 } 1464 if sc.Type == ServiceCheckHTTP && sc.Path == "" { 1465 return fmt.Errorf("service checks of http type must have a valid http path") 1466 } 1467 1468 if sc.Type == ServiceCheckScript && sc.Command == "" { 1469 return fmt.Errorf("service checks of script type must have a valid script path") 1470 } 1471 1472 if sc.Interval <= 0 { 1473 return fmt.Errorf("service checks must have positive time intervals") 1474 } 1475 return nil 1476 } 1477 1478 // RequiresPort returns whether the service check requires the task has a port. 1479 func (sc *ServiceCheck) RequiresPort() bool { 1480 switch sc.Type { 1481 case ServiceCheckHTTP, ServiceCheckTCP: 1482 return true 1483 default: 1484 return false 1485 } 1486 } 1487 1488 func (sc *ServiceCheck) Hash(serviceID string) string { 1489 h := sha1.New() 1490 io.WriteString(h, serviceID) 1491 io.WriteString(h, sc.Name) 1492 io.WriteString(h, sc.Type) 1493 io.WriteString(h, sc.Command) 1494 io.WriteString(h, strings.Join(sc.Args, "")) 1495 io.WriteString(h, sc.Path) 1496 io.WriteString(h, sc.Protocol) 1497 io.WriteString(h, sc.Interval.String()) 1498 io.WriteString(h, sc.Timeout.String()) 1499 return fmt.Sprintf("%x", h.Sum(nil)) 1500 } 1501 1502 const ( 1503 NomadConsulPrefix = "nomad-registered-service" 1504 ) 1505 1506 var ( 1507 AgentServicePrefix = fmt.Sprintf("%s-%s", NomadConsulPrefix, "agent") 1508 ) 1509 1510 // The Service model represents a Consul service definition 1511 type Service struct { 1512 Name string // Name of the service, defaults to id 1513 Tags []string // List of tags for the service 1514 PortLabel string `mapstructure:"port"` // port for the service 1515 Checks []*ServiceCheck // List of checks associated with the service 1516 } 1517 1518 func (s *Service) Copy() *Service { 1519 if s == nil { 1520 return nil 1521 } 1522 ns := new(Service) 1523 *ns = *s 1524 ns.Tags = CopySliceString(ns.Tags) 1525 1526 if s.Checks != nil { 1527 checks := make([]*ServiceCheck, len(ns.Checks)) 1528 for i, c := range ns.Checks { 1529 checks[i] = c.Copy() 1530 } 1531 ns.Checks = checks 1532 } 1533 1534 return ns 1535 } 1536 1537 // InitFields interpolates values of Job, Task Group and Task in the Service 1538 // Name. This also generates check names, service id and check ids. 1539 func (s *Service) InitFields(job string, taskGroup string, task string) { 1540 s.Name = args.ReplaceEnv(s.Name, map[string]string{ 1541 "JOB": job, 1542 "TASKGROUP": taskGroup, 1543 "TASK": task, 1544 "BASE": fmt.Sprintf("%s-%s-%s", job, taskGroup, task), 1545 }, 1546 ) 1547 1548 for _, check := range s.Checks { 1549 if check.Name == "" { 1550 check.Name = fmt.Sprintf("service: %q check", s.Name) 1551 } 1552 } 1553 } 1554 1555 func (s *Service) ID(identifier string) string { 1556 return fmt.Sprintf("%s-%s-%s", NomadConsulPrefix, identifier, s.Hash()) 1557 } 1558 1559 // Validate checks if the Check definition is valid 1560 func (s *Service) Validate() error { 1561 var mErr multierror.Error 1562 1563 // Ensure the service name is valid per RFC-952 §1 1564 // (https://tools.ietf.org/html/rfc952), RFC-1123 §2.1 1565 // (https://tools.ietf.org/html/rfc1123), and RFC-2782 1566 // (https://tools.ietf.org/html/rfc2782). 1567 re := regexp.MustCompile(`^(?i:[a-z0-9]|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])$`) 1568 if !re.MatchString(s.Name) { 1569 mErr.Errors = append(mErr.Errors, fmt.Errorf("service name must be valid per RFC 1123 and can contain only alphanumeric characters or dashes and must be less than 63 characters long: %q", s.Name)) 1570 } 1571 1572 for _, c := range s.Checks { 1573 if s.PortLabel == "" && c.RequiresPort() { 1574 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is not valid since service %q doesn't have port", c.Name, s.Name)) 1575 continue 1576 } 1577 if err := c.Validate(); err != nil { 1578 mErr.Errors = append(mErr.Errors, err) 1579 } 1580 } 1581 return mErr.ErrorOrNil() 1582 } 1583 1584 // Hash calculates the hash of the check based on it's content and the service 1585 // which owns it 1586 func (s *Service) Hash() string { 1587 h := sha1.New() 1588 io.WriteString(h, s.Name) 1589 io.WriteString(h, strings.Join(s.Tags, "")) 1590 io.WriteString(h, s.PortLabel) 1591 return fmt.Sprintf("%x", h.Sum(nil)) 1592 } 1593 1594 const ( 1595 // DefaultKillTimeout is the default timeout between signaling a task it 1596 // will be killed and killing it. 1597 DefaultKillTimeout = 5 * time.Second 1598 ) 1599 1600 // LogConfig provides configuration for log rotation 1601 type LogConfig struct { 1602 MaxFiles int `mapstructure:"max_files"` 1603 MaxFileSizeMB int `mapstructure:"max_file_size"` 1604 } 1605 1606 func DefaultLogConfig() *LogConfig { 1607 return &LogConfig{ 1608 MaxFiles: 10, 1609 MaxFileSizeMB: 10, 1610 } 1611 } 1612 1613 // Validate returns an error if the log config specified are less than 1614 // the minimum allowed. 1615 func (l *LogConfig) Validate() error { 1616 var mErr multierror.Error 1617 if l.MaxFiles < 1 { 1618 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum number of files is 1; got %d", l.MaxFiles)) 1619 } 1620 if l.MaxFileSizeMB < 1 { 1621 mErr.Errors = append(mErr.Errors, fmt.Errorf("minimum file size is 1MB; got %d", l.MaxFileSizeMB)) 1622 } 1623 return mErr.ErrorOrNil() 1624 } 1625 1626 // Task is a single process typically that is executed as part of a task group. 1627 type Task struct { 1628 // Name of the task 1629 Name string 1630 1631 // Driver is used to control which driver is used 1632 Driver string 1633 1634 // User is used to determine which user will run the task. It defaults to 1635 // the same user the Nomad client is being run as. 1636 User string 1637 1638 // Config is provided to the driver to initialize 1639 Config map[string]interface{} 1640 1641 // Map of environment variables to be used by the driver 1642 Env map[string]string 1643 1644 // List of service definitions exposed by the Task 1645 Services []*Service 1646 1647 // Constraints can be specified at a task level and apply only to 1648 // the particular task. 1649 Constraints []*Constraint 1650 1651 // Resources is the resources needed by this task 1652 Resources *Resources 1653 1654 // Meta is used to associate arbitrary metadata with this 1655 // task. This is opaque to Nomad. 1656 Meta map[string]string 1657 1658 // KillTimeout is the time between signaling a task that it will be 1659 // killed and killing it. 1660 KillTimeout time.Duration `mapstructure:"kill_timeout"` 1661 1662 // LogConfig provides configuration for log rotation 1663 LogConfig *LogConfig `mapstructure:"logs"` 1664 1665 // Artifacts is a list of artifacts to download and extract before running 1666 // the task. 1667 Artifacts []*TaskArtifact 1668 } 1669 1670 func (t *Task) Copy() *Task { 1671 if t == nil { 1672 return nil 1673 } 1674 nt := new(Task) 1675 *nt = *t 1676 nt.Env = CopyMapStringString(nt.Env) 1677 1678 if t.Services != nil { 1679 services := make([]*Service, len(nt.Services)) 1680 for i, s := range nt.Services { 1681 services[i] = s.Copy() 1682 } 1683 nt.Services = services 1684 } 1685 1686 nt.Constraints = CopySliceConstraints(nt.Constraints) 1687 1688 nt.Resources = nt.Resources.Copy() 1689 nt.Meta = CopyMapStringString(nt.Meta) 1690 1691 if t.Artifacts != nil { 1692 artifacts := make([]*TaskArtifact, 0, len(t.Artifacts)) 1693 for _, a := range nt.Artifacts { 1694 artifacts = append(artifacts, a.Copy()) 1695 } 1696 nt.Artifacts = artifacts 1697 } 1698 1699 if i, err := copystructure.Copy(nt.Config); err != nil { 1700 nt.Config = i.(map[string]interface{}) 1701 } 1702 1703 return nt 1704 } 1705 1706 // InitFields initializes fields in the task. 1707 func (t *Task) InitFields(job *Job, tg *TaskGroup) { 1708 t.InitServiceFields(job.Name, tg.Name) 1709 1710 // Set the default timeout if it is not specified. 1711 if t.KillTimeout == 0 { 1712 t.KillTimeout = DefaultKillTimeout 1713 } 1714 } 1715 1716 // InitServiceFields interpolates values of Job, Task Group 1717 // and Tasks in all the service Names of a Task. This also generates the service 1718 // id, check id and check names. 1719 func (t *Task) InitServiceFields(job string, taskGroup string) { 1720 for _, service := range t.Services { 1721 service.InitFields(job, taskGroup, t.Name) 1722 } 1723 } 1724 1725 func (t *Task) GoString() string { 1726 return fmt.Sprintf("*%#v", *t) 1727 } 1728 1729 func (t *Task) FindHostAndPortFor(portLabel string) (string, int) { 1730 for _, network := range t.Resources.Networks { 1731 if p, ok := network.MapLabelToValues(nil)[portLabel]; ok { 1732 return network.IP, p 1733 } 1734 } 1735 return "", 0 1736 } 1737 1738 // Validate is used to sanity check a task 1739 func (t *Task) Validate() error { 1740 var mErr multierror.Error 1741 if t.Name == "" { 1742 mErr.Errors = append(mErr.Errors, errors.New("Missing task name")) 1743 } 1744 if t.Driver == "" { 1745 mErr.Errors = append(mErr.Errors, errors.New("Missing task driver")) 1746 } 1747 if t.KillTimeout.Nanoseconds() < 0 { 1748 mErr.Errors = append(mErr.Errors, errors.New("KillTimeout must be a positive value")) 1749 } 1750 1751 // Validate the resources. 1752 if t.Resources == nil { 1753 mErr.Errors = append(mErr.Errors, errors.New("Missing task resources")) 1754 } else if err := t.Resources.MeetsMinResources(); err != nil { 1755 mErr.Errors = append(mErr.Errors, err) 1756 } 1757 1758 // Validate the log config 1759 if t.LogConfig == nil { 1760 mErr.Errors = append(mErr.Errors, errors.New("Missing Log Config")) 1761 } else if err := t.LogConfig.Validate(); err != nil { 1762 mErr.Errors = append(mErr.Errors, err) 1763 } 1764 1765 for idx, constr := range t.Constraints { 1766 if err := constr.Validate(); err != nil { 1767 outer := fmt.Errorf("Constraint %d validation failed: %s", idx+1, err) 1768 mErr.Errors = append(mErr.Errors, outer) 1769 } 1770 } 1771 1772 // Validate Services 1773 if err := validateServices(t); err != nil { 1774 mErr.Errors = append(mErr.Errors, err) 1775 } 1776 1777 if t.LogConfig != nil && t.Resources != nil { 1778 logUsage := (t.LogConfig.MaxFiles * t.LogConfig.MaxFileSizeMB) 1779 if t.Resources.DiskMB <= logUsage { 1780 mErr.Errors = append(mErr.Errors, 1781 fmt.Errorf("log storage (%d MB) must be less than requested disk capacity (%d MB)", 1782 logUsage, t.Resources.DiskMB)) 1783 } 1784 } 1785 1786 for idx, artifact := range t.Artifacts { 1787 if err := artifact.Validate(); err != nil { 1788 outer := fmt.Errorf("Artifact %d validation failed: %v", idx+1, err) 1789 mErr.Errors = append(mErr.Errors, outer) 1790 } 1791 } 1792 1793 // If the driver is java or qemu ensure that they have specified an 1794 // artifact. 1795 if (t.Driver == "qemu" || t.Driver == "java") && len(t.Artifacts) == 0 { 1796 err := fmt.Errorf("must specify at least one artifact when using %q driver", t.Driver) 1797 mErr.Errors = append(mErr.Errors, err) 1798 } 1799 1800 return mErr.ErrorOrNil() 1801 } 1802 1803 // validateServices takes a task and validates the services within it are valid 1804 // and reference ports that exist. 1805 func validateServices(t *Task) error { 1806 var mErr multierror.Error 1807 1808 // Ensure that services don't ask for non-existent ports and their names are 1809 // unique. 1810 servicePorts := make(map[string][]string) 1811 knownServices := make(map[string]struct{}) 1812 for i, service := range t.Services { 1813 if err := service.Validate(); err != nil { 1814 outer := fmt.Errorf("service %d validation failed: %s", i, err) 1815 mErr.Errors = append(mErr.Errors, outer) 1816 } 1817 if _, ok := knownServices[service.Name]; ok { 1818 mErr.Errors = append(mErr.Errors, fmt.Errorf("service %q is duplicate", service.Name)) 1819 } 1820 knownServices[service.Name] = struct{}{} 1821 1822 if service.PortLabel != "" { 1823 servicePorts[service.PortLabel] = append(servicePorts[service.PortLabel], service.Name) 1824 } 1825 1826 // Ensure that check names are unique. 1827 knownChecks := make(map[string]struct{}) 1828 for _, check := range service.Checks { 1829 if _, ok := knownChecks[check.Name]; ok { 1830 mErr.Errors = append(mErr.Errors, fmt.Errorf("check %q is duplicate", check.Name)) 1831 } 1832 knownChecks[check.Name] = struct{}{} 1833 } 1834 } 1835 1836 // Get the set of port labels. 1837 portLabels := make(map[string]struct{}) 1838 if t.Resources != nil { 1839 for _, network := range t.Resources.Networks { 1840 ports := network.MapLabelToValues(nil) 1841 for portLabel, _ := range ports { 1842 portLabels[portLabel] = struct{}{} 1843 } 1844 } 1845 } 1846 1847 // Ensure all ports referenced in services exist. 1848 for servicePort, services := range servicePorts { 1849 _, ok := portLabels[servicePort] 1850 if !ok { 1851 joined := strings.Join(services, ", ") 1852 err := fmt.Errorf("port label %q referenced by services %v does not exist", servicePort, joined) 1853 mErr.Errors = append(mErr.Errors, err) 1854 } 1855 } 1856 return mErr.ErrorOrNil() 1857 } 1858 1859 // Set of possible states for a task. 1860 const ( 1861 TaskStatePending = "pending" // The task is waiting to be run. 1862 TaskStateRunning = "running" // The task is currently running. 1863 TaskStateDead = "dead" // Terminal state of task. 1864 ) 1865 1866 // TaskState tracks the current state of a task and events that caused state 1867 // transitions. 1868 type TaskState struct { 1869 // The current state of the task. 1870 State string 1871 1872 // Series of task events that transition the state of the task. 1873 Events []*TaskEvent 1874 } 1875 1876 func (ts *TaskState) Copy() *TaskState { 1877 if ts == nil { 1878 return nil 1879 } 1880 copy := new(TaskState) 1881 copy.State = ts.State 1882 1883 if ts.Events != nil { 1884 copy.Events = make([]*TaskEvent, len(ts.Events)) 1885 for i, e := range ts.Events { 1886 copy.Events[i] = e.Copy() 1887 } 1888 } 1889 return copy 1890 } 1891 1892 // Failed returns if the task has has failed. 1893 func (ts *TaskState) Failed() bool { 1894 l := len(ts.Events) 1895 if ts.State != TaskStateDead || l == 0 { 1896 return false 1897 } 1898 1899 switch ts.Events[l-1].Type { 1900 case TaskNotRestarting, TaskArtifactDownloadFailed, TaskFailedValidation: 1901 return true 1902 default: 1903 return false 1904 } 1905 } 1906 1907 // Successful returns whether a task finished successfully. 1908 func (ts *TaskState) Successful() bool { 1909 l := len(ts.Events) 1910 if ts.State != TaskStateDead || l == 0 { 1911 return false 1912 } 1913 1914 e := ts.Events[l-1] 1915 if e.Type != TaskTerminated { 1916 return false 1917 } 1918 1919 return e.ExitCode == 0 1920 } 1921 1922 const ( 1923 // TaskDriveFailure indicates that the task could not be started due to a 1924 // failure in the driver. 1925 TaskDriverFailure = "Driver Failure" 1926 1927 // TaskReceived signals that the task has been pulled by the client at the 1928 // given timestamp. 1929 TaskReceived = "Received" 1930 1931 // TaskFailedValidation indicates the task was invalid and as such was not 1932 // run. 1933 TaskFailedValidation = "Failed Validation" 1934 1935 // TaskStarted signals that the task was started and its timestamp can be 1936 // used to determine the running length of the task. 1937 TaskStarted = "Started" 1938 1939 // TaskTerminated indicates that the task was started and exited. 1940 TaskTerminated = "Terminated" 1941 1942 // TaskKilled indicates a user has killed the task. 1943 TaskKilled = "Killed" 1944 1945 // TaskRestarting indicates that task terminated and is being restarted. 1946 TaskRestarting = "Restarting" 1947 1948 // TaskNotRestarting indicates that the task has failed and is not being 1949 // restarted because it has exceeded its restart policy. 1950 TaskNotRestarting = "Not Restarting" 1951 1952 // TaskDownloadingArtifacts means the task is downloading the artifacts 1953 // specified in the task. 1954 TaskDownloadingArtifacts = "Downloading Artifacts" 1955 1956 // TaskArtifactDownloadFailed indicates that downloading the artifacts 1957 // failed. 1958 TaskArtifactDownloadFailed = "Failed Artifact Download" 1959 ) 1960 1961 // TaskEvent is an event that effects the state of a task and contains meta-data 1962 // appropriate to the events type. 1963 type TaskEvent struct { 1964 Type string 1965 Time int64 // Unix Nanosecond timestamp 1966 1967 // Restart fields. 1968 RestartReason string 1969 1970 // Driver Failure fields. 1971 DriverError string // A driver error occurred while starting the task. 1972 1973 // Task Terminated Fields. 1974 ExitCode int // The exit code of the task. 1975 Signal int // The signal that terminated the task. 1976 Message string // A possible message explaining the termination of the task. 1977 1978 // Task Killed Fields. 1979 KillError string // Error killing the task. 1980 1981 // TaskRestarting fields. 1982 StartDelay int64 // The sleep period before restarting the task in unix nanoseconds. 1983 1984 // Artifact Download fields 1985 DownloadError string // Error downloading artifacts 1986 1987 // Validation fields 1988 ValidationError string // Validation error 1989 } 1990 1991 func (te *TaskEvent) GoString() string { 1992 return fmt.Sprintf("%v at %v", te.Type, te.Time) 1993 } 1994 1995 func (te *TaskEvent) Copy() *TaskEvent { 1996 if te == nil { 1997 return nil 1998 } 1999 copy := new(TaskEvent) 2000 *copy = *te 2001 return copy 2002 } 2003 2004 func NewTaskEvent(event string) *TaskEvent { 2005 return &TaskEvent{ 2006 Type: event, 2007 Time: time.Now().UnixNano(), 2008 } 2009 } 2010 2011 func (e *TaskEvent) SetDriverError(err error) *TaskEvent { 2012 if err != nil { 2013 e.DriverError = err.Error() 2014 } 2015 return e 2016 } 2017 2018 func (e *TaskEvent) SetExitCode(c int) *TaskEvent { 2019 e.ExitCode = c 2020 return e 2021 } 2022 2023 func (e *TaskEvent) SetSignal(s int) *TaskEvent { 2024 e.Signal = s 2025 return e 2026 } 2027 2028 func (e *TaskEvent) SetExitMessage(err error) *TaskEvent { 2029 if err != nil { 2030 e.Message = err.Error() 2031 } 2032 return e 2033 } 2034 2035 func (e *TaskEvent) SetKillError(err error) *TaskEvent { 2036 if err != nil { 2037 e.KillError = err.Error() 2038 } 2039 return e 2040 } 2041 2042 func (e *TaskEvent) SetRestartDelay(delay time.Duration) *TaskEvent { 2043 e.StartDelay = int64(delay) 2044 return e 2045 } 2046 2047 func (e *TaskEvent) SetRestartReason(reason string) *TaskEvent { 2048 e.RestartReason = reason 2049 return e 2050 } 2051 2052 func (e *TaskEvent) SetDownloadError(err error) *TaskEvent { 2053 if err != nil { 2054 e.DownloadError = err.Error() 2055 } 2056 return e 2057 } 2058 2059 func (e *TaskEvent) SetValidationError(err error) *TaskEvent { 2060 if err != nil { 2061 e.ValidationError = err.Error() 2062 } 2063 return e 2064 } 2065 2066 // TaskArtifact is an artifact to download before running the task. 2067 type TaskArtifact struct { 2068 // GetterSource is the source to download an artifact using go-getter 2069 GetterSource string `mapstructure:"source"` 2070 2071 // GetterOptions are options to use when downloading the artifact using 2072 // go-getter. 2073 GetterOptions map[string]string `mapstructure:"options"` 2074 2075 // RelativeDest is the download destination given relative to the task's 2076 // directory. 2077 RelativeDest string `mapstructure:"destination"` 2078 } 2079 2080 func (ta *TaskArtifact) Copy() *TaskArtifact { 2081 if ta == nil { 2082 return nil 2083 } 2084 nta := new(TaskArtifact) 2085 *nta = *ta 2086 nta.GetterOptions = CopyMapStringString(ta.GetterOptions) 2087 return nta 2088 } 2089 2090 func (ta *TaskArtifact) GoString() string { 2091 return fmt.Sprintf("%+v", ta) 2092 } 2093 2094 func (ta *TaskArtifact) Validate() error { 2095 // Verify the source 2096 var mErr multierror.Error 2097 if ta.GetterSource == "" { 2098 mErr.Errors = append(mErr.Errors, fmt.Errorf("source must be specified")) 2099 } 2100 2101 // Verify the destination doesn't escape the tasks directory 2102 alloc, err := filepath.Abs(filepath.Join("/", "foo/", "bar/")) 2103 if err != nil { 2104 mErr.Errors = append(mErr.Errors, err) 2105 return mErr.ErrorOrNil() 2106 } 2107 abs, err := filepath.Abs(filepath.Join(alloc, ta.RelativeDest)) 2108 if err != nil { 2109 mErr.Errors = append(mErr.Errors, err) 2110 return mErr.ErrorOrNil() 2111 } 2112 rel, err := filepath.Rel(alloc, abs) 2113 if err != nil { 2114 mErr.Errors = append(mErr.Errors, err) 2115 return mErr.ErrorOrNil() 2116 } 2117 if strings.HasPrefix(rel, "..") { 2118 mErr.Errors = append(mErr.Errors, fmt.Errorf("destination escapes task's directory")) 2119 } 2120 2121 // Verify the checksum 2122 if check, ok := ta.GetterOptions["checksum"]; ok { 2123 check = strings.TrimSpace(check) 2124 if check == "" { 2125 mErr.Errors = append(mErr.Errors, fmt.Errorf("checksum value can not be empty")) 2126 return mErr.ErrorOrNil() 2127 } 2128 2129 parts := strings.Split(check, ":") 2130 if l := len(parts); l != 2 { 2131 mErr.Errors = append(mErr.Errors, fmt.Errorf(`checksum must be given as "type:value"; got %q`, check)) 2132 return mErr.ErrorOrNil() 2133 } 2134 2135 checksumVal := parts[1] 2136 checksumBytes, err := hex.DecodeString(checksumVal) 2137 if err != nil { 2138 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid checksum: %v", err)) 2139 return mErr.ErrorOrNil() 2140 } 2141 2142 checksumType := parts[0] 2143 expectedLength := 0 2144 switch checksumType { 2145 case "md5": 2146 expectedLength = md5.Size 2147 case "sha1": 2148 expectedLength = sha1.Size 2149 case "sha256": 2150 expectedLength = sha256.Size 2151 case "sha512": 2152 expectedLength = sha512.Size 2153 default: 2154 mErr.Errors = append(mErr.Errors, fmt.Errorf("unsupported checksum type: %s", checksumType)) 2155 return mErr.ErrorOrNil() 2156 } 2157 2158 if len(checksumBytes) != expectedLength { 2159 mErr.Errors = append(mErr.Errors, fmt.Errorf("invalid %s checksum: %v", checksumType, checksumVal)) 2160 return mErr.ErrorOrNil() 2161 } 2162 } 2163 2164 return mErr.ErrorOrNil() 2165 } 2166 2167 const ( 2168 ConstraintDistinctHosts = "distinct_hosts" 2169 ConstraintRegex = "regexp" 2170 ConstraintVersion = "version" 2171 ) 2172 2173 // Constraints are used to restrict placement options. 2174 type Constraint struct { 2175 LTarget string // Left-hand target 2176 RTarget string // Right-hand target 2177 Operand string // Constraint operand (<=, <, =, !=, >, >=), contains, near 2178 str string // Memoized string 2179 } 2180 2181 func (c *Constraint) Copy() *Constraint { 2182 if c == nil { 2183 return nil 2184 } 2185 nc := new(Constraint) 2186 *nc = *c 2187 return nc 2188 } 2189 2190 func (c *Constraint) String() string { 2191 if c.str != "" { 2192 return c.str 2193 } 2194 c.str = fmt.Sprintf("%s %s %s", c.LTarget, c.Operand, c.RTarget) 2195 return c.str 2196 } 2197 2198 func (c *Constraint) Validate() error { 2199 var mErr multierror.Error 2200 if c.Operand == "" { 2201 mErr.Errors = append(mErr.Errors, errors.New("Missing constraint operand")) 2202 } 2203 2204 // Perform additional validation based on operand 2205 switch c.Operand { 2206 case ConstraintRegex: 2207 if _, err := regexp.Compile(c.RTarget); err != nil { 2208 mErr.Errors = append(mErr.Errors, fmt.Errorf("Regular expression failed to compile: %v", err)) 2209 } 2210 case ConstraintVersion: 2211 if _, err := version.NewConstraint(c.RTarget); err != nil { 2212 mErr.Errors = append(mErr.Errors, fmt.Errorf("Version constraint is invalid: %v", err)) 2213 } 2214 } 2215 return mErr.ErrorOrNil() 2216 } 2217 2218 const ( 2219 AllocDesiredStatusRun = "run" // Allocation should run 2220 AllocDesiredStatusStop = "stop" // Allocation should stop 2221 AllocDesiredStatusEvict = "evict" // Allocation should stop, and was evicted 2222 AllocDesiredStatusFailed = "failed" // Allocation failed to be done 2223 ) 2224 2225 const ( 2226 AllocClientStatusPending = "pending" 2227 AllocClientStatusRunning = "running" 2228 AllocClientStatusComplete = "complete" 2229 AllocClientStatusFailed = "failed" 2230 ) 2231 2232 // Allocation is used to allocate the placement of a task group to a node. 2233 type Allocation struct { 2234 // ID of the allocation (UUID) 2235 ID string 2236 2237 // ID of the evaluation that generated this allocation 2238 EvalID string 2239 2240 // Name is a logical name of the allocation. 2241 Name string 2242 2243 // NodeID is the node this is being placed on 2244 NodeID string 2245 2246 // Job is the parent job of the task group being allocated. 2247 // This is copied at allocation time to avoid issues if the job 2248 // definition is updated. 2249 JobID string 2250 Job *Job 2251 2252 // TaskGroup is the name of the task group that should be run 2253 TaskGroup string 2254 2255 // Resources is the total set of resources allocated as part 2256 // of this allocation of the task group. 2257 Resources *Resources 2258 2259 // TaskResources is the set of resources allocated to each 2260 // task. These should sum to the total Resources. 2261 TaskResources map[string]*Resources 2262 2263 // Services is a map of service names to service ids 2264 Services map[string]string 2265 2266 // Metrics associated with this allocation 2267 Metrics *AllocMetric 2268 2269 // Desired Status of the allocation on the client 2270 DesiredStatus string 2271 2272 // DesiredStatusDescription is meant to provide more human useful information 2273 DesiredDescription string 2274 2275 // Status of the allocation on the client 2276 ClientStatus string 2277 2278 // ClientStatusDescription is meant to provide more human useful information 2279 ClientDescription string 2280 2281 // TaskStates stores the state of each task, 2282 TaskStates map[string]*TaskState 2283 2284 // Raft Indexes 2285 CreateIndex uint64 2286 ModifyIndex uint64 2287 2288 // AllocModifyIndex is not updated when the client updates allocations. This 2289 // lets the client pull only the allocs updated by the server. 2290 AllocModifyIndex uint64 2291 2292 // CreateTime is the time the allocation has finished scheduling and been 2293 // verified by the plan applier. 2294 CreateTime int64 2295 } 2296 2297 func (a *Allocation) Copy() *Allocation { 2298 if a == nil { 2299 return nil 2300 } 2301 na := new(Allocation) 2302 *na = *a 2303 2304 na.Job = na.Job.Copy() 2305 na.Resources = na.Resources.Copy() 2306 2307 if a.TaskResources != nil { 2308 tr := make(map[string]*Resources, len(na.TaskResources)) 2309 for task, resource := range na.TaskResources { 2310 tr[task] = resource.Copy() 2311 } 2312 na.TaskResources = tr 2313 } 2314 2315 if a.Services != nil { 2316 s := make(map[string]string, len(na.Services)) 2317 for service, id := range na.Services { 2318 s[service] = id 2319 } 2320 na.Services = s 2321 } 2322 2323 na.Metrics = na.Metrics.Copy() 2324 2325 if a.TaskStates != nil { 2326 ts := make(map[string]*TaskState, len(na.TaskStates)) 2327 for task, state := range na.TaskStates { 2328 ts[task] = state.Copy() 2329 } 2330 na.TaskStates = ts 2331 } 2332 return na 2333 } 2334 2335 // TerminalStatus returns if the desired or actual status is terminal and 2336 // will no longer transition. 2337 func (a *Allocation) TerminalStatus() bool { 2338 // First check the desired state and if that isn't terminal, check client 2339 // state. 2340 switch a.DesiredStatus { 2341 case AllocDesiredStatusStop, AllocDesiredStatusEvict, AllocDesiredStatusFailed: 2342 return true 2343 default: 2344 } 2345 2346 switch a.ClientStatus { 2347 case AllocClientStatusComplete, AllocClientStatusFailed: 2348 return true 2349 default: 2350 return false 2351 } 2352 } 2353 2354 // RanSuccessfully returns whether the client has ran the allocation and all 2355 // tasks finished successfully 2356 func (a *Allocation) RanSuccessfully() bool { 2357 // Handle the case the client hasn't started the allocation. 2358 if len(a.TaskStates) == 0 { 2359 return false 2360 } 2361 2362 // Check to see if all the tasks finised successfully in the allocation 2363 allSuccess := true 2364 for _, state := range a.TaskStates { 2365 allSuccess = allSuccess && state.Successful() 2366 } 2367 2368 return allSuccess 2369 } 2370 2371 // Stub returns a list stub for the allocation 2372 func (a *Allocation) Stub() *AllocListStub { 2373 return &AllocListStub{ 2374 ID: a.ID, 2375 EvalID: a.EvalID, 2376 Name: a.Name, 2377 NodeID: a.NodeID, 2378 JobID: a.JobID, 2379 TaskGroup: a.TaskGroup, 2380 DesiredStatus: a.DesiredStatus, 2381 DesiredDescription: a.DesiredDescription, 2382 ClientStatus: a.ClientStatus, 2383 ClientDescription: a.ClientDescription, 2384 TaskStates: a.TaskStates, 2385 CreateIndex: a.CreateIndex, 2386 ModifyIndex: a.ModifyIndex, 2387 CreateTime: a.CreateTime, 2388 } 2389 } 2390 2391 // PopulateServiceIDs generates the service IDs for all the service definitions 2392 // in that Allocation 2393 func (a *Allocation) PopulateServiceIDs(tg *TaskGroup) { 2394 // Retain the old services, and re-initialize. We may be removing 2395 // services, so we cannot update the existing map. 2396 previous := a.Services 2397 a.Services = make(map[string]string) 2398 2399 for _, task := range tg.Tasks { 2400 for _, service := range task.Services { 2401 // Retain the service if an ID is already generated 2402 if id, ok := previous[service.Name]; ok { 2403 a.Services[service.Name] = id 2404 continue 2405 } 2406 2407 // If the service hasn't been generated an ID, we generate one. 2408 // We add a prefix to the Service ID so that we can know that this service 2409 // is managed by Nomad since Consul can also have service which are not 2410 // managed by Nomad 2411 a.Services[service.Name] = fmt.Sprintf("%s-%s", NomadConsulPrefix, GenerateUUID()) 2412 } 2413 } 2414 } 2415 2416 var ( 2417 // AllocationIndexRegex is a regular expression to find the allocation index. 2418 AllocationIndexRegex = regexp.MustCompile(".+\\[(\\d+)\\]$") 2419 ) 2420 2421 // Index returns the index of the allocation. If the allocation is from a task 2422 // group with count greater than 1, there will be multiple allocations for it. 2423 func (a *Allocation) Index() int { 2424 matches := AllocationIndexRegex.FindStringSubmatch(a.Name) 2425 if len(matches) != 2 { 2426 return -1 2427 } 2428 2429 index, err := strconv.Atoi(matches[1]) 2430 if err != nil { 2431 return -1 2432 } 2433 2434 return index 2435 } 2436 2437 // AllocListStub is used to return a subset of alloc information 2438 type AllocListStub struct { 2439 ID string 2440 EvalID string 2441 Name string 2442 NodeID string 2443 JobID string 2444 TaskGroup string 2445 DesiredStatus string 2446 DesiredDescription string 2447 ClientStatus string 2448 ClientDescription string 2449 TaskStates map[string]*TaskState 2450 CreateIndex uint64 2451 ModifyIndex uint64 2452 CreateTime int64 2453 } 2454 2455 // AllocMetric is used to track various metrics while attempting 2456 // to make an allocation. These are used to debug a job, or to better 2457 // understand the pressure within the system. 2458 type AllocMetric struct { 2459 // NodesEvaluated is the number of nodes that were evaluated 2460 NodesEvaluated int 2461 2462 // NodesFiltered is the number of nodes filtered due to a constraint 2463 NodesFiltered int 2464 2465 // NodesAvailable is the number of nodes available for evaluation per DC. 2466 NodesAvailable map[string]int 2467 2468 // ClassFiltered is the number of nodes filtered by class 2469 ClassFiltered map[string]int 2470 2471 // ConstraintFiltered is the number of failures caused by constraint 2472 ConstraintFiltered map[string]int 2473 2474 // NodesExhausted is the number of nodes skipped due to being 2475 // exhausted of at least one resource 2476 NodesExhausted int 2477 2478 // ClassExhausted is the number of nodes exhausted by class 2479 ClassExhausted map[string]int 2480 2481 // DimensionExhausted provides the count by dimension or reason 2482 DimensionExhausted map[string]int 2483 2484 // Scores is the scores of the final few nodes remaining 2485 // for placement. The top score is typically selected. 2486 Scores map[string]float64 2487 2488 // AllocationTime is a measure of how long the allocation 2489 // attempt took. This can affect performance and SLAs. 2490 AllocationTime time.Duration 2491 2492 // CoalescedFailures indicates the number of other 2493 // allocations that were coalesced into this failed allocation. 2494 // This is to prevent creating many failed allocations for a 2495 // single task group. 2496 CoalescedFailures int 2497 } 2498 2499 func (a *AllocMetric) Copy() *AllocMetric { 2500 if a == nil { 2501 return nil 2502 } 2503 na := new(AllocMetric) 2504 *na = *a 2505 na.NodesAvailable = CopyMapStringInt(na.NodesAvailable) 2506 na.ClassFiltered = CopyMapStringInt(na.ClassFiltered) 2507 na.ConstraintFiltered = CopyMapStringInt(na.ConstraintFiltered) 2508 na.ClassExhausted = CopyMapStringInt(na.ClassExhausted) 2509 na.DimensionExhausted = CopyMapStringInt(na.DimensionExhausted) 2510 na.Scores = CopyMapStringFloat64(na.Scores) 2511 return na 2512 } 2513 2514 func (a *AllocMetric) EvaluateNode() { 2515 a.NodesEvaluated += 1 2516 } 2517 2518 func (a *AllocMetric) FilterNode(node *Node, constraint string) { 2519 a.NodesFiltered += 1 2520 if node != nil && node.NodeClass != "" { 2521 if a.ClassFiltered == nil { 2522 a.ClassFiltered = make(map[string]int) 2523 } 2524 a.ClassFiltered[node.NodeClass] += 1 2525 } 2526 if constraint != "" { 2527 if a.ConstraintFiltered == nil { 2528 a.ConstraintFiltered = make(map[string]int) 2529 } 2530 a.ConstraintFiltered[constraint] += 1 2531 } 2532 } 2533 2534 func (a *AllocMetric) ExhaustedNode(node *Node, dimension string) { 2535 a.NodesExhausted += 1 2536 if node != nil && node.NodeClass != "" { 2537 if a.ClassExhausted == nil { 2538 a.ClassExhausted = make(map[string]int) 2539 } 2540 a.ClassExhausted[node.NodeClass] += 1 2541 } 2542 if dimension != "" { 2543 if a.DimensionExhausted == nil { 2544 a.DimensionExhausted = make(map[string]int) 2545 } 2546 a.DimensionExhausted[dimension] += 1 2547 } 2548 } 2549 2550 func (a *AllocMetric) ScoreNode(node *Node, name string, score float64) { 2551 if a.Scores == nil { 2552 a.Scores = make(map[string]float64) 2553 } 2554 key := fmt.Sprintf("%s.%s", node.ID, name) 2555 a.Scores[key] = score 2556 } 2557 2558 const ( 2559 EvalStatusBlocked = "blocked" 2560 EvalStatusPending = "pending" 2561 EvalStatusComplete = "complete" 2562 EvalStatusFailed = "failed" 2563 EvalStatusCancelled = "canceled" 2564 ) 2565 2566 const ( 2567 EvalTriggerJobRegister = "job-register" 2568 EvalTriggerJobDeregister = "job-deregister" 2569 EvalTriggerPeriodicJob = "periodic-job" 2570 EvalTriggerNodeUpdate = "node-update" 2571 EvalTriggerScheduled = "scheduled" 2572 EvalTriggerRollingUpdate = "rolling-update" 2573 EvalTriggerMaxPlans = "max-plan-attempts" 2574 ) 2575 2576 const ( 2577 // CoreJobEvalGC is used for the garbage collection of evaluations 2578 // and allocations. We periodically scan evaluations in a terminal state, 2579 // in which all the corresponding allocations are also terminal. We 2580 // delete these out of the system to bound the state. 2581 CoreJobEvalGC = "eval-gc" 2582 2583 // CoreJobNodeGC is used for the garbage collection of failed nodes. 2584 // We periodically scan nodes in a terminal state, and if they have no 2585 // corresponding allocations we delete these out of the system. 2586 CoreJobNodeGC = "node-gc" 2587 2588 // CoreJobJobGC is used for the garbage collection of eligible jobs. We 2589 // periodically scan garbage collectible jobs and check if both their 2590 // evaluations and allocations are terminal. If so, we delete these out of 2591 // the system. 2592 CoreJobJobGC = "job-gc" 2593 2594 // CoreJobForceGC is used to force garbage collection of all GCable objects. 2595 CoreJobForceGC = "force-gc" 2596 ) 2597 2598 // Evaluation is used anytime we need to apply business logic as a result 2599 // of a change to our desired state (job specification) or the emergent state 2600 // (registered nodes). When the inputs change, we need to "evaluate" them, 2601 // potentially taking action (allocation of work) or doing nothing if the state 2602 // of the world does not require it. 2603 type Evaluation struct { 2604 // ID is a randonly generated UUID used for this evaluation. This 2605 // is assigned upon the creation of the evaluation. 2606 ID string 2607 2608 // Priority is used to control scheduling importance and if this job 2609 // can preempt other jobs. 2610 Priority int 2611 2612 // Type is used to control which schedulers are available to handle 2613 // this evaluation. 2614 Type string 2615 2616 // TriggeredBy is used to give some insight into why this Eval 2617 // was created. (Job change, node failure, alloc failure, etc). 2618 TriggeredBy string 2619 2620 // JobID is the job this evaluation is scoped to. Evaluations cannot 2621 // be run in parallel for a given JobID, so we serialize on this. 2622 JobID string 2623 2624 // JobModifyIndex is the modify index of the job at the time 2625 // the evaluation was created 2626 JobModifyIndex uint64 2627 2628 // NodeID is the node that was affected triggering the evaluation. 2629 NodeID string 2630 2631 // NodeModifyIndex is the modify index of the node at the time 2632 // the evaluation was created 2633 NodeModifyIndex uint64 2634 2635 // Status of the evaluation 2636 Status string 2637 2638 // StatusDescription is meant to provide more human useful information 2639 StatusDescription string 2640 2641 // Wait is a minimum wait time for running the eval. This is used to 2642 // support a rolling upgrade. 2643 Wait time.Duration 2644 2645 // NextEval is the evaluation ID for the eval created to do a followup. 2646 // This is used to support rolling upgrades, where we need a chain of evaluations. 2647 NextEval string 2648 2649 // PreviousEval is the evaluation ID for the eval creating this one to do a followup. 2650 // This is used to support rolling upgrades, where we need a chain of evaluations. 2651 PreviousEval string 2652 2653 // BlockedEval is the evaluation ID for a created blocked eval. A 2654 // blocked eval will be created if all allocations could not be placed due 2655 // to constraints or lacking resources. 2656 BlockedEval string 2657 2658 // FailedTGAllocs are task groups which have allocations that could not be 2659 // made, but the metrics are persisted so that the user can use the feedback 2660 // to determine the cause. 2661 FailedTGAllocs map[string]*AllocMetric 2662 2663 // ClassEligibility tracks computed node classes that have been explicitly 2664 // marked as eligible or ineligible. 2665 ClassEligibility map[string]bool 2666 2667 // EscapedComputedClass marks whether the job has constraints that are not 2668 // captured by computed node classes. 2669 EscapedComputedClass bool 2670 2671 // AnnotatePlan triggers the scheduler to provide additional annotations 2672 // during the evaluation. This should not be set during normal operations. 2673 AnnotatePlan bool 2674 2675 // SnapshotIndex is the Raft index of the snapshot used to process the 2676 // evaluation. As such it will only be set once it has gone through the 2677 // scheduler. 2678 SnapshotIndex uint64 2679 2680 // Raft Indexes 2681 CreateIndex uint64 2682 ModifyIndex uint64 2683 } 2684 2685 // TerminalStatus returns if the current status is terminal and 2686 // will no longer transition. 2687 func (e *Evaluation) TerminalStatus() bool { 2688 switch e.Status { 2689 case EvalStatusComplete, EvalStatusFailed, EvalStatusCancelled: 2690 return true 2691 default: 2692 return false 2693 } 2694 } 2695 2696 func (e *Evaluation) GoString() string { 2697 return fmt.Sprintf("<Eval '%s' JobID: '%s'>", e.ID, e.JobID) 2698 } 2699 2700 func (e *Evaluation) Copy() *Evaluation { 2701 if e == nil { 2702 return nil 2703 } 2704 ne := new(Evaluation) 2705 *ne = *e 2706 2707 // Copy ClassEligibility 2708 if e.ClassEligibility != nil { 2709 classes := make(map[string]bool, len(e.ClassEligibility)) 2710 for class, elig := range e.ClassEligibility { 2711 classes[class] = elig 2712 } 2713 ne.ClassEligibility = classes 2714 } 2715 2716 // Copy FailedTGAllocs 2717 if e.FailedTGAllocs != nil { 2718 failedTGs := make(map[string]*AllocMetric, len(e.FailedTGAllocs)) 2719 for tg, metric := range e.FailedTGAllocs { 2720 failedTGs[tg] = metric.Copy() 2721 } 2722 ne.FailedTGAllocs = failedTGs 2723 } 2724 2725 return ne 2726 } 2727 2728 // ShouldEnqueue checks if a given evaluation should be enqueued into the 2729 // eval_broker 2730 func (e *Evaluation) ShouldEnqueue() bool { 2731 switch e.Status { 2732 case EvalStatusPending: 2733 return true 2734 case EvalStatusComplete, EvalStatusFailed, EvalStatusBlocked, EvalStatusCancelled: 2735 return false 2736 default: 2737 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 2738 } 2739 } 2740 2741 // ShouldBlock checks if a given evaluation should be entered into the blocked 2742 // eval tracker. 2743 func (e *Evaluation) ShouldBlock() bool { 2744 switch e.Status { 2745 case EvalStatusBlocked: 2746 return true 2747 case EvalStatusComplete, EvalStatusFailed, EvalStatusPending, EvalStatusCancelled: 2748 return false 2749 default: 2750 panic(fmt.Sprintf("unhandled evaluation (%s) status %s", e.ID, e.Status)) 2751 } 2752 } 2753 2754 // MakePlan is used to make a plan from the given evaluation 2755 // for a given Job 2756 func (e *Evaluation) MakePlan(j *Job) *Plan { 2757 p := &Plan{ 2758 EvalID: e.ID, 2759 Priority: e.Priority, 2760 Job: j, 2761 NodeUpdate: make(map[string][]*Allocation), 2762 NodeAllocation: make(map[string][]*Allocation), 2763 } 2764 if j != nil { 2765 p.AllAtOnce = j.AllAtOnce 2766 } 2767 return p 2768 } 2769 2770 // NextRollingEval creates an evaluation to followup this eval for rolling updates 2771 func (e *Evaluation) NextRollingEval(wait time.Duration) *Evaluation { 2772 return &Evaluation{ 2773 ID: GenerateUUID(), 2774 Priority: e.Priority, 2775 Type: e.Type, 2776 TriggeredBy: EvalTriggerRollingUpdate, 2777 JobID: e.JobID, 2778 JobModifyIndex: e.JobModifyIndex, 2779 Status: EvalStatusPending, 2780 Wait: wait, 2781 PreviousEval: e.ID, 2782 } 2783 } 2784 2785 // CreateBlockedEval creates a blocked evaluation to followup this eval to place any 2786 // failed allocations. It takes the classes marked explicitly eligible or 2787 // ineligible and whether the job has escaped computed node classes. 2788 func (e *Evaluation) CreateBlockedEval(classEligibility map[string]bool, escaped bool) *Evaluation { 2789 return &Evaluation{ 2790 ID: GenerateUUID(), 2791 Priority: e.Priority, 2792 Type: e.Type, 2793 TriggeredBy: e.TriggeredBy, 2794 JobID: e.JobID, 2795 JobModifyIndex: e.JobModifyIndex, 2796 Status: EvalStatusBlocked, 2797 PreviousEval: e.ID, 2798 ClassEligibility: classEligibility, 2799 EscapedComputedClass: escaped, 2800 } 2801 } 2802 2803 // Plan is used to submit a commit plan for task allocations. These 2804 // are submitted to the leader which verifies that resources have 2805 // not been overcommitted before admiting the plan. 2806 type Plan struct { 2807 // EvalID is the evaluation ID this plan is associated with 2808 EvalID string 2809 2810 // EvalToken is used to prevent a split-brain processing of 2811 // an evaluation. There should only be a single scheduler running 2812 // an Eval at a time, but this could be violated after a leadership 2813 // transition. This unique token is used to reject plans that are 2814 // being submitted from a different leader. 2815 EvalToken string 2816 2817 // Priority is the priority of the upstream job 2818 Priority int 2819 2820 // AllAtOnce is used to control if incremental scheduling of task groups 2821 // is allowed or if we must do a gang scheduling of the entire job. 2822 // If this is false, a plan may be partially applied. Otherwise, the 2823 // entire plan must be able to make progress. 2824 AllAtOnce bool 2825 2826 // Job is the parent job of all the allocations in the Plan. 2827 // Since a Plan only involves a single Job, we can reduce the size 2828 // of the plan by only including it once. 2829 Job *Job 2830 2831 // NodeUpdate contains all the allocations for each node. For each node, 2832 // this is a list of the allocations to update to either stop or evict. 2833 NodeUpdate map[string][]*Allocation 2834 2835 // NodeAllocation contains all the allocations for each node. 2836 // The evicts must be considered prior to the allocations. 2837 NodeAllocation map[string][]*Allocation 2838 2839 // Annotations contains annotations by the scheduler to be used by operators 2840 // to understand the decisions made by the scheduler. 2841 Annotations *PlanAnnotations 2842 } 2843 2844 func (p *Plan) AppendUpdate(alloc *Allocation, status, desc string) { 2845 newAlloc := new(Allocation) 2846 *newAlloc = *alloc 2847 2848 // If the job is not set in the plan we are deregistering a job so we 2849 // extract the job from the allocation. 2850 if p.Job == nil && newAlloc.Job != nil { 2851 p.Job = newAlloc.Job 2852 } 2853 2854 // Normalize the job 2855 newAlloc.Job = nil 2856 2857 // Strip the resources as it can be rebuilt. 2858 newAlloc.Resources = nil 2859 2860 newAlloc.DesiredStatus = status 2861 newAlloc.DesiredDescription = desc 2862 node := alloc.NodeID 2863 existing := p.NodeUpdate[node] 2864 p.NodeUpdate[node] = append(existing, newAlloc) 2865 } 2866 2867 func (p *Plan) PopUpdate(alloc *Allocation) { 2868 existing := p.NodeUpdate[alloc.NodeID] 2869 n := len(existing) 2870 if n > 0 && existing[n-1].ID == alloc.ID { 2871 existing = existing[:n-1] 2872 if len(existing) > 0 { 2873 p.NodeUpdate[alloc.NodeID] = existing 2874 } else { 2875 delete(p.NodeUpdate, alloc.NodeID) 2876 } 2877 } 2878 } 2879 2880 func (p *Plan) AppendAlloc(alloc *Allocation) { 2881 node := alloc.NodeID 2882 existing := p.NodeAllocation[node] 2883 p.NodeAllocation[node] = append(existing, alloc) 2884 } 2885 2886 // IsNoOp checks if this plan would do nothing 2887 func (p *Plan) IsNoOp() bool { 2888 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 2889 } 2890 2891 // PlanResult is the result of a plan submitted to the leader. 2892 type PlanResult struct { 2893 // NodeUpdate contains all the updates that were committed. 2894 NodeUpdate map[string][]*Allocation 2895 2896 // NodeAllocation contains all the allocations that were committed. 2897 NodeAllocation map[string][]*Allocation 2898 2899 // RefreshIndex is the index the worker should refresh state up to. 2900 // This allows all evictions and allocations to be materialized. 2901 // If any allocations were rejected due to stale data (node state, 2902 // over committed) this can be used to force a worker refresh. 2903 RefreshIndex uint64 2904 2905 // AllocIndex is the Raft index in which the evictions and 2906 // allocations took place. This is used for the write index. 2907 AllocIndex uint64 2908 } 2909 2910 // IsNoOp checks if this plan result would do nothing 2911 func (p *PlanResult) IsNoOp() bool { 2912 return len(p.NodeUpdate) == 0 && len(p.NodeAllocation) == 0 2913 } 2914 2915 // FullCommit is used to check if all the allocations in a plan 2916 // were committed as part of the result. Returns if there was 2917 // a match, and the number of expected and actual allocations. 2918 func (p *PlanResult) FullCommit(plan *Plan) (bool, int, int) { 2919 expected := 0 2920 actual := 0 2921 for name, allocList := range plan.NodeAllocation { 2922 didAlloc, _ := p.NodeAllocation[name] 2923 expected += len(allocList) 2924 actual += len(didAlloc) 2925 } 2926 return actual == expected, expected, actual 2927 } 2928 2929 // PlanAnnotations holds annotations made by the scheduler to give further debug 2930 // information to operators. 2931 type PlanAnnotations struct { 2932 // DesiredTGUpdates is the set of desired updates per task group. 2933 DesiredTGUpdates map[string]*DesiredUpdates 2934 } 2935 2936 // DesiredUpdates is the set of changes the scheduler would like to make given 2937 // sufficient resources and cluster capacity. 2938 type DesiredUpdates struct { 2939 Ignore uint64 2940 Place uint64 2941 Migrate uint64 2942 Stop uint64 2943 InPlaceUpdate uint64 2944 DestructiveUpdate uint64 2945 } 2946 2947 // msgpackHandle is a shared handle for encoding/decoding of structs 2948 var MsgpackHandle = func() *codec.MsgpackHandle { 2949 h := &codec.MsgpackHandle{RawToString: true} 2950 2951 // Sets the default type for decoding a map into a nil interface{}. 2952 // This is necessary in particular because we store the driver configs as a 2953 // nil interface{}. 2954 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 2955 return h 2956 }() 2957 2958 var HashiMsgpackHandle = func() *hcodec.MsgpackHandle { 2959 h := &hcodec.MsgpackHandle{RawToString: true} 2960 2961 // Sets the default type for decoding a map into a nil interface{}. 2962 // This is necessary in particular because we store the driver configs as a 2963 // nil interface{}. 2964 h.MapType = reflect.TypeOf(map[string]interface{}(nil)) 2965 return h 2966 }() 2967 2968 // Decode is used to decode a MsgPack encoded object 2969 func Decode(buf []byte, out interface{}) error { 2970 return codec.NewDecoder(bytes.NewReader(buf), MsgpackHandle).Decode(out) 2971 } 2972 2973 // Encode is used to encode a MsgPack object with type prefix 2974 func Encode(t MessageType, msg interface{}) ([]byte, error) { 2975 var buf bytes.Buffer 2976 buf.WriteByte(uint8(t)) 2977 err := codec.NewEncoder(&buf, MsgpackHandle).Encode(msg) 2978 return buf.Bytes(), err 2979 }