github.com/hashicorp/nomad/api@v0.0.0-20240306165712-3193ac204f65/tasks.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package api 5 6 import ( 7 "fmt" 8 "path" 9 "path/filepath" 10 "strings" 11 "time" 12 ) 13 14 type ReconcileOption = string 15 16 const ( 17 // RestartPolicyModeDelay causes an artificial delay till the next interval is 18 // reached when the specified attempts have been reached in the interval. 19 RestartPolicyModeDelay = "delay" 20 21 // RestartPolicyModeFail causes a job to fail if the specified number of 22 // attempts are reached within an interval. 23 RestartPolicyModeFail = "fail" 24 25 // ReconcileOption is used to specify the behavior of the reconciliation process 26 // between the original allocations and the replacements when a previously 27 // disconnected client comes back online. 28 ReconcileOptionKeepOriginal = "keep_original" 29 ReconcileOptionKeepReplacement = "keep_replacement" 30 ReconcileOptionBestScore = "best_score" 31 ReconcileOptionLongestRunning = "longest_running" 32 ) 33 34 // MemoryStats holds memory usage related stats 35 type MemoryStats struct { 36 RSS uint64 37 Cache uint64 38 Swap uint64 39 Usage uint64 40 MaxUsage uint64 41 KernelUsage uint64 42 KernelMaxUsage uint64 43 Measured []string 44 } 45 46 // CpuStats holds cpu usage related stats 47 type CpuStats struct { 48 SystemMode float64 49 UserMode float64 50 TotalTicks float64 51 ThrottledPeriods uint64 52 ThrottledTime uint64 53 Percent float64 54 Measured []string 55 } 56 57 // ResourceUsage holds information related to cpu and memory stats 58 type ResourceUsage struct { 59 MemoryStats *MemoryStats 60 CpuStats *CpuStats 61 DeviceStats []*DeviceGroupStats 62 } 63 64 // TaskResourceUsage holds aggregated resource usage of all processes in a Task 65 // and the resource usage of the individual pids 66 type TaskResourceUsage struct { 67 ResourceUsage *ResourceUsage 68 Timestamp int64 69 Pids map[string]*ResourceUsage 70 } 71 72 // AllocResourceUsage holds the aggregated task resource usage of the 73 // allocation. 74 type AllocResourceUsage struct { 75 ResourceUsage *ResourceUsage 76 Tasks map[string]*TaskResourceUsage 77 Timestamp int64 78 } 79 80 // AllocCheckStatus contains the current status of a nomad service discovery check. 81 type AllocCheckStatus struct { 82 ID string 83 Check string 84 Group string 85 Mode string 86 Output string 87 Service string 88 Task string 89 Status string 90 StatusCode int 91 Timestamp int64 92 } 93 94 // AllocCheckStatuses holds the set of nomad service discovery checks within 95 // the allocation (including group and task level service checks). 96 type AllocCheckStatuses map[string]AllocCheckStatus 97 98 // RestartPolicy defines how the Nomad client restarts 99 // tasks in a taskgroup when they fail 100 type RestartPolicy struct { 101 Interval *time.Duration `hcl:"interval,optional"` 102 Attempts *int `hcl:"attempts,optional"` 103 Delay *time.Duration `hcl:"delay,optional"` 104 Mode *string `hcl:"mode,optional"` 105 RenderTemplates *bool `mapstructure:"render_templates" hcl:"render_templates,optional"` 106 } 107 108 func (r *RestartPolicy) Merge(rp *RestartPolicy) { 109 if rp.Interval != nil { 110 r.Interval = rp.Interval 111 } 112 if rp.Attempts != nil { 113 r.Attempts = rp.Attempts 114 } 115 if rp.Delay != nil { 116 r.Delay = rp.Delay 117 } 118 if rp.Mode != nil { 119 r.Mode = rp.Mode 120 } 121 if rp.RenderTemplates != nil { 122 r.RenderTemplates = rp.RenderTemplates 123 } 124 } 125 126 // Disconnect strategy defines how both clients and server should behave in case of 127 // disconnection between them. 128 type DisconnectStrategy struct { 129 // Defines for how long the server will consider the unresponsive node as 130 // disconnected but alive instead of lost. 131 LostAfter *time.Duration `mapstructure:"lost_after" hcl:"lost_after,optional"` 132 133 // Defines for how long a disconnected client will keep its allocations running. 134 StopOnClientAfter *time.Duration `mapstructure:"stop_on_client_after" hcl:"stop_on_client_after,optional"` 135 136 // A boolean field used to define if the allocations should be replaced while 137 // it's considered disconnected. 138 Replace *bool `mapstructure:"replace" hcl:"replace,optional"` 139 140 // Once the disconnected node starts reporting again, it will define which 141 // instances to keep: the original allocations, the replacement, the one 142 // running on the node with the best score as it is currently implemented, 143 // or the allocation that has been running continuously the longest. 144 Reconcile *ReconcileOption `mapstructure:"reconcile" hcl:"reconcile,optional"` 145 } 146 147 func (ds *DisconnectStrategy) Canonicalize() { 148 if ds.Replace == nil { 149 ds.Replace = pointerOf(true) 150 } 151 152 if ds.Reconcile == nil { 153 ds.Reconcile = pointerOf(ReconcileOptionBestScore) 154 } 155 } 156 157 // Reschedule configures how Tasks are rescheduled when they crash or fail. 158 type ReschedulePolicy struct { 159 // Attempts limits the number of rescheduling attempts that can occur in an interval. 160 Attempts *int `mapstructure:"attempts" hcl:"attempts,optional"` 161 162 // Interval is a duration in which we can limit the number of reschedule attempts. 163 Interval *time.Duration `mapstructure:"interval" hcl:"interval,optional"` 164 165 // Delay is a minimum duration to wait between reschedule attempts. 166 // The delay function determines how much subsequent reschedule attempts are delayed by. 167 Delay *time.Duration `mapstructure:"delay" hcl:"delay,optional"` 168 169 // DelayFunction determines how the delay progressively changes on subsequent reschedule 170 // attempts. Valid values are "exponential", "constant", and "fibonacci". 171 DelayFunction *string `mapstructure:"delay_function" hcl:"delay_function,optional"` 172 173 // MaxDelay is an upper bound on the delay. 174 MaxDelay *time.Duration `mapstructure:"max_delay" hcl:"max_delay,optional"` 175 176 // Unlimited allows rescheduling attempts until they succeed 177 Unlimited *bool `mapstructure:"unlimited" hcl:"unlimited,optional"` 178 } 179 180 func (r *ReschedulePolicy) Merge(rp *ReschedulePolicy) { 181 if rp == nil { 182 return 183 } 184 if rp.Interval != nil { 185 r.Interval = rp.Interval 186 } 187 if rp.Attempts != nil { 188 r.Attempts = rp.Attempts 189 } 190 if rp.Delay != nil { 191 r.Delay = rp.Delay 192 } 193 if rp.DelayFunction != nil { 194 r.DelayFunction = rp.DelayFunction 195 } 196 if rp.MaxDelay != nil { 197 r.MaxDelay = rp.MaxDelay 198 } 199 if rp.Unlimited != nil { 200 r.Unlimited = rp.Unlimited 201 } 202 } 203 204 func (r *ReschedulePolicy) Canonicalize(jobType string) { 205 dp := NewDefaultReschedulePolicy(jobType) 206 if r.Interval == nil { 207 r.Interval = dp.Interval 208 } 209 if r.Attempts == nil { 210 r.Attempts = dp.Attempts 211 } 212 if r.Delay == nil { 213 r.Delay = dp.Delay 214 } 215 if r.DelayFunction == nil { 216 r.DelayFunction = dp.DelayFunction 217 } 218 if r.MaxDelay == nil { 219 r.MaxDelay = dp.MaxDelay 220 } 221 if r.Unlimited == nil { 222 r.Unlimited = dp.Unlimited 223 } 224 } 225 226 // Affinity is used to serialize task group affinities 227 type Affinity struct { 228 LTarget string `hcl:"attribute,optional"` // Left-hand target 229 RTarget string `hcl:"value,optional"` // Right-hand target 230 Operand string `hcl:"operator,optional"` // Constraint operand (<=, <, =, !=, >, >=), set_contains_all, set_contains_any 231 Weight *int8 `hcl:"weight,optional"` // Weight applied to nodes that match the affinity. Can be negative 232 } 233 234 func NewAffinity(lTarget string, operand string, rTarget string, weight int8) *Affinity { 235 return &Affinity{ 236 LTarget: lTarget, 237 RTarget: rTarget, 238 Operand: operand, 239 Weight: pointerOf(weight), 240 } 241 } 242 243 func (a *Affinity) Canonicalize() { 244 if a.Weight == nil { 245 a.Weight = pointerOf(int8(50)) 246 } 247 } 248 249 func NewDefaultDisconnectStrategy() *DisconnectStrategy { 250 return &DisconnectStrategy{ 251 LostAfter: pointerOf(0 * time.Minute), 252 Replace: pointerOf(true), 253 Reconcile: pointerOf(ReconcileOptionBestScore), 254 } 255 } 256 257 func NewDefaultReschedulePolicy(jobType string) *ReschedulePolicy { 258 var dp *ReschedulePolicy 259 switch jobType { 260 case "service": 261 // This needs to be in sync with DefaultServiceJobReschedulePolicy 262 // in nomad/structs/structs.go 263 dp = &ReschedulePolicy{ 264 Delay: pointerOf(30 * time.Second), 265 DelayFunction: pointerOf("exponential"), 266 MaxDelay: pointerOf(1 * time.Hour), 267 Unlimited: pointerOf(true), 268 269 Attempts: pointerOf(0), 270 Interval: pointerOf(time.Duration(0)), 271 } 272 case "batch": 273 // This needs to be in sync with DefaultBatchJobReschedulePolicy 274 // in nomad/structs/structs.go 275 dp = &ReschedulePolicy{ 276 Attempts: pointerOf(1), 277 Interval: pointerOf(24 * time.Hour), 278 Delay: pointerOf(5 * time.Second), 279 DelayFunction: pointerOf("constant"), 280 281 MaxDelay: pointerOf(time.Duration(0)), 282 Unlimited: pointerOf(false), 283 } 284 285 case "system": 286 dp = &ReschedulePolicy{ 287 Attempts: pointerOf(0), 288 Interval: pointerOf(time.Duration(0)), 289 Delay: pointerOf(time.Duration(0)), 290 DelayFunction: pointerOf(""), 291 MaxDelay: pointerOf(time.Duration(0)), 292 Unlimited: pointerOf(false), 293 } 294 295 default: 296 // GH-7203: it is possible an unknown job type is passed to this 297 // function and we need to ensure a non-nil object is returned so that 298 // the canonicalization runs without panicking. 299 dp = &ReschedulePolicy{ 300 Attempts: pointerOf(0), 301 Interval: pointerOf(time.Duration(0)), 302 Delay: pointerOf(time.Duration(0)), 303 DelayFunction: pointerOf(""), 304 MaxDelay: pointerOf(time.Duration(0)), 305 Unlimited: pointerOf(false), 306 } 307 } 308 return dp 309 } 310 311 func (r *ReschedulePolicy) Copy() *ReschedulePolicy { 312 if r == nil { 313 return nil 314 } 315 nrp := new(ReschedulePolicy) 316 *nrp = *r 317 return nrp 318 } 319 320 func (p *ReschedulePolicy) String() string { 321 if p == nil { 322 return "" 323 } 324 if *p.Unlimited { 325 return fmt.Sprintf("unlimited with %v delay, max_delay = %v", *p.DelayFunction, *p.MaxDelay) 326 } 327 return fmt.Sprintf("%v in %v with %v delay, max_delay = %v", *p.Attempts, *p.Interval, *p.DelayFunction, *p.MaxDelay) 328 } 329 330 // Spread is used to serialize task group allocation spread preferences 331 type Spread struct { 332 Attribute string `hcl:"attribute,optional"` 333 Weight *int8 `hcl:"weight,optional"` 334 SpreadTarget []*SpreadTarget `hcl:"target,block"` 335 } 336 337 // SpreadTarget is used to serialize target allocation spread percentages 338 type SpreadTarget struct { 339 Value string `hcl:",label"` 340 Percent uint8 `hcl:"percent,optional"` 341 } 342 343 func NewSpreadTarget(value string, percent uint8) *SpreadTarget { 344 return &SpreadTarget{ 345 Value: value, 346 Percent: percent, 347 } 348 } 349 350 func NewSpread(attribute string, weight int8, spreadTargets []*SpreadTarget) *Spread { 351 return &Spread{ 352 Attribute: attribute, 353 Weight: pointerOf(weight), 354 SpreadTarget: spreadTargets, 355 } 356 } 357 358 func (s *Spread) Canonicalize() { 359 if s.Weight == nil { 360 s.Weight = pointerOf(int8(50)) 361 } 362 } 363 364 // EphemeralDisk is an ephemeral disk object 365 type EphemeralDisk struct { 366 Sticky *bool `hcl:"sticky,optional"` 367 Migrate *bool `hcl:"migrate,optional"` 368 SizeMB *int `mapstructure:"size" hcl:"size,optional"` 369 } 370 371 func DefaultEphemeralDisk() *EphemeralDisk { 372 return &EphemeralDisk{ 373 Sticky: pointerOf(false), 374 Migrate: pointerOf(false), 375 SizeMB: pointerOf(300), 376 } 377 } 378 379 func (e *EphemeralDisk) Canonicalize() { 380 if e.Sticky == nil { 381 e.Sticky = pointerOf(false) 382 } 383 if e.Migrate == nil { 384 e.Migrate = pointerOf(false) 385 } 386 if e.SizeMB == nil { 387 e.SizeMB = pointerOf(300) 388 } 389 } 390 391 // MigrateStrategy describes how allocations for a task group should be 392 // migrated between nodes (eg when draining). 393 type MigrateStrategy struct { 394 MaxParallel *int `mapstructure:"max_parallel" hcl:"max_parallel,optional"` 395 HealthCheck *string `mapstructure:"health_check" hcl:"health_check,optional"` 396 MinHealthyTime *time.Duration `mapstructure:"min_healthy_time" hcl:"min_healthy_time,optional"` 397 HealthyDeadline *time.Duration `mapstructure:"healthy_deadline" hcl:"healthy_deadline,optional"` 398 } 399 400 func DefaultMigrateStrategy() *MigrateStrategy { 401 return &MigrateStrategy{ 402 MaxParallel: pointerOf(1), 403 HealthCheck: pointerOf("checks"), 404 MinHealthyTime: pointerOf(10 * time.Second), 405 HealthyDeadline: pointerOf(5 * time.Minute), 406 } 407 } 408 409 func (m *MigrateStrategy) Canonicalize() { 410 if m == nil { 411 return 412 } 413 defaults := DefaultMigrateStrategy() 414 if m.MaxParallel == nil { 415 m.MaxParallel = defaults.MaxParallel 416 } 417 if m.HealthCheck == nil { 418 m.HealthCheck = defaults.HealthCheck 419 } 420 if m.MinHealthyTime == nil { 421 m.MinHealthyTime = defaults.MinHealthyTime 422 } 423 if m.HealthyDeadline == nil { 424 m.HealthyDeadline = defaults.HealthyDeadline 425 } 426 } 427 428 func (m *MigrateStrategy) Merge(o *MigrateStrategy) { 429 if o.MaxParallel != nil { 430 m.MaxParallel = o.MaxParallel 431 } 432 if o.HealthCheck != nil { 433 m.HealthCheck = o.HealthCheck 434 } 435 if o.MinHealthyTime != nil { 436 m.MinHealthyTime = o.MinHealthyTime 437 } 438 if o.HealthyDeadline != nil { 439 m.HealthyDeadline = o.HealthyDeadline 440 } 441 } 442 443 func (m *MigrateStrategy) Copy() *MigrateStrategy { 444 if m == nil { 445 return nil 446 } 447 nm := new(MigrateStrategy) 448 *nm = *m 449 return nm 450 } 451 452 // VolumeRequest is a representation of a storage volume that a TaskGroup wishes to use. 453 type VolumeRequest struct { 454 Name string `hcl:"name,label"` 455 Type string `hcl:"type,optional"` 456 Source string `hcl:"source,optional"` 457 ReadOnly bool `hcl:"read_only,optional"` 458 AccessMode string `hcl:"access_mode,optional"` 459 AttachmentMode string `hcl:"attachment_mode,optional"` 460 MountOptions *CSIMountOptions `hcl:"mount_options,block"` 461 PerAlloc bool `hcl:"per_alloc,optional"` 462 ExtraKeysHCL []string `hcl1:",unusedKeys,optional" json:"-"` 463 } 464 465 const ( 466 VolumeMountPropagationPrivate = "private" 467 VolumeMountPropagationHostToTask = "host-to-task" 468 VolumeMountPropagationBidirectional = "bidirectional" 469 ) 470 471 // VolumeMount represents the relationship between a destination path in a task 472 // and the task group volume that should be mounted there. 473 type VolumeMount struct { 474 Volume *string `hcl:"volume,optional"` 475 Destination *string `hcl:"destination,optional"` 476 ReadOnly *bool `mapstructure:"read_only" hcl:"read_only,optional"` 477 PropagationMode *string `mapstructure:"propagation_mode" hcl:"propagation_mode,optional"` 478 SELinuxLabel *string `mapstructure:"selinux_label" hcl:"selinux_label,optional"` 479 } 480 481 func (vm *VolumeMount) Canonicalize() { 482 if vm.PropagationMode == nil { 483 vm.PropagationMode = pointerOf(VolumeMountPropagationPrivate) 484 } 485 486 if vm.ReadOnly == nil { 487 vm.ReadOnly = pointerOf(false) 488 } 489 490 if vm.SELinuxLabel == nil { 491 vm.SELinuxLabel = pointerOf("") 492 } 493 } 494 495 // TaskGroup is the unit of scheduling. 496 type TaskGroup struct { 497 Name *string `hcl:"name,label"` 498 Count *int `hcl:"count,optional"` 499 Constraints []*Constraint `hcl:"constraint,block"` 500 Affinities []*Affinity `hcl:"affinity,block"` 501 Tasks []*Task `hcl:"task,block"` 502 Spreads []*Spread `hcl:"spread,block"` 503 Volumes map[string]*VolumeRequest `hcl:"volume,block"` 504 RestartPolicy *RestartPolicy `hcl:"restart,block"` 505 Disconnect *DisconnectStrategy `hcl:"disconnect,block"` 506 ReschedulePolicy *ReschedulePolicy `hcl:"reschedule,block"` 507 EphemeralDisk *EphemeralDisk `hcl:"ephemeral_disk,block"` 508 Update *UpdateStrategy `hcl:"update,block"` 509 Migrate *MigrateStrategy `hcl:"migrate,block"` 510 Networks []*NetworkResource `hcl:"network,block"` 511 Meta map[string]string `hcl:"meta,block"` 512 Services []*Service `hcl:"service,block"` 513 ShutdownDelay *time.Duration `mapstructure:"shutdown_delay" hcl:"shutdown_delay,optional"` 514 // Deprecated: StopAfterClientDisconnect is deprecated in Nomad 1.8. Use Disconnect.StopOnClientAfter instead. 515 StopAfterClientDisconnect *time.Duration `mapstructure:"stop_after_client_disconnect" hcl:"stop_after_client_disconnect,optional"` 516 // To be deprecated after 1.8.0 infavour of Disconnect.LostAfter 517 MaxClientDisconnect *time.Duration `mapstructure:"max_client_disconnect" hcl:"max_client_disconnect,optional"` 518 Scaling *ScalingPolicy `hcl:"scaling,block"` 519 Consul *Consul `hcl:"consul,block"` 520 // To be deprecated after 1.8.0 infavour of Disconnect.Replace 521 PreventRescheduleOnLost *bool `hcl:"prevent_reschedule_on_lost,optional"` 522 } 523 524 // NewTaskGroup creates a new TaskGroup. 525 func NewTaskGroup(name string, count int) *TaskGroup { 526 return &TaskGroup{ 527 Name: pointerOf(name), 528 Count: pointerOf(count), 529 } 530 } 531 532 // Canonicalize sets defaults and merges settings that should be inherited from the job 533 func (g *TaskGroup) Canonicalize(job *Job) { 534 if g.Name == nil { 535 g.Name = pointerOf("") 536 } 537 538 if g.Count == nil { 539 if g.Scaling != nil && g.Scaling.Min != nil { 540 g.Count = pointerOf(int(*g.Scaling.Min)) 541 } else { 542 g.Count = pointerOf(1) 543 } 544 } 545 if g.Scaling != nil { 546 g.Scaling.Canonicalize(*g.Count) 547 } 548 if g.EphemeralDisk == nil { 549 g.EphemeralDisk = DefaultEphemeralDisk() 550 } else { 551 g.EphemeralDisk.Canonicalize() 552 } 553 554 // Merge job.consul onto group.consul 555 if g.Consul == nil { 556 g.Consul = new(Consul) 557 } 558 g.Consul.MergeNamespace(job.ConsulNamespace) 559 g.Consul.Canonicalize() 560 561 // Merge the update policy from the job 562 if ju, tu := job.Update != nil, g.Update != nil; ju && tu { 563 // Merge the jobs and task groups definition of the update strategy 564 jc := job.Update.Copy() 565 jc.Merge(g.Update) 566 g.Update = jc 567 } else if ju && !job.Update.Empty() { 568 // Inherit the jobs as long as it is non-empty. 569 jc := job.Update.Copy() 570 g.Update = jc 571 } 572 573 if g.Update != nil { 574 g.Update.Canonicalize() 575 } 576 577 // Merge the reschedule policy from the job 578 if jr, tr := job.Reschedule != nil, g.ReschedulePolicy != nil; jr && tr { 579 jobReschedule := job.Reschedule.Copy() 580 jobReschedule.Merge(g.ReschedulePolicy) 581 g.ReschedulePolicy = jobReschedule 582 } else if jr { 583 jobReschedule := job.Reschedule.Copy() 584 g.ReschedulePolicy = jobReschedule 585 } 586 // Only use default reschedule policy for non system jobs 587 if g.ReschedulePolicy == nil && *job.Type != "system" { 588 g.ReschedulePolicy = NewDefaultReschedulePolicy(*job.Type) 589 } 590 if g.ReschedulePolicy != nil { 591 g.ReschedulePolicy.Canonicalize(*job.Type) 592 } 593 594 // Merge the migrate strategy from the job 595 if jm, tm := job.Migrate != nil, g.Migrate != nil; jm && tm { 596 jobMigrate := job.Migrate.Copy() 597 jobMigrate.Merge(g.Migrate) 598 g.Migrate = jobMigrate 599 } else if jm { 600 jobMigrate := job.Migrate.Copy() 601 g.Migrate = jobMigrate 602 } 603 604 // Merge with default reschedule policy 605 if g.Migrate == nil && *job.Type == "service" { 606 g.Migrate = &MigrateStrategy{} 607 } 608 if g.Migrate != nil { 609 g.Migrate.Canonicalize() 610 } 611 612 var defaultRestartPolicy *RestartPolicy 613 switch *job.Type { 614 case "service", "system": 615 defaultRestartPolicy = defaultServiceJobRestartPolicy() 616 default: 617 defaultRestartPolicy = defaultBatchJobRestartPolicy() 618 } 619 620 if g.RestartPolicy != nil { 621 defaultRestartPolicy.Merge(g.RestartPolicy) 622 } 623 g.RestartPolicy = defaultRestartPolicy 624 625 for _, t := range g.Tasks { 626 t.Canonicalize(g, job) 627 } 628 629 for _, spread := range g.Spreads { 630 spread.Canonicalize() 631 } 632 for _, a := range g.Affinities { 633 a.Canonicalize() 634 } 635 for _, n := range g.Networks { 636 n.Canonicalize() 637 } 638 for _, s := range g.Services { 639 s.Canonicalize(nil, g, job) 640 } 641 642 if g.PreventRescheduleOnLost == nil { 643 g.PreventRescheduleOnLost = pointerOf(false) 644 } 645 646 if g.Disconnect != nil { 647 g.Disconnect.Canonicalize() 648 } 649 } 650 651 // These needs to be in sync with DefaultServiceJobRestartPolicy in 652 // in nomad/structs/structs.go 653 func defaultServiceJobRestartPolicy() *RestartPolicy { 654 return &RestartPolicy{ 655 Delay: pointerOf(15 * time.Second), 656 Attempts: pointerOf(2), 657 Interval: pointerOf(30 * time.Minute), 658 Mode: pointerOf(RestartPolicyModeFail), 659 RenderTemplates: pointerOf(false), 660 } 661 } 662 663 // These needs to be in sync with DefaultBatchJobRestartPolicy in 664 // in nomad/structs/structs.go 665 func defaultBatchJobRestartPolicy() *RestartPolicy { 666 return &RestartPolicy{ 667 Delay: pointerOf(15 * time.Second), 668 Attempts: pointerOf(3), 669 Interval: pointerOf(24 * time.Hour), 670 Mode: pointerOf(RestartPolicyModeFail), 671 RenderTemplates: pointerOf(false), 672 } 673 } 674 675 // Constrain is used to add a constraint to a task group. 676 func (g *TaskGroup) Constrain(c *Constraint) *TaskGroup { 677 g.Constraints = append(g.Constraints, c) 678 return g 679 } 680 681 // AddMeta is used to add a meta k/v pair to a task group 682 func (g *TaskGroup) SetMeta(key, val string) *TaskGroup { 683 if g.Meta == nil { 684 g.Meta = make(map[string]string) 685 } 686 g.Meta[key] = val 687 return g 688 } 689 690 // AddTask is used to add a new task to a task group. 691 func (g *TaskGroup) AddTask(t *Task) *TaskGroup { 692 g.Tasks = append(g.Tasks, t) 693 return g 694 } 695 696 // AddAffinity is used to add a new affinity to a task group. 697 func (g *TaskGroup) AddAffinity(a *Affinity) *TaskGroup { 698 g.Affinities = append(g.Affinities, a) 699 return g 700 } 701 702 // RequireDisk adds a ephemeral disk to the task group 703 func (g *TaskGroup) RequireDisk(disk *EphemeralDisk) *TaskGroup { 704 g.EphemeralDisk = disk 705 return g 706 } 707 708 // AddSpread is used to add a new spread preference to a task group. 709 func (g *TaskGroup) AddSpread(s *Spread) *TaskGroup { 710 g.Spreads = append(g.Spreads, s) 711 return g 712 } 713 714 // LogConfig provides configuration for log rotation 715 type LogConfig struct { 716 MaxFiles *int `mapstructure:"max_files" hcl:"max_files,optional"` 717 MaxFileSizeMB *int `mapstructure:"max_file_size" hcl:"max_file_size,optional"` 718 719 // COMPAT(1.6.0): Enabled had to be swapped for Disabled to fix a backwards 720 // compatibility bug when restoring pre-1.5.4 jobs. Remove in 1.6.0 721 Enabled *bool `mapstructure:"enabled" hcl:"enabled,optional"` 722 723 Disabled *bool `mapstructure:"disabled" hcl:"disabled,optional"` 724 } 725 726 func DefaultLogConfig() *LogConfig { 727 return &LogConfig{ 728 MaxFiles: pointerOf(10), 729 MaxFileSizeMB: pointerOf(10), 730 Disabled: pointerOf(false), 731 } 732 } 733 734 func (l *LogConfig) Canonicalize() { 735 if l.MaxFiles == nil { 736 l.MaxFiles = pointerOf(10) 737 } 738 if l.MaxFileSizeMB == nil { 739 l.MaxFileSizeMB = pointerOf(10) 740 } 741 if l.Disabled == nil { 742 l.Disabled = pointerOf(false) 743 } 744 } 745 746 // DispatchPayloadConfig configures how a task gets its input from a job dispatch 747 type DispatchPayloadConfig struct { 748 File string `hcl:"file,optional"` 749 } 750 751 const ( 752 TaskLifecycleHookPrestart = "prestart" 753 TaskLifecycleHookPoststart = "poststart" 754 TaskLifecycleHookPoststop = "poststop" 755 ) 756 757 type TaskLifecycle struct { 758 Hook string `mapstructure:"hook" hcl:"hook,optional"` 759 Sidecar bool `mapstructure:"sidecar" hcl:"sidecar,optional"` 760 } 761 762 // Determine if lifecycle has user-input values 763 func (l *TaskLifecycle) Empty() bool { 764 return l == nil || (l.Hook == "") 765 } 766 767 // Task is a single process in a task group. 768 type Task struct { 769 Name string `hcl:"name,label"` 770 Driver string `hcl:"driver,optional"` 771 User string `hcl:"user,optional"` 772 Lifecycle *TaskLifecycle `hcl:"lifecycle,block"` 773 Config map[string]interface{} `hcl:"config,block"` 774 Constraints []*Constraint `hcl:"constraint,block"` 775 Affinities []*Affinity `hcl:"affinity,block"` 776 Env map[string]string `hcl:"env,block"` 777 Services []*Service `hcl:"service,block"` 778 Resources *Resources `hcl:"resources,block"` 779 RestartPolicy *RestartPolicy `hcl:"restart,block"` 780 Meta map[string]string `hcl:"meta,block"` 781 KillTimeout *time.Duration `mapstructure:"kill_timeout" hcl:"kill_timeout,optional"` 782 LogConfig *LogConfig `mapstructure:"logs" hcl:"logs,block"` 783 Artifacts []*TaskArtifact `hcl:"artifact,block"` 784 Vault *Vault `hcl:"vault,block"` 785 Consul *Consul `hcl:"consul,block"` 786 Templates []*Template `hcl:"template,block"` 787 DispatchPayload *DispatchPayloadConfig `hcl:"dispatch_payload,block"` 788 VolumeMounts []*VolumeMount `hcl:"volume_mount,block"` 789 CSIPluginConfig *TaskCSIPluginConfig `mapstructure:"csi_plugin" json:",omitempty" hcl:"csi_plugin,block"` 790 Leader bool `hcl:"leader,optional"` 791 ShutdownDelay time.Duration `mapstructure:"shutdown_delay" hcl:"shutdown_delay,optional"` 792 KillSignal string `mapstructure:"kill_signal" hcl:"kill_signal,optional"` 793 Kind string `hcl:"kind,optional"` 794 ScalingPolicies []*ScalingPolicy `hcl:"scaling,block"` 795 796 // Identity is the default Nomad Workload Identity and will be added to 797 // Identities with the name "default" 798 Identity *WorkloadIdentity 799 800 // Workload Identities 801 Identities []*WorkloadIdentity `hcl:"identity,block"` 802 803 Actions []*Action `hcl:"action,block"` 804 } 805 806 func (t *Task) Canonicalize(tg *TaskGroup, job *Job) { 807 if t.Resources == nil { 808 t.Resources = &Resources{} 809 } 810 t.Resources.Canonicalize() 811 812 if t.KillTimeout == nil { 813 t.KillTimeout = pointerOf(5 * time.Second) 814 } 815 if t.LogConfig == nil { 816 t.LogConfig = DefaultLogConfig() 817 } else { 818 t.LogConfig.Canonicalize() 819 } 820 for _, artifact := range t.Artifacts { 821 artifact.Canonicalize() 822 } 823 if t.Vault != nil { 824 t.Vault.Canonicalize() 825 } 826 if t.Consul != nil { 827 t.Consul.Canonicalize() 828 } 829 for _, tmpl := range t.Templates { 830 tmpl.Canonicalize() 831 } 832 for _, s := range t.Services { 833 s.Canonicalize(t, tg, job) 834 } 835 for _, a := range t.Affinities { 836 a.Canonicalize() 837 } 838 for _, vm := range t.VolumeMounts { 839 vm.Canonicalize() 840 } 841 if t.Lifecycle.Empty() { 842 t.Lifecycle = nil 843 } 844 if t.CSIPluginConfig != nil { 845 t.CSIPluginConfig.Canonicalize() 846 } 847 if t.RestartPolicy == nil { 848 t.RestartPolicy = tg.RestartPolicy 849 } else { 850 tgrp := &RestartPolicy{} 851 *tgrp = *tg.RestartPolicy 852 tgrp.Merge(t.RestartPolicy) 853 t.RestartPolicy = tgrp 854 } 855 } 856 857 // TaskArtifact is used to download artifacts before running a task. 858 type TaskArtifact struct { 859 GetterSource *string `mapstructure:"source" hcl:"source,optional"` 860 GetterOptions map[string]string `mapstructure:"options" hcl:"options,block"` 861 GetterHeaders map[string]string `mapstructure:"headers" hcl:"headers,block"` 862 GetterMode *string `mapstructure:"mode" hcl:"mode,optional"` 863 RelativeDest *string `mapstructure:"destination" hcl:"destination,optional"` 864 } 865 866 func (a *TaskArtifact) Canonicalize() { 867 if a.GetterMode == nil { 868 a.GetterMode = pointerOf("any") 869 } 870 if a.GetterSource == nil { 871 // Shouldn't be possible, but we don't want to panic 872 a.GetterSource = pointerOf("") 873 } 874 if len(a.GetterOptions) == 0 { 875 a.GetterOptions = nil 876 } 877 if len(a.GetterHeaders) == 0 { 878 a.GetterHeaders = nil 879 } 880 if a.RelativeDest == nil { 881 switch *a.GetterMode { 882 case "file": 883 // File mode should default to local/filename 884 dest := *a.GetterSource 885 dest = path.Base(dest) 886 dest = filepath.Join("local", dest) 887 a.RelativeDest = &dest 888 default: 889 // Default to a directory 890 a.RelativeDest = pointerOf("local/") 891 } 892 } 893 } 894 895 // WaitConfig is the Min/Max duration to wait for the Consul cluster to reach a 896 // consistent state before attempting to render Templates. 897 type WaitConfig struct { 898 Min *time.Duration `mapstructure:"min" hcl:"min"` 899 Max *time.Duration `mapstructure:"max" hcl:"max"` 900 } 901 902 func (wc *WaitConfig) Copy() *WaitConfig { 903 if wc == nil { 904 return nil 905 } 906 907 nwc := new(WaitConfig) 908 *nwc = *wc 909 910 return nwc 911 } 912 913 type ChangeScript struct { 914 Command *string `mapstructure:"command" hcl:"command"` 915 Args []string `mapstructure:"args" hcl:"args,optional"` 916 Timeout *time.Duration `mapstructure:"timeout" hcl:"timeout,optional"` 917 FailOnError *bool `mapstructure:"fail_on_error" hcl:"fail_on_error"` 918 } 919 920 func (ch *ChangeScript) Canonicalize() { 921 if ch.Command == nil { 922 ch.Command = pointerOf("") 923 } 924 if ch.Args == nil { 925 ch.Args = []string{} 926 } 927 if ch.Timeout == nil { 928 ch.Timeout = pointerOf(5 * time.Second) 929 } 930 if ch.FailOnError == nil { 931 ch.FailOnError = pointerOf(false) 932 } 933 } 934 935 type Template struct { 936 SourcePath *string `mapstructure:"source" hcl:"source,optional"` 937 DestPath *string `mapstructure:"destination" hcl:"destination,optional"` 938 EmbeddedTmpl *string `mapstructure:"data" hcl:"data,optional"` 939 ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"` 940 ChangeScript *ChangeScript `mapstructure:"change_script" hcl:"change_script,block"` 941 ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"` 942 Splay *time.Duration `mapstructure:"splay" hcl:"splay,optional"` 943 Perms *string `mapstructure:"perms" hcl:"perms,optional"` 944 Uid *int `mapstructure:"uid" hcl:"uid,optional"` 945 Gid *int `mapstructure:"gid" hcl:"gid,optional"` 946 LeftDelim *string `mapstructure:"left_delimiter" hcl:"left_delimiter,optional"` 947 RightDelim *string `mapstructure:"right_delimiter" hcl:"right_delimiter,optional"` 948 Envvars *bool `mapstructure:"env" hcl:"env,optional"` 949 VaultGrace *time.Duration `mapstructure:"vault_grace" hcl:"vault_grace,optional"` 950 Wait *WaitConfig `mapstructure:"wait" hcl:"wait,block"` 951 ErrMissingKey *bool `mapstructure:"error_on_missing_key" hcl:"error_on_missing_key,optional"` 952 } 953 954 func (tmpl *Template) Canonicalize() { 955 if tmpl.SourcePath == nil { 956 tmpl.SourcePath = pointerOf("") 957 } 958 if tmpl.DestPath == nil { 959 tmpl.DestPath = pointerOf("") 960 } 961 if tmpl.EmbeddedTmpl == nil { 962 tmpl.EmbeddedTmpl = pointerOf("") 963 } 964 if tmpl.ChangeMode == nil { 965 tmpl.ChangeMode = pointerOf("restart") 966 } 967 if tmpl.ChangeSignal == nil { 968 if *tmpl.ChangeMode == "signal" { 969 tmpl.ChangeSignal = pointerOf("SIGHUP") 970 } else { 971 tmpl.ChangeSignal = pointerOf("") 972 } 973 } else { 974 sig := *tmpl.ChangeSignal 975 tmpl.ChangeSignal = pointerOf(strings.ToUpper(sig)) 976 } 977 if tmpl.ChangeScript != nil { 978 tmpl.ChangeScript.Canonicalize() 979 } 980 if tmpl.Splay == nil { 981 tmpl.Splay = pointerOf(5 * time.Second) 982 } 983 if tmpl.Perms == nil { 984 tmpl.Perms = pointerOf("0644") 985 } 986 if tmpl.LeftDelim == nil { 987 tmpl.LeftDelim = pointerOf("{{") 988 } 989 if tmpl.RightDelim == nil { 990 tmpl.RightDelim = pointerOf("}}") 991 } 992 if tmpl.Envvars == nil { 993 tmpl.Envvars = pointerOf(false) 994 } 995 if tmpl.ErrMissingKey == nil { 996 tmpl.ErrMissingKey = pointerOf(false) 997 } 998 //COMPAT(0.12) VaultGrace is deprecated and unused as of Vault 0.5 999 if tmpl.VaultGrace == nil { 1000 tmpl.VaultGrace = pointerOf(time.Duration(0)) 1001 } 1002 } 1003 1004 type Vault struct { 1005 Policies []string `hcl:"policies,optional"` 1006 Role string `hcl:"role,optional"` 1007 Namespace *string `mapstructure:"namespace" hcl:"namespace,optional"` 1008 Cluster string `hcl:"cluster,optional"` 1009 Env *bool `hcl:"env,optional"` 1010 DisableFile *bool `mapstructure:"disable_file" hcl:"disable_file,optional"` 1011 ChangeMode *string `mapstructure:"change_mode" hcl:"change_mode,optional"` 1012 ChangeSignal *string `mapstructure:"change_signal" hcl:"change_signal,optional"` 1013 AllowTokenExpiration *bool `mapstructure:"allow_token_expiration" hcl:"allow_token_expiration,optional"` 1014 } 1015 1016 func (v *Vault) Canonicalize() { 1017 if v.Env == nil { 1018 v.Env = pointerOf(true) 1019 } 1020 if v.DisableFile == nil { 1021 v.DisableFile = pointerOf(false) 1022 } 1023 if v.Namespace == nil { 1024 v.Namespace = pointerOf("") 1025 } 1026 if v.Cluster == "" { 1027 v.Cluster = "default" 1028 } 1029 if v.ChangeMode == nil { 1030 v.ChangeMode = pointerOf("restart") 1031 } 1032 if v.ChangeSignal == nil { 1033 v.ChangeSignal = pointerOf("SIGHUP") 1034 } 1035 if v.AllowTokenExpiration == nil { 1036 v.AllowTokenExpiration = pointerOf(false) 1037 } 1038 } 1039 1040 // NewTask creates and initializes a new Task. 1041 func NewTask(name, driver string) *Task { 1042 return &Task{ 1043 Name: name, 1044 Driver: driver, 1045 } 1046 } 1047 1048 // Configure is used to configure a single k/v pair on 1049 // the task. 1050 func (t *Task) SetConfig(key string, val interface{}) *Task { 1051 if t.Config == nil { 1052 t.Config = make(map[string]interface{}) 1053 } 1054 t.Config[key] = val 1055 return t 1056 } 1057 1058 // SetMeta is used to add metadata k/v pairs to the task. 1059 func (t *Task) SetMeta(key, val string) *Task { 1060 if t.Meta == nil { 1061 t.Meta = make(map[string]string) 1062 } 1063 t.Meta[key] = val 1064 return t 1065 } 1066 1067 // Require is used to add resource requirements to a task. 1068 func (t *Task) Require(r *Resources) *Task { 1069 t.Resources = r 1070 return t 1071 } 1072 1073 // Constraint adds a new constraints to a single task. 1074 func (t *Task) Constrain(c *Constraint) *Task { 1075 t.Constraints = append(t.Constraints, c) 1076 return t 1077 } 1078 1079 // AddAffinity adds a new affinity to a single task. 1080 func (t *Task) AddAffinity(a *Affinity) *Task { 1081 t.Affinities = append(t.Affinities, a) 1082 return t 1083 } 1084 1085 // SetLogConfig sets a log config to a task 1086 func (t *Task) SetLogConfig(l *LogConfig) *Task { 1087 t.LogConfig = l 1088 return t 1089 } 1090 1091 // SetLifecycle is used to set lifecycle config to a task. 1092 func (t *Task) SetLifecycle(l *TaskLifecycle) *Task { 1093 t.Lifecycle = l 1094 return t 1095 } 1096 1097 // TaskState tracks the current state of a task and events that caused state 1098 // transitions. 1099 type TaskState struct { 1100 State string 1101 Failed bool 1102 Restarts uint64 1103 LastRestart time.Time 1104 StartedAt time.Time 1105 FinishedAt time.Time 1106 Events []*TaskEvent 1107 1108 // Experimental - TaskHandle is based on drivers.TaskHandle and used 1109 // by remote task drivers to migrate task handles between allocations. 1110 TaskHandle *TaskHandle 1111 } 1112 1113 // Experimental - TaskHandle is based on drivers.TaskHandle and used by remote 1114 // task drivers to migrate task handles between allocations. 1115 type TaskHandle struct { 1116 Version int 1117 DriverState []byte 1118 } 1119 1120 const ( 1121 TaskSetup = "Task Setup" 1122 TaskSetupFailure = "Setup Failure" 1123 TaskDriverFailure = "Driver Failure" 1124 TaskDriverMessage = "Driver" 1125 TaskReceived = "Received" 1126 TaskFailedValidation = "Failed Validation" 1127 TaskStarted = "Started" 1128 TaskTerminated = "Terminated" 1129 TaskKilling = "Killing" 1130 TaskKilled = "Killed" 1131 TaskRestarting = "Restarting" 1132 TaskNotRestarting = "Not Restarting" 1133 TaskDownloadingArtifacts = "Downloading Artifacts" 1134 TaskArtifactDownloadFailed = "Failed Artifact Download" 1135 TaskSiblingFailed = "Sibling Task Failed" 1136 TaskSignaling = "Signaling" 1137 TaskRestartSignal = "Restart Signaled" 1138 TaskLeaderDead = "Leader Task Dead" 1139 TaskBuildingTaskDir = "Building Task Directory" 1140 TaskClientReconnected = "Reconnected" 1141 ) 1142 1143 // TaskEvent is an event that effects the state of a task and contains meta-data 1144 // appropriate to the events type. 1145 type TaskEvent struct { 1146 Type string 1147 Time int64 1148 DisplayMessage string 1149 Details map[string]string 1150 Message string 1151 // DEPRECATION NOTICE: The following fields are all deprecated. see TaskEvent struct in structs.go for details. 1152 FailsTask bool 1153 RestartReason string 1154 SetupError string 1155 DriverError string 1156 DriverMessage string 1157 ExitCode int 1158 Signal int 1159 KillReason string 1160 KillTimeout time.Duration 1161 KillError string 1162 StartDelay int64 1163 DownloadError string 1164 ValidationError string 1165 DiskLimit int64 1166 DiskSize int64 1167 FailedSibling string 1168 VaultError string 1169 TaskSignalReason string 1170 TaskSignal string 1171 GenericSource string 1172 } 1173 1174 // CSIPluginType is an enum string that encapsulates the valid options for a 1175 // CSIPlugin block's Type. These modes will allow the plugin to be used in 1176 // different ways by the client. 1177 type CSIPluginType string 1178 1179 const ( 1180 // CSIPluginTypeNode indicates that Nomad should only use the plugin for 1181 // performing Node RPCs against the provided plugin. 1182 CSIPluginTypeNode CSIPluginType = "node" 1183 1184 // CSIPluginTypeController indicates that Nomad should only use the plugin for 1185 // performing Controller RPCs against the provided plugin. 1186 CSIPluginTypeController CSIPluginType = "controller" 1187 1188 // CSIPluginTypeMonolith indicates that Nomad can use the provided plugin for 1189 // both controller and node rpcs. 1190 CSIPluginTypeMonolith CSIPluginType = "monolith" 1191 ) 1192 1193 // TaskCSIPluginConfig contains the data that is required to setup a task as a 1194 // CSI plugin. This will be used by the csi_plugin_supervisor_hook to configure 1195 // mounts for the plugin and initiate the connection to the plugin catalog. 1196 type TaskCSIPluginConfig struct { 1197 // ID is the identifier of the plugin. 1198 // Ideally this should be the FQDN of the plugin. 1199 ID string `mapstructure:"id" hcl:"id,optional"` 1200 1201 // CSIPluginType instructs Nomad on how to handle processing a plugin 1202 Type CSIPluginType `mapstructure:"type" hcl:"type,optional"` 1203 1204 // MountDir is the directory (within its container) in which the plugin creates a 1205 // socket (called CSISocketName) for communication with Nomad. Default is /csi. 1206 MountDir string `mapstructure:"mount_dir" hcl:"mount_dir,optional"` 1207 1208 // StagePublishBaseDir is the base directory (within its container) in which the plugin 1209 // mounts volumes being staged and bind mounts volumes being published. 1210 // e.g. staging_target_path = {StagePublishBaseDir}/staging/{volume-id}/{usage-mode} 1211 // e.g. target_path = {StagePublishBaseDir}/per-alloc/{alloc-id}/{volume-id}/{usage-mode} 1212 // Default is /local/csi. 1213 StagePublishBaseDir string `mapstructure:"stage_publish_base_dir" hcl:"stage_publish_base_dir,optional"` 1214 1215 // HealthTimeout is the time after which the CSI plugin tasks will be killed 1216 // if the CSI Plugin is not healthy. 1217 HealthTimeout time.Duration `mapstructure:"health_timeout" hcl:"health_timeout,optional"` 1218 } 1219 1220 func (t *TaskCSIPluginConfig) Canonicalize() { 1221 if t.MountDir == "" { 1222 t.MountDir = "/csi" 1223 } 1224 1225 if t.StagePublishBaseDir == "" { 1226 t.StagePublishBaseDir = filepath.Join("/local", "csi") 1227 } 1228 1229 if t.HealthTimeout == 0 { 1230 t.HealthTimeout = 30 * time.Second 1231 } 1232 } 1233 1234 // WorkloadIdentity is the jobspec block which determines if and how a workload 1235 // identity is exposed to tasks. 1236 type WorkloadIdentity struct { 1237 Name string `hcl:"name,optional"` 1238 Audience []string `mapstructure:"aud" hcl:"aud,optional"` 1239 ChangeMode string `mapstructure:"change_mode" hcl:"change_mode,optional"` 1240 ChangeSignal string `mapstructure:"change_signal" hcl:"change_signal,optional"` 1241 Env bool `hcl:"env,optional"` 1242 File bool `hcl:"file,optional"` 1243 ServiceName string `hcl:"service_name,optional"` 1244 TTL time.Duration `mapstructure:"ttl" hcl:"ttl,optional"` 1245 } 1246 1247 type Action struct { 1248 Name string `hcl:"name,label"` 1249 Command string `mapstructure:"command" hcl:"command"` 1250 Args []string `mapstructure:"args" hcl:"args,optional"` 1251 }