github.com/quite/nomad@v0.8.6/command/alloc_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "strconv" 8 "strings" 9 "time" 10 11 humanize "github.com/dustin/go-humanize" 12 13 "github.com/hashicorp/nomad/api" 14 "github.com/hashicorp/nomad/api/contexts" 15 "github.com/hashicorp/nomad/client" 16 "github.com/posener/complete" 17 ) 18 19 type AllocStatusCommand struct { 20 Meta 21 } 22 23 func (c *AllocStatusCommand) Help() string { 24 helpText := ` 25 Usage: nomad alloc status [options] <allocation> 26 27 Display information about existing allocations and its tasks. This command can 28 be used to inspect the current status of an allocation, including its running 29 status, metadata, and verbose failure messages reported by internal 30 subsystems. 31 32 General Options: 33 34 ` + generalOptionsUsage() + ` 35 36 Alloc Status Options: 37 38 -short 39 Display short output. Shows only the most recent task event. 40 41 -stats 42 Display detailed resource usage statistics. 43 44 -verbose 45 Show full information. 46 47 -json 48 Output the allocation in its JSON format. 49 50 -t 51 Format and display allocation using a Go template. 52 ` 53 54 return strings.TrimSpace(helpText) 55 } 56 57 func (c *AllocStatusCommand) Synopsis() string { 58 return "Display allocation status information and metadata" 59 } 60 61 func (c *AllocStatusCommand) AutocompleteFlags() complete.Flags { 62 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 63 complete.Flags{ 64 "-short": complete.PredictNothing, 65 "-verbose": complete.PredictNothing, 66 "-json": complete.PredictNothing, 67 "-t": complete.PredictAnything, 68 }) 69 } 70 71 func (c *AllocStatusCommand) AutocompleteArgs() complete.Predictor { 72 return complete.PredictFunc(func(a complete.Args) []string { 73 client, err := c.Meta.Client() 74 if err != nil { 75 return nil 76 } 77 78 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Allocs, nil) 79 if err != nil { 80 return []string{} 81 } 82 return resp.Matches[contexts.Allocs] 83 }) 84 } 85 86 func (c *AllocStatusCommand) Name() string { return "alloc status" } 87 88 func (c *AllocStatusCommand) Run(args []string) int { 89 var short, displayStats, verbose, json bool 90 var tmpl string 91 92 flags := c.Meta.FlagSet(c.Name(), FlagSetClient) 93 flags.Usage = func() { c.Ui.Output(c.Help()) } 94 flags.BoolVar(&short, "short", false, "") 95 flags.BoolVar(&verbose, "verbose", false, "") 96 flags.BoolVar(&displayStats, "stats", false, "") 97 flags.BoolVar(&json, "json", false, "") 98 flags.StringVar(&tmpl, "t", "", "") 99 100 if err := flags.Parse(args); err != nil { 101 return 1 102 } 103 104 // Check that we got exactly one allocation ID 105 args = flags.Args() 106 107 // Get the HTTP client 108 client, err := c.Meta.Client() 109 if err != nil { 110 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 111 return 1 112 } 113 114 // If args not specified but output format is specified, format and output the allocations data list 115 if len(args) == 0 && json || len(tmpl) > 0 { 116 allocs, _, err := client.Allocations().List(nil) 117 if err != nil { 118 c.Ui.Error(fmt.Sprintf("Error querying allocations: %v", err)) 119 return 1 120 } 121 122 out, err := Format(json, tmpl, allocs) 123 if err != nil { 124 c.Ui.Error(err.Error()) 125 return 1 126 } 127 128 c.Ui.Output(out) 129 return 0 130 } 131 132 if len(args) != 1 { 133 c.Ui.Error("This command takes one of the following argument conditions:") 134 c.Ui.Error(" * A single <allocation>") 135 c.Ui.Error(" * No arguments, with output format specified") 136 c.Ui.Error(commandErrorText(c)) 137 return 1 138 } 139 allocID := args[0] 140 141 // Truncate the id unless full length is requested 142 length := shortId 143 if verbose { 144 length = fullId 145 } 146 147 // Query the allocation info 148 if len(allocID) == 1 { 149 c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) 150 return 1 151 } 152 153 allocID = sanitizeUUIDPrefix(allocID) 154 allocs, _, err := client.Allocations().PrefixList(allocID) 155 if err != nil { 156 c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err)) 157 return 1 158 } 159 if len(allocs) == 0 { 160 c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID)) 161 return 1 162 } 163 if len(allocs) > 1 { 164 out := formatAllocListStubs(allocs, verbose, length) 165 c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out)) 166 return 0 167 } 168 // Prefix lookup matched a single allocation 169 alloc, _, err := client.Allocations().Info(allocs[0].ID, nil) 170 if err != nil { 171 c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) 172 return 1 173 } 174 175 // If output format is specified, format and output the data 176 if json || len(tmpl) > 0 { 177 out, err := Format(json, tmpl, alloc) 178 if err != nil { 179 c.Ui.Error(err.Error()) 180 return 1 181 } 182 183 c.Ui.Output(out) 184 return 0 185 } 186 187 // Format the allocation data 188 output, err := formatAllocBasicInfo(alloc, client, length, verbose) 189 if err != nil { 190 c.Ui.Error(err.Error()) 191 return 1 192 } 193 c.Ui.Output(output) 194 195 if short { 196 c.shortTaskStatus(alloc) 197 } else { 198 var statsErr error 199 var stats *api.AllocResourceUsage 200 stats, statsErr = client.Allocations().Stats(alloc, nil) 201 if statsErr != nil { 202 c.Ui.Output("") 203 if statsErr != api.NodeDownErr { 204 c.Ui.Error(fmt.Sprintf("Couldn't retrieve stats: %v", statsErr)) 205 } else { 206 c.Ui.Output("Omitting resource statistics since the node is down.") 207 } 208 } 209 c.outputTaskDetails(alloc, stats, displayStats) 210 } 211 212 // Format the detailed status 213 if verbose { 214 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Metrics[reset]")) 215 c.Ui.Output(formatAllocMetrics(alloc.Metrics, true, " ")) 216 } 217 218 return 0 219 } 220 221 func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength int, verbose bool) (string, error) { 222 var formattedCreateTime, formattedModifyTime string 223 224 if verbose { 225 formattedCreateTime = formatUnixNanoTime(alloc.CreateTime) 226 formattedModifyTime = formatUnixNanoTime(alloc.ModifyTime) 227 } else { 228 formattedCreateTime = prettyTimeDiff(time.Unix(0, alloc.CreateTime), time.Now()) 229 formattedModifyTime = prettyTimeDiff(time.Unix(0, alloc.ModifyTime), time.Now()) 230 } 231 232 basic := []string{ 233 fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)), 234 fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)), 235 fmt.Sprintf("Name|%s", alloc.Name), 236 fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)), 237 fmt.Sprintf("Job ID|%s", alloc.JobID), 238 fmt.Sprintf("Job Version|%d", getVersion(alloc.Job)), 239 fmt.Sprintf("Client Status|%s", alloc.ClientStatus), 240 fmt.Sprintf("Client Description|%s", alloc.ClientDescription), 241 fmt.Sprintf("Desired Status|%s", alloc.DesiredStatus), 242 fmt.Sprintf("Desired Description|%s", alloc.DesiredDescription), 243 fmt.Sprintf("Created|%s", formattedCreateTime), 244 fmt.Sprintf("Modified|%s", formattedModifyTime), 245 } 246 247 if alloc.DeploymentID != "" { 248 health := "unset" 249 canary := false 250 if alloc.DeploymentStatus != nil { 251 if alloc.DeploymentStatus.Healthy != nil { 252 if *alloc.DeploymentStatus.Healthy { 253 health = "healthy" 254 } else { 255 health = "unhealthy" 256 } 257 } 258 259 canary = alloc.DeploymentStatus.Canary 260 } 261 262 basic = append(basic, 263 fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)), 264 fmt.Sprintf("Deployment Health|%s", health)) 265 if canary { 266 basic = append(basic, fmt.Sprintf("Canary|%v", true)) 267 } 268 } 269 270 if alloc.RescheduleTracker != nil && len(alloc.RescheduleTracker.Events) > 0 { 271 attempts, total := alloc.RescheduleInfo(time.Unix(0, alloc.ModifyTime)) 272 // Show this section only if the reschedule policy limits the number of attempts 273 if total > 0 { 274 reschedInfo := fmt.Sprintf("Reschedule Attempts|%d/%d", attempts, total) 275 basic = append(basic, reschedInfo) 276 } 277 } 278 if alloc.NextAllocation != "" { 279 basic = append(basic, 280 fmt.Sprintf("Replacement Alloc ID|%s", limit(alloc.NextAllocation, uuidLength))) 281 } 282 if alloc.FollowupEvalID != "" { 283 nextEvalTime := futureEvalTimePretty(alloc.FollowupEvalID, client) 284 if nextEvalTime != "" { 285 basic = append(basic, 286 fmt.Sprintf("Reschedule Eligibility|%s", nextEvalTime)) 287 } 288 } 289 290 if verbose { 291 basic = append(basic, 292 fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated), 293 fmt.Sprintf("Filtered Nodes|%d", alloc.Metrics.NodesFiltered), 294 fmt.Sprintf("Exhausted Nodes|%d", alloc.Metrics.NodesExhausted), 295 fmt.Sprintf("Allocation Time|%s", alloc.Metrics.AllocationTime), 296 fmt.Sprintf("Failures|%d", alloc.Metrics.CoalescedFailures)) 297 } 298 299 return formatKV(basic), nil 300 } 301 302 // futureEvalTimePretty returns when the eval is eligible to reschedule 303 // relative to current time, based on the WaitUntil field 304 func futureEvalTimePretty(evalID string, client *api.Client) string { 305 evaluation, _, err := client.Evaluations().Info(evalID, nil) 306 // Eval time is not a critical output, 307 // don't return it on errors, if its not set or already in the past 308 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 309 return "" 310 } 311 return prettyTimeDiff(evaluation.WaitUntil, time.Now()) 312 } 313 314 // outputTaskDetails prints task details for each task in the allocation, 315 // optionally printing verbose statistics if displayStats is set 316 func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) { 317 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 318 state := alloc.TaskStates[task] 319 c.Ui.Output(c.Colorize().Color(fmt.Sprintf("\n[bold]Task %q is %q[reset]", task, state.State))) 320 c.outputTaskResources(alloc, task, stats, displayStats) 321 c.Ui.Output("") 322 c.outputTaskStatus(state) 323 } 324 } 325 326 func formatTaskTimes(t time.Time) string { 327 if t.IsZero() { 328 return "N/A" 329 } 330 331 return formatTime(t) 332 } 333 334 // outputTaskStatus prints out a list of the most recent events for the given 335 // task state. 336 func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) { 337 basic := []string{ 338 fmt.Sprintf("Started At|%s", formatTaskTimes(state.StartedAt)), 339 fmt.Sprintf("Finished At|%s", formatTaskTimes(state.FinishedAt)), 340 fmt.Sprintf("Total Restarts|%d", state.Restarts), 341 fmt.Sprintf("Last Restart|%s", formatTaskTimes(state.LastRestart))} 342 343 c.Ui.Output("Task Events:") 344 c.Ui.Output(formatKV(basic)) 345 c.Ui.Output("") 346 347 c.Ui.Output("Recent Events:") 348 events := make([]string, len(state.Events)+1) 349 events[0] = "Time|Type|Description" 350 351 size := len(state.Events) 352 for i, event := range state.Events { 353 msg := event.DisplayMessage 354 if msg == "" { 355 msg = buildDisplayMessage(event) 356 } 357 formattedTime := formatUnixNanoTime(event.Time) 358 events[size-i] = fmt.Sprintf("%s|%s|%s", formattedTime, event.Type, msg) 359 // Reverse order so we are sorted by time 360 } 361 c.Ui.Output(formatList(events)) 362 } 363 364 func buildDisplayMessage(event *api.TaskEvent) string { 365 // Build up the description based on the event type. 366 var desc string 367 switch event.Type { 368 case api.TaskSetup: 369 desc = event.Message 370 case api.TaskStarted: 371 desc = "Task started by client" 372 case api.TaskReceived: 373 desc = "Task received by client" 374 case api.TaskFailedValidation: 375 if event.ValidationError != "" { 376 desc = event.ValidationError 377 } else { 378 desc = "Validation of task failed" 379 } 380 case api.TaskSetupFailure: 381 if event.SetupError != "" { 382 desc = event.SetupError 383 } else { 384 desc = "Task setup failed" 385 } 386 case api.TaskDriverFailure: 387 if event.DriverError != "" { 388 desc = event.DriverError 389 } else { 390 desc = "Failed to start task" 391 } 392 case api.TaskDownloadingArtifacts: 393 desc = "Client is downloading artifacts" 394 case api.TaskArtifactDownloadFailed: 395 if event.DownloadError != "" { 396 desc = event.DownloadError 397 } else { 398 desc = "Failed to download artifacts" 399 } 400 case api.TaskKilling: 401 if event.KillReason != "" { 402 desc = fmt.Sprintf("Killing task: %v", event.KillReason) 403 } else if event.KillTimeout != 0 { 404 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 405 } else { 406 desc = "Sent interrupt" 407 } 408 case api.TaskKilled: 409 if event.KillError != "" { 410 desc = event.KillError 411 } else { 412 desc = "Task successfully killed" 413 } 414 case api.TaskTerminated: 415 var parts []string 416 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 417 418 if event.Signal != 0 { 419 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 420 } 421 422 if event.Message != "" { 423 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 424 } 425 desc = strings.Join(parts, ", ") 426 case api.TaskRestarting: 427 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 428 if event.RestartReason != "" && event.RestartReason != client.ReasonWithinPolicy { 429 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 430 } else { 431 desc = in 432 } 433 case api.TaskNotRestarting: 434 if event.RestartReason != "" { 435 desc = event.RestartReason 436 } else { 437 desc = "Task exceeded restart policy" 438 } 439 case api.TaskSiblingFailed: 440 if event.FailedSibling != "" { 441 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 442 } else { 443 desc = "Task's sibling failed" 444 } 445 case api.TaskSignaling: 446 sig := event.TaskSignal 447 reason := event.TaskSignalReason 448 449 if sig == "" && reason == "" { 450 desc = "Task being sent a signal" 451 } else if sig == "" { 452 desc = reason 453 } else if reason == "" { 454 desc = fmt.Sprintf("Task being sent signal %v", sig) 455 } else { 456 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 457 } 458 case api.TaskRestartSignal: 459 if event.RestartReason != "" { 460 desc = event.RestartReason 461 } else { 462 desc = "Task signaled to restart" 463 } 464 case api.TaskDriverMessage: 465 desc = event.DriverMessage 466 case api.TaskLeaderDead: 467 desc = "Leader Task in Group dead" 468 default: 469 desc = event.Message 470 } 471 472 return desc 473 } 474 475 // outputTaskResources prints the task resources for the passed task and if 476 // displayStats is set, verbose resource usage statistics 477 func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task string, stats *api.AllocResourceUsage, displayStats bool) { 478 resource, ok := alloc.TaskResources[task] 479 if !ok { 480 return 481 } 482 483 c.Ui.Output("Task Resources") 484 var addr []string 485 for _, nw := range resource.Networks { 486 ports := append(nw.DynamicPorts, nw.ReservedPorts...) 487 for _, port := range ports { 488 addr = append(addr, fmt.Sprintf("%v: %v:%v\n", port.Label, nw.IP, port.Value)) 489 } 490 } 491 var resourcesOutput []string 492 resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses") 493 firstAddr := "" 494 if len(addr) > 0 { 495 firstAddr = addr[0] 496 } 497 498 // Display the rolled up stats. If possible prefer the live statistics 499 cpuUsage := strconv.Itoa(*resource.CPU) 500 memUsage := humanize.IBytes(uint64(*resource.MemoryMB * bytesPerMegabyte)) 501 if stats != nil { 502 if ru, ok := stats.Tasks[task]; ok && ru != nil && ru.ResourceUsage != nil { 503 if cs := ru.ResourceUsage.CpuStats; cs != nil { 504 cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage) 505 } 506 if ms := ru.ResourceUsage.MemoryStats; ms != nil { 507 memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage) 508 } 509 } 510 } 511 resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v", 512 cpuUsage, 513 memUsage, 514 humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)), 515 *resource.IOPS, 516 firstAddr)) 517 for i := 1; i < len(addr); i++ { 518 resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i])) 519 } 520 c.Ui.Output(formatListWithSpaces(resourcesOutput)) 521 522 if stats != nil { 523 if ru, ok := stats.Tasks[task]; ok && ru != nil && displayStats && ru.ResourceUsage != nil { 524 c.Ui.Output("") 525 c.outputVerboseResourceUsage(task, ru.ResourceUsage) 526 } 527 } 528 } 529 530 // outputVerboseResourceUsage outputs the verbose resource usage for the passed 531 // task 532 func (c *AllocStatusCommand) outputVerboseResourceUsage(task string, resourceUsage *api.ResourceUsage) { 533 memoryStats := resourceUsage.MemoryStats 534 cpuStats := resourceUsage.CpuStats 535 if memoryStats != nil && len(memoryStats.Measured) > 0 { 536 c.Ui.Output("Memory Stats") 537 538 // Sort the measured stats 539 sort.Strings(memoryStats.Measured) 540 541 var measuredStats []string 542 for _, measured := range memoryStats.Measured { 543 switch measured { 544 case "RSS": 545 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.RSS)) 546 case "Cache": 547 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Cache)) 548 case "Swap": 549 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Swap)) 550 case "Max Usage": 551 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.MaxUsage)) 552 case "Kernel Usage": 553 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelUsage)) 554 case "Kernel Max Usage": 555 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelMaxUsage)) 556 } 557 } 558 559 out := make([]string, 2) 560 out[0] = strings.Join(memoryStats.Measured, "|") 561 out[1] = strings.Join(measuredStats, "|") 562 c.Ui.Output(formatList(out)) 563 c.Ui.Output("") 564 } 565 566 if cpuStats != nil && len(cpuStats.Measured) > 0 { 567 c.Ui.Output("CPU Stats") 568 569 // Sort the measured stats 570 sort.Strings(cpuStats.Measured) 571 572 var measuredStats []string 573 for _, measured := range cpuStats.Measured { 574 switch measured { 575 case "Percent": 576 percent := strconv.FormatFloat(cpuStats.Percent, 'f', 2, 64) 577 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 578 case "Throttled Periods": 579 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledPeriods)) 580 case "Throttled Time": 581 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledTime)) 582 case "User Mode": 583 percent := strconv.FormatFloat(cpuStats.UserMode, 'f', 2, 64) 584 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 585 case "System Mode": 586 percent := strconv.FormatFloat(cpuStats.SystemMode, 'f', 2, 64) 587 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 588 } 589 } 590 591 out := make([]string, 2) 592 out[0] = strings.Join(cpuStats.Measured, "|") 593 out[1] = strings.Join(measuredStats, "|") 594 c.Ui.Output(formatList(out)) 595 } 596 } 597 598 // shortTaskStatus prints out the current state of each task. 599 func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) { 600 tasks := make([]string, 0, len(alloc.TaskStates)+1) 601 tasks = append(tasks, "Name|State|Last Event|Time") 602 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 603 state := alloc.TaskStates[task] 604 lastState := state.State 605 var lastEvent, lastTime string 606 607 l := len(state.Events) 608 if l != 0 { 609 last := state.Events[l-1] 610 lastEvent = last.Type 611 lastTime = formatUnixNanoTime(last.Time) 612 } 613 614 tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s", 615 task, lastState, lastEvent, lastTime)) 616 } 617 618 c.Ui.Output(c.Colorize().Color("\n[bold]Tasks[reset]")) 619 c.Ui.Output(formatList(tasks)) 620 } 621 622 // sortedTaskStateIterator is a helper that takes the task state map and returns a 623 // channel that returns the keys in a sorted order. 624 func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string { 625 output := make(chan string, len(m)) 626 keys := make([]string, len(m)) 627 i := 0 628 for k := range m { 629 keys[i] = k 630 i++ 631 } 632 sort.Strings(keys) 633 634 for _, key := range keys { 635 output <- key 636 } 637 638 close(output) 639 return output 640 }