github.com/emate/nomad@v0.8.2-wo-binpacking/command/alloc_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "strconv" 8 "strings" 9 "time" 10 11 humanize "github.com/dustin/go-humanize" 12 13 "github.com/hashicorp/nomad/api" 14 "github.com/hashicorp/nomad/api/contexts" 15 "github.com/hashicorp/nomad/client" 16 "github.com/posener/complete" 17 ) 18 19 type AllocStatusCommand struct { 20 Meta 21 } 22 23 func (c *AllocStatusCommand) Help() string { 24 helpText := ` 25 Usage: nomad alloc status [options] <allocation> 26 27 Display information about existing allocations and its tasks. This command can 28 be used to inspect the current status of an allocation, including its running 29 status, metadata, and verbose failure messages reported by internal 30 subsystems. 31 32 General Options: 33 34 ` + generalOptionsUsage() + ` 35 36 Alloc Status Options: 37 38 -short 39 Display short output. Shows only the most recent task event. 40 41 -stats 42 Display detailed resource usage statistics. 43 44 -verbose 45 Show full information. 46 47 -json 48 Output the allocation in its JSON format. 49 50 -t 51 Format and display allocation using a Go template. 52 ` 53 54 return strings.TrimSpace(helpText) 55 } 56 57 func (c *AllocStatusCommand) Synopsis() string { 58 return "Display allocation status information and metadata" 59 } 60 61 func (c *AllocStatusCommand) AutocompleteFlags() complete.Flags { 62 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 63 complete.Flags{ 64 "-short": complete.PredictNothing, 65 "-verbose": complete.PredictNothing, 66 "-json": complete.PredictNothing, 67 "-t": complete.PredictAnything, 68 }) 69 } 70 71 func (c *AllocStatusCommand) AutocompleteArgs() complete.Predictor { 72 return complete.PredictFunc(func(a complete.Args) []string { 73 client, err := c.Meta.Client() 74 if err != nil { 75 return nil 76 } 77 78 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Allocs, nil) 79 if err != nil { 80 return []string{} 81 } 82 return resp.Matches[contexts.Allocs] 83 }) 84 } 85 86 func (c *AllocStatusCommand) Name() string { return "alloc status" } 87 88 func (c *AllocStatusCommand) Run(args []string) int { 89 var short, displayStats, verbose, json bool 90 var tmpl string 91 92 flags := c.Meta.FlagSet(c.Name(), FlagSetClient) 93 flags.Usage = func() { c.Ui.Output(c.Help()) } 94 flags.BoolVar(&short, "short", false, "") 95 flags.BoolVar(&verbose, "verbose", false, "") 96 flags.BoolVar(&displayStats, "stats", false, "") 97 flags.BoolVar(&json, "json", false, "") 98 flags.StringVar(&tmpl, "t", "", "") 99 100 if err := flags.Parse(args); err != nil { 101 return 1 102 } 103 104 // Check that we got exactly one allocation ID 105 args = flags.Args() 106 107 // Get the HTTP client 108 client, err := c.Meta.Client() 109 if err != nil { 110 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 111 return 1 112 } 113 114 // If args not specified but output format is specified, format and output the allocations data list 115 if len(args) == 0 && json || len(tmpl) > 0 { 116 allocs, _, err := client.Allocations().List(nil) 117 if err != nil { 118 c.Ui.Error(fmt.Sprintf("Error querying allocations: %v", err)) 119 return 1 120 } 121 122 out, err := Format(json, tmpl, allocs) 123 if err != nil { 124 c.Ui.Error(err.Error()) 125 return 1 126 } 127 128 c.Ui.Output(out) 129 return 0 130 } 131 132 if len(args) != 1 { 133 c.Ui.Error("This command takes one of the following argument conditions:") 134 c.Ui.Error(" * A single <allocation>") 135 c.Ui.Error(" * No arguments, with output format specified") 136 c.Ui.Error(commandErrorText(c)) 137 return 1 138 } 139 allocID := args[0] 140 141 // Truncate the id unless full length is requested 142 length := shortId 143 if verbose { 144 length = fullId 145 } 146 147 // Query the allocation info 148 if len(allocID) == 1 { 149 c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) 150 return 1 151 } 152 153 allocID = sanitizeUUIDPrefix(allocID) 154 allocs, _, err := client.Allocations().PrefixList(allocID) 155 if err != nil { 156 c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err)) 157 return 1 158 } 159 if len(allocs) == 0 { 160 c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID)) 161 return 1 162 } 163 if len(allocs) > 1 { 164 out := formatAllocListStubs(allocs, verbose, length) 165 c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out)) 166 return 0 167 } 168 // Prefix lookup matched a single allocation 169 alloc, _, err := client.Allocations().Info(allocs[0].ID, nil) 170 if err != nil { 171 c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) 172 return 1 173 } 174 175 // If output format is specified, format and output the data 176 if json || len(tmpl) > 0 { 177 out, err := Format(json, tmpl, alloc) 178 if err != nil { 179 c.Ui.Error(err.Error()) 180 return 1 181 } 182 183 c.Ui.Output(out) 184 return 0 185 } 186 187 // Format the allocation data 188 output, err := formatAllocBasicInfo(alloc, client, length, verbose) 189 if err != nil { 190 c.Ui.Error(err.Error()) 191 return 1 192 } 193 c.Ui.Output(output) 194 195 if short { 196 c.shortTaskStatus(alloc) 197 } else { 198 var statsErr error 199 var stats *api.AllocResourceUsage 200 stats, statsErr = client.Allocations().Stats(alloc, nil) 201 if statsErr != nil { 202 c.Ui.Output("") 203 if statsErr != api.NodeDownErr { 204 c.Ui.Error(fmt.Sprintf("Couldn't retrieve stats: %v", statsErr)) 205 } else { 206 c.Ui.Output("Omitting resource statistics since the node is down.") 207 } 208 } 209 c.outputTaskDetails(alloc, stats, displayStats) 210 } 211 212 // Format the detailed status 213 if verbose { 214 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Metrics[reset]")) 215 c.Ui.Output(formatAllocMetrics(alloc.Metrics, true, " ")) 216 } 217 218 return 0 219 } 220 221 func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength int, verbose bool) (string, error) { 222 var formattedCreateTime, formattedModifyTime string 223 224 if verbose { 225 formattedCreateTime = formatUnixNanoTime(alloc.CreateTime) 226 formattedModifyTime = formatUnixNanoTime(alloc.ModifyTime) 227 } else { 228 formattedCreateTime = prettyTimeDiff(time.Unix(0, alloc.CreateTime), time.Now()) 229 formattedModifyTime = prettyTimeDiff(time.Unix(0, alloc.ModifyTime), time.Now()) 230 } 231 232 basic := []string{ 233 fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)), 234 fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)), 235 fmt.Sprintf("Name|%s", alloc.Name), 236 fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)), 237 fmt.Sprintf("Job ID|%s", alloc.JobID), 238 fmt.Sprintf("Job Version|%d", getVersion(alloc.Job)), 239 fmt.Sprintf("Client Status|%s", alloc.ClientStatus), 240 fmt.Sprintf("Client Description|%s", alloc.ClientDescription), 241 fmt.Sprintf("Desired Status|%s", alloc.DesiredStatus), 242 fmt.Sprintf("Desired Description|%s", alloc.DesiredDescription), 243 fmt.Sprintf("Created|%s", formattedCreateTime), 244 fmt.Sprintf("Modified|%s", formattedModifyTime), 245 } 246 247 if alloc.DeploymentID != "" { 248 health := "unset" 249 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { 250 if *alloc.DeploymentStatus.Healthy { 251 health = "healthy" 252 } else { 253 health = "unhealthy" 254 } 255 } 256 257 basic = append(basic, 258 fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)), 259 fmt.Sprintf("Deployment Health|%s", health)) 260 261 // Check if this allocation is a canary 262 deployment, _, err := client.Deployments().Info(alloc.DeploymentID, nil) 263 if err != nil { 264 return "", fmt.Errorf("Error querying deployment %q: %s", alloc.DeploymentID, err) 265 } 266 267 canary := false 268 if state, ok := deployment.TaskGroups[alloc.TaskGroup]; ok { 269 for _, id := range state.PlacedCanaries { 270 if id == alloc.ID { 271 canary = true 272 break 273 } 274 } 275 } 276 277 if canary { 278 basic = append(basic, fmt.Sprintf("Canary|%v", true)) 279 } 280 } 281 282 if alloc.RescheduleTracker != nil && len(alloc.RescheduleTracker.Events) > 0 { 283 attempts, total := alloc.RescheduleInfo(time.Unix(0, alloc.ModifyTime)) 284 // Show this section only if the reschedule policy limits the number of attempts 285 if total > 0 { 286 reschedInfo := fmt.Sprintf("Reschedule Attempts|%d/%d", attempts, total) 287 basic = append(basic, reschedInfo) 288 } 289 } 290 if alloc.NextAllocation != "" { 291 basic = append(basic, 292 fmt.Sprintf("Replacement Alloc ID|%s", limit(alloc.NextAllocation, uuidLength))) 293 } 294 if alloc.FollowupEvalID != "" { 295 nextEvalTime := futureEvalTimePretty(alloc.FollowupEvalID, client) 296 if nextEvalTime != "" { 297 basic = append(basic, 298 fmt.Sprintf("Reschedule Eligibility|%s", nextEvalTime)) 299 } 300 } 301 302 if verbose { 303 basic = append(basic, 304 fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated), 305 fmt.Sprintf("Filtered Nodes|%d", alloc.Metrics.NodesFiltered), 306 fmt.Sprintf("Exhausted Nodes|%d", alloc.Metrics.NodesExhausted), 307 fmt.Sprintf("Allocation Time|%s", alloc.Metrics.AllocationTime), 308 fmt.Sprintf("Failures|%d", alloc.Metrics.CoalescedFailures)) 309 } 310 311 return formatKV(basic), nil 312 } 313 314 // futureEvalTimePretty returns when the eval is eligible to reschedule 315 // relative to current time, based on the WaitUntil field 316 func futureEvalTimePretty(evalID string, client *api.Client) string { 317 evaluation, _, err := client.Evaluations().Info(evalID, nil) 318 // Eval time is not a critical output, 319 // don't return it on errors, if its not set or already in the past 320 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 321 return "" 322 } 323 return prettyTimeDiff(evaluation.WaitUntil, time.Now()) 324 } 325 326 // outputTaskDetails prints task details for each task in the allocation, 327 // optionally printing verbose statistics if displayStats is set 328 func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) { 329 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 330 state := alloc.TaskStates[task] 331 c.Ui.Output(c.Colorize().Color(fmt.Sprintf("\n[bold]Task %q is %q[reset]", task, state.State))) 332 c.outputTaskResources(alloc, task, stats, displayStats) 333 c.Ui.Output("") 334 c.outputTaskStatus(state) 335 } 336 } 337 338 func formatTaskTimes(t time.Time) string { 339 if t.IsZero() { 340 return "N/A" 341 } 342 343 return formatTime(t) 344 } 345 346 // outputTaskStatus prints out a list of the most recent events for the given 347 // task state. 348 func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) { 349 basic := []string{ 350 fmt.Sprintf("Started At|%s", formatTaskTimes(state.StartedAt)), 351 fmt.Sprintf("Finished At|%s", formatTaskTimes(state.FinishedAt)), 352 fmt.Sprintf("Total Restarts|%d", state.Restarts), 353 fmt.Sprintf("Last Restart|%s", formatTaskTimes(state.LastRestart))} 354 355 c.Ui.Output("Task Events:") 356 c.Ui.Output(formatKV(basic)) 357 c.Ui.Output("") 358 359 c.Ui.Output("Recent Events:") 360 events := make([]string, len(state.Events)+1) 361 events[0] = "Time|Type|Description" 362 363 size := len(state.Events) 364 for i, event := range state.Events { 365 msg := event.DisplayMessage 366 if msg == "" { 367 msg = buildDisplayMessage(event) 368 } 369 formattedTime := formatUnixNanoTime(event.Time) 370 events[size-i] = fmt.Sprintf("%s|%s|%s", formattedTime, event.Type, msg) 371 // Reverse order so we are sorted by time 372 } 373 c.Ui.Output(formatList(events)) 374 } 375 376 func buildDisplayMessage(event *api.TaskEvent) string { 377 // Build up the description based on the event type. 378 var desc string 379 switch event.Type { 380 case api.TaskSetup: 381 desc = event.Message 382 case api.TaskStarted: 383 desc = "Task started by client" 384 case api.TaskReceived: 385 desc = "Task received by client" 386 case api.TaskFailedValidation: 387 if event.ValidationError != "" { 388 desc = event.ValidationError 389 } else { 390 desc = "Validation of task failed" 391 } 392 case api.TaskSetupFailure: 393 if event.SetupError != "" { 394 desc = event.SetupError 395 } else { 396 desc = "Task setup failed" 397 } 398 case api.TaskDriverFailure: 399 if event.DriverError != "" { 400 desc = event.DriverError 401 } else { 402 desc = "Failed to start task" 403 } 404 case api.TaskDownloadingArtifacts: 405 desc = "Client is downloading artifacts" 406 case api.TaskArtifactDownloadFailed: 407 if event.DownloadError != "" { 408 desc = event.DownloadError 409 } else { 410 desc = "Failed to download artifacts" 411 } 412 case api.TaskKilling: 413 if event.KillReason != "" { 414 desc = fmt.Sprintf("Killing task: %v", event.KillReason) 415 } else if event.KillTimeout != 0 { 416 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 417 } else { 418 desc = "Sent interrupt" 419 } 420 case api.TaskKilled: 421 if event.KillError != "" { 422 desc = event.KillError 423 } else { 424 desc = "Task successfully killed" 425 } 426 case api.TaskTerminated: 427 var parts []string 428 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 429 430 if event.Signal != 0 { 431 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 432 } 433 434 if event.Message != "" { 435 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 436 } 437 desc = strings.Join(parts, ", ") 438 case api.TaskRestarting: 439 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 440 if event.RestartReason != "" && event.RestartReason != client.ReasonWithinPolicy { 441 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 442 } else { 443 desc = in 444 } 445 case api.TaskNotRestarting: 446 if event.RestartReason != "" { 447 desc = event.RestartReason 448 } else { 449 desc = "Task exceeded restart policy" 450 } 451 case api.TaskSiblingFailed: 452 if event.FailedSibling != "" { 453 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 454 } else { 455 desc = "Task's sibling failed" 456 } 457 case api.TaskSignaling: 458 sig := event.TaskSignal 459 reason := event.TaskSignalReason 460 461 if sig == "" && reason == "" { 462 desc = "Task being sent a signal" 463 } else if sig == "" { 464 desc = reason 465 } else if reason == "" { 466 desc = fmt.Sprintf("Task being sent signal %v", sig) 467 } else { 468 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 469 } 470 case api.TaskRestartSignal: 471 if event.RestartReason != "" { 472 desc = event.RestartReason 473 } else { 474 desc = "Task signaled to restart" 475 } 476 case api.TaskDriverMessage: 477 desc = event.DriverMessage 478 case api.TaskLeaderDead: 479 desc = "Leader Task in Group dead" 480 default: 481 desc = event.Message 482 } 483 484 return desc 485 } 486 487 // outputTaskResources prints the task resources for the passed task and if 488 // displayStats is set, verbose resource usage statistics 489 func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task string, stats *api.AllocResourceUsage, displayStats bool) { 490 resource, ok := alloc.TaskResources[task] 491 if !ok { 492 return 493 } 494 495 c.Ui.Output("Task Resources") 496 var addr []string 497 for _, nw := range resource.Networks { 498 ports := append(nw.DynamicPorts, nw.ReservedPorts...) 499 for _, port := range ports { 500 addr = append(addr, fmt.Sprintf("%v: %v:%v\n", port.Label, nw.IP, port.Value)) 501 } 502 } 503 var resourcesOutput []string 504 resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses") 505 firstAddr := "" 506 if len(addr) > 0 { 507 firstAddr = addr[0] 508 } 509 510 // Display the rolled up stats. If possible prefer the live statistics 511 cpuUsage := strconv.Itoa(*resource.CPU) 512 memUsage := humanize.IBytes(uint64(*resource.MemoryMB * bytesPerMegabyte)) 513 if stats != nil { 514 if ru, ok := stats.Tasks[task]; ok && ru != nil && ru.ResourceUsage != nil { 515 if cs := ru.ResourceUsage.CpuStats; cs != nil { 516 cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage) 517 } 518 if ms := ru.ResourceUsage.MemoryStats; ms != nil { 519 memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage) 520 } 521 } 522 } 523 resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v", 524 cpuUsage, 525 memUsage, 526 humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)), 527 *resource.IOPS, 528 firstAddr)) 529 for i := 1; i < len(addr); i++ { 530 resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i])) 531 } 532 c.Ui.Output(formatListWithSpaces(resourcesOutput)) 533 534 if stats != nil { 535 if ru, ok := stats.Tasks[task]; ok && ru != nil && displayStats && ru.ResourceUsage != nil { 536 c.Ui.Output("") 537 c.outputVerboseResourceUsage(task, ru.ResourceUsage) 538 } 539 } 540 } 541 542 // outputVerboseResourceUsage outputs the verbose resource usage for the passed 543 // task 544 func (c *AllocStatusCommand) outputVerboseResourceUsage(task string, resourceUsage *api.ResourceUsage) { 545 memoryStats := resourceUsage.MemoryStats 546 cpuStats := resourceUsage.CpuStats 547 if memoryStats != nil && len(memoryStats.Measured) > 0 { 548 c.Ui.Output("Memory Stats") 549 550 // Sort the measured stats 551 sort.Strings(memoryStats.Measured) 552 553 var measuredStats []string 554 for _, measured := range memoryStats.Measured { 555 switch measured { 556 case "RSS": 557 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.RSS)) 558 case "Cache": 559 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Cache)) 560 case "Swap": 561 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Swap)) 562 case "Max Usage": 563 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.MaxUsage)) 564 case "Kernel Usage": 565 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelUsage)) 566 case "Kernel Max Usage": 567 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelMaxUsage)) 568 } 569 } 570 571 out := make([]string, 2) 572 out[0] = strings.Join(memoryStats.Measured, "|") 573 out[1] = strings.Join(measuredStats, "|") 574 c.Ui.Output(formatList(out)) 575 c.Ui.Output("") 576 } 577 578 if cpuStats != nil && len(cpuStats.Measured) > 0 { 579 c.Ui.Output("CPU Stats") 580 581 // Sort the measured stats 582 sort.Strings(cpuStats.Measured) 583 584 var measuredStats []string 585 for _, measured := range cpuStats.Measured { 586 switch measured { 587 case "Percent": 588 percent := strconv.FormatFloat(cpuStats.Percent, 'f', 2, 64) 589 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 590 case "Throttled Periods": 591 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledPeriods)) 592 case "Throttled Time": 593 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledTime)) 594 case "User Mode": 595 percent := strconv.FormatFloat(cpuStats.UserMode, 'f', 2, 64) 596 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 597 case "System Mode": 598 percent := strconv.FormatFloat(cpuStats.SystemMode, 'f', 2, 64) 599 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 600 } 601 } 602 603 out := make([]string, 2) 604 out[0] = strings.Join(cpuStats.Measured, "|") 605 out[1] = strings.Join(measuredStats, "|") 606 c.Ui.Output(formatList(out)) 607 } 608 } 609 610 // shortTaskStatus prints out the current state of each task. 611 func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) { 612 tasks := make([]string, 0, len(alloc.TaskStates)+1) 613 tasks = append(tasks, "Name|State|Last Event|Time") 614 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 615 state := alloc.TaskStates[task] 616 lastState := state.State 617 var lastEvent, lastTime string 618 619 l := len(state.Events) 620 if l != 0 { 621 last := state.Events[l-1] 622 lastEvent = last.Type 623 lastTime = formatUnixNanoTime(last.Time) 624 } 625 626 tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s", 627 task, lastState, lastEvent, lastTime)) 628 } 629 630 c.Ui.Output(c.Colorize().Color("\n[bold]Tasks[reset]")) 631 c.Ui.Output(formatList(tasks)) 632 } 633 634 // sortedTaskStateIterator is a helper that takes the task state map and returns a 635 // channel that returns the keys in a sorted order. 636 func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string { 637 output := make(chan string, len(m)) 638 keys := make([]string, len(m)) 639 i := 0 640 for k := range m { 641 keys[i] = k 642 i++ 643 } 644 sort.Strings(keys) 645 646 for _, key := range keys { 647 output <- key 648 } 649 650 close(output) 651 return output 652 }