github.com/djenriquez/nomad-1@v0.8.1/command/alloc_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "strconv" 8 "strings" 9 "time" 10 11 humanize "github.com/dustin/go-humanize" 12 13 "github.com/hashicorp/nomad/api" 14 "github.com/hashicorp/nomad/api/contexts" 15 "github.com/hashicorp/nomad/client" 16 "github.com/posener/complete" 17 ) 18 19 type AllocStatusCommand struct { 20 Meta 21 } 22 23 func (c *AllocStatusCommand) Help() string { 24 helpText := ` 25 Usage: nomad alloc status [options] <allocation> 26 27 Display information about existing allocations and its tasks. This command can 28 be used to inspect the current status of an allocation, including its running 29 status, metadata, and verbose failure messages reported by internal 30 subsystems. 31 32 General Options: 33 34 ` + generalOptionsUsage() + ` 35 36 Alloc Status Options: 37 38 -short 39 Display short output. Shows only the most recent task event. 40 41 -stats 42 Display detailed resource usage statistics. 43 44 -verbose 45 Show full information. 46 47 -json 48 Output the allocation in its JSON format. 49 50 -t 51 Format and display allocation using a Go template. 52 ` 53 54 return strings.TrimSpace(helpText) 55 } 56 57 func (c *AllocStatusCommand) Synopsis() string { 58 return "Display allocation status information and metadata" 59 } 60 61 func (c *AllocStatusCommand) AutocompleteFlags() complete.Flags { 62 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 63 complete.Flags{ 64 "-short": complete.PredictNothing, 65 "-verbose": complete.PredictNothing, 66 "-json": complete.PredictNothing, 67 "-t": complete.PredictAnything, 68 }) 69 } 70 71 func (c *AllocStatusCommand) AutocompleteArgs() complete.Predictor { 72 return complete.PredictFunc(func(a complete.Args) []string { 73 client, err := c.Meta.Client() 74 if err != nil { 75 return nil 76 } 77 78 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Allocs, nil) 79 if err != nil { 80 return []string{} 81 } 82 return resp.Matches[contexts.Allocs] 83 }) 84 } 85 86 func (c *AllocStatusCommand) Run(args []string) int { 87 var short, displayStats, verbose, json bool 88 var tmpl string 89 90 flags := c.Meta.FlagSet("alloc status", FlagSetClient) 91 flags.Usage = func() { c.Ui.Output(c.Help()) } 92 flags.BoolVar(&short, "short", false, "") 93 flags.BoolVar(&verbose, "verbose", false, "") 94 flags.BoolVar(&displayStats, "stats", false, "") 95 flags.BoolVar(&json, "json", false, "") 96 flags.StringVar(&tmpl, "t", "", "") 97 98 if err := flags.Parse(args); err != nil { 99 return 1 100 } 101 102 // Check that we got exactly one allocation ID 103 args = flags.Args() 104 105 // Get the HTTP client 106 client, err := c.Meta.Client() 107 if err != nil { 108 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 109 return 1 110 } 111 112 // If args not specified but output format is specified, format and output the allocations data list 113 if len(args) == 0 && json || len(tmpl) > 0 { 114 allocs, _, err := client.Allocations().List(nil) 115 if err != nil { 116 c.Ui.Error(fmt.Sprintf("Error querying allocations: %v", err)) 117 return 1 118 } 119 120 out, err := Format(json, tmpl, allocs) 121 if err != nil { 122 c.Ui.Error(err.Error()) 123 return 1 124 } 125 126 c.Ui.Output(out) 127 return 0 128 } 129 130 if len(args) != 1 { 131 c.Ui.Error(c.Help()) 132 return 1 133 } 134 allocID := args[0] 135 136 // Truncate the id unless full length is requested 137 length := shortId 138 if verbose { 139 length = fullId 140 } 141 142 // Query the allocation info 143 if len(allocID) == 1 { 144 c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) 145 return 1 146 } 147 148 allocID = sanitizeUUIDPrefix(allocID) 149 allocs, _, err := client.Allocations().PrefixList(allocID) 150 if err != nil { 151 c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err)) 152 return 1 153 } 154 if len(allocs) == 0 { 155 c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID)) 156 return 1 157 } 158 if len(allocs) > 1 { 159 out := formatAllocListStubs(allocs, verbose, length) 160 c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out)) 161 return 0 162 } 163 // Prefix lookup matched a single allocation 164 alloc, _, err := client.Allocations().Info(allocs[0].ID, nil) 165 if err != nil { 166 c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) 167 return 1 168 } 169 170 // If output format is specified, format and output the data 171 if json || len(tmpl) > 0 { 172 out, err := Format(json, tmpl, alloc) 173 if err != nil { 174 c.Ui.Error(err.Error()) 175 return 1 176 } 177 178 c.Ui.Output(out) 179 return 0 180 } 181 182 // Format the allocation data 183 output, err := formatAllocBasicInfo(alloc, client, length, verbose) 184 if err != nil { 185 c.Ui.Error(err.Error()) 186 return 1 187 } 188 c.Ui.Output(output) 189 190 if short { 191 c.shortTaskStatus(alloc) 192 } else { 193 var statsErr error 194 var stats *api.AllocResourceUsage 195 stats, statsErr = client.Allocations().Stats(alloc, nil) 196 if statsErr != nil { 197 c.Ui.Output("") 198 if statsErr != api.NodeDownErr { 199 c.Ui.Error(fmt.Sprintf("Couldn't retrieve stats: %v", statsErr)) 200 } else { 201 c.Ui.Output("Omitting resource statistics since the node is down.") 202 } 203 } 204 c.outputTaskDetails(alloc, stats, displayStats) 205 } 206 207 // Format the detailed status 208 if verbose { 209 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Metrics[reset]")) 210 c.Ui.Output(formatAllocMetrics(alloc.Metrics, true, " ")) 211 } 212 213 return 0 214 } 215 216 func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength int, verbose bool) (string, error) { 217 var formattedCreateTime, formattedModifyTime string 218 219 if verbose { 220 formattedCreateTime = formatUnixNanoTime(alloc.CreateTime) 221 formattedModifyTime = formatUnixNanoTime(alloc.ModifyTime) 222 } else { 223 formattedCreateTime = prettyTimeDiff(time.Unix(0, alloc.CreateTime), time.Now()) 224 formattedModifyTime = prettyTimeDiff(time.Unix(0, alloc.ModifyTime), time.Now()) 225 } 226 227 basic := []string{ 228 fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)), 229 fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)), 230 fmt.Sprintf("Name|%s", alloc.Name), 231 fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)), 232 fmt.Sprintf("Job ID|%s", alloc.JobID), 233 fmt.Sprintf("Job Version|%d", getVersion(alloc.Job)), 234 fmt.Sprintf("Client Status|%s", alloc.ClientStatus), 235 fmt.Sprintf("Client Description|%s", alloc.ClientDescription), 236 fmt.Sprintf("Desired Status|%s", alloc.DesiredStatus), 237 fmt.Sprintf("Desired Description|%s", alloc.DesiredDescription), 238 fmt.Sprintf("Created|%s", formattedCreateTime), 239 fmt.Sprintf("Modified|%s", formattedModifyTime), 240 } 241 242 if alloc.DeploymentID != "" { 243 health := "unset" 244 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { 245 if *alloc.DeploymentStatus.Healthy { 246 health = "healthy" 247 } else { 248 health = "unhealthy" 249 } 250 } 251 252 basic = append(basic, 253 fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)), 254 fmt.Sprintf("Deployment Health|%s", health)) 255 256 // Check if this allocation is a canary 257 deployment, _, err := client.Deployments().Info(alloc.DeploymentID, nil) 258 if err != nil { 259 return "", fmt.Errorf("Error querying deployment %q: %s", alloc.DeploymentID, err) 260 } 261 262 canary := false 263 if state, ok := deployment.TaskGroups[alloc.TaskGroup]; ok { 264 for _, id := range state.PlacedCanaries { 265 if id == alloc.ID { 266 canary = true 267 break 268 } 269 } 270 } 271 272 if canary { 273 basic = append(basic, fmt.Sprintf("Canary|%v", true)) 274 } 275 } 276 277 if alloc.RescheduleTracker != nil && len(alloc.RescheduleTracker.Events) > 0 { 278 attempts, total := alloc.RescheduleInfo(time.Unix(0, alloc.ModifyTime)) 279 // Show this section only if the reschedule policy limits the number of attempts 280 if total > 0 { 281 reschedInfo := fmt.Sprintf("Reschedule Attempts|%d/%d", attempts, total) 282 basic = append(basic, reschedInfo) 283 } 284 } 285 if alloc.NextAllocation != "" { 286 basic = append(basic, 287 fmt.Sprintf("Replacement Alloc ID|%s", limit(alloc.NextAllocation, uuidLength))) 288 } 289 if alloc.FollowupEvalID != "" { 290 nextEvalTime := futureEvalTimePretty(alloc.FollowupEvalID, client) 291 if nextEvalTime != "" { 292 basic = append(basic, 293 fmt.Sprintf("Reschedule Eligibility|%s", nextEvalTime)) 294 } 295 } 296 297 if verbose { 298 basic = append(basic, 299 fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated), 300 fmt.Sprintf("Filtered Nodes|%d", alloc.Metrics.NodesFiltered), 301 fmt.Sprintf("Exhausted Nodes|%d", alloc.Metrics.NodesExhausted), 302 fmt.Sprintf("Allocation Time|%s", alloc.Metrics.AllocationTime), 303 fmt.Sprintf("Failures|%d", alloc.Metrics.CoalescedFailures)) 304 } 305 306 return formatKV(basic), nil 307 } 308 309 // futureEvalTimePretty returns when the eval is eligible to reschedule 310 // relative to current time, based on the WaitUntil field 311 func futureEvalTimePretty(evalID string, client *api.Client) string { 312 evaluation, _, err := client.Evaluations().Info(evalID, nil) 313 // Eval time is not a critical output, 314 // don't return it on errors, if its not set or already in the past 315 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 316 return "" 317 } 318 return prettyTimeDiff(evaluation.WaitUntil, time.Now()) 319 } 320 321 // outputTaskDetails prints task details for each task in the allocation, 322 // optionally printing verbose statistics if displayStats is set 323 func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) { 324 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 325 state := alloc.TaskStates[task] 326 c.Ui.Output(c.Colorize().Color(fmt.Sprintf("\n[bold]Task %q is %q[reset]", task, state.State))) 327 c.outputTaskResources(alloc, task, stats, displayStats) 328 c.Ui.Output("") 329 c.outputTaskStatus(state) 330 } 331 } 332 333 func formatTaskTimes(t time.Time) string { 334 if t.IsZero() { 335 return "N/A" 336 } 337 338 return formatTime(t) 339 } 340 341 // outputTaskStatus prints out a list of the most recent events for the given 342 // task state. 343 func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) { 344 basic := []string{ 345 fmt.Sprintf("Started At|%s", formatTaskTimes(state.StartedAt)), 346 fmt.Sprintf("Finished At|%s", formatTaskTimes(state.FinishedAt)), 347 fmt.Sprintf("Total Restarts|%d", state.Restarts), 348 fmt.Sprintf("Last Restart|%s", formatTaskTimes(state.LastRestart))} 349 350 c.Ui.Output("Task Events:") 351 c.Ui.Output(formatKV(basic)) 352 c.Ui.Output("") 353 354 c.Ui.Output("Recent Events:") 355 events := make([]string, len(state.Events)+1) 356 events[0] = "Time|Type|Description" 357 358 size := len(state.Events) 359 for i, event := range state.Events { 360 msg := event.DisplayMessage 361 if msg == "" { 362 msg = buildDisplayMessage(event) 363 } 364 formattedTime := formatUnixNanoTime(event.Time) 365 events[size-i] = fmt.Sprintf("%s|%s|%s", formattedTime, event.Type, msg) 366 // Reverse order so we are sorted by time 367 } 368 c.Ui.Output(formatList(events)) 369 } 370 371 func buildDisplayMessage(event *api.TaskEvent) string { 372 // Build up the description based on the event type. 373 var desc string 374 switch event.Type { 375 case api.TaskSetup: 376 desc = event.Message 377 case api.TaskStarted: 378 desc = "Task started by client" 379 case api.TaskReceived: 380 desc = "Task received by client" 381 case api.TaskFailedValidation: 382 if event.ValidationError != "" { 383 desc = event.ValidationError 384 } else { 385 desc = "Validation of task failed" 386 } 387 case api.TaskSetupFailure: 388 if event.SetupError != "" { 389 desc = event.SetupError 390 } else { 391 desc = "Task setup failed" 392 } 393 case api.TaskDriverFailure: 394 if event.DriverError != "" { 395 desc = event.DriverError 396 } else { 397 desc = "Failed to start task" 398 } 399 case api.TaskDownloadingArtifacts: 400 desc = "Client is downloading artifacts" 401 case api.TaskArtifactDownloadFailed: 402 if event.DownloadError != "" { 403 desc = event.DownloadError 404 } else { 405 desc = "Failed to download artifacts" 406 } 407 case api.TaskKilling: 408 if event.KillReason != "" { 409 desc = fmt.Sprintf("Killing task: %v", event.KillReason) 410 } else if event.KillTimeout != 0 { 411 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 412 } else { 413 desc = "Sent interrupt" 414 } 415 case api.TaskKilled: 416 if event.KillError != "" { 417 desc = event.KillError 418 } else { 419 desc = "Task successfully killed" 420 } 421 case api.TaskTerminated: 422 var parts []string 423 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 424 425 if event.Signal != 0 { 426 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 427 } 428 429 if event.Message != "" { 430 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 431 } 432 desc = strings.Join(parts, ", ") 433 case api.TaskRestarting: 434 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 435 if event.RestartReason != "" && event.RestartReason != client.ReasonWithinPolicy { 436 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 437 } else { 438 desc = in 439 } 440 case api.TaskNotRestarting: 441 if event.RestartReason != "" { 442 desc = event.RestartReason 443 } else { 444 desc = "Task exceeded restart policy" 445 } 446 case api.TaskSiblingFailed: 447 if event.FailedSibling != "" { 448 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 449 } else { 450 desc = "Task's sibling failed" 451 } 452 case api.TaskSignaling: 453 sig := event.TaskSignal 454 reason := event.TaskSignalReason 455 456 if sig == "" && reason == "" { 457 desc = "Task being sent a signal" 458 } else if sig == "" { 459 desc = reason 460 } else if reason == "" { 461 desc = fmt.Sprintf("Task being sent signal %v", sig) 462 } else { 463 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 464 } 465 case api.TaskRestartSignal: 466 if event.RestartReason != "" { 467 desc = event.RestartReason 468 } else { 469 desc = "Task signaled to restart" 470 } 471 case api.TaskDriverMessage: 472 desc = event.DriverMessage 473 case api.TaskLeaderDead: 474 desc = "Leader Task in Group dead" 475 default: 476 desc = event.Message 477 } 478 479 return desc 480 } 481 482 // outputTaskResources prints the task resources for the passed task and if 483 // displayStats is set, verbose resource usage statistics 484 func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task string, stats *api.AllocResourceUsage, displayStats bool) { 485 resource, ok := alloc.TaskResources[task] 486 if !ok { 487 return 488 } 489 490 c.Ui.Output("Task Resources") 491 var addr []string 492 for _, nw := range resource.Networks { 493 ports := append(nw.DynamicPorts, nw.ReservedPorts...) 494 for _, port := range ports { 495 addr = append(addr, fmt.Sprintf("%v: %v:%v\n", port.Label, nw.IP, port.Value)) 496 } 497 } 498 var resourcesOutput []string 499 resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses") 500 firstAddr := "" 501 if len(addr) > 0 { 502 firstAddr = addr[0] 503 } 504 505 // Display the rolled up stats. If possible prefer the live statistics 506 cpuUsage := strconv.Itoa(*resource.CPU) 507 memUsage := humanize.IBytes(uint64(*resource.MemoryMB * bytesPerMegabyte)) 508 if stats != nil { 509 if ru, ok := stats.Tasks[task]; ok && ru != nil && ru.ResourceUsage != nil { 510 if cs := ru.ResourceUsage.CpuStats; cs != nil { 511 cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage) 512 } 513 if ms := ru.ResourceUsage.MemoryStats; ms != nil { 514 memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage) 515 } 516 } 517 } 518 resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v", 519 cpuUsage, 520 memUsage, 521 humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)), 522 *resource.IOPS, 523 firstAddr)) 524 for i := 1; i < len(addr); i++ { 525 resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i])) 526 } 527 c.Ui.Output(formatListWithSpaces(resourcesOutput)) 528 529 if stats != nil { 530 if ru, ok := stats.Tasks[task]; ok && ru != nil && displayStats && ru.ResourceUsage != nil { 531 c.Ui.Output("") 532 c.outputVerboseResourceUsage(task, ru.ResourceUsage) 533 } 534 } 535 } 536 537 // outputVerboseResourceUsage outputs the verbose resource usage for the passed 538 // task 539 func (c *AllocStatusCommand) outputVerboseResourceUsage(task string, resourceUsage *api.ResourceUsage) { 540 memoryStats := resourceUsage.MemoryStats 541 cpuStats := resourceUsage.CpuStats 542 if memoryStats != nil && len(memoryStats.Measured) > 0 { 543 c.Ui.Output("Memory Stats") 544 545 // Sort the measured stats 546 sort.Strings(memoryStats.Measured) 547 548 var measuredStats []string 549 for _, measured := range memoryStats.Measured { 550 switch measured { 551 case "RSS": 552 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.RSS)) 553 case "Cache": 554 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Cache)) 555 case "Swap": 556 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Swap)) 557 case "Max Usage": 558 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.MaxUsage)) 559 case "Kernel Usage": 560 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelUsage)) 561 case "Kernel Max Usage": 562 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelMaxUsage)) 563 } 564 } 565 566 out := make([]string, 2) 567 out[0] = strings.Join(memoryStats.Measured, "|") 568 out[1] = strings.Join(measuredStats, "|") 569 c.Ui.Output(formatList(out)) 570 c.Ui.Output("") 571 } 572 573 if cpuStats != nil && len(cpuStats.Measured) > 0 { 574 c.Ui.Output("CPU Stats") 575 576 // Sort the measured stats 577 sort.Strings(cpuStats.Measured) 578 579 var measuredStats []string 580 for _, measured := range cpuStats.Measured { 581 switch measured { 582 case "Percent": 583 percent := strconv.FormatFloat(cpuStats.Percent, 'f', 2, 64) 584 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 585 case "Throttled Periods": 586 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledPeriods)) 587 case "Throttled Time": 588 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledTime)) 589 case "User Mode": 590 percent := strconv.FormatFloat(cpuStats.UserMode, 'f', 2, 64) 591 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 592 case "System Mode": 593 percent := strconv.FormatFloat(cpuStats.SystemMode, 'f', 2, 64) 594 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 595 } 596 } 597 598 out := make([]string, 2) 599 out[0] = strings.Join(cpuStats.Measured, "|") 600 out[1] = strings.Join(measuredStats, "|") 601 c.Ui.Output(formatList(out)) 602 } 603 } 604 605 // shortTaskStatus prints out the current state of each task. 606 func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) { 607 tasks := make([]string, 0, len(alloc.TaskStates)+1) 608 tasks = append(tasks, "Name|State|Last Event|Time") 609 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 610 state := alloc.TaskStates[task] 611 lastState := state.State 612 var lastEvent, lastTime string 613 614 l := len(state.Events) 615 if l != 0 { 616 last := state.Events[l-1] 617 lastEvent = last.Type 618 lastTime = formatUnixNanoTime(last.Time) 619 } 620 621 tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s", 622 task, lastState, lastEvent, lastTime)) 623 } 624 625 c.Ui.Output(c.Colorize().Color("\n[bold]Tasks[reset]")) 626 c.Ui.Output(formatList(tasks)) 627 } 628 629 // sortedTaskStateIterator is a helper that takes the task state map and returns a 630 // channel that returns the keys in a sorted order. 631 func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string { 632 output := make(chan string, len(m)) 633 keys := make([]string, len(m)) 634 i := 0 635 for k := range m { 636 keys[i] = k 637 i++ 638 } 639 sort.Strings(keys) 640 641 for _, key := range keys { 642 output <- key 643 } 644 645 close(output) 646 return output 647 }