github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/command/alloc_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "strconv" 8 "strings" 9 "time" 10 11 "github.com/dustin/go-humanize" 12 "github.com/mitchellh/colorstring" 13 14 "github.com/hashicorp/nomad/api" 15 "github.com/hashicorp/nomad/client" 16 ) 17 18 type AllocStatusCommand struct { 19 Meta 20 color *colorstring.Colorize 21 } 22 23 func (c *AllocStatusCommand) Help() string { 24 helpText := ` 25 Usage: nomad alloc-status [options] <allocation> 26 27 Display information about existing allocations and its tasks. This command can 28 be used to inspect the current status of an allocation, including its running 29 status, metadata, and verbose failure messages reported by internal 30 subsystems. 31 32 General Options: 33 34 ` + generalOptionsUsage() + ` 35 36 Alloc Status Options: 37 38 -short 39 Display short output. Shows only the most recent task event. 40 41 -stats 42 Display detailed resource usage statistics. 43 44 -verbose 45 Show full information. 46 47 -json 48 Output the allocation in its JSON format. 49 50 -t 51 Format and display allocation using a Go template. 52 ` 53 54 return strings.TrimSpace(helpText) 55 } 56 57 func (c *AllocStatusCommand) Synopsis() string { 58 return "Display allocation status information and metadata" 59 } 60 61 func (c *AllocStatusCommand) Run(args []string) int { 62 var short, displayStats, verbose, json bool 63 var tmpl string 64 65 flags := c.Meta.FlagSet("alloc-status", FlagSetClient) 66 flags.Usage = func() { c.Ui.Output(c.Help()) } 67 flags.BoolVar(&short, "short", false, "") 68 flags.BoolVar(&verbose, "verbose", false, "") 69 flags.BoolVar(&displayStats, "stats", false, "") 70 flags.BoolVar(&json, "json", false, "") 71 flags.StringVar(&tmpl, "t", "", "") 72 73 if err := flags.Parse(args); err != nil { 74 return 1 75 } 76 77 // Check that we got exactly one allocation ID 78 args = flags.Args() 79 80 // Get the HTTP client 81 client, err := c.Meta.Client() 82 if err != nil { 83 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 84 return 1 85 } 86 87 // If args not specified but output format is specified, format and output the allocations data list 88 if len(args) == 0 && json || len(tmpl) > 0 { 89 allocs, _, err := client.Allocations().List(nil) 90 if err != nil { 91 c.Ui.Error(fmt.Sprintf("Error querying allocations: %v", err)) 92 return 1 93 } 94 95 out, err := Format(json, tmpl, allocs) 96 if err != nil { 97 c.Ui.Error(err.Error()) 98 return 1 99 } 100 101 c.Ui.Output(out) 102 return 0 103 } 104 105 if len(args) != 1 { 106 c.Ui.Error(c.Help()) 107 return 1 108 } 109 allocID := args[0] 110 111 // Truncate the id unless full length is requested 112 length := shortId 113 if verbose { 114 length = fullId 115 } 116 117 // Query the allocation info 118 if len(allocID) == 1 { 119 c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters.")) 120 return 1 121 } 122 if len(allocID)%2 == 1 { 123 // Identifiers must be of even length, so we strip off the last byte 124 // to provide a consistent user experience. 125 allocID = allocID[:len(allocID)-1] 126 } 127 128 allocs, _, err := client.Allocations().PrefixList(allocID) 129 if err != nil { 130 c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err)) 131 return 1 132 } 133 if len(allocs) == 0 { 134 c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID)) 135 return 1 136 } 137 if len(allocs) > 1 { 138 out := formatAllocListStubs(allocs, verbose, length) 139 c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out)) 140 return 0 141 } 142 // Prefix lookup matched a single allocation 143 alloc, _, err := client.Allocations().Info(allocs[0].ID, nil) 144 if err != nil { 145 c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err)) 146 return 1 147 } 148 149 // If output format is specified, format and output the data 150 if json || len(tmpl) > 0 { 151 out, err := Format(json, tmpl, alloc) 152 if err != nil { 153 c.Ui.Error(err.Error()) 154 return 1 155 } 156 157 c.Ui.Output(out) 158 return 0 159 } 160 161 // Format the allocation data 162 output, err := formatAllocBasicInfo(alloc, client, length, verbose) 163 if err != nil { 164 c.Ui.Error(err.Error()) 165 return 1 166 } 167 c.Ui.Output(output) 168 169 if short { 170 c.shortTaskStatus(alloc) 171 } else { 172 var statsErr error 173 var stats *api.AllocResourceUsage 174 stats, statsErr = client.Allocations().Stats(alloc, nil) 175 if statsErr != nil { 176 c.Ui.Output("") 177 if statsErr != api.NodeDownErr { 178 c.Ui.Error(fmt.Sprintf("Couldn't retrieve stats (HINT: ensure Client.Advertise.HTTP is set): %v", statsErr)) 179 } else { 180 c.Ui.Output("Omitting resource statistics since the node is down.") 181 } 182 } 183 c.outputTaskDetails(alloc, stats, displayStats) 184 } 185 186 // Format the detailed status 187 if verbose { 188 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Metrics[reset]")) 189 c.Ui.Output(formatAllocMetrics(alloc.Metrics, true, " ")) 190 } 191 192 return 0 193 } 194 195 func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength int, verbose bool) (string, error) { 196 basic := []string{ 197 fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)), 198 fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)), 199 fmt.Sprintf("Name|%s", alloc.Name), 200 fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)), 201 fmt.Sprintf("Job ID|%s", alloc.JobID), 202 fmt.Sprintf("Job Version|%d", *alloc.Job.Version), 203 fmt.Sprintf("Client Status|%s", alloc.ClientStatus), 204 fmt.Sprintf("Client Description|%s", alloc.ClientDescription), 205 fmt.Sprintf("Desired Status|%s", alloc.DesiredStatus), 206 fmt.Sprintf("Desired Description|%s", alloc.DesiredDescription), 207 fmt.Sprintf("Created At|%s", formatUnixNanoTime(alloc.CreateTime)), 208 } 209 210 if alloc.DeploymentID != "" { 211 health := "unset" 212 if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil { 213 if *alloc.DeploymentStatus.Healthy { 214 health = "healthy" 215 } else { 216 health = "unhealthy" 217 } 218 } 219 220 basic = append(basic, 221 fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)), 222 fmt.Sprintf("Deployment Health|%s", health)) 223 224 // Check if this allocation is a canary 225 deployment, _, err := client.Deployments().Info(alloc.DeploymentID, nil) 226 if err != nil { 227 return "", fmt.Errorf("Error querying deployment %q: %s", alloc.DeploymentID, err) 228 } 229 230 canary := false 231 if state, ok := deployment.TaskGroups[alloc.TaskGroup]; ok { 232 for _, id := range state.PlacedCanaries { 233 if id == alloc.ID { 234 canary = true 235 break 236 } 237 } 238 } 239 240 if canary { 241 basic = append(basic, fmt.Sprintf("Canary|%v", true)) 242 } 243 } 244 245 if verbose { 246 basic = append(basic, 247 fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated), 248 fmt.Sprintf("Filtered Nodes|%d", alloc.Metrics.NodesFiltered), 249 fmt.Sprintf("Exhausted Nodes|%d", alloc.Metrics.NodesExhausted), 250 fmt.Sprintf("Allocation Time|%s", alloc.Metrics.AllocationTime), 251 fmt.Sprintf("Failures|%d", alloc.Metrics.CoalescedFailures)) 252 } 253 254 return formatKV(basic), nil 255 } 256 257 // outputTaskDetails prints task details for each task in the allocation, 258 // optionally printing verbose statistics if displayStats is set 259 func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) { 260 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 261 state := alloc.TaskStates[task] 262 c.Ui.Output(c.Colorize().Color(fmt.Sprintf("\n[bold]Task %q is %q[reset]", task, state.State))) 263 c.outputTaskResources(alloc, task, stats, displayStats) 264 c.Ui.Output("") 265 c.outputTaskStatus(state) 266 } 267 } 268 269 func formatTaskTimes(t time.Time) string { 270 if t.IsZero() { 271 return "N/A" 272 } 273 274 return formatTime(t) 275 } 276 277 // outputTaskStatus prints out a list of the most recent events for the given 278 // task state. 279 func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) { 280 basic := []string{ 281 fmt.Sprintf("Started At|%s", formatTaskTimes(state.StartedAt)), 282 fmt.Sprintf("Finished At|%s", formatTaskTimes(state.FinishedAt)), 283 fmt.Sprintf("Total Restarts|%d", state.Restarts), 284 fmt.Sprintf("Last Restart|%s", formatTaskTimes(state.LastRestart))} 285 286 c.Ui.Output("Task Events:") 287 c.Ui.Output(formatKV(basic)) 288 c.Ui.Output("") 289 290 c.Ui.Output("Recent Events:") 291 events := make([]string, len(state.Events)+1) 292 events[0] = "Time|Type|Description" 293 294 size := len(state.Events) 295 for i, event := range state.Events { 296 formatedTime := formatUnixNanoTime(event.Time) 297 298 // Build up the description based on the event type. 299 var desc string 300 switch event.Type { 301 case api.TaskSetup: 302 desc = event.Message 303 case api.TaskStarted: 304 desc = "Task started by client" 305 case api.TaskReceived: 306 desc = "Task received by client" 307 case api.TaskFailedValidation: 308 if event.ValidationError != "" { 309 desc = event.ValidationError 310 } else { 311 desc = "Validation of task failed" 312 } 313 case api.TaskSetupFailure: 314 if event.SetupError != "" { 315 desc = event.SetupError 316 } else { 317 desc = "Task setup failed" 318 } 319 case api.TaskDriverFailure: 320 if event.DriverError != "" { 321 desc = event.DriverError 322 } else { 323 desc = "Failed to start task" 324 } 325 case api.TaskDownloadingArtifacts: 326 desc = "Client is downloading artifacts" 327 case api.TaskArtifactDownloadFailed: 328 if event.DownloadError != "" { 329 desc = event.DownloadError 330 } else { 331 desc = "Failed to download artifacts" 332 } 333 case api.TaskKilling: 334 if event.KillReason != "" { 335 desc = fmt.Sprintf("Killing task: %v", event.KillReason) 336 } else if event.KillTimeout != 0 { 337 desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout) 338 } else { 339 desc = "Sent interrupt" 340 } 341 case api.TaskKilled: 342 if event.KillError != "" { 343 desc = event.KillError 344 } else { 345 desc = "Task successfully killed" 346 } 347 case api.TaskTerminated: 348 var parts []string 349 parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode)) 350 351 if event.Signal != 0 { 352 parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal)) 353 } 354 355 if event.Message != "" { 356 parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message)) 357 } 358 desc = strings.Join(parts, ", ") 359 case api.TaskRestarting: 360 in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay)) 361 if event.RestartReason != "" && event.RestartReason != client.ReasonWithinPolicy { 362 desc = fmt.Sprintf("%s - %s", event.RestartReason, in) 363 } else { 364 desc = in 365 } 366 case api.TaskNotRestarting: 367 if event.RestartReason != "" { 368 desc = event.RestartReason 369 } else { 370 desc = "Task exceeded restart policy" 371 } 372 case api.TaskSiblingFailed: 373 if event.FailedSibling != "" { 374 desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling) 375 } else { 376 desc = "Task's sibling failed" 377 } 378 case api.TaskSignaling: 379 sig := event.TaskSignal 380 reason := event.TaskSignalReason 381 382 if sig == "" && reason == "" { 383 desc = "Task being sent a signal" 384 } else if sig == "" { 385 desc = reason 386 } else if reason == "" { 387 desc = fmt.Sprintf("Task being sent signal %v", sig) 388 } else { 389 desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason) 390 } 391 case api.TaskRestartSignal: 392 if event.RestartReason != "" { 393 desc = event.RestartReason 394 } else { 395 desc = "Task signaled to restart" 396 } 397 case api.TaskDriverMessage: 398 desc = event.DriverMessage 399 case api.TaskLeaderDead: 400 desc = "Leader Task in Group dead" 401 } 402 403 // Reverse order so we are sorted by time 404 events[size-i] = fmt.Sprintf("%s|%s|%s", formatedTime, event.Type, desc) 405 } 406 c.Ui.Output(formatList(events)) 407 } 408 409 // outputTaskResources prints the task resources for the passed task and if 410 // displayStats is set, verbose resource usage statistics 411 func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task string, stats *api.AllocResourceUsage, displayStats bool) { 412 resource, ok := alloc.TaskResources[task] 413 if !ok { 414 return 415 } 416 417 c.Ui.Output("Task Resources") 418 var addr []string 419 for _, nw := range resource.Networks { 420 ports := append(nw.DynamicPorts, nw.ReservedPorts...) 421 for _, port := range ports { 422 addr = append(addr, fmt.Sprintf("%v: %v:%v\n", port.Label, nw.IP, port.Value)) 423 } 424 } 425 var resourcesOutput []string 426 resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses") 427 firstAddr := "" 428 if len(addr) > 0 { 429 firstAddr = addr[0] 430 } 431 432 // Display the rolled up stats. If possible prefer the live statistics 433 cpuUsage := strconv.Itoa(*resource.CPU) 434 memUsage := humanize.IBytes(uint64(*resource.MemoryMB * bytesPerMegabyte)) 435 if stats != nil { 436 if ru, ok := stats.Tasks[task]; ok && ru != nil && ru.ResourceUsage != nil { 437 if cs := ru.ResourceUsage.CpuStats; cs != nil { 438 cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage) 439 } 440 if ms := ru.ResourceUsage.MemoryStats; ms != nil { 441 memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage) 442 } 443 } 444 } 445 resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v", 446 cpuUsage, 447 memUsage, 448 humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)), 449 *resource.IOPS, 450 firstAddr)) 451 for i := 1; i < len(addr); i++ { 452 resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i])) 453 } 454 c.Ui.Output(formatListWithSpaces(resourcesOutput)) 455 456 if stats != nil { 457 if ru, ok := stats.Tasks[task]; ok && ru != nil && displayStats && ru.ResourceUsage != nil { 458 c.Ui.Output("") 459 c.outputVerboseResourceUsage(task, ru.ResourceUsage) 460 } 461 } 462 } 463 464 // outputVerboseResourceUsage outputs the verbose resource usage for the passed 465 // task 466 func (c *AllocStatusCommand) outputVerboseResourceUsage(task string, resourceUsage *api.ResourceUsage) { 467 memoryStats := resourceUsage.MemoryStats 468 cpuStats := resourceUsage.CpuStats 469 if memoryStats != nil && len(memoryStats.Measured) > 0 { 470 c.Ui.Output("Memory Stats") 471 472 // Sort the measured stats 473 sort.Strings(memoryStats.Measured) 474 475 var measuredStats []string 476 for _, measured := range memoryStats.Measured { 477 switch measured { 478 case "RSS": 479 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.RSS)) 480 case "Cache": 481 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Cache)) 482 case "Swap": 483 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Swap)) 484 case "Max Usage": 485 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.MaxUsage)) 486 case "Kernel Usage": 487 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelUsage)) 488 case "Kernel Max Usage": 489 measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelMaxUsage)) 490 } 491 } 492 493 out := make([]string, 2) 494 out[0] = strings.Join(memoryStats.Measured, "|") 495 out[1] = strings.Join(measuredStats, "|") 496 c.Ui.Output(formatList(out)) 497 c.Ui.Output("") 498 } 499 500 if cpuStats != nil && len(cpuStats.Measured) > 0 { 501 c.Ui.Output("CPU Stats") 502 503 // Sort the measured stats 504 sort.Strings(cpuStats.Measured) 505 506 var measuredStats []string 507 for _, measured := range cpuStats.Measured { 508 switch measured { 509 case "Percent": 510 percent := strconv.FormatFloat(cpuStats.Percent, 'f', 2, 64) 511 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 512 case "Throttled Periods": 513 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledPeriods)) 514 case "Throttled Time": 515 measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledTime)) 516 case "User Mode": 517 percent := strconv.FormatFloat(cpuStats.UserMode, 'f', 2, 64) 518 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 519 case "System Mode": 520 percent := strconv.FormatFloat(cpuStats.SystemMode, 'f', 2, 64) 521 measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent)) 522 } 523 } 524 525 out := make([]string, 2) 526 out[0] = strings.Join(cpuStats.Measured, "|") 527 out[1] = strings.Join(measuredStats, "|") 528 c.Ui.Output(formatList(out)) 529 } 530 } 531 532 // shortTaskStatus prints out the current state of each task. 533 func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) { 534 tasks := make([]string, 0, len(alloc.TaskStates)+1) 535 tasks = append(tasks, "Name|State|Last Event|Time") 536 for task := range c.sortedTaskStateIterator(alloc.TaskStates) { 537 state := alloc.TaskStates[task] 538 lastState := state.State 539 var lastEvent, lastTime string 540 541 l := len(state.Events) 542 if l != 0 { 543 last := state.Events[l-1] 544 lastEvent = last.Type 545 lastTime = formatUnixNanoTime(last.Time) 546 } 547 548 tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s", 549 task, lastState, lastEvent, lastTime)) 550 } 551 552 c.Ui.Output(c.Colorize().Color("\n[bold]Tasks[reset]")) 553 c.Ui.Output(formatList(tasks)) 554 } 555 556 // sortedTaskStateIterator is a helper that takes the task state map and returns a 557 // channel that returns the keys in a sorted order. 558 func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string { 559 output := make(chan string, len(m)) 560 keys := make([]string, len(m)) 561 i := 0 562 for k := range m { 563 keys[i] = k 564 i++ 565 } 566 sort.Strings(keys) 567 568 for _, key := range keys { 569 output <- key 570 } 571 572 close(output) 573 return output 574 }