github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/command/alloc_status.go (about)

     1  package command
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"sort"
     7  	"strconv"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/dustin/go-humanize"
    12  	"github.com/mitchellh/colorstring"
    13  
    14  	"github.com/hashicorp/nomad/api"
    15  	"github.com/hashicorp/nomad/client"
    16  )
    17  
    18  type AllocStatusCommand struct {
    19  	Meta
    20  	color *colorstring.Colorize
    21  }
    22  
    23  func (c *AllocStatusCommand) Help() string {
    24  	helpText := `
    25  Usage: nomad alloc-status [options] <allocation>
    26  
    27    Display information about existing allocations and its tasks. This command can
    28    be used to inspect the current status of an allocation, including its running
    29    status, metadata, and verbose failure messages reported by internal
    30    subsystems.
    31  
    32  General Options:
    33  
    34    ` + generalOptionsUsage() + `
    35  
    36  Alloc Status Options:
    37  
    38    -short
    39      Display short output. Shows only the most recent task event.
    40  
    41    -stats
    42      Display detailed resource usage statistics.
    43  
    44    -verbose
    45      Show full information.
    46  
    47    -json
    48      Output the allocation in its JSON format.
    49  
    50    -t
    51      Format and display allocation using a Go template.
    52  `
    53  
    54  	return strings.TrimSpace(helpText)
    55  }
    56  
    57  func (c *AllocStatusCommand) Synopsis() string {
    58  	return "Display allocation status information and metadata"
    59  }
    60  
    61  func (c *AllocStatusCommand) Run(args []string) int {
    62  	var short, displayStats, verbose, json bool
    63  	var tmpl string
    64  
    65  	flags := c.Meta.FlagSet("alloc-status", FlagSetClient)
    66  	flags.Usage = func() { c.Ui.Output(c.Help()) }
    67  	flags.BoolVar(&short, "short", false, "")
    68  	flags.BoolVar(&verbose, "verbose", false, "")
    69  	flags.BoolVar(&displayStats, "stats", false, "")
    70  	flags.BoolVar(&json, "json", false, "")
    71  	flags.StringVar(&tmpl, "t", "", "")
    72  
    73  	if err := flags.Parse(args); err != nil {
    74  		return 1
    75  	}
    76  
    77  	// Check that we got exactly one allocation ID
    78  	args = flags.Args()
    79  
    80  	// Get the HTTP client
    81  	client, err := c.Meta.Client()
    82  	if err != nil {
    83  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
    84  		return 1
    85  	}
    86  
    87  	// If args not specified but output format is specified, format and output the allocations data list
    88  	if len(args) == 0 && json || len(tmpl) > 0 {
    89  		allocs, _, err := client.Allocations().List(nil)
    90  		if err != nil {
    91  			c.Ui.Error(fmt.Sprintf("Error querying allocations: %v", err))
    92  			return 1
    93  		}
    94  
    95  		out, err := Format(json, tmpl, allocs)
    96  		if err != nil {
    97  			c.Ui.Error(err.Error())
    98  			return 1
    99  		}
   100  
   101  		c.Ui.Output(out)
   102  		return 0
   103  	}
   104  
   105  	if len(args) != 1 {
   106  		c.Ui.Error(c.Help())
   107  		return 1
   108  	}
   109  	allocID := args[0]
   110  
   111  	// Truncate the id unless full length is requested
   112  	length := shortId
   113  	if verbose {
   114  		length = fullId
   115  	}
   116  
   117  	// Query the allocation info
   118  	if len(allocID) == 1 {
   119  		c.Ui.Error(fmt.Sprintf("Identifier must contain at least two characters."))
   120  		return 1
   121  	}
   122  	if len(allocID)%2 == 1 {
   123  		// Identifiers must be of even length, so we strip off the last byte
   124  		// to provide a consistent user experience.
   125  		allocID = allocID[:len(allocID)-1]
   126  	}
   127  
   128  	allocs, _, err := client.Allocations().PrefixList(allocID)
   129  	if err != nil {
   130  		c.Ui.Error(fmt.Sprintf("Error querying allocation: %v", err))
   131  		return 1
   132  	}
   133  	if len(allocs) == 0 {
   134  		c.Ui.Error(fmt.Sprintf("No allocation(s) with prefix or id %q found", allocID))
   135  		return 1
   136  	}
   137  	if len(allocs) > 1 {
   138  		out := formatAllocListStubs(allocs, verbose, length)
   139  		c.Ui.Output(fmt.Sprintf("Prefix matched multiple allocations\n\n%s", out))
   140  		return 0
   141  	}
   142  	// Prefix lookup matched a single allocation
   143  	alloc, _, err := client.Allocations().Info(allocs[0].ID, nil)
   144  	if err != nil {
   145  		c.Ui.Error(fmt.Sprintf("Error querying allocation: %s", err))
   146  		return 1
   147  	}
   148  
   149  	// If output format is specified, format and output the data
   150  	if json || len(tmpl) > 0 {
   151  		out, err := Format(json, tmpl, alloc)
   152  		if err != nil {
   153  			c.Ui.Error(err.Error())
   154  			return 1
   155  		}
   156  
   157  		c.Ui.Output(out)
   158  		return 0
   159  	}
   160  
   161  	// Format the allocation data
   162  	output, err := formatAllocBasicInfo(alloc, client, length, verbose)
   163  	if err != nil {
   164  		c.Ui.Error(err.Error())
   165  		return 1
   166  	}
   167  	c.Ui.Output(output)
   168  
   169  	if short {
   170  		c.shortTaskStatus(alloc)
   171  	} else {
   172  		var statsErr error
   173  		var stats *api.AllocResourceUsage
   174  		stats, statsErr = client.Allocations().Stats(alloc, nil)
   175  		if statsErr != nil {
   176  			c.Ui.Output("")
   177  			if statsErr != api.NodeDownErr {
   178  				c.Ui.Error(fmt.Sprintf("Couldn't retrieve stats (HINT: ensure Client.Advertise.HTTP is set): %v", statsErr))
   179  			} else {
   180  				c.Ui.Output("Omitting resource statistics since the node is down.")
   181  			}
   182  		}
   183  		c.outputTaskDetails(alloc, stats, displayStats)
   184  	}
   185  
   186  	// Format the detailed status
   187  	if verbose {
   188  		c.Ui.Output(c.Colorize().Color("\n[bold]Placement Metrics[reset]"))
   189  		c.Ui.Output(formatAllocMetrics(alloc.Metrics, true, "  "))
   190  	}
   191  
   192  	return 0
   193  }
   194  
   195  func formatAllocBasicInfo(alloc *api.Allocation, client *api.Client, uuidLength int, verbose bool) (string, error) {
   196  	basic := []string{
   197  		fmt.Sprintf("ID|%s", limit(alloc.ID, uuidLength)),
   198  		fmt.Sprintf("Eval ID|%s", limit(alloc.EvalID, uuidLength)),
   199  		fmt.Sprintf("Name|%s", alloc.Name),
   200  		fmt.Sprintf("Node ID|%s", limit(alloc.NodeID, uuidLength)),
   201  		fmt.Sprintf("Job ID|%s", alloc.JobID),
   202  		fmt.Sprintf("Job Version|%d", *alloc.Job.Version),
   203  		fmt.Sprintf("Client Status|%s", alloc.ClientStatus),
   204  		fmt.Sprintf("Client Description|%s", alloc.ClientDescription),
   205  		fmt.Sprintf("Desired Status|%s", alloc.DesiredStatus),
   206  		fmt.Sprintf("Desired Description|%s", alloc.DesiredDescription),
   207  		fmt.Sprintf("Created At|%s", formatUnixNanoTime(alloc.CreateTime)),
   208  	}
   209  
   210  	if alloc.DeploymentID != "" {
   211  		health := "unset"
   212  		if alloc.DeploymentStatus != nil && alloc.DeploymentStatus.Healthy != nil {
   213  			if *alloc.DeploymentStatus.Healthy {
   214  				health = "healthy"
   215  			} else {
   216  				health = "unhealthy"
   217  			}
   218  		}
   219  
   220  		basic = append(basic,
   221  			fmt.Sprintf("Deployment ID|%s", limit(alloc.DeploymentID, uuidLength)),
   222  			fmt.Sprintf("Deployment Health|%s", health))
   223  
   224  		// Check if this allocation is a canary
   225  		deployment, _, err := client.Deployments().Info(alloc.DeploymentID, nil)
   226  		if err != nil {
   227  			return "", fmt.Errorf("Error querying deployment %q: %s", alloc.DeploymentID, err)
   228  		}
   229  
   230  		canary := false
   231  		if state, ok := deployment.TaskGroups[alloc.TaskGroup]; ok {
   232  			for _, id := range state.PlacedCanaries {
   233  				if id == alloc.ID {
   234  					canary = true
   235  					break
   236  				}
   237  			}
   238  		}
   239  
   240  		if canary {
   241  			basic = append(basic, fmt.Sprintf("Canary|%v", true))
   242  		}
   243  	}
   244  
   245  	if verbose {
   246  		basic = append(basic,
   247  			fmt.Sprintf("Evaluated Nodes|%d", alloc.Metrics.NodesEvaluated),
   248  			fmt.Sprintf("Filtered Nodes|%d", alloc.Metrics.NodesFiltered),
   249  			fmt.Sprintf("Exhausted Nodes|%d", alloc.Metrics.NodesExhausted),
   250  			fmt.Sprintf("Allocation Time|%s", alloc.Metrics.AllocationTime),
   251  			fmt.Sprintf("Failures|%d", alloc.Metrics.CoalescedFailures))
   252  	}
   253  
   254  	return formatKV(basic), nil
   255  }
   256  
   257  // outputTaskDetails prints task details for each task in the allocation,
   258  // optionally printing verbose statistics if displayStats is set
   259  func (c *AllocStatusCommand) outputTaskDetails(alloc *api.Allocation, stats *api.AllocResourceUsage, displayStats bool) {
   260  	for task := range c.sortedTaskStateIterator(alloc.TaskStates) {
   261  		state := alloc.TaskStates[task]
   262  		c.Ui.Output(c.Colorize().Color(fmt.Sprintf("\n[bold]Task %q is %q[reset]", task, state.State)))
   263  		c.outputTaskResources(alloc, task, stats, displayStats)
   264  		c.Ui.Output("")
   265  		c.outputTaskStatus(state)
   266  	}
   267  }
   268  
   269  func formatTaskTimes(t time.Time) string {
   270  	if t.IsZero() {
   271  		return "N/A"
   272  	}
   273  
   274  	return formatTime(t)
   275  }
   276  
   277  // outputTaskStatus prints out a list of the most recent events for the given
   278  // task state.
   279  func (c *AllocStatusCommand) outputTaskStatus(state *api.TaskState) {
   280  	basic := []string{
   281  		fmt.Sprintf("Started At|%s", formatTaskTimes(state.StartedAt)),
   282  		fmt.Sprintf("Finished At|%s", formatTaskTimes(state.FinishedAt)),
   283  		fmt.Sprintf("Total Restarts|%d", state.Restarts),
   284  		fmt.Sprintf("Last Restart|%s", formatTaskTimes(state.LastRestart))}
   285  
   286  	c.Ui.Output("Task Events:")
   287  	c.Ui.Output(formatKV(basic))
   288  	c.Ui.Output("")
   289  
   290  	c.Ui.Output("Recent Events:")
   291  	events := make([]string, len(state.Events)+1)
   292  	events[0] = "Time|Type|Description"
   293  
   294  	size := len(state.Events)
   295  	for i, event := range state.Events {
   296  		formatedTime := formatUnixNanoTime(event.Time)
   297  
   298  		// Build up the description based on the event type.
   299  		var desc string
   300  		switch event.Type {
   301  		case api.TaskSetup:
   302  			desc = event.Message
   303  		case api.TaskStarted:
   304  			desc = "Task started by client"
   305  		case api.TaskReceived:
   306  			desc = "Task received by client"
   307  		case api.TaskFailedValidation:
   308  			if event.ValidationError != "" {
   309  				desc = event.ValidationError
   310  			} else {
   311  				desc = "Validation of task failed"
   312  			}
   313  		case api.TaskSetupFailure:
   314  			if event.SetupError != "" {
   315  				desc = event.SetupError
   316  			} else {
   317  				desc = "Task setup failed"
   318  			}
   319  		case api.TaskDriverFailure:
   320  			if event.DriverError != "" {
   321  				desc = event.DriverError
   322  			} else {
   323  				desc = "Failed to start task"
   324  			}
   325  		case api.TaskDownloadingArtifacts:
   326  			desc = "Client is downloading artifacts"
   327  		case api.TaskArtifactDownloadFailed:
   328  			if event.DownloadError != "" {
   329  				desc = event.DownloadError
   330  			} else {
   331  				desc = "Failed to download artifacts"
   332  			}
   333  		case api.TaskKilling:
   334  			if event.KillReason != "" {
   335  				desc = fmt.Sprintf("Killing task: %v", event.KillReason)
   336  			} else if event.KillTimeout != 0 {
   337  				desc = fmt.Sprintf("Sent interrupt. Waiting %v before force killing", event.KillTimeout)
   338  			} else {
   339  				desc = "Sent interrupt"
   340  			}
   341  		case api.TaskKilled:
   342  			if event.KillError != "" {
   343  				desc = event.KillError
   344  			} else {
   345  				desc = "Task successfully killed"
   346  			}
   347  		case api.TaskTerminated:
   348  			var parts []string
   349  			parts = append(parts, fmt.Sprintf("Exit Code: %d", event.ExitCode))
   350  
   351  			if event.Signal != 0 {
   352  				parts = append(parts, fmt.Sprintf("Signal: %d", event.Signal))
   353  			}
   354  
   355  			if event.Message != "" {
   356  				parts = append(parts, fmt.Sprintf("Exit Message: %q", event.Message))
   357  			}
   358  			desc = strings.Join(parts, ", ")
   359  		case api.TaskRestarting:
   360  			in := fmt.Sprintf("Task restarting in %v", time.Duration(event.StartDelay))
   361  			if event.RestartReason != "" && event.RestartReason != client.ReasonWithinPolicy {
   362  				desc = fmt.Sprintf("%s - %s", event.RestartReason, in)
   363  			} else {
   364  				desc = in
   365  			}
   366  		case api.TaskNotRestarting:
   367  			if event.RestartReason != "" {
   368  				desc = event.RestartReason
   369  			} else {
   370  				desc = "Task exceeded restart policy"
   371  			}
   372  		case api.TaskSiblingFailed:
   373  			if event.FailedSibling != "" {
   374  				desc = fmt.Sprintf("Task's sibling %q failed", event.FailedSibling)
   375  			} else {
   376  				desc = "Task's sibling failed"
   377  			}
   378  		case api.TaskSignaling:
   379  			sig := event.TaskSignal
   380  			reason := event.TaskSignalReason
   381  
   382  			if sig == "" && reason == "" {
   383  				desc = "Task being sent a signal"
   384  			} else if sig == "" {
   385  				desc = reason
   386  			} else if reason == "" {
   387  				desc = fmt.Sprintf("Task being sent signal %v", sig)
   388  			} else {
   389  				desc = fmt.Sprintf("Task being sent signal %v: %v", sig, reason)
   390  			}
   391  		case api.TaskRestartSignal:
   392  			if event.RestartReason != "" {
   393  				desc = event.RestartReason
   394  			} else {
   395  				desc = "Task signaled to restart"
   396  			}
   397  		case api.TaskDriverMessage:
   398  			desc = event.DriverMessage
   399  		case api.TaskLeaderDead:
   400  			desc = "Leader Task in Group dead"
   401  		}
   402  
   403  		// Reverse order so we are sorted by time
   404  		events[size-i] = fmt.Sprintf("%s|%s|%s", formatedTime, event.Type, desc)
   405  	}
   406  	c.Ui.Output(formatList(events))
   407  }
   408  
   409  // outputTaskResources prints the task resources for the passed task and if
   410  // displayStats is set, verbose resource usage statistics
   411  func (c *AllocStatusCommand) outputTaskResources(alloc *api.Allocation, task string, stats *api.AllocResourceUsage, displayStats bool) {
   412  	resource, ok := alloc.TaskResources[task]
   413  	if !ok {
   414  		return
   415  	}
   416  
   417  	c.Ui.Output("Task Resources")
   418  	var addr []string
   419  	for _, nw := range resource.Networks {
   420  		ports := append(nw.DynamicPorts, nw.ReservedPorts...)
   421  		for _, port := range ports {
   422  			addr = append(addr, fmt.Sprintf("%v: %v:%v\n", port.Label, nw.IP, port.Value))
   423  		}
   424  	}
   425  	var resourcesOutput []string
   426  	resourcesOutput = append(resourcesOutput, "CPU|Memory|Disk|IOPS|Addresses")
   427  	firstAddr := ""
   428  	if len(addr) > 0 {
   429  		firstAddr = addr[0]
   430  	}
   431  
   432  	// Display the rolled up stats. If possible prefer the live statistics
   433  	cpuUsage := strconv.Itoa(*resource.CPU)
   434  	memUsage := humanize.IBytes(uint64(*resource.MemoryMB * bytesPerMegabyte))
   435  	if stats != nil {
   436  		if ru, ok := stats.Tasks[task]; ok && ru != nil && ru.ResourceUsage != nil {
   437  			if cs := ru.ResourceUsage.CpuStats; cs != nil {
   438  				cpuUsage = fmt.Sprintf("%v/%v", math.Floor(cs.TotalTicks), cpuUsage)
   439  			}
   440  			if ms := ru.ResourceUsage.MemoryStats; ms != nil {
   441  				memUsage = fmt.Sprintf("%v/%v", humanize.IBytes(ms.RSS), memUsage)
   442  			}
   443  		}
   444  	}
   445  	resourcesOutput = append(resourcesOutput, fmt.Sprintf("%v MHz|%v|%v|%v|%v",
   446  		cpuUsage,
   447  		memUsage,
   448  		humanize.IBytes(uint64(*alloc.Resources.DiskMB*bytesPerMegabyte)),
   449  		*resource.IOPS,
   450  		firstAddr))
   451  	for i := 1; i < len(addr); i++ {
   452  		resourcesOutput = append(resourcesOutput, fmt.Sprintf("||||%v", addr[i]))
   453  	}
   454  	c.Ui.Output(formatListWithSpaces(resourcesOutput))
   455  
   456  	if stats != nil {
   457  		if ru, ok := stats.Tasks[task]; ok && ru != nil && displayStats && ru.ResourceUsage != nil {
   458  			c.Ui.Output("")
   459  			c.outputVerboseResourceUsage(task, ru.ResourceUsage)
   460  		}
   461  	}
   462  }
   463  
   464  // outputVerboseResourceUsage outputs the verbose resource usage for the passed
   465  // task
   466  func (c *AllocStatusCommand) outputVerboseResourceUsage(task string, resourceUsage *api.ResourceUsage) {
   467  	memoryStats := resourceUsage.MemoryStats
   468  	cpuStats := resourceUsage.CpuStats
   469  	if memoryStats != nil && len(memoryStats.Measured) > 0 {
   470  		c.Ui.Output("Memory Stats")
   471  
   472  		// Sort the measured stats
   473  		sort.Strings(memoryStats.Measured)
   474  
   475  		var measuredStats []string
   476  		for _, measured := range memoryStats.Measured {
   477  			switch measured {
   478  			case "RSS":
   479  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.RSS))
   480  			case "Cache":
   481  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Cache))
   482  			case "Swap":
   483  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.Swap))
   484  			case "Max Usage":
   485  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.MaxUsage))
   486  			case "Kernel Usage":
   487  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelUsage))
   488  			case "Kernel Max Usage":
   489  				measuredStats = append(measuredStats, humanize.IBytes(memoryStats.KernelMaxUsage))
   490  			}
   491  		}
   492  
   493  		out := make([]string, 2)
   494  		out[0] = strings.Join(memoryStats.Measured, "|")
   495  		out[1] = strings.Join(measuredStats, "|")
   496  		c.Ui.Output(formatList(out))
   497  		c.Ui.Output("")
   498  	}
   499  
   500  	if cpuStats != nil && len(cpuStats.Measured) > 0 {
   501  		c.Ui.Output("CPU Stats")
   502  
   503  		// Sort the measured stats
   504  		sort.Strings(cpuStats.Measured)
   505  
   506  		var measuredStats []string
   507  		for _, measured := range cpuStats.Measured {
   508  			switch measured {
   509  			case "Percent":
   510  				percent := strconv.FormatFloat(cpuStats.Percent, 'f', 2, 64)
   511  				measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent))
   512  			case "Throttled Periods":
   513  				measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledPeriods))
   514  			case "Throttled Time":
   515  				measuredStats = append(measuredStats, fmt.Sprintf("%v", cpuStats.ThrottledTime))
   516  			case "User Mode":
   517  				percent := strconv.FormatFloat(cpuStats.UserMode, 'f', 2, 64)
   518  				measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent))
   519  			case "System Mode":
   520  				percent := strconv.FormatFloat(cpuStats.SystemMode, 'f', 2, 64)
   521  				measuredStats = append(measuredStats, fmt.Sprintf("%v%%", percent))
   522  			}
   523  		}
   524  
   525  		out := make([]string, 2)
   526  		out[0] = strings.Join(cpuStats.Measured, "|")
   527  		out[1] = strings.Join(measuredStats, "|")
   528  		c.Ui.Output(formatList(out))
   529  	}
   530  }
   531  
   532  // shortTaskStatus prints out the current state of each task.
   533  func (c *AllocStatusCommand) shortTaskStatus(alloc *api.Allocation) {
   534  	tasks := make([]string, 0, len(alloc.TaskStates)+1)
   535  	tasks = append(tasks, "Name|State|Last Event|Time")
   536  	for task := range c.sortedTaskStateIterator(alloc.TaskStates) {
   537  		state := alloc.TaskStates[task]
   538  		lastState := state.State
   539  		var lastEvent, lastTime string
   540  
   541  		l := len(state.Events)
   542  		if l != 0 {
   543  			last := state.Events[l-1]
   544  			lastEvent = last.Type
   545  			lastTime = formatUnixNanoTime(last.Time)
   546  		}
   547  
   548  		tasks = append(tasks, fmt.Sprintf("%s|%s|%s|%s",
   549  			task, lastState, lastEvent, lastTime))
   550  	}
   551  
   552  	c.Ui.Output(c.Colorize().Color("\n[bold]Tasks[reset]"))
   553  	c.Ui.Output(formatList(tasks))
   554  }
   555  
   556  // sortedTaskStateIterator is a helper that takes the task state map and returns a
   557  // channel that returns the keys in a sorted order.
   558  func (c *AllocStatusCommand) sortedTaskStateIterator(m map[string]*api.TaskState) <-chan string {
   559  	output := make(chan string, len(m))
   560  	keys := make([]string, len(m))
   561  	i := 0
   562  	for k := range m {
   563  		keys[i] = k
   564  		i++
   565  	}
   566  	sort.Strings(keys)
   567  
   568  	for _, key := range keys {
   569  		output <- key
   570  	}
   571  
   572  	close(output)
   573  	return output
   574  }