github.com/mongey/nomad@v0.5.2/command/status.go (about)

     1  package command
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/gob"
     6  	"fmt"
     7  	"sort"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/api"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  const (
    16  	// maxFailedTGs is the maximum number of task groups we show failure reasons
    17  	// for before defering to eval-status
    18  	maxFailedTGs = 5
    19  )
    20  
    21  type StatusCommand struct {
    22  	Meta
    23  	length    int
    24  	evals     bool
    25  	allAllocs bool
    26  	verbose   bool
    27  }
    28  
    29  func (c *StatusCommand) Help() string {
    30  	helpText := `
    31  Usage: nomad status [options] <job>
    32  
    33    Display status information about jobs. If no job ID is given,
    34    a list of all known jobs will be dumped.
    35  
    36  General Options:
    37  
    38    ` + generalOptionsUsage() + `
    39  
    40  Status Options:
    41  
    42    -short
    43      Display short output. Used only when a single job is being
    44      queried, and drops verbose information about allocations.
    45  
    46    -evals
    47      Display the evaluations associated with the job.
    48  
    49    -all-allocs
    50      Display all allocations matching the job ID, including those from an older
    51      instance of the job.
    52  
    53    -verbose
    54      Display full information.
    55  `
    56  	return strings.TrimSpace(helpText)
    57  }
    58  
    59  func (c *StatusCommand) Synopsis() string {
    60  	return "Display status information about jobs"
    61  }
    62  
    63  func (c *StatusCommand) Run(args []string) int {
    64  	var short bool
    65  
    66  	flags := c.Meta.FlagSet("status", FlagSetClient)
    67  	flags.Usage = func() { c.Ui.Output(c.Help()) }
    68  	flags.BoolVar(&short, "short", false, "")
    69  	flags.BoolVar(&c.evals, "evals", false, "")
    70  	flags.BoolVar(&c.allAllocs, "all-allocs", false, "")
    71  	flags.BoolVar(&c.verbose, "verbose", false, "")
    72  
    73  	if err := flags.Parse(args); err != nil {
    74  		return 1
    75  	}
    76  
    77  	// Check that we either got no jobs or exactly one.
    78  	args = flags.Args()
    79  	if len(args) > 1 {
    80  		c.Ui.Error(c.Help())
    81  		return 1
    82  	}
    83  
    84  	// Truncate the id unless full length is requested
    85  	c.length = shortId
    86  	if c.verbose {
    87  		c.length = fullId
    88  	}
    89  
    90  	// Get the HTTP client
    91  	client, err := c.Meta.Client()
    92  	if err != nil {
    93  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
    94  		return 1
    95  	}
    96  
    97  	// Invoke list mode if no job ID.
    98  	if len(args) == 0 {
    99  		jobs, _, err := client.Jobs().List(nil)
   100  		if err != nil {
   101  			c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err))
   102  			return 1
   103  		}
   104  
   105  		if len(jobs) == 0 {
   106  			// No output if we have no jobs
   107  			c.Ui.Output("No running jobs")
   108  		} else {
   109  			c.Ui.Output(createStatusListOutput(jobs))
   110  		}
   111  		return 0
   112  	}
   113  
   114  	// Try querying the job
   115  	jobID := args[0]
   116  	jobs, _, err := client.Jobs().PrefixList(jobID)
   117  	if err != nil {
   118  		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
   119  		return 1
   120  	}
   121  	if len(jobs) == 0 {
   122  		c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
   123  		return 1
   124  	}
   125  	if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID {
   126  		c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs)))
   127  		return 0
   128  	}
   129  	// Prefix lookup matched a single job
   130  	job, _, err := client.Jobs().Info(jobs[0].ID, nil)
   131  	if err != nil {
   132  		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
   133  		return 1
   134  	}
   135  
   136  	// Check if it is periodic
   137  	sJob, err := convertApiJob(job)
   138  	if err != nil {
   139  		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
   140  		return 1
   141  	}
   142  	periodic := sJob.IsPeriodic()
   143  
   144  	// Format the job info
   145  	basic := []string{
   146  		fmt.Sprintf("ID|%s", job.ID),
   147  		fmt.Sprintf("Name|%s", job.Name),
   148  		fmt.Sprintf("Type|%s", job.Type),
   149  		fmt.Sprintf("Priority|%d", job.Priority),
   150  		fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")),
   151  		fmt.Sprintf("Status|%s", job.Status),
   152  		fmt.Sprintf("Periodic|%v", periodic),
   153  	}
   154  
   155  	if periodic {
   156  		now := time.Now().UTC()
   157  		next := sJob.Periodic.Next(now)
   158  		basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s",
   159  			fmt.Sprintf("%s (%s from now)",
   160  				formatTime(next), formatTimeDifference(now, next, time.Second))))
   161  	}
   162  
   163  	c.Ui.Output(formatKV(basic))
   164  
   165  	// Exit early
   166  	if short {
   167  		return 0
   168  	}
   169  
   170  	// Print periodic job information
   171  	if periodic {
   172  		if err := c.outputPeriodicInfo(client, job); err != nil {
   173  			c.Ui.Error(err.Error())
   174  			return 1
   175  		}
   176  
   177  		return 0
   178  	}
   179  
   180  	if err := c.outputJobInfo(client, job); err != nil {
   181  		c.Ui.Error(err.Error())
   182  		return 1
   183  	}
   184  
   185  	return 0
   186  }
   187  
   188  // outputPeriodicInfo prints information about the passed periodic job. If a
   189  // request fails, an error is returned.
   190  func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
   191  	// Generate the prefix that matches launched jobs from the periodic job.
   192  	prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix)
   193  	children, _, err := client.Jobs().PrefixList(prefix)
   194  	if err != nil {
   195  		return fmt.Errorf("Error querying job: %s", err)
   196  	}
   197  
   198  	if len(children) == 0 {
   199  		c.Ui.Output("\nNo instances of periodic job found")
   200  		return nil
   201  	}
   202  
   203  	out := make([]string, 1)
   204  	out[0] = "ID|Status"
   205  	for _, child := range children {
   206  		// Ensure that we are only showing jobs whose parent is the requested
   207  		// job.
   208  		if child.ParentID != job.ID {
   209  			continue
   210  		}
   211  
   212  		out = append(out, fmt.Sprintf("%s|%s",
   213  			child.ID,
   214  			child.Status))
   215  	}
   216  
   217  	c.Ui.Output(fmt.Sprintf("\nPreviously launched jobs:\n%s", formatList(out)))
   218  	return nil
   219  }
   220  
   221  // outputJobInfo prints information about the passed non-periodic job. If a
   222  // request fails, an error is returned.
   223  func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
   224  	var evals, allocs []string
   225  
   226  	// Query the allocations
   227  	jobAllocs, _, err := client.Jobs().Allocations(job.ID, c.allAllocs, nil)
   228  	if err != nil {
   229  		return fmt.Errorf("Error querying job allocations: %s", err)
   230  	}
   231  
   232  	// Query the evaluations
   233  	jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
   234  	if err != nil {
   235  		return fmt.Errorf("Error querying job evaluations: %s", err)
   236  	}
   237  
   238  	// Query the summary
   239  	summary, _, err := client.Jobs().Summary(job.ID, nil)
   240  	if err != nil {
   241  		return fmt.Errorf("Error querying job summary: %s", err)
   242  	}
   243  
   244  	// Format the summary
   245  	c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]"))
   246  	if summary != nil {
   247  		summaries := make([]string, len(summary.Summary)+1)
   248  		summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost"
   249  		taskGroups := make([]string, 0, len(summary.Summary))
   250  		for taskGroup := range summary.Summary {
   251  			taskGroups = append(taskGroups, taskGroup)
   252  		}
   253  		sort.Strings(taskGroups)
   254  		for idx, taskGroup := range taskGroups {
   255  			tgs := summary.Summary[taskGroup]
   256  			summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d",
   257  				taskGroup, tgs.Queued, tgs.Starting,
   258  				tgs.Running, tgs.Failed,
   259  				tgs.Complete, tgs.Lost,
   260  			)
   261  		}
   262  		c.Ui.Output(formatList(summaries))
   263  	}
   264  
   265  	// Determine latest evaluation with failures whose follow up hasn't
   266  	// completed, this is done while formatting
   267  	var latestFailedPlacement *api.Evaluation
   268  	blockedEval := false
   269  
   270  	// Format the evals
   271  	evals = make([]string, len(jobEvals)+1)
   272  	evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
   273  	for i, eval := range jobEvals {
   274  		failures, _ := evalFailureStatus(eval)
   275  		evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s",
   276  			limit(eval.ID, c.length),
   277  			eval.Priority,
   278  			eval.TriggeredBy,
   279  			eval.Status,
   280  			failures,
   281  		)
   282  
   283  		if eval.Status == "blocked" {
   284  			blockedEval = true
   285  		}
   286  
   287  		if len(eval.FailedTGAllocs) == 0 {
   288  			// Skip evals without failures
   289  			continue
   290  		}
   291  
   292  		if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
   293  			latestFailedPlacement = eval
   294  		}
   295  	}
   296  
   297  	if c.verbose || c.evals {
   298  		c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]"))
   299  		c.Ui.Output(formatList(evals))
   300  	}
   301  
   302  	if blockedEval && latestFailedPlacement != nil {
   303  		c.outputFailedPlacements(latestFailedPlacement)
   304  	}
   305  
   306  	// Format the allocs
   307  	c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]"))
   308  	if len(jobAllocs) > 0 {
   309  		allocs = make([]string, len(jobAllocs)+1)
   310  		allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status|Created At"
   311  		for i, alloc := range jobAllocs {
   312  			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s",
   313  				limit(alloc.ID, c.length),
   314  				limit(alloc.EvalID, c.length),
   315  				limit(alloc.NodeID, c.length),
   316  				alloc.TaskGroup,
   317  				alloc.DesiredStatus,
   318  				alloc.ClientStatus,
   319  				formatUnixNanoTime(alloc.CreateTime))
   320  		}
   321  
   322  		c.Ui.Output(formatList(allocs))
   323  	} else {
   324  		c.Ui.Output("No allocations placed")
   325  	}
   326  	return nil
   327  }
   328  
   329  func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
   330  	if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
   331  		return
   332  	}
   333  
   334  	c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]"))
   335  
   336  	sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
   337  	for i, tg := range sorted {
   338  		if i >= maxFailedTGs {
   339  			break
   340  		}
   341  
   342  		c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
   343  		metrics := failedEval.FailedTGAllocs[tg]
   344  		c.Ui.Output(formatAllocMetrics(metrics, false, "  "))
   345  		if i != len(sorted)-1 {
   346  			c.Ui.Output("")
   347  		}
   348  	}
   349  
   350  	if len(sorted) > maxFailedTGs {
   351  		trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
   352  		c.Ui.Output(trunc)
   353  	}
   354  }
   355  
   356  // convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
   357  // This function is just a hammer and probably needs to be revisited.
   358  func convertApiJob(in *api.Job) (*structs.Job, error) {
   359  	gob.Register(map[string]interface{}{})
   360  	gob.Register([]interface{}{})
   361  	var structJob *structs.Job
   362  	buf := new(bytes.Buffer)
   363  	if err := gob.NewEncoder(buf).Encode(in); err != nil {
   364  		return nil, err
   365  	}
   366  	if err := gob.NewDecoder(buf).Decode(&structJob); err != nil {
   367  		return nil, err
   368  	}
   369  	return structJob, nil
   370  }
   371  
   372  // list general information about a list of jobs
   373  func createStatusListOutput(jobs []*api.JobListStub) string {
   374  	out := make([]string, len(jobs)+1)
   375  	out[0] = "ID|Type|Priority|Status"
   376  	for i, job := range jobs {
   377  		out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
   378  			job.ID,
   379  			job.Type,
   380  			job.Priority,
   381  			job.Status)
   382  	}
   383  	return formatList(out)
   384  }