github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/command/status.go (about)

     1  package command
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/gob"
     6  	"fmt"
     7  	"sort"
     8  	"strings"
     9  	"time"
    10  
    11  	"github.com/hashicorp/nomad/api"
    12  	"github.com/hashicorp/nomad/nomad/structs"
    13  )
    14  
    15  const (
    16  	// maxFailedTGs is the maximum number of task groups we show failure reasons
    17  	// for before defering to eval-status
    18  	maxFailedTGs = 5
    19  )
    20  
    21  type StatusCommand struct {
    22  	Meta
    23  	length  int
    24  	evals   bool
    25  	verbose bool
    26  }
    27  
    28  func (c *StatusCommand) Help() string {
    29  	helpText := `
    30  Usage: nomad status [options] <job>
    31  
    32    Display status information about jobs. If no job ID is given,
    33    a list of all known jobs will be dumped.
    34  
    35  General Options:
    36  
    37    ` + generalOptionsUsage() + `
    38  
    39  Status Options:
    40  
    41    -short
    42      Display short output. Used only when a single job is being
    43      queried, and drops verbose information about allocations.
    44  
    45    -evals
    46      Display the evaluations associated with the job.
    47  
    48    -verbose
    49      Display full information.
    50  `
    51  	return strings.TrimSpace(helpText)
    52  }
    53  
    54  func (c *StatusCommand) Synopsis() string {
    55  	return "Display status information about jobs"
    56  }
    57  
    58  func (c *StatusCommand) Run(args []string) int {
    59  	var short bool
    60  
    61  	flags := c.Meta.FlagSet("status", FlagSetClient)
    62  	flags.Usage = func() { c.Ui.Output(c.Help()) }
    63  	flags.BoolVar(&short, "short", false, "")
    64  	flags.BoolVar(&c.evals, "evals", false, "")
    65  	flags.BoolVar(&c.verbose, "verbose", false, "")
    66  
    67  	if err := flags.Parse(args); err != nil {
    68  		return 1
    69  	}
    70  
    71  	// Check that we either got no jobs or exactly one.
    72  	args = flags.Args()
    73  	if len(args) > 1 {
    74  		c.Ui.Error(c.Help())
    75  		return 1
    76  	}
    77  
    78  	// Truncate the id unless full length is requested
    79  	c.length = shortId
    80  	if c.verbose {
    81  		c.length = fullId
    82  	}
    83  
    84  	// Get the HTTP client
    85  	client, err := c.Meta.Client()
    86  	if err != nil {
    87  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err))
    88  		return 1
    89  	}
    90  
    91  	// Invoke list mode if no job ID.
    92  	if len(args) == 0 {
    93  		jobs, _, err := client.Jobs().List(nil)
    94  		if err != nil {
    95  			c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err))
    96  			return 1
    97  		}
    98  
    99  		if len(jobs) == 0 {
   100  			// No output if we have no jobs
   101  			c.Ui.Output("No running jobs")
   102  		} else {
   103  			c.Ui.Output(createStatusListOutput(jobs))
   104  		}
   105  		return 0
   106  	}
   107  
   108  	// Try querying the job
   109  	jobID := args[0]
   110  	jobs, _, err := client.Jobs().PrefixList(jobID)
   111  	if err != nil {
   112  		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
   113  		return 1
   114  	}
   115  	if len(jobs) == 0 {
   116  		c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID))
   117  		return 1
   118  	}
   119  	if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID {
   120  		c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs)))
   121  		return 0
   122  	}
   123  	// Prefix lookup matched a single job
   124  	job, _, err := client.Jobs().Info(jobs[0].ID, nil)
   125  	if err != nil {
   126  		c.Ui.Error(fmt.Sprintf("Error querying job: %s", err))
   127  		return 1
   128  	}
   129  
   130  	// Check if it is periodic
   131  	sJob, err := convertApiJob(job)
   132  	if err != nil {
   133  		c.Ui.Error(fmt.Sprintf("Error converting job: %s", err))
   134  		return 1
   135  	}
   136  	periodic := sJob.IsPeriodic()
   137  
   138  	// Format the job info
   139  	basic := []string{
   140  		fmt.Sprintf("ID|%s", job.ID),
   141  		fmt.Sprintf("Name|%s", job.Name),
   142  		fmt.Sprintf("Type|%s", job.Type),
   143  		fmt.Sprintf("Priority|%d", job.Priority),
   144  		fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")),
   145  		fmt.Sprintf("Status|%s", job.Status),
   146  		fmt.Sprintf("Periodic|%v", periodic),
   147  	}
   148  
   149  	if periodic {
   150  		now := time.Now().UTC()
   151  		next := sJob.Periodic.Next(now)
   152  		basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s",
   153  			fmt.Sprintf("%s (%s from now)",
   154  				formatTime(next), formatTimeDifference(now, next, time.Second))))
   155  	}
   156  
   157  	c.Ui.Output(formatKV(basic))
   158  
   159  	// Exit early
   160  	if short {
   161  		return 0
   162  	}
   163  
   164  	// Print periodic job information
   165  	if periodic {
   166  		if err := c.outputPeriodicInfo(client, job); err != nil {
   167  			c.Ui.Error(err.Error())
   168  			return 1
   169  		}
   170  
   171  		return 0
   172  	}
   173  
   174  	if err := c.outputJobInfo(client, job); err != nil {
   175  		c.Ui.Error(err.Error())
   176  		return 1
   177  	}
   178  
   179  	return 0
   180  }
   181  
   182  // outputPeriodicInfo prints information about the passed periodic job. If a
   183  // request fails, an error is returned.
   184  func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error {
   185  	// Generate the prefix that matches launched jobs from the periodic job.
   186  	prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix)
   187  	children, _, err := client.Jobs().PrefixList(prefix)
   188  	if err != nil {
   189  		return fmt.Errorf("Error querying job: %s", err)
   190  	}
   191  
   192  	if len(children) == 0 {
   193  		c.Ui.Output("\nNo instances of periodic job found")
   194  		return nil
   195  	}
   196  
   197  	out := make([]string, 1)
   198  	out[0] = "ID|Status"
   199  	for _, child := range children {
   200  		// Ensure that we are only showing jobs whose parent is the requested
   201  		// job.
   202  		if child.ParentID != job.ID {
   203  			continue
   204  		}
   205  
   206  		out = append(out, fmt.Sprintf("%s|%s",
   207  			child.ID,
   208  			child.Status))
   209  	}
   210  
   211  	c.Ui.Output(fmt.Sprintf("\nPreviously launched jobs:\n%s", formatList(out)))
   212  	return nil
   213  }
   214  
   215  // outputJobInfo prints information about the passed non-periodic job. If a
   216  // request fails, an error is returned.
   217  func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error {
   218  	var evals, allocs []string
   219  
   220  	// Query the allocations
   221  	jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil)
   222  	if err != nil {
   223  		return fmt.Errorf("Error querying job allocations: %s", err)
   224  	}
   225  
   226  	// Query the evaluations
   227  	jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil)
   228  	if err != nil {
   229  		return fmt.Errorf("Error querying job evaluations: %s", err)
   230  	}
   231  
   232  	// Query the summary
   233  	summary, _, err := client.Jobs().Summary(job.ID, nil)
   234  	if err != nil {
   235  		return fmt.Errorf("Error querying job summary: %s", err)
   236  	}
   237  
   238  	// Format the summary
   239  	c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]"))
   240  	if summary != nil {
   241  		summaries := make([]string, len(summary.Summary)+1)
   242  		summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost"
   243  		taskGroups := make([]string, 0, len(summary.Summary))
   244  		for taskGroup := range summary.Summary {
   245  			taskGroups = append(taskGroups, taskGroup)
   246  		}
   247  		sort.Strings(taskGroups)
   248  		for idx, taskGroup := range taskGroups {
   249  			tgs := summary.Summary[taskGroup]
   250  			summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d",
   251  				taskGroup, tgs.Queued, tgs.Starting,
   252  				tgs.Running, tgs.Failed,
   253  				tgs.Complete, tgs.Lost,
   254  			)
   255  		}
   256  		c.Ui.Output(formatList(summaries))
   257  	}
   258  
   259  	// Determine latest evaluation with failures whose follow up hasn't
   260  	// completed, this is done while formatting
   261  	var latestFailedPlacement *api.Evaluation
   262  	blockedEval := false
   263  
   264  	// Format the evals
   265  	evals = make([]string, len(jobEvals)+1)
   266  	evals[0] = "ID|Priority|Triggered By|Status|Placement Failures"
   267  	for i, eval := range jobEvals {
   268  		failures, _ := evalFailureStatus(eval)
   269  		evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s",
   270  			limit(eval.ID, c.length),
   271  			eval.Priority,
   272  			eval.TriggeredBy,
   273  			eval.Status,
   274  			failures,
   275  		)
   276  
   277  		if eval.Status == "blocked" {
   278  			blockedEval = true
   279  		}
   280  
   281  		if len(eval.FailedTGAllocs) == 0 {
   282  			// Skip evals without failures
   283  			continue
   284  		}
   285  
   286  		if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex {
   287  			latestFailedPlacement = eval
   288  		}
   289  	}
   290  
   291  	if c.verbose || c.evals {
   292  		c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]"))
   293  		c.Ui.Output(formatList(evals))
   294  	}
   295  
   296  	if blockedEval && latestFailedPlacement != nil {
   297  		c.outputFailedPlacements(latestFailedPlacement)
   298  	}
   299  
   300  	// Format the allocs
   301  	c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]"))
   302  	if len(jobAllocs) > 0 {
   303  		allocs = make([]string, len(jobAllocs)+1)
   304  		allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status|Created At"
   305  		for i, alloc := range jobAllocs {
   306  			allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s",
   307  				limit(alloc.ID, c.length),
   308  				limit(alloc.EvalID, c.length),
   309  				limit(alloc.NodeID, c.length),
   310  				alloc.TaskGroup,
   311  				alloc.DesiredStatus,
   312  				alloc.ClientStatus,
   313  				formatUnixNanoTime(alloc.CreateTime))
   314  		}
   315  
   316  		c.Ui.Output(formatList(allocs))
   317  	} else {
   318  		c.Ui.Output("No allocations placed")
   319  	}
   320  	return nil
   321  }
   322  
   323  func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) {
   324  	if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 {
   325  		return
   326  	}
   327  
   328  	c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]"))
   329  
   330  	sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs)
   331  	for i, tg := range sorted {
   332  		if i >= maxFailedTGs {
   333  			break
   334  		}
   335  
   336  		c.Ui.Output(fmt.Sprintf("Task Group %q:", tg))
   337  		metrics := failedEval.FailedTGAllocs[tg]
   338  		c.Ui.Output(formatAllocMetrics(metrics, false, "  "))
   339  		if i != len(sorted)-1 {
   340  			c.Ui.Output("")
   341  		}
   342  	}
   343  
   344  	if len(sorted) > maxFailedTGs {
   345  		trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID)
   346  		c.Ui.Output(trunc)
   347  	}
   348  }
   349  
   350  // convertApiJob is used to take a *api.Job and convert it to an *struct.Job.
   351  // This function is just a hammer and probably needs to be revisited.
   352  func convertApiJob(in *api.Job) (*structs.Job, error) {
   353  	gob.Register(map[string]interface{}{})
   354  	gob.Register([]interface{}{})
   355  	var structJob *structs.Job
   356  	buf := new(bytes.Buffer)
   357  	if err := gob.NewEncoder(buf).Encode(in); err != nil {
   358  		return nil, err
   359  	}
   360  	if err := gob.NewDecoder(buf).Decode(&structJob); err != nil {
   361  		return nil, err
   362  	}
   363  	return structJob, nil
   364  }
   365  
   366  // list general information about a list of jobs
   367  func createStatusListOutput(jobs []*api.JobListStub) string {
   368  	out := make([]string, len(jobs)+1)
   369  	out[0] = "ID|Type|Priority|Status"
   370  	for i, job := range jobs {
   371  		out[i+1] = fmt.Sprintf("%s|%s|%d|%s",
   372  			job.ID,
   373  			job.Type,
   374  			job.Priority,
   375  			job.Status)
   376  	}
   377  	return formatList(out)
   378  }