github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/command/status.go (about) 1 package command 2 3 import ( 4 "bytes" 5 "encoding/gob" 6 "fmt" 7 "sort" 8 "strings" 9 "time" 10 11 "github.com/hashicorp/nomad/api" 12 "github.com/hashicorp/nomad/nomad/structs" 13 ) 14 15 const ( 16 // maxFailedTGs is the maximum number of task groups we show failure reasons 17 // for before defering to eval-status 18 maxFailedTGs = 5 19 ) 20 21 type StatusCommand struct { 22 Meta 23 length int 24 evals bool 25 verbose bool 26 } 27 28 func (c *StatusCommand) Help() string { 29 helpText := ` 30 Usage: nomad status [options] <job> 31 32 Display status information about jobs. If no job ID is given, 33 a list of all known jobs will be dumped. 34 35 General Options: 36 37 ` + generalOptionsUsage() + ` 38 39 Status Options: 40 41 -short 42 Display short output. Used only when a single job is being 43 queried, and drops verbose information about allocations. 44 45 -evals 46 Display the evaluations associated with the job. 47 48 -verbose 49 Display full information. 50 ` 51 return strings.TrimSpace(helpText) 52 } 53 54 func (c *StatusCommand) Synopsis() string { 55 return "Display status information about jobs" 56 } 57 58 func (c *StatusCommand) Run(args []string) int { 59 var short bool 60 61 flags := c.Meta.FlagSet("status", FlagSetClient) 62 flags.Usage = func() { c.Ui.Output(c.Help()) } 63 flags.BoolVar(&short, "short", false, "") 64 flags.BoolVar(&c.evals, "evals", false, "") 65 flags.BoolVar(&c.verbose, "verbose", false, "") 66 67 if err := flags.Parse(args); err != nil { 68 return 1 69 } 70 71 // Check that we either got no jobs or exactly one. 72 args = flags.Args() 73 if len(args) > 1 { 74 c.Ui.Error(c.Help()) 75 return 1 76 } 77 78 // Truncate the id unless full length is requested 79 c.length = shortId 80 if c.verbose { 81 c.length = fullId 82 } 83 84 // Get the HTTP client 85 client, err := c.Meta.Client() 86 if err != nil { 87 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 88 return 1 89 } 90 91 // Invoke list mode if no job ID. 92 if len(args) == 0 { 93 jobs, _, err := client.Jobs().List(nil) 94 if err != nil { 95 c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err)) 96 return 1 97 } 98 99 if len(jobs) == 0 { 100 // No output if we have no jobs 101 c.Ui.Output("No running jobs") 102 } else { 103 c.Ui.Output(createStatusListOutput(jobs)) 104 } 105 return 0 106 } 107 108 // Try querying the job 109 jobID := args[0] 110 jobs, _, err := client.Jobs().PrefixList(jobID) 111 if err != nil { 112 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 113 return 1 114 } 115 if len(jobs) == 0 { 116 c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID)) 117 return 1 118 } 119 if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID { 120 c.Ui.Output(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs))) 121 return 0 122 } 123 // Prefix lookup matched a single job 124 job, _, err := client.Jobs().Info(jobs[0].ID, nil) 125 if err != nil { 126 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 127 return 1 128 } 129 130 // Check if it is periodic 131 sJob, err := convertApiJob(job) 132 if err != nil { 133 c.Ui.Error(fmt.Sprintf("Error converting job: %s", err)) 134 return 1 135 } 136 periodic := sJob.IsPeriodic() 137 138 // Format the job info 139 basic := []string{ 140 fmt.Sprintf("ID|%s", job.ID), 141 fmt.Sprintf("Name|%s", job.Name), 142 fmt.Sprintf("Type|%s", job.Type), 143 fmt.Sprintf("Priority|%d", job.Priority), 144 fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")), 145 fmt.Sprintf("Status|%s", job.Status), 146 fmt.Sprintf("Periodic|%v", periodic), 147 } 148 149 if periodic { 150 now := time.Now().UTC() 151 next := sJob.Periodic.Next(now) 152 basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", 153 fmt.Sprintf("%s (%s from now)", 154 formatTime(next), formatTimeDifference(now, next, time.Second)))) 155 } 156 157 c.Ui.Output(formatKV(basic)) 158 159 // Exit early 160 if short { 161 return 0 162 } 163 164 // Print periodic job information 165 if periodic { 166 if err := c.outputPeriodicInfo(client, job); err != nil { 167 c.Ui.Error(err.Error()) 168 return 1 169 } 170 171 return 0 172 } 173 174 if err := c.outputJobInfo(client, job); err != nil { 175 c.Ui.Error(err.Error()) 176 return 1 177 } 178 179 return 0 180 } 181 182 // outputPeriodicInfo prints information about the passed periodic job. If a 183 // request fails, an error is returned. 184 func (c *StatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error { 185 // Generate the prefix that matches launched jobs from the periodic job. 186 prefix := fmt.Sprintf("%s%s", job.ID, structs.PeriodicLaunchSuffix) 187 children, _, err := client.Jobs().PrefixList(prefix) 188 if err != nil { 189 return fmt.Errorf("Error querying job: %s", err) 190 } 191 192 if len(children) == 0 { 193 c.Ui.Output("\nNo instances of periodic job found") 194 return nil 195 } 196 197 out := make([]string, 1) 198 out[0] = "ID|Status" 199 for _, child := range children { 200 // Ensure that we are only showing jobs whose parent is the requested 201 // job. 202 if child.ParentID != job.ID { 203 continue 204 } 205 206 out = append(out, fmt.Sprintf("%s|%s", 207 child.ID, 208 child.Status)) 209 } 210 211 c.Ui.Output(fmt.Sprintf("\nPreviously launched jobs:\n%s", formatList(out))) 212 return nil 213 } 214 215 // outputJobInfo prints information about the passed non-periodic job. If a 216 // request fails, an error is returned. 217 func (c *StatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { 218 var evals, allocs []string 219 220 // Query the allocations 221 jobAllocs, _, err := client.Jobs().Allocations(job.ID, nil) 222 if err != nil { 223 return fmt.Errorf("Error querying job allocations: %s", err) 224 } 225 226 // Query the evaluations 227 jobEvals, _, err := client.Jobs().Evaluations(job.ID, nil) 228 if err != nil { 229 return fmt.Errorf("Error querying job evaluations: %s", err) 230 } 231 232 // Query the summary 233 summary, _, err := client.Jobs().Summary(job.ID, nil) 234 if err != nil { 235 return fmt.Errorf("Error querying job summary: %s", err) 236 } 237 238 // Format the summary 239 c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]")) 240 if summary != nil { 241 summaries := make([]string, len(summary.Summary)+1) 242 summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost" 243 taskGroups := make([]string, 0, len(summary.Summary)) 244 for taskGroup := range summary.Summary { 245 taskGroups = append(taskGroups, taskGroup) 246 } 247 sort.Strings(taskGroups) 248 for idx, taskGroup := range taskGroups { 249 tgs := summary.Summary[taskGroup] 250 summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d", 251 taskGroup, tgs.Queued, tgs.Starting, 252 tgs.Running, tgs.Failed, 253 tgs.Complete, tgs.Lost, 254 ) 255 } 256 c.Ui.Output(formatList(summaries)) 257 } 258 259 // Determine latest evaluation with failures whose follow up hasn't 260 // completed, this is done while formatting 261 var latestFailedPlacement *api.Evaluation 262 blockedEval := false 263 264 // Format the evals 265 evals = make([]string, len(jobEvals)+1) 266 evals[0] = "ID|Priority|Triggered By|Status|Placement Failures" 267 for i, eval := range jobEvals { 268 failures, _ := evalFailureStatus(eval) 269 evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s", 270 limit(eval.ID, c.length), 271 eval.Priority, 272 eval.TriggeredBy, 273 eval.Status, 274 failures, 275 ) 276 277 if eval.Status == "blocked" { 278 blockedEval = true 279 } 280 281 if len(eval.FailedTGAllocs) == 0 { 282 // Skip evals without failures 283 continue 284 } 285 286 if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { 287 latestFailedPlacement = eval 288 } 289 } 290 291 if c.verbose || c.evals { 292 c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]")) 293 c.Ui.Output(formatList(evals)) 294 } 295 296 if blockedEval && latestFailedPlacement != nil { 297 c.outputFailedPlacements(latestFailedPlacement) 298 } 299 300 // Format the allocs 301 c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]")) 302 if len(jobAllocs) > 0 { 303 allocs = make([]string, len(jobAllocs)+1) 304 allocs[0] = "ID|Eval ID|Node ID|Task Group|Desired|Status|Created At" 305 for i, alloc := range jobAllocs { 306 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%s|%s", 307 limit(alloc.ID, c.length), 308 limit(alloc.EvalID, c.length), 309 limit(alloc.NodeID, c.length), 310 alloc.TaskGroup, 311 alloc.DesiredStatus, 312 alloc.ClientStatus, 313 formatUnixNanoTime(alloc.CreateTime)) 314 } 315 316 c.Ui.Output(formatList(allocs)) 317 } else { 318 c.Ui.Output("No allocations placed") 319 } 320 return nil 321 } 322 323 func (c *StatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { 324 if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 { 325 return 326 } 327 328 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]")) 329 330 sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) 331 for i, tg := range sorted { 332 if i >= maxFailedTGs { 333 break 334 } 335 336 c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) 337 metrics := failedEval.FailedTGAllocs[tg] 338 c.Ui.Output(formatAllocMetrics(metrics, false, " ")) 339 if i != len(sorted)-1 { 340 c.Ui.Output("") 341 } 342 } 343 344 if len(sorted) > maxFailedTGs { 345 trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) 346 c.Ui.Output(trunc) 347 } 348 } 349 350 // convertApiJob is used to take a *api.Job and convert it to an *struct.Job. 351 // This function is just a hammer and probably needs to be revisited. 352 func convertApiJob(in *api.Job) (*structs.Job, error) { 353 gob.Register(map[string]interface{}{}) 354 gob.Register([]interface{}{}) 355 var structJob *structs.Job 356 buf := new(bytes.Buffer) 357 if err := gob.NewEncoder(buf).Encode(in); err != nil { 358 return nil, err 359 } 360 if err := gob.NewDecoder(buf).Decode(&structJob); err != nil { 361 return nil, err 362 } 363 return structJob, nil 364 } 365 366 // list general information about a list of jobs 367 func createStatusListOutput(jobs []*api.JobListStub) string { 368 out := make([]string, len(jobs)+1) 369 out[0] = "ID|Type|Priority|Status" 370 for i, job := range jobs { 371 out[i+1] = fmt.Sprintf("%s|%s|%d|%s", 372 job.ID, 373 job.Type, 374 job.Priority, 375 job.Status) 376 } 377 return formatList(out) 378 }