github.com/djenriquez/nomad-1@v0.8.1/command/job_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 "time" 8 9 "github.com/hashicorp/nomad/api" 10 "github.com/hashicorp/nomad/api/contexts" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "github.com/posener/complete" 13 ) 14 15 const ( 16 // maxFailedTGs is the maximum number of task groups we show failure reasons 17 // for before deferring to eval-status 18 maxFailedTGs = 5 19 ) 20 21 type JobStatusCommand struct { 22 Meta 23 length int 24 evals bool 25 allAllocs bool 26 verbose bool 27 } 28 29 func (c *JobStatusCommand) Help() string { 30 helpText := ` 31 Usage: nomad status [options] <job> 32 33 Display status information about a job. If no job ID is given, a list of all 34 known jobs will be displayed. 35 36 General Options: 37 38 ` + generalOptionsUsage() + ` 39 40 Status Options: 41 42 -short 43 Display short output. Used only when a single job is being 44 queried, and drops verbose information about allocations. 45 46 -evals 47 Display the evaluations associated with the job. 48 49 -all-allocs 50 Display all allocations matching the job ID, including those from an older 51 instance of the job. 52 53 -verbose 54 Display full information. 55 ` 56 return strings.TrimSpace(helpText) 57 } 58 59 func (c *JobStatusCommand) Synopsis() string { 60 return "Display status information about a job" 61 } 62 63 func (c *JobStatusCommand) AutocompleteFlags() complete.Flags { 64 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 65 complete.Flags{ 66 "-all-allocs": complete.PredictNothing, 67 "-evals": complete.PredictNothing, 68 "-short": complete.PredictNothing, 69 "-verbose": complete.PredictNothing, 70 }) 71 } 72 73 func (c *JobStatusCommand) AutocompleteArgs() complete.Predictor { 74 return complete.PredictFunc(func(a complete.Args) []string { 75 client, err := c.Meta.Client() 76 if err != nil { 77 return nil 78 } 79 80 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil) 81 if err != nil { 82 return []string{} 83 } 84 return resp.Matches[contexts.Jobs] 85 }) 86 } 87 88 func (c *JobStatusCommand) Run(args []string) int { 89 var short bool 90 91 flags := c.Meta.FlagSet("status", FlagSetClient) 92 flags.Usage = func() { c.Ui.Output(c.Help()) } 93 flags.BoolVar(&short, "short", false, "") 94 flags.BoolVar(&c.evals, "evals", false, "") 95 flags.BoolVar(&c.allAllocs, "all-allocs", false, "") 96 flags.BoolVar(&c.verbose, "verbose", false, "") 97 98 if err := flags.Parse(args); err != nil { 99 return 1 100 } 101 102 // Check that we either got no jobs or exactly one. 103 args = flags.Args() 104 if len(args) > 1 { 105 c.Ui.Error(c.Help()) 106 return 1 107 } 108 109 // Truncate the id unless full length is requested 110 c.length = shortId 111 if c.verbose { 112 c.length = fullId 113 } 114 115 // Get the HTTP client 116 client, err := c.Meta.Client() 117 if err != nil { 118 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 119 return 1 120 } 121 122 // Invoke list mode if no job ID. 123 if len(args) == 0 { 124 jobs, _, err := client.Jobs().List(nil) 125 if err != nil { 126 c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err)) 127 return 1 128 } 129 130 if len(jobs) == 0 { 131 // No output if we have no jobs 132 c.Ui.Output("No running jobs") 133 } else { 134 c.Ui.Output(createStatusListOutput(jobs)) 135 } 136 return 0 137 } 138 139 // Try querying the job 140 jobID := args[0] 141 142 jobs, _, err := client.Jobs().PrefixList(jobID) 143 if err != nil { 144 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 145 return 1 146 } 147 if len(jobs) == 0 { 148 c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID)) 149 return 1 150 } 151 if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID { 152 c.Ui.Error(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs))) 153 return 1 154 } 155 // Prefix lookup matched a single job 156 job, _, err := client.Jobs().Info(jobs[0].ID, nil) 157 if err != nil { 158 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 159 return 1 160 } 161 162 periodic := job.IsPeriodic() 163 parameterized := job.IsParameterized() 164 165 // Format the job info 166 basic := []string{ 167 fmt.Sprintf("ID|%s", *job.ID), 168 fmt.Sprintf("Name|%s", *job.Name), 169 fmt.Sprintf("Submit Date|%s", formatTime(getSubmitTime(job))), 170 fmt.Sprintf("Type|%s", *job.Type), 171 fmt.Sprintf("Priority|%d", *job.Priority), 172 fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")), 173 fmt.Sprintf("Status|%s", getStatusString(*job.Status, job.Stop)), 174 fmt.Sprintf("Periodic|%v", periodic), 175 fmt.Sprintf("Parameterized|%v", parameterized), 176 } 177 178 if periodic && !parameterized { 179 if *job.Stop { 180 basic = append(basic, fmt.Sprintf("Next Periodic Launch|none (job stopped)")) 181 } else { 182 location, err := job.Periodic.GetLocation() 183 if err == nil { 184 now := time.Now().In(location) 185 next := job.Periodic.Next(now) 186 basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", 187 fmt.Sprintf("%s (%s from now)", 188 formatTime(next), formatTimeDifference(now, next, time.Second)))) 189 } 190 } 191 } 192 193 c.Ui.Output(formatKV(basic)) 194 195 // Exit early 196 if short { 197 return 0 198 } 199 200 // Print periodic job information 201 if periodic && !parameterized { 202 if err := c.outputPeriodicInfo(client, job); err != nil { 203 c.Ui.Error(err.Error()) 204 return 1 205 } 206 } else if parameterized { 207 if err := c.outputParameterizedInfo(client, job); err != nil { 208 c.Ui.Error(err.Error()) 209 return 1 210 } 211 } else { 212 if err := c.outputJobInfo(client, job); err != nil { 213 c.Ui.Error(err.Error()) 214 return 1 215 } 216 } 217 218 return 0 219 } 220 221 // outputPeriodicInfo prints information about the passed periodic job. If a 222 // request fails, an error is returned. 223 func (c *JobStatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error { 224 // Output the summary 225 if err := c.outputJobSummary(client, job); err != nil { 226 return err 227 } 228 229 // Generate the prefix that matches launched jobs from the periodic job. 230 prefix := fmt.Sprintf("%s%s", *job.ID, structs.PeriodicLaunchSuffix) 231 children, _, err := client.Jobs().PrefixList(prefix) 232 if err != nil { 233 return fmt.Errorf("Error querying job: %s", err) 234 } 235 236 if len(children) == 0 { 237 c.Ui.Output("\nNo instances of periodic job found") 238 return nil 239 } 240 241 out := make([]string, 1) 242 out[0] = "ID|Status" 243 for _, child := range children { 244 // Ensure that we are only showing jobs whose parent is the requested 245 // job. 246 if child.ParentID != *job.ID { 247 continue 248 } 249 250 out = append(out, fmt.Sprintf("%s|%s", 251 child.ID, 252 child.Status)) 253 } 254 255 c.Ui.Output(c.Colorize().Color("\n[bold]Previously Launched Jobs[reset]")) 256 c.Ui.Output(formatList(out)) 257 return nil 258 } 259 260 // outputParameterizedInfo prints information about a parameterized job. If a 261 // request fails, an error is returned. 262 func (c *JobStatusCommand) outputParameterizedInfo(client *api.Client, job *api.Job) error { 263 // Output parameterized job details 264 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job[reset]")) 265 parameterizedJob := make([]string, 3) 266 parameterizedJob[0] = fmt.Sprintf("Payload|%s", job.ParameterizedJob.Payload) 267 parameterizedJob[1] = fmt.Sprintf("Required Metadata|%v", strings.Join(job.ParameterizedJob.MetaRequired, ", ")) 268 parameterizedJob[2] = fmt.Sprintf("Optional Metadata|%v", strings.Join(job.ParameterizedJob.MetaOptional, ", ")) 269 c.Ui.Output(formatKV(parameterizedJob)) 270 271 // Output the summary 272 if err := c.outputJobSummary(client, job); err != nil { 273 return err 274 } 275 276 // Generate the prefix that matches launched jobs from the parameterized job. 277 prefix := fmt.Sprintf("%s%s", *job.ID, structs.DispatchLaunchSuffix) 278 children, _, err := client.Jobs().PrefixList(prefix) 279 if err != nil { 280 return fmt.Errorf("Error querying job: %s", err) 281 } 282 283 if len(children) == 0 { 284 c.Ui.Output("\nNo dispatched instances of parameterized job found") 285 return nil 286 } 287 288 out := make([]string, 1) 289 out[0] = "ID|Status" 290 for _, child := range children { 291 // Ensure that we are only showing jobs whose parent is the requested 292 // job. 293 if child.ParentID != *job.ID { 294 continue 295 } 296 297 out = append(out, fmt.Sprintf("%s|%s", 298 child.ID, 299 child.Status)) 300 } 301 302 c.Ui.Output(c.Colorize().Color("\n[bold]Dispatched Jobs[reset]")) 303 c.Ui.Output(formatList(out)) 304 return nil 305 } 306 307 // outputJobInfo prints information about the passed non-periodic job. If a 308 // request fails, an error is returned. 309 func (c *JobStatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { 310 311 // Query the allocations 312 jobAllocs, _, err := client.Jobs().Allocations(*job.ID, c.allAllocs, nil) 313 if err != nil { 314 return fmt.Errorf("Error querying job allocations: %s", err) 315 } 316 317 // Query the evaluations 318 jobEvals, _, err := client.Jobs().Evaluations(*job.ID, nil) 319 if err != nil { 320 return fmt.Errorf("Error querying job evaluations: %s", err) 321 } 322 323 latestDeployment, _, err := client.Jobs().LatestDeployment(*job.ID, nil) 324 if err != nil { 325 return fmt.Errorf("Error querying latest job deployment: %s", err) 326 } 327 328 // Output the summary 329 if err := c.outputJobSummary(client, job); err != nil { 330 return err 331 } 332 333 // Determine latest evaluation with failures whose follow up hasn't 334 // completed, this is done while formatting 335 var latestFailedPlacement *api.Evaluation 336 blockedEval := false 337 338 // Format the evals 339 evals := make([]string, len(jobEvals)+1) 340 evals[0] = "ID|Priority|Triggered By|Status|Placement Failures" 341 for i, eval := range jobEvals { 342 failures, _ := evalFailureStatus(eval) 343 evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s", 344 limit(eval.ID, c.length), 345 eval.Priority, 346 eval.TriggeredBy, 347 eval.Status, 348 failures, 349 ) 350 351 if eval.Status == "blocked" { 352 blockedEval = true 353 } 354 355 if len(eval.FailedTGAllocs) == 0 { 356 // Skip evals without failures 357 continue 358 } 359 360 if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { 361 latestFailedPlacement = eval 362 } 363 } 364 365 if c.verbose || c.evals { 366 c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]")) 367 c.Ui.Output(formatList(evals)) 368 } 369 370 if blockedEval && latestFailedPlacement != nil { 371 c.outputFailedPlacements(latestFailedPlacement) 372 } 373 374 c.outputReschedulingEvals(client, job, jobAllocs, c.length) 375 376 if latestDeployment != nil { 377 c.Ui.Output(c.Colorize().Color("\n[bold]Latest Deployment[reset]")) 378 c.Ui.Output(c.Colorize().Color(c.formatDeployment(latestDeployment))) 379 } 380 381 // Format the allocs 382 c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]")) 383 c.Ui.Output(formatAllocListStubs(jobAllocs, c.verbose, c.length)) 384 return nil 385 } 386 387 func (c *JobStatusCommand) formatDeployment(d *api.Deployment) string { 388 // Format the high-level elements 389 high := []string{ 390 fmt.Sprintf("ID|%s", limit(d.ID, c.length)), 391 fmt.Sprintf("Status|%s", d.Status), 392 fmt.Sprintf("Description|%s", d.StatusDescription), 393 } 394 395 base := formatKV(high) 396 if len(d.TaskGroups) == 0 { 397 return base 398 } 399 base += "\n\n[bold]Deployed[reset]\n" 400 base += formatDeploymentGroups(d, c.length) 401 return base 402 } 403 404 func formatAllocListStubs(stubs []*api.AllocationListStub, verbose bool, uuidLength int) string { 405 if len(stubs) == 0 { 406 return "No allocations placed" 407 } 408 409 allocs := make([]string, len(stubs)+1) 410 if verbose { 411 allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 412 for i, alloc := range stubs { 413 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s", 414 limit(alloc.ID, uuidLength), 415 limit(alloc.EvalID, uuidLength), 416 limit(alloc.NodeID, uuidLength), 417 alloc.TaskGroup, 418 alloc.JobVersion, 419 alloc.DesiredStatus, 420 alloc.ClientStatus, 421 formatUnixNanoTime(alloc.CreateTime), 422 formatUnixNanoTime(alloc.ModifyTime)) 423 } 424 } else { 425 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 426 for i, alloc := range stubs { 427 now := time.Now() 428 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 429 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 430 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 431 limit(alloc.ID, uuidLength), 432 limit(alloc.NodeID, uuidLength), 433 alloc.TaskGroup, 434 alloc.JobVersion, 435 alloc.DesiredStatus, 436 alloc.ClientStatus, 437 createTimePretty, 438 modTimePretty) 439 } 440 } 441 442 return formatList(allocs) 443 } 444 445 func formatAllocList(allocations []*api.Allocation, verbose bool, uuidLength int) string { 446 if len(allocations) == 0 { 447 return "No allocations placed" 448 } 449 450 allocs := make([]string, len(allocations)+1) 451 if verbose { 452 allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 453 for i, alloc := range allocations { 454 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s", 455 limit(alloc.ID, uuidLength), 456 limit(alloc.EvalID, uuidLength), 457 limit(alloc.NodeID, uuidLength), 458 alloc.TaskGroup, 459 getVersion(alloc.Job), 460 alloc.DesiredStatus, 461 alloc.ClientStatus, 462 formatUnixNanoTime(alloc.CreateTime), 463 formatUnixNanoTime(alloc.ModifyTime)) 464 } 465 } else { 466 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 467 for i, alloc := range allocations { 468 now := time.Now() 469 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 470 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 471 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 472 limit(alloc.ID, uuidLength), 473 limit(alloc.NodeID, uuidLength), 474 alloc.TaskGroup, 475 getVersion(alloc.Job), 476 alloc.DesiredStatus, 477 alloc.ClientStatus, 478 createTimePretty, 479 modTimePretty) 480 } 481 } 482 483 return formatList(allocs) 484 } 485 486 // outputJobSummary displays the given jobs summary and children job summary 487 // where appropriate 488 func (c *JobStatusCommand) outputJobSummary(client *api.Client, job *api.Job) error { 489 // Query the summary 490 summary, _, err := client.Jobs().Summary(*job.ID, nil) 491 if err != nil { 492 return fmt.Errorf("Error querying job summary: %s", err) 493 } 494 495 if summary == nil { 496 return nil 497 } 498 499 periodic := job.IsPeriodic() 500 parameterizedJob := job.IsParameterized() 501 502 // Print the summary 503 if !periodic && !parameterizedJob { 504 c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]")) 505 summaries := make([]string, len(summary.Summary)+1) 506 summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost" 507 taskGroups := make([]string, 0, len(summary.Summary)) 508 for taskGroup := range summary.Summary { 509 taskGroups = append(taskGroups, taskGroup) 510 } 511 sort.Strings(taskGroups) 512 for idx, taskGroup := range taskGroups { 513 tgs := summary.Summary[taskGroup] 514 summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d", 515 taskGroup, tgs.Queued, tgs.Starting, 516 tgs.Running, tgs.Failed, 517 tgs.Complete, tgs.Lost, 518 ) 519 } 520 c.Ui.Output(formatList(summaries)) 521 } 522 523 // Always display the summary if we are periodic or parameterized, but 524 // only display if the summary is non-zero on normal jobs 525 if summary.Children != nil && (parameterizedJob || periodic || summary.Children.Sum() > 0) { 526 if parameterizedJob { 527 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job Summary[reset]")) 528 } else { 529 c.Ui.Output(c.Colorize().Color("\n[bold]Children Job Summary[reset]")) 530 } 531 summaries := make([]string, 2) 532 summaries[0] = "Pending|Running|Dead" 533 summaries[1] = fmt.Sprintf("%d|%d|%d", 534 summary.Children.Pending, summary.Children.Running, summary.Children.Dead) 535 c.Ui.Output(formatList(summaries)) 536 } 537 538 return nil 539 } 540 541 // outputReschedulingEvals displays eval IDs and time for any 542 // delayed evaluations by task group 543 func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.Job, allocListStubs []*api.AllocationListStub, uuidLength int) error { 544 // Get the most recent alloc ID by task group 545 546 mostRecentAllocs := make(map[string]*api.AllocationListStub) 547 for _, alloc := range allocListStubs { 548 a, ok := mostRecentAllocs[alloc.TaskGroup] 549 if !ok || alloc.ModifyTime > a.ModifyTime { 550 mostRecentAllocs[alloc.TaskGroup] = alloc 551 } 552 } 553 554 followUpEvalIds := make(map[string]string) 555 for tg, alloc := range mostRecentAllocs { 556 if alloc.FollowupEvalID != "" { 557 followUpEvalIds[tg] = alloc.FollowupEvalID 558 } 559 } 560 561 if len(followUpEvalIds) == 0 { 562 return nil 563 } 564 // Print the reschedule info section 565 var delayedEvalInfos []string 566 567 taskGroups := make([]string, 0, len(followUpEvalIds)) 568 for taskGroup := range followUpEvalIds { 569 taskGroups = append(taskGroups, taskGroup) 570 } 571 sort.Strings(taskGroups) 572 var evalDetails []string 573 for _, taskGroup := range taskGroups { 574 evalID := followUpEvalIds[taskGroup] 575 evaluation, _, err := client.Evaluations().Info(evalID, nil) 576 // Eval time is not critical output, 577 // so don't return it on errors, if its not set, or its already in the past 578 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 579 continue 580 } 581 evalTime := prettyTimeDiff(evaluation.WaitUntil, time.Now()) 582 if c.verbose { 583 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Reschedule Policy|Eval ID|Eval Time") 584 rp := job.LookupTaskGroup(taskGroup).ReschedulePolicy 585 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s|%s", taskGroup, rp.String(), limit(evalID, uuidLength), evalTime)) 586 } else { 587 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Eval ID|Eval Time") 588 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s", taskGroup, limit(evalID, uuidLength), evalTime)) 589 } 590 } 591 if len(evalDetails) == 0 { 592 return nil 593 } 594 // Only show this section if there is pending evals 595 delayedEvalInfos = append(delayedEvalInfos, evalDetails...) 596 c.Ui.Output(c.Colorize().Color("\n[bold]Future Rescheduling Attempts[reset]")) 597 c.Ui.Output(formatList(delayedEvalInfos)) 598 return nil 599 } 600 601 func (c *JobStatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { 602 if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 { 603 return 604 } 605 606 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]")) 607 608 sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) 609 for i, tg := range sorted { 610 if i >= maxFailedTGs { 611 break 612 } 613 614 c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) 615 metrics := failedEval.FailedTGAllocs[tg] 616 c.Ui.Output(formatAllocMetrics(metrics, false, " ")) 617 if i != len(sorted)-1 { 618 c.Ui.Output("") 619 } 620 } 621 622 if len(sorted) > maxFailedTGs { 623 trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) 624 c.Ui.Output(trunc) 625 } 626 } 627 628 // list general information about a list of jobs 629 func createStatusListOutput(jobs []*api.JobListStub) string { 630 out := make([]string, len(jobs)+1) 631 out[0] = "ID|Type|Priority|Status|Submit Date" 632 for i, job := range jobs { 633 out[i+1] = fmt.Sprintf("%s|%s|%d|%s|%s", 634 job.ID, 635 getTypeString(job), 636 job.Priority, 637 getStatusString(job.Status, &job.Stop), 638 formatTime(time.Unix(0, job.SubmitTime))) 639 } 640 return formatList(out) 641 } 642 643 func getTypeString(job *api.JobListStub) string { 644 t := job.Type 645 646 if job.Periodic { 647 t += "/periodic" 648 } 649 650 if job.ParameterizedJob { 651 t += "/parameterized" 652 } 653 654 return t 655 } 656 657 func getStatusString(status string, stop *bool) string { 658 if stop != nil && *stop { 659 return fmt.Sprintf("%s (stopped)", status) 660 } 661 return status 662 }