github.com/uchennaokeke444/nomad@v0.11.8/command/job_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 "time" 8 9 "github.com/hashicorp/nomad/api" 10 "github.com/hashicorp/nomad/api/contexts" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "github.com/posener/complete" 13 ) 14 15 const ( 16 // maxFailedTGs is the maximum number of task groups we show failure reasons 17 // for before deferring to eval-status 18 maxFailedTGs = 5 19 ) 20 21 type JobStatusCommand struct { 22 Meta 23 length int 24 evals bool 25 allAllocs bool 26 verbose bool 27 } 28 29 func (c *JobStatusCommand) Help() string { 30 helpText := ` 31 Usage: nomad status [options] <job> 32 33 Display status information about a job. If no job ID is given, a list of all 34 known jobs will be displayed. 35 36 General Options: 37 38 ` + generalOptionsUsage() + ` 39 40 Status Options: 41 42 -short 43 Display short output. Used only when a single job is being 44 queried, and drops verbose information about allocations. 45 46 -evals 47 Display the evaluations associated with the job. 48 49 -all-allocs 50 Display all allocations matching the job ID, including those from an older 51 instance of the job. 52 53 -verbose 54 Display full information. 55 ` 56 return strings.TrimSpace(helpText) 57 } 58 59 func (c *JobStatusCommand) Synopsis() string { 60 return "Display status information about a job" 61 } 62 63 func (c *JobStatusCommand) AutocompleteFlags() complete.Flags { 64 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 65 complete.Flags{ 66 "-all-allocs": complete.PredictNothing, 67 "-evals": complete.PredictNothing, 68 "-short": complete.PredictNothing, 69 "-verbose": complete.PredictNothing, 70 }) 71 } 72 73 func (c *JobStatusCommand) AutocompleteArgs() complete.Predictor { 74 return complete.PredictFunc(func(a complete.Args) []string { 75 client, err := c.Meta.Client() 76 if err != nil { 77 return nil 78 } 79 80 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil) 81 if err != nil { 82 return []string{} 83 } 84 return resp.Matches[contexts.Jobs] 85 }) 86 } 87 88 func (c *JobStatusCommand) Name() string { return "status" } 89 90 func (c *JobStatusCommand) Run(args []string) int { 91 var short bool 92 93 flags := c.Meta.FlagSet(c.Name(), FlagSetClient) 94 flags.Usage = func() { c.Ui.Output(c.Help()) } 95 flags.BoolVar(&short, "short", false, "") 96 flags.BoolVar(&c.evals, "evals", false, "") 97 flags.BoolVar(&c.allAllocs, "all-allocs", false, "") 98 flags.BoolVar(&c.verbose, "verbose", false, "") 99 100 if err := flags.Parse(args); err != nil { 101 return 1 102 } 103 104 // Check that we either got no jobs or exactly one. 105 args = flags.Args() 106 if len(args) > 1 { 107 c.Ui.Error("This command takes either no arguments or one: <job>") 108 c.Ui.Error(commandErrorText(c)) 109 return 1 110 } 111 112 // Truncate the id unless full length is requested 113 c.length = shortId 114 if c.verbose { 115 c.length = fullId 116 } 117 118 // Get the HTTP client 119 client, err := c.Meta.Client() 120 if err != nil { 121 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 122 return 1 123 } 124 125 // Invoke list mode if no job ID. 126 if len(args) == 0 { 127 jobs, _, err := client.Jobs().List(nil) 128 if err != nil { 129 c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err)) 130 return 1 131 } 132 133 if len(jobs) == 0 { 134 // No output if we have no jobs 135 c.Ui.Output("No running jobs") 136 } else { 137 c.Ui.Output(createStatusListOutput(jobs)) 138 } 139 return 0 140 } 141 142 // Try querying the job 143 jobID := args[0] 144 145 jobs, _, err := client.Jobs().PrefixList(jobID) 146 if err != nil { 147 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 148 return 1 149 } 150 if len(jobs) == 0 { 151 c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID)) 152 return 1 153 } 154 if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID { 155 c.Ui.Error(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs))) 156 return 1 157 } 158 // Prefix lookup matched a single job 159 job, _, err := client.Jobs().Info(jobs[0].ID, nil) 160 if err != nil { 161 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 162 return 1 163 } 164 165 periodic := job.IsPeriodic() 166 parameterized := job.IsParameterized() 167 168 // Format the job info 169 basic := []string{ 170 fmt.Sprintf("ID|%s", *job.ID), 171 fmt.Sprintf("Name|%s", *job.Name), 172 fmt.Sprintf("Submit Date|%s", formatTime(time.Unix(0, *job.SubmitTime))), 173 fmt.Sprintf("Type|%s", *job.Type), 174 fmt.Sprintf("Priority|%d", *job.Priority), 175 fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")), 176 fmt.Sprintf("Namespace|%s", *job.Namespace), 177 fmt.Sprintf("Status|%s", getStatusString(*job.Status, job.Stop)), 178 fmt.Sprintf("Periodic|%v", periodic), 179 fmt.Sprintf("Parameterized|%v", parameterized), 180 } 181 182 if periodic && !parameterized { 183 if *job.Stop { 184 basic = append(basic, fmt.Sprintf("Next Periodic Launch|none (job stopped)")) 185 } else { 186 location, err := job.Periodic.GetLocation() 187 if err == nil { 188 now := time.Now().In(location) 189 next, err := job.Periodic.Next(now) 190 if err == nil { 191 basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", 192 fmt.Sprintf("%s (%s from now)", 193 formatTime(next), formatTimeDifference(now, next, time.Second)))) 194 } 195 } 196 } 197 } 198 199 c.Ui.Output(formatKV(basic)) 200 201 // Exit early 202 if short { 203 return 0 204 } 205 206 // Print periodic job information 207 if periodic && !parameterized { 208 if err := c.outputPeriodicInfo(client, job); err != nil { 209 c.Ui.Error(err.Error()) 210 return 1 211 } 212 } else if parameterized { 213 if err := c.outputParameterizedInfo(client, job); err != nil { 214 c.Ui.Error(err.Error()) 215 return 1 216 } 217 } else { 218 if err := c.outputJobInfo(client, job); err != nil { 219 c.Ui.Error(err.Error()) 220 return 1 221 } 222 } 223 224 return 0 225 } 226 227 // outputPeriodicInfo prints information about the passed periodic job. If a 228 // request fails, an error is returned. 229 func (c *JobStatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error { 230 // Output the summary 231 if err := c.outputJobSummary(client, job); err != nil { 232 return err 233 } 234 235 // Generate the prefix that matches launched jobs from the periodic job. 236 prefix := fmt.Sprintf("%s%s", *job.ID, structs.PeriodicLaunchSuffix) 237 children, _, err := client.Jobs().PrefixList(prefix) 238 if err != nil { 239 return fmt.Errorf("Error querying job: %s", err) 240 } 241 242 if len(children) == 0 { 243 c.Ui.Output("\nNo instances of periodic job found") 244 return nil 245 } 246 247 out := make([]string, 1) 248 out[0] = "ID|Status" 249 for _, child := range children { 250 // Ensure that we are only showing jobs whose parent is the requested 251 // job. 252 if child.ParentID != *job.ID { 253 continue 254 } 255 256 out = append(out, fmt.Sprintf("%s|%s", 257 child.ID, 258 child.Status)) 259 } 260 261 c.Ui.Output(c.Colorize().Color("\n[bold]Previously Launched Jobs[reset]")) 262 c.Ui.Output(formatList(out)) 263 return nil 264 } 265 266 // outputParameterizedInfo prints information about a parameterized job. If a 267 // request fails, an error is returned. 268 func (c *JobStatusCommand) outputParameterizedInfo(client *api.Client, job *api.Job) error { 269 // Output parameterized job details 270 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job[reset]")) 271 parameterizedJob := make([]string, 3) 272 parameterizedJob[0] = fmt.Sprintf("Payload|%s", job.ParameterizedJob.Payload) 273 parameterizedJob[1] = fmt.Sprintf("Required Metadata|%v", strings.Join(job.ParameterizedJob.MetaRequired, ", ")) 274 parameterizedJob[2] = fmt.Sprintf("Optional Metadata|%v", strings.Join(job.ParameterizedJob.MetaOptional, ", ")) 275 c.Ui.Output(formatKV(parameterizedJob)) 276 277 // Output the summary 278 if err := c.outputJobSummary(client, job); err != nil { 279 return err 280 } 281 282 // Generate the prefix that matches launched jobs from the parameterized job. 283 prefix := fmt.Sprintf("%s%s", *job.ID, structs.DispatchLaunchSuffix) 284 children, _, err := client.Jobs().PrefixList(prefix) 285 if err != nil { 286 return fmt.Errorf("Error querying job: %s", err) 287 } 288 289 if len(children) == 0 { 290 c.Ui.Output("\nNo dispatched instances of parameterized job found") 291 return nil 292 } 293 294 out := make([]string, 1) 295 out[0] = "ID|Status" 296 for _, child := range children { 297 // Ensure that we are only showing jobs whose parent is the requested 298 // job. 299 if child.ParentID != *job.ID { 300 continue 301 } 302 303 out = append(out, fmt.Sprintf("%s|%s", 304 child.ID, 305 child.Status)) 306 } 307 308 c.Ui.Output(c.Colorize().Color("\n[bold]Dispatched Jobs[reset]")) 309 c.Ui.Output(formatList(out)) 310 return nil 311 } 312 313 // outputJobInfo prints information about the passed non-periodic job. If a 314 // request fails, an error is returned. 315 func (c *JobStatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { 316 317 // Query the allocations 318 jobAllocs, _, err := client.Jobs().Allocations(*job.ID, c.allAllocs, nil) 319 if err != nil { 320 return fmt.Errorf("Error querying job allocations: %s", err) 321 } 322 323 // Query the evaluations 324 jobEvals, _, err := client.Jobs().Evaluations(*job.ID, nil) 325 if err != nil { 326 return fmt.Errorf("Error querying job evaluations: %s", err) 327 } 328 329 latestDeployment, _, err := client.Jobs().LatestDeployment(*job.ID, nil) 330 if err != nil { 331 return fmt.Errorf("Error querying latest job deployment: %s", err) 332 } 333 334 // Output the summary 335 if err := c.outputJobSummary(client, job); err != nil { 336 return err 337 } 338 339 // Determine latest evaluation with failures whose follow up hasn't 340 // completed, this is done while formatting 341 var latestFailedPlacement *api.Evaluation 342 blockedEval := false 343 344 // Format the evals 345 evals := make([]string, len(jobEvals)+1) 346 evals[0] = "ID|Priority|Triggered By|Status|Placement Failures" 347 for i, eval := range jobEvals { 348 failures, _ := evalFailureStatus(eval) 349 evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s", 350 limit(eval.ID, c.length), 351 eval.Priority, 352 eval.TriggeredBy, 353 eval.Status, 354 failures, 355 ) 356 357 if eval.Status == "blocked" { 358 blockedEval = true 359 } 360 361 if len(eval.FailedTGAllocs) == 0 { 362 // Skip evals without failures 363 continue 364 } 365 366 if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { 367 latestFailedPlacement = eval 368 } 369 } 370 371 if c.verbose || c.evals { 372 c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]")) 373 c.Ui.Output(formatList(evals)) 374 } 375 376 if blockedEval && latestFailedPlacement != nil { 377 c.outputFailedPlacements(latestFailedPlacement) 378 } 379 380 c.outputReschedulingEvals(client, job, jobAllocs, c.length) 381 382 if latestDeployment != nil { 383 c.Ui.Output(c.Colorize().Color("\n[bold]Latest Deployment[reset]")) 384 c.Ui.Output(c.Colorize().Color(c.formatDeployment(latestDeployment))) 385 } 386 387 // Format the allocs 388 c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]")) 389 c.Ui.Output(formatAllocListStubs(jobAllocs, c.verbose, c.length)) 390 return nil 391 } 392 393 func (c *JobStatusCommand) formatDeployment(d *api.Deployment) string { 394 // Format the high-level elements 395 high := []string{ 396 fmt.Sprintf("ID|%s", limit(d.ID, c.length)), 397 fmt.Sprintf("Status|%s", d.Status), 398 fmt.Sprintf("Description|%s", d.StatusDescription), 399 } 400 401 base := formatKV(high) 402 if len(d.TaskGroups) == 0 { 403 return base 404 } 405 base += "\n\n[bold]Deployed[reset]\n" 406 base += formatDeploymentGroups(d, c.length) 407 return base 408 } 409 410 func formatAllocListStubs(stubs []*api.AllocationListStub, verbose bool, uuidLength int) string { 411 if len(stubs) == 0 { 412 return "No allocations placed" 413 } 414 415 allocs := make([]string, len(stubs)+1) 416 if verbose { 417 allocs[0] = "ID|Eval ID|Node ID|Node Name|Task Group|Version|Desired|Status|Created|Modified" 418 for i, alloc := range stubs { 419 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%d|%s|%s|%s|%s", 420 limit(alloc.ID, uuidLength), 421 limit(alloc.EvalID, uuidLength), 422 limit(alloc.NodeID, uuidLength), 423 alloc.NodeName, 424 alloc.TaskGroup, 425 alloc.JobVersion, 426 alloc.DesiredStatus, 427 alloc.ClientStatus, 428 formatUnixNanoTime(alloc.CreateTime), 429 formatUnixNanoTime(alloc.ModifyTime)) 430 } 431 } else { 432 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 433 for i, alloc := range stubs { 434 now := time.Now() 435 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 436 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 437 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 438 limit(alloc.ID, uuidLength), 439 limit(alloc.NodeID, uuidLength), 440 alloc.TaskGroup, 441 alloc.JobVersion, 442 alloc.DesiredStatus, 443 alloc.ClientStatus, 444 createTimePretty, 445 modTimePretty) 446 } 447 } 448 449 return formatList(allocs) 450 } 451 452 func formatAllocList(allocations []*api.Allocation, verbose bool, uuidLength int) string { 453 if len(allocations) == 0 { 454 return "No allocations placed" 455 } 456 457 allocs := make([]string, len(allocations)+1) 458 if verbose { 459 allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 460 for i, alloc := range allocations { 461 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s", 462 limit(alloc.ID, uuidLength), 463 limit(alloc.EvalID, uuidLength), 464 limit(alloc.NodeID, uuidLength), 465 alloc.TaskGroup, 466 *alloc.Job.Version, 467 alloc.DesiredStatus, 468 alloc.ClientStatus, 469 formatUnixNanoTime(alloc.CreateTime), 470 formatUnixNanoTime(alloc.ModifyTime)) 471 } 472 } else { 473 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 474 for i, alloc := range allocations { 475 now := time.Now() 476 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 477 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 478 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 479 limit(alloc.ID, uuidLength), 480 limit(alloc.NodeID, uuidLength), 481 alloc.TaskGroup, 482 *alloc.Job.Version, 483 alloc.DesiredStatus, 484 alloc.ClientStatus, 485 createTimePretty, 486 modTimePretty) 487 } 488 } 489 490 return formatList(allocs) 491 } 492 493 // outputJobSummary displays the given jobs summary and children job summary 494 // where appropriate 495 func (c *JobStatusCommand) outputJobSummary(client *api.Client, job *api.Job) error { 496 // Query the summary 497 summary, _, err := client.Jobs().Summary(*job.ID, nil) 498 if err != nil { 499 return fmt.Errorf("Error querying job summary: %s", err) 500 } 501 502 if summary == nil { 503 return nil 504 } 505 506 periodic := job.IsPeriodic() 507 parameterizedJob := job.IsParameterized() 508 509 // Print the summary 510 if !periodic && !parameterizedJob { 511 c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]")) 512 summaries := make([]string, len(summary.Summary)+1) 513 summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost" 514 taskGroups := make([]string, 0, len(summary.Summary)) 515 for taskGroup := range summary.Summary { 516 taskGroups = append(taskGroups, taskGroup) 517 } 518 sort.Strings(taskGroups) 519 for idx, taskGroup := range taskGroups { 520 tgs := summary.Summary[taskGroup] 521 summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d", 522 taskGroup, tgs.Queued, tgs.Starting, 523 tgs.Running, tgs.Failed, 524 tgs.Complete, tgs.Lost, 525 ) 526 } 527 c.Ui.Output(formatList(summaries)) 528 } 529 530 // Always display the summary if we are periodic or parameterized, but 531 // only display if the summary is non-zero on normal jobs 532 if summary.Children != nil && (parameterizedJob || periodic || summary.Children.Sum() > 0) { 533 if parameterizedJob { 534 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job Summary[reset]")) 535 } else { 536 c.Ui.Output(c.Colorize().Color("\n[bold]Children Job Summary[reset]")) 537 } 538 summaries := make([]string, 2) 539 summaries[0] = "Pending|Running|Dead" 540 summaries[1] = fmt.Sprintf("%d|%d|%d", 541 summary.Children.Pending, summary.Children.Running, summary.Children.Dead) 542 c.Ui.Output(formatList(summaries)) 543 } 544 545 return nil 546 } 547 548 // outputReschedulingEvals displays eval IDs and time for any 549 // delayed evaluations by task group 550 func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.Job, allocListStubs []*api.AllocationListStub, uuidLength int) error { 551 // Get the most recent alloc ID by task group 552 553 mostRecentAllocs := make(map[string]*api.AllocationListStub) 554 for _, alloc := range allocListStubs { 555 a, ok := mostRecentAllocs[alloc.TaskGroup] 556 if !ok || alloc.ModifyTime > a.ModifyTime { 557 mostRecentAllocs[alloc.TaskGroup] = alloc 558 } 559 } 560 561 followUpEvalIds := make(map[string]string) 562 for tg, alloc := range mostRecentAllocs { 563 if alloc.FollowupEvalID != "" { 564 followUpEvalIds[tg] = alloc.FollowupEvalID 565 } 566 } 567 568 if len(followUpEvalIds) == 0 { 569 return nil 570 } 571 // Print the reschedule info section 572 var delayedEvalInfos []string 573 574 taskGroups := make([]string, 0, len(followUpEvalIds)) 575 for taskGroup := range followUpEvalIds { 576 taskGroups = append(taskGroups, taskGroup) 577 } 578 sort.Strings(taskGroups) 579 var evalDetails []string 580 first := true 581 for _, taskGroup := range taskGroups { 582 evalID := followUpEvalIds[taskGroup] 583 evaluation, _, err := client.Evaluations().Info(evalID, nil) 584 // Eval time is not critical output, 585 // so don't return it on errors, if its not set, or its already in the past 586 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 587 continue 588 } 589 evalTime := prettyTimeDiff(evaluation.WaitUntil, time.Now()) 590 if c.verbose { 591 if first { 592 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Reschedule Policy|Eval ID|Eval Time") 593 } 594 rp := job.LookupTaskGroup(taskGroup).ReschedulePolicy 595 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s|%s", taskGroup, rp.String(), limit(evalID, uuidLength), evalTime)) 596 } else { 597 if first { 598 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Eval ID|Eval Time") 599 } 600 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s", taskGroup, limit(evalID, uuidLength), evalTime)) 601 } 602 first = false 603 } 604 if len(evalDetails) == 0 { 605 return nil 606 } 607 // Only show this section if there is pending evals 608 delayedEvalInfos = append(delayedEvalInfos, evalDetails...) 609 c.Ui.Output(c.Colorize().Color("\n[bold]Future Rescheduling Attempts[reset]")) 610 c.Ui.Output(formatList(delayedEvalInfos)) 611 return nil 612 } 613 614 func (c *JobStatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { 615 if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 { 616 return 617 } 618 619 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]")) 620 621 sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) 622 for i, tg := range sorted { 623 if i >= maxFailedTGs { 624 break 625 } 626 627 c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) 628 metrics := failedEval.FailedTGAllocs[tg] 629 c.Ui.Output(formatAllocMetrics(metrics, false, " ")) 630 if i != len(sorted)-1 { 631 c.Ui.Output("") 632 } 633 } 634 635 if len(sorted) > maxFailedTGs { 636 trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) 637 c.Ui.Output(trunc) 638 } 639 } 640 641 // list general information about a list of jobs 642 func createStatusListOutput(jobs []*api.JobListStub) string { 643 out := make([]string, len(jobs)+1) 644 out[0] = "ID|Type|Priority|Status|Submit Date" 645 for i, job := range jobs { 646 out[i+1] = fmt.Sprintf("%s|%s|%d|%s|%s", 647 job.ID, 648 getTypeString(job), 649 job.Priority, 650 getStatusString(job.Status, &job.Stop), 651 formatTime(time.Unix(0, job.SubmitTime))) 652 } 653 return formatList(out) 654 } 655 656 func getTypeString(job *api.JobListStub) string { 657 t := job.Type 658 659 if job.Periodic { 660 t += "/periodic" 661 } 662 663 if job.ParameterizedJob { 664 t += "/parameterized" 665 } 666 667 return t 668 } 669 670 func getStatusString(status string, stop *bool) string { 671 if stop != nil && *stop { 672 return fmt.Sprintf("%s (stopped)", status) 673 } 674 return status 675 }