github.com/ferranbt/nomad@v0.9.3-0.20190607002617-85c449b7667c/command/job_status.go (about) 1 package command 2 3 import ( 4 "fmt" 5 "sort" 6 "strings" 7 "time" 8 9 "github.com/hashicorp/nomad/api" 10 "github.com/hashicorp/nomad/api/contexts" 11 "github.com/hashicorp/nomad/nomad/structs" 12 "github.com/posener/complete" 13 ) 14 15 const ( 16 // maxFailedTGs is the maximum number of task groups we show failure reasons 17 // for before deferring to eval-status 18 maxFailedTGs = 5 19 ) 20 21 type JobStatusCommand struct { 22 Meta 23 length int 24 evals bool 25 allAllocs bool 26 verbose bool 27 } 28 29 func (c *JobStatusCommand) Help() string { 30 helpText := ` 31 Usage: nomad status [options] <job> 32 33 Display status information about a job. If no job ID is given, a list of all 34 known jobs will be displayed. 35 36 General Options: 37 38 ` + generalOptionsUsage() + ` 39 40 Status Options: 41 42 -short 43 Display short output. Used only when a single job is being 44 queried, and drops verbose information about allocations. 45 46 -evals 47 Display the evaluations associated with the job. 48 49 -all-allocs 50 Display all allocations matching the job ID, including those from an older 51 instance of the job. 52 53 -verbose 54 Display full information. 55 ` 56 return strings.TrimSpace(helpText) 57 } 58 59 func (c *JobStatusCommand) Synopsis() string { 60 return "Display status information about a job" 61 } 62 63 func (c *JobStatusCommand) AutocompleteFlags() complete.Flags { 64 return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient), 65 complete.Flags{ 66 "-all-allocs": complete.PredictNothing, 67 "-evals": complete.PredictNothing, 68 "-short": complete.PredictNothing, 69 "-verbose": complete.PredictNothing, 70 }) 71 } 72 73 func (c *JobStatusCommand) AutocompleteArgs() complete.Predictor { 74 return complete.PredictFunc(func(a complete.Args) []string { 75 client, err := c.Meta.Client() 76 if err != nil { 77 return nil 78 } 79 80 resp, _, err := client.Search().PrefixSearch(a.Last, contexts.Jobs, nil) 81 if err != nil { 82 return []string{} 83 } 84 return resp.Matches[contexts.Jobs] 85 }) 86 } 87 88 func (c *JobStatusCommand) Name() string { return "status" } 89 90 func (c *JobStatusCommand) Run(args []string) int { 91 var short bool 92 93 flags := c.Meta.FlagSet(c.Name(), FlagSetClient) 94 flags.Usage = func() { c.Ui.Output(c.Help()) } 95 flags.BoolVar(&short, "short", false, "") 96 flags.BoolVar(&c.evals, "evals", false, "") 97 flags.BoolVar(&c.allAllocs, "all-allocs", false, "") 98 flags.BoolVar(&c.verbose, "verbose", false, "") 99 100 if err := flags.Parse(args); err != nil { 101 return 1 102 } 103 104 // Check that we either got no jobs or exactly one. 105 args = flags.Args() 106 if len(args) > 1 { 107 c.Ui.Error("This command takes either no arguments or one: <job>") 108 c.Ui.Error(commandErrorText(c)) 109 return 1 110 } 111 112 // Truncate the id unless full length is requested 113 c.length = shortId 114 if c.verbose { 115 c.length = fullId 116 } 117 118 // Get the HTTP client 119 client, err := c.Meta.Client() 120 if err != nil { 121 c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err)) 122 return 1 123 } 124 125 // Invoke list mode if no job ID. 126 if len(args) == 0 { 127 jobs, _, err := client.Jobs().List(nil) 128 if err != nil { 129 c.Ui.Error(fmt.Sprintf("Error querying jobs: %s", err)) 130 return 1 131 } 132 133 if len(jobs) == 0 { 134 // No output if we have no jobs 135 c.Ui.Output("No running jobs") 136 } else { 137 c.Ui.Output(createStatusListOutput(jobs)) 138 } 139 return 0 140 } 141 142 // Try querying the job 143 jobID := args[0] 144 145 jobs, _, err := client.Jobs().PrefixList(jobID) 146 if err != nil { 147 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 148 return 1 149 } 150 if len(jobs) == 0 { 151 c.Ui.Error(fmt.Sprintf("No job(s) with prefix or id %q found", jobID)) 152 return 1 153 } 154 if len(jobs) > 1 && strings.TrimSpace(jobID) != jobs[0].ID { 155 c.Ui.Error(fmt.Sprintf("Prefix matched multiple jobs\n\n%s", createStatusListOutput(jobs))) 156 return 1 157 } 158 // Prefix lookup matched a single job 159 job, _, err := client.Jobs().Info(jobs[0].ID, nil) 160 if err != nil { 161 c.Ui.Error(fmt.Sprintf("Error querying job: %s", err)) 162 return 1 163 } 164 165 periodic := job.IsPeriodic() 166 parameterized := job.IsParameterized() 167 168 // Format the job info 169 basic := []string{ 170 fmt.Sprintf("ID|%s", *job.ID), 171 fmt.Sprintf("Name|%s", *job.Name), 172 fmt.Sprintf("Submit Date|%s", formatTime(getSubmitTime(job))), 173 fmt.Sprintf("Type|%s", *job.Type), 174 fmt.Sprintf("Priority|%d", *job.Priority), 175 fmt.Sprintf("Datacenters|%s", strings.Join(job.Datacenters, ",")), 176 fmt.Sprintf("Status|%s", getStatusString(*job.Status, job.Stop)), 177 fmt.Sprintf("Periodic|%v", periodic), 178 fmt.Sprintf("Parameterized|%v", parameterized), 179 } 180 181 if periodic && !parameterized { 182 if *job.Stop { 183 basic = append(basic, fmt.Sprintf("Next Periodic Launch|none (job stopped)")) 184 } else { 185 location, err := job.Periodic.GetLocation() 186 if err == nil { 187 now := time.Now().In(location) 188 next, err := job.Periodic.Next(now) 189 if err == nil { 190 basic = append(basic, fmt.Sprintf("Next Periodic Launch|%s", 191 fmt.Sprintf("%s (%s from now)", 192 formatTime(next), formatTimeDifference(now, next, time.Second)))) 193 } 194 } 195 } 196 } 197 198 c.Ui.Output(formatKV(basic)) 199 200 // Exit early 201 if short { 202 return 0 203 } 204 205 // Print periodic job information 206 if periodic && !parameterized { 207 if err := c.outputPeriodicInfo(client, job); err != nil { 208 c.Ui.Error(err.Error()) 209 return 1 210 } 211 } else if parameterized { 212 if err := c.outputParameterizedInfo(client, job); err != nil { 213 c.Ui.Error(err.Error()) 214 return 1 215 } 216 } else { 217 if err := c.outputJobInfo(client, job); err != nil { 218 c.Ui.Error(err.Error()) 219 return 1 220 } 221 } 222 223 return 0 224 } 225 226 // outputPeriodicInfo prints information about the passed periodic job. If a 227 // request fails, an error is returned. 228 func (c *JobStatusCommand) outputPeriodicInfo(client *api.Client, job *api.Job) error { 229 // Output the summary 230 if err := c.outputJobSummary(client, job); err != nil { 231 return err 232 } 233 234 // Generate the prefix that matches launched jobs from the periodic job. 235 prefix := fmt.Sprintf("%s%s", *job.ID, structs.PeriodicLaunchSuffix) 236 children, _, err := client.Jobs().PrefixList(prefix) 237 if err != nil { 238 return fmt.Errorf("Error querying job: %s", err) 239 } 240 241 if len(children) == 0 { 242 c.Ui.Output("\nNo instances of periodic job found") 243 return nil 244 } 245 246 out := make([]string, 1) 247 out[0] = "ID|Status" 248 for _, child := range children { 249 // Ensure that we are only showing jobs whose parent is the requested 250 // job. 251 if child.ParentID != *job.ID { 252 continue 253 } 254 255 out = append(out, fmt.Sprintf("%s|%s", 256 child.ID, 257 child.Status)) 258 } 259 260 c.Ui.Output(c.Colorize().Color("\n[bold]Previously Launched Jobs[reset]")) 261 c.Ui.Output(formatList(out)) 262 return nil 263 } 264 265 // outputParameterizedInfo prints information about a parameterized job. If a 266 // request fails, an error is returned. 267 func (c *JobStatusCommand) outputParameterizedInfo(client *api.Client, job *api.Job) error { 268 // Output parameterized job details 269 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job[reset]")) 270 parameterizedJob := make([]string, 3) 271 parameterizedJob[0] = fmt.Sprintf("Payload|%s", job.ParameterizedJob.Payload) 272 parameterizedJob[1] = fmt.Sprintf("Required Metadata|%v", strings.Join(job.ParameterizedJob.MetaRequired, ", ")) 273 parameterizedJob[2] = fmt.Sprintf("Optional Metadata|%v", strings.Join(job.ParameterizedJob.MetaOptional, ", ")) 274 c.Ui.Output(formatKV(parameterizedJob)) 275 276 // Output the summary 277 if err := c.outputJobSummary(client, job); err != nil { 278 return err 279 } 280 281 // Generate the prefix that matches launched jobs from the parameterized job. 282 prefix := fmt.Sprintf("%s%s", *job.ID, structs.DispatchLaunchSuffix) 283 children, _, err := client.Jobs().PrefixList(prefix) 284 if err != nil { 285 return fmt.Errorf("Error querying job: %s", err) 286 } 287 288 if len(children) == 0 { 289 c.Ui.Output("\nNo dispatched instances of parameterized job found") 290 return nil 291 } 292 293 out := make([]string, 1) 294 out[0] = "ID|Status" 295 for _, child := range children { 296 // Ensure that we are only showing jobs whose parent is the requested 297 // job. 298 if child.ParentID != *job.ID { 299 continue 300 } 301 302 out = append(out, fmt.Sprintf("%s|%s", 303 child.ID, 304 child.Status)) 305 } 306 307 c.Ui.Output(c.Colorize().Color("\n[bold]Dispatched Jobs[reset]")) 308 c.Ui.Output(formatList(out)) 309 return nil 310 } 311 312 // outputJobInfo prints information about the passed non-periodic job. If a 313 // request fails, an error is returned. 314 func (c *JobStatusCommand) outputJobInfo(client *api.Client, job *api.Job) error { 315 316 // Query the allocations 317 jobAllocs, _, err := client.Jobs().Allocations(*job.ID, c.allAllocs, nil) 318 if err != nil { 319 return fmt.Errorf("Error querying job allocations: %s", err) 320 } 321 322 // Query the evaluations 323 jobEvals, _, err := client.Jobs().Evaluations(*job.ID, nil) 324 if err != nil { 325 return fmt.Errorf("Error querying job evaluations: %s", err) 326 } 327 328 latestDeployment, _, err := client.Jobs().LatestDeployment(*job.ID, nil) 329 if err != nil { 330 return fmt.Errorf("Error querying latest job deployment: %s", err) 331 } 332 333 // Output the summary 334 if err := c.outputJobSummary(client, job); err != nil { 335 return err 336 } 337 338 // Determine latest evaluation with failures whose follow up hasn't 339 // completed, this is done while formatting 340 var latestFailedPlacement *api.Evaluation 341 blockedEval := false 342 343 // Format the evals 344 evals := make([]string, len(jobEvals)+1) 345 evals[0] = "ID|Priority|Triggered By|Status|Placement Failures" 346 for i, eval := range jobEvals { 347 failures, _ := evalFailureStatus(eval) 348 evals[i+1] = fmt.Sprintf("%s|%d|%s|%s|%s", 349 limit(eval.ID, c.length), 350 eval.Priority, 351 eval.TriggeredBy, 352 eval.Status, 353 failures, 354 ) 355 356 if eval.Status == "blocked" { 357 blockedEval = true 358 } 359 360 if len(eval.FailedTGAllocs) == 0 { 361 // Skip evals without failures 362 continue 363 } 364 365 if latestFailedPlacement == nil || latestFailedPlacement.CreateIndex < eval.CreateIndex { 366 latestFailedPlacement = eval 367 } 368 } 369 370 if c.verbose || c.evals { 371 c.Ui.Output(c.Colorize().Color("\n[bold]Evaluations[reset]")) 372 c.Ui.Output(formatList(evals)) 373 } 374 375 if blockedEval && latestFailedPlacement != nil { 376 c.outputFailedPlacements(latestFailedPlacement) 377 } 378 379 c.outputReschedulingEvals(client, job, jobAllocs, c.length) 380 381 if latestDeployment != nil { 382 c.Ui.Output(c.Colorize().Color("\n[bold]Latest Deployment[reset]")) 383 c.Ui.Output(c.Colorize().Color(c.formatDeployment(latestDeployment))) 384 } 385 386 // Format the allocs 387 c.Ui.Output(c.Colorize().Color("\n[bold]Allocations[reset]")) 388 c.Ui.Output(formatAllocListStubs(jobAllocs, c.verbose, c.length)) 389 return nil 390 } 391 392 func (c *JobStatusCommand) formatDeployment(d *api.Deployment) string { 393 // Format the high-level elements 394 high := []string{ 395 fmt.Sprintf("ID|%s", limit(d.ID, c.length)), 396 fmt.Sprintf("Status|%s", d.Status), 397 fmt.Sprintf("Description|%s", d.StatusDescription), 398 } 399 400 base := formatKV(high) 401 if len(d.TaskGroups) == 0 { 402 return base 403 } 404 base += "\n\n[bold]Deployed[reset]\n" 405 base += formatDeploymentGroups(d, c.length) 406 return base 407 } 408 409 func formatAllocListStubs(stubs []*api.AllocationListStub, verbose bool, uuidLength int) string { 410 if len(stubs) == 0 { 411 return "No allocations placed" 412 } 413 414 allocs := make([]string, len(stubs)+1) 415 if verbose { 416 allocs[0] = "ID|Eval ID|Node ID|Node Name|Task Group|Version|Desired|Status|Created|Modified" 417 for i, alloc := range stubs { 418 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%s|%d|%s|%s|%s|%s", 419 limit(alloc.ID, uuidLength), 420 limit(alloc.EvalID, uuidLength), 421 limit(alloc.NodeID, uuidLength), 422 alloc.NodeName, 423 alloc.TaskGroup, 424 alloc.JobVersion, 425 alloc.DesiredStatus, 426 alloc.ClientStatus, 427 formatUnixNanoTime(alloc.CreateTime), 428 formatUnixNanoTime(alloc.ModifyTime)) 429 } 430 } else { 431 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 432 for i, alloc := range stubs { 433 now := time.Now() 434 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 435 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 436 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 437 limit(alloc.ID, uuidLength), 438 limit(alloc.NodeID, uuidLength), 439 alloc.TaskGroup, 440 alloc.JobVersion, 441 alloc.DesiredStatus, 442 alloc.ClientStatus, 443 createTimePretty, 444 modTimePretty) 445 } 446 } 447 448 return formatList(allocs) 449 } 450 451 func formatAllocList(allocations []*api.Allocation, verbose bool, uuidLength int) string { 452 if len(allocations) == 0 { 453 return "No allocations placed" 454 } 455 456 allocs := make([]string, len(allocations)+1) 457 if verbose { 458 allocs[0] = "ID|Eval ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 459 for i, alloc := range allocations { 460 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%s|%d|%s|%s|%s|%s", 461 limit(alloc.ID, uuidLength), 462 limit(alloc.EvalID, uuidLength), 463 limit(alloc.NodeID, uuidLength), 464 alloc.TaskGroup, 465 getVersion(alloc.Job), 466 alloc.DesiredStatus, 467 alloc.ClientStatus, 468 formatUnixNanoTime(alloc.CreateTime), 469 formatUnixNanoTime(alloc.ModifyTime)) 470 } 471 } else { 472 allocs[0] = "ID|Node ID|Task Group|Version|Desired|Status|Created|Modified" 473 for i, alloc := range allocations { 474 now := time.Now() 475 createTimePretty := prettyTimeDiff(time.Unix(0, alloc.CreateTime), now) 476 modTimePretty := prettyTimeDiff(time.Unix(0, alloc.ModifyTime), now) 477 allocs[i+1] = fmt.Sprintf("%s|%s|%s|%d|%s|%s|%s|%s", 478 limit(alloc.ID, uuidLength), 479 limit(alloc.NodeID, uuidLength), 480 alloc.TaskGroup, 481 getVersion(alloc.Job), 482 alloc.DesiredStatus, 483 alloc.ClientStatus, 484 createTimePretty, 485 modTimePretty) 486 } 487 } 488 489 return formatList(allocs) 490 } 491 492 // outputJobSummary displays the given jobs summary and children job summary 493 // where appropriate 494 func (c *JobStatusCommand) outputJobSummary(client *api.Client, job *api.Job) error { 495 // Query the summary 496 summary, _, err := client.Jobs().Summary(*job.ID, nil) 497 if err != nil { 498 return fmt.Errorf("Error querying job summary: %s", err) 499 } 500 501 if summary == nil { 502 return nil 503 } 504 505 periodic := job.IsPeriodic() 506 parameterizedJob := job.IsParameterized() 507 508 // Print the summary 509 if !periodic && !parameterizedJob { 510 c.Ui.Output(c.Colorize().Color("\n[bold]Summary[reset]")) 511 summaries := make([]string, len(summary.Summary)+1) 512 summaries[0] = "Task Group|Queued|Starting|Running|Failed|Complete|Lost" 513 taskGroups := make([]string, 0, len(summary.Summary)) 514 for taskGroup := range summary.Summary { 515 taskGroups = append(taskGroups, taskGroup) 516 } 517 sort.Strings(taskGroups) 518 for idx, taskGroup := range taskGroups { 519 tgs := summary.Summary[taskGroup] 520 summaries[idx+1] = fmt.Sprintf("%s|%d|%d|%d|%d|%d|%d", 521 taskGroup, tgs.Queued, tgs.Starting, 522 tgs.Running, tgs.Failed, 523 tgs.Complete, tgs.Lost, 524 ) 525 } 526 c.Ui.Output(formatList(summaries)) 527 } 528 529 // Always display the summary if we are periodic or parameterized, but 530 // only display if the summary is non-zero on normal jobs 531 if summary.Children != nil && (parameterizedJob || periodic || summary.Children.Sum() > 0) { 532 if parameterizedJob { 533 c.Ui.Output(c.Colorize().Color("\n[bold]Parameterized Job Summary[reset]")) 534 } else { 535 c.Ui.Output(c.Colorize().Color("\n[bold]Children Job Summary[reset]")) 536 } 537 summaries := make([]string, 2) 538 summaries[0] = "Pending|Running|Dead" 539 summaries[1] = fmt.Sprintf("%d|%d|%d", 540 summary.Children.Pending, summary.Children.Running, summary.Children.Dead) 541 c.Ui.Output(formatList(summaries)) 542 } 543 544 return nil 545 } 546 547 // outputReschedulingEvals displays eval IDs and time for any 548 // delayed evaluations by task group 549 func (c *JobStatusCommand) outputReschedulingEvals(client *api.Client, job *api.Job, allocListStubs []*api.AllocationListStub, uuidLength int) error { 550 // Get the most recent alloc ID by task group 551 552 mostRecentAllocs := make(map[string]*api.AllocationListStub) 553 for _, alloc := range allocListStubs { 554 a, ok := mostRecentAllocs[alloc.TaskGroup] 555 if !ok || alloc.ModifyTime > a.ModifyTime { 556 mostRecentAllocs[alloc.TaskGroup] = alloc 557 } 558 } 559 560 followUpEvalIds := make(map[string]string) 561 for tg, alloc := range mostRecentAllocs { 562 if alloc.FollowupEvalID != "" { 563 followUpEvalIds[tg] = alloc.FollowupEvalID 564 } 565 } 566 567 if len(followUpEvalIds) == 0 { 568 return nil 569 } 570 // Print the reschedule info section 571 var delayedEvalInfos []string 572 573 taskGroups := make([]string, 0, len(followUpEvalIds)) 574 for taskGroup := range followUpEvalIds { 575 taskGroups = append(taskGroups, taskGroup) 576 } 577 sort.Strings(taskGroups) 578 var evalDetails []string 579 first := true 580 for _, taskGroup := range taskGroups { 581 evalID := followUpEvalIds[taskGroup] 582 evaluation, _, err := client.Evaluations().Info(evalID, nil) 583 // Eval time is not critical output, 584 // so don't return it on errors, if its not set, or its already in the past 585 if err != nil || evaluation.WaitUntil.IsZero() || time.Now().After(evaluation.WaitUntil) { 586 continue 587 } 588 evalTime := prettyTimeDiff(evaluation.WaitUntil, time.Now()) 589 if c.verbose { 590 if first { 591 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Reschedule Policy|Eval ID|Eval Time") 592 } 593 rp := job.LookupTaskGroup(taskGroup).ReschedulePolicy 594 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s|%s", taskGroup, rp.String(), limit(evalID, uuidLength), evalTime)) 595 } else { 596 if first { 597 delayedEvalInfos = append(delayedEvalInfos, "Task Group|Eval ID|Eval Time") 598 } 599 evalDetails = append(evalDetails, fmt.Sprintf("%s|%s|%s", taskGroup, limit(evalID, uuidLength), evalTime)) 600 } 601 first = false 602 } 603 if len(evalDetails) == 0 { 604 return nil 605 } 606 // Only show this section if there is pending evals 607 delayedEvalInfos = append(delayedEvalInfos, evalDetails...) 608 c.Ui.Output(c.Colorize().Color("\n[bold]Future Rescheduling Attempts[reset]")) 609 c.Ui.Output(formatList(delayedEvalInfos)) 610 return nil 611 } 612 613 func (c *JobStatusCommand) outputFailedPlacements(failedEval *api.Evaluation) { 614 if failedEval == nil || len(failedEval.FailedTGAllocs) == 0 { 615 return 616 } 617 618 c.Ui.Output(c.Colorize().Color("\n[bold]Placement Failure[reset]")) 619 620 sorted := sortedTaskGroupFromMetrics(failedEval.FailedTGAllocs) 621 for i, tg := range sorted { 622 if i >= maxFailedTGs { 623 break 624 } 625 626 c.Ui.Output(fmt.Sprintf("Task Group %q:", tg)) 627 metrics := failedEval.FailedTGAllocs[tg] 628 c.Ui.Output(formatAllocMetrics(metrics, false, " ")) 629 if i != len(sorted)-1 { 630 c.Ui.Output("") 631 } 632 } 633 634 if len(sorted) > maxFailedTGs { 635 trunc := fmt.Sprintf("\nPlacement failures truncated. To see remainder run:\nnomad eval-status %s", failedEval.ID) 636 c.Ui.Output(trunc) 637 } 638 } 639 640 // list general information about a list of jobs 641 func createStatusListOutput(jobs []*api.JobListStub) string { 642 out := make([]string, len(jobs)+1) 643 out[0] = "ID|Type|Priority|Status|Submit Date" 644 for i, job := range jobs { 645 out[i+1] = fmt.Sprintf("%s|%s|%d|%s|%s", 646 job.ID, 647 getTypeString(job), 648 job.Priority, 649 getStatusString(job.Status, &job.Stop), 650 formatTime(time.Unix(0, job.SubmitTime))) 651 } 652 return formatList(out) 653 } 654 655 func getTypeString(job *api.JobListStub) string { 656 t := job.Type 657 658 if job.Periodic { 659 t += "/periodic" 660 } 661 662 if job.ParameterizedJob { 663 t += "/parameterized" 664 } 665 666 return t 667 } 668 669 func getStatusString(status string, stop *bool) string { 670 if stop != nil && *stop { 671 return fmt.Sprintf("%s (stopped)", status) 672 } 673 return status 674 }