github.com/panekj/cli@v0.0.0-20230304125325-467dd2f3797e/cli/command/service/progress/progress.go (about) 1 package progress 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/swarm" 17 "github.com/docker/docker/client" 18 "github.com/docker/docker/pkg/progress" 19 "github.com/docker/docker/pkg/streamformatter" 20 "github.com/docker/docker/pkg/stringid" 21 ) 22 23 var ( 24 numberedStates = map[swarm.TaskState]int64{ 25 swarm.TaskStateNew: 1, 26 swarm.TaskStateAllocated: 2, 27 swarm.TaskStatePending: 3, 28 swarm.TaskStateAssigned: 4, 29 swarm.TaskStateAccepted: 5, 30 swarm.TaskStatePreparing: 6, 31 swarm.TaskStateReady: 7, 32 swarm.TaskStateStarting: 8, 33 swarm.TaskStateRunning: 9, 34 35 // The following states are not actually shown in progress 36 // output, but are used internally for ordering. 37 swarm.TaskStateComplete: 10, 38 swarm.TaskStateShutdown: 11, 39 swarm.TaskStateFailed: 12, 40 swarm.TaskStateRejected: 13, 41 } 42 43 longestState int 44 ) 45 46 const ( 47 maxProgress = 9 48 maxProgressBars = 20 49 maxJobProgress = 10 50 ) 51 52 type progressUpdater interface { 53 update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) 54 } 55 56 func init() { 57 for state := range numberedStates { 58 // for jobs, we use the "complete" state, and so it should be factored 59 // in to the computation of the longest state. 60 if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState { 61 longestState = len(state) 62 } 63 } 64 } 65 66 func terminalState(state swarm.TaskState) bool { 67 return numberedStates[state] > numberedStates[swarm.TaskStateRunning] 68 } 69 70 // ServiceProgress outputs progress information for convergence of a service. 71 // 72 //nolint:gocyclo 73 func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error { 74 defer progressWriter.Close() 75 76 progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false) 77 78 sigint := make(chan os.Signal, 1) 79 signal.Notify(sigint, os.Interrupt) 80 defer signal.Stop(sigint) 81 82 taskFilter := filters.NewArgs() 83 taskFilter.Add("service", serviceID) 84 taskFilter.Add("_up-to-date", "true") 85 86 getUpToDateTasks := func() ([]swarm.Task, error) { 87 return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter}) 88 } 89 90 var ( 91 updater progressUpdater 92 converged bool 93 convergedAt time.Time 94 monitor = 5 * time.Second 95 rollback bool 96 message *progress.Progress 97 ) 98 99 for { 100 service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{}) 101 if err != nil { 102 return err 103 } 104 105 if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 { 106 monitor = service.Spec.UpdateConfig.Monitor 107 } 108 109 if updater == nil { 110 updater, err = initializeUpdater(service, progressOut) 111 if err != nil { 112 return err 113 } 114 } 115 116 if service.UpdateStatus != nil { 117 switch service.UpdateStatus.State { 118 case swarm.UpdateStateUpdating: 119 rollback = false 120 case swarm.UpdateStateCompleted: 121 if !converged { 122 return nil 123 } 124 case swarm.UpdateStatePaused: 125 return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message) 126 case swarm.UpdateStateRollbackStarted: 127 if !rollback && service.UpdateStatus.Message != "" { 128 progressOut.WriteProgress(progress.Progress{ 129 ID: "rollback", 130 Action: service.UpdateStatus.Message, 131 }) 132 } 133 rollback = true 134 case swarm.UpdateStateRollbackPaused: 135 return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message) 136 case swarm.UpdateStateRollbackCompleted: 137 if !converged { 138 message = &progress.Progress{ID: "rollback", Message: service.UpdateStatus.Message} 139 } 140 rollback = true 141 } 142 } 143 if converged && time.Since(convergedAt) >= monitor { 144 progressOut.WriteProgress(progress.Progress{ 145 ID: "verify", 146 Action: "Service converged", 147 }) 148 if message != nil { 149 progressOut.WriteProgress(*message) 150 } 151 return nil 152 } 153 154 tasks, err := getUpToDateTasks() 155 if err != nil { 156 return err 157 } 158 159 activeNodes, err := getActiveNodes(ctx, client) 160 if err != nil { 161 return err 162 } 163 164 converged, err = updater.update(service, tasks, activeNodes, rollback) 165 if err != nil { 166 return err 167 } 168 if converged { 169 // if the service is a job, there's no need to verify it. jobs are 170 // stay done once they're done. skip the verification and just end 171 // the progress monitoring. 172 // 173 // only job services have a non-nil job status, which means we can 174 // use the presence of this field to check if the service is a job 175 // here. 176 if service.JobStatus != nil { 177 progress.Message(progressOut, "", "job complete") 178 return nil 179 } 180 181 if convergedAt.IsZero() { 182 convergedAt = time.Now() 183 } 184 wait := monitor - time.Since(convergedAt) 185 if wait >= 0 { 186 progressOut.WriteProgress(progress.Progress{ 187 // Ideally this would have no ID, but 188 // the progress rendering code behaves 189 // poorly on an "action" with no ID. It 190 // returns the cursor to the beginning 191 // of the line, so the first character 192 // may be difficult to read. Then the 193 // output is overwritten by the shell 194 // prompt when the command finishes. 195 ID: "verify", 196 Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1), 197 }) 198 } 199 } else { 200 if !convergedAt.IsZero() { 201 progressOut.WriteProgress(progress.Progress{ 202 ID: "verify", 203 Action: "Detected task failure", 204 }) 205 } 206 convergedAt = time.Time{} 207 } 208 209 select { 210 case <-time.After(200 * time.Millisecond): 211 case <-sigint: 212 if !converged { 213 progress.Message(progressOut, "", "Operation continuing in background.") 214 progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID) 215 } 216 return nil 217 } 218 } 219 } 220 221 func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) { 222 nodes, err := client.NodeList(ctx, types.NodeListOptions{}) 223 if err != nil { 224 return nil, err 225 } 226 227 activeNodes := make(map[string]struct{}) 228 for _, n := range nodes { 229 if n.Status.State != swarm.NodeStateDown { 230 activeNodes[n.ID] = struct{}{} 231 } 232 } 233 return activeNodes, nil 234 } 235 236 func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) { 237 if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil { 238 return &replicatedProgressUpdater{ 239 progressOut: progressOut, 240 }, nil 241 } 242 if service.Spec.Mode.Global != nil { 243 return &globalProgressUpdater{ 244 progressOut: progressOut, 245 }, nil 246 } 247 if service.Spec.Mode.ReplicatedJob != nil { 248 return newReplicatedJobProgressUpdater(service, progressOut), nil 249 } 250 if service.Spec.Mode.GlobalJob != nil { 251 return &globalJobProgressUpdater{ 252 progressOut: progressOut, 253 }, nil 254 } 255 return nil, errors.New("unrecognized service mode") 256 } 257 258 func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) { 259 if rollback { 260 progressOut.WriteProgress(progress.Progress{ 261 ID: "overall progress", 262 Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator), 263 }) 264 return 265 } 266 progressOut.WriteProgress(progress.Progress{ 267 ID: "overall progress", 268 Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator), 269 }) 270 } 271 272 func truncError(errMsg string) string { 273 // Remove newlines from the error, which corrupt the output. 274 errMsg = strings.Replace(errMsg, "\n", " ", -1) 275 276 // Limit the length to 75 characters, so that even on narrow terminals 277 // this will not overflow to the next line. 278 if len(errMsg) > 75 { 279 errMsg = errMsg[:74] + "…" 280 } 281 return errMsg 282 } 283 284 type replicatedProgressUpdater struct { 285 progressOut progress.Output 286 287 // used for mapping slots to a contiguous space 288 // this also causes progress bars to appear in order 289 slotMap map[int]int 290 291 initialized bool 292 done bool 293 } 294 295 func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 296 if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil { 297 return false, errors.New("no replica count") 298 } 299 replicas := *service.Spec.Mode.Replicated.Replicas 300 301 if !u.initialized { 302 u.slotMap = make(map[int]int) 303 304 // Draw progress bars in order 305 writeOverallProgress(u.progressOut, 0, int(replicas), rollback) 306 307 if replicas <= maxProgressBars { 308 for i := uint64(1); i <= replicas; i++ { 309 progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ") 310 } 311 } 312 u.initialized = true 313 } 314 315 tasksBySlot := u.tasksBySlot(tasks, activeNodes) 316 317 // If we had reached a converged state, check if we are still converged. 318 if u.done { 319 for _, task := range tasksBySlot { 320 if task.Status.State != swarm.TaskStateRunning { 321 u.done = false 322 break 323 } 324 } 325 } 326 327 running := uint64(0) 328 329 for _, task := range tasksBySlot { 330 mappedSlot := u.slotMap[task.Slot] 331 if mappedSlot == 0 { 332 mappedSlot = len(u.slotMap) + 1 333 u.slotMap[task.Slot] = mappedSlot 334 } 335 336 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 337 running++ 338 } 339 340 u.writeTaskProgress(task, mappedSlot, replicas) 341 } 342 343 if !u.done { 344 writeOverallProgress(u.progressOut, int(running), int(replicas), rollback) 345 346 if running == replicas { 347 u.done = true 348 } 349 } 350 351 return running == replicas, nil 352 } 353 354 func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task { 355 // If there are multiple tasks with the same slot number, favor the one 356 // with the *lowest* desired state. This can happen in restart 357 // scenarios. 358 tasksBySlot := make(map[int]swarm.Task) 359 for _, task := range tasks { 360 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 361 continue 362 } 363 if existingTask, ok := tasksBySlot[task.Slot]; ok { 364 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 365 continue 366 } 367 // If the desired states match, observed state breaks 368 // ties. This can happen with the "start first" service 369 // update mode. 370 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 371 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 372 continue 373 } 374 } 375 if task.NodeID != "" { 376 if _, nodeActive := activeNodes[task.NodeID]; !nodeActive { 377 continue 378 } 379 } 380 tasksBySlot[task.Slot] = task 381 } 382 383 return tasksBySlot 384 } 385 386 func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64) { 387 if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas { 388 return 389 } 390 391 if task.Status.Err != "" { 392 u.progressOut.WriteProgress(progress.Progress{ 393 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 394 Action: truncError(task.Status.Err), 395 }) 396 return 397 } 398 399 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 400 u.progressOut.WriteProgress(progress.Progress{ 401 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 402 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 403 Current: numberedStates[task.Status.State], 404 Total: maxProgress, 405 HideCounts: true, 406 }) 407 } 408 } 409 410 type globalProgressUpdater struct { 411 progressOut progress.Output 412 413 initialized bool 414 done bool 415 } 416 417 func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 418 tasksByNode := u.tasksByNode(tasks) 419 420 // We don't have perfect knowledge of how many nodes meet the 421 // constraints for this service. But the orchestrator creates tasks 422 // for all eligible nodes at the same time, so we should see all those 423 // nodes represented among the up-to-date tasks. 424 nodeCount := len(tasksByNode) 425 426 if !u.initialized { 427 if nodeCount == 0 { 428 // Two possibilities: either the orchestrator hasn't created 429 // the tasks yet, or the service doesn't meet constraints for 430 // any node. Either way, we wait. 431 u.progressOut.WriteProgress(progress.Progress{ 432 ID: "overall progress", 433 Action: "waiting for new tasks", 434 }) 435 return false, nil 436 } 437 438 writeOverallProgress(u.progressOut, 0, nodeCount, rollback) 439 u.initialized = true 440 } 441 442 // If we had reached a converged state, check if we are still converged. 443 if u.done { 444 for _, task := range tasksByNode { 445 if task.Status.State != swarm.TaskStateRunning { 446 u.done = false 447 break 448 } 449 } 450 } 451 452 running := 0 453 454 for _, task := range tasksByNode { 455 if _, nodeActive := activeNodes[task.NodeID]; nodeActive { 456 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 457 running++ 458 } 459 460 u.writeTaskProgress(task, nodeCount) 461 } 462 } 463 464 if !u.done { 465 writeOverallProgress(u.progressOut, running, nodeCount, rollback) 466 467 if running == nodeCount { 468 u.done = true 469 } 470 } 471 472 return running == nodeCount, nil 473 } 474 475 func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task { 476 // If there are multiple tasks with the same node ID, favor the one 477 // with the *lowest* desired state. This can happen in restart 478 // scenarios. 479 tasksByNode := make(map[string]swarm.Task) 480 for _, task := range tasks { 481 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 482 continue 483 } 484 if existingTask, ok := tasksByNode[task.NodeID]; ok { 485 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 486 continue 487 } 488 489 // If the desired states match, observed state breaks 490 // ties. This can happen with the "start first" service 491 // update mode. 492 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 493 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 494 continue 495 } 496 497 } 498 tasksByNode[task.NodeID] = task 499 } 500 501 return tasksByNode 502 } 503 504 func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int) { 505 if u.done || nodeCount > maxProgressBars { 506 return 507 } 508 509 if task.Status.Err != "" { 510 u.progressOut.WriteProgress(progress.Progress{ 511 ID: stringid.TruncateID(task.NodeID), 512 Action: truncError(task.Status.Err), 513 }) 514 return 515 } 516 517 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 518 u.progressOut.WriteProgress(progress.Progress{ 519 ID: stringid.TruncateID(task.NodeID), 520 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 521 Current: numberedStates[task.Status.State], 522 Total: maxProgress, 523 HideCounts: true, 524 }) 525 } 526 } 527 528 // replicatedJobProgressUpdater outputs the progress of a replicated job. This 529 // progress consists of a few main elements. 530 // 531 // The first is the progress bar for the job as a whole. This shows the number 532 // of completed out of total tasks for the job. Tasks that are currently 533 // running are not counted. 534 // 535 // The second is the status of the "active" tasks for the job. We count a task 536 // as "active" if it has any non-terminal state, not just running. This is 537 // shown as a fraction of the maximum concurrent tasks that can be running, 538 // which is the less of MaxConcurrent or TotalCompletions - completed tasks. 539 type replicatedJobProgressUpdater struct { 540 progressOut progress.Output 541 542 // jobIteration is the service's job iteration, used to exclude tasks 543 // belonging to earlier iterations. 544 jobIteration uint64 545 546 // concurrent is the value of MaxConcurrent as an int. That is, the maximum 547 // number of tasks allowed to be run simultaneously. 548 concurrent int 549 550 // total is the value of TotalCompletions, the number of complete tasks 551 // desired. 552 total int 553 554 // initialized is set to true after the first time update is called. the 555 // first time update is called, the components of the progress UI are all 556 // written out in an initial pass. this ensure that they will subsequently 557 // be in order, no matter how they are updated. 558 initialized bool 559 560 // progressDigits is the number digits in total, so that we know how much 561 // to pad the job progress field with. 562 // 563 // when we're writing the number of completed over total tasks, we need to 564 // pad the numerator with spaces, so that the bar doesn't jump around. 565 // we'll compute that once on init, and then reuse it over and over. 566 // 567 // we compute this in the least clever way possible: convert to string 568 // with strconv.Itoa, then take the len. 569 progressDigits int 570 571 // activeDigits is the same, but for active tasks, and it applies to both 572 // the numerator and denominator. 573 activeDigits int 574 } 575 576 func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater { 577 u := &replicatedJobProgressUpdater{ 578 progressOut: progressOut, 579 concurrent: int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent), 580 total: int(*service.Spec.Mode.ReplicatedJob.TotalCompletions), 581 jobIteration: service.JobStatus.JobIteration.Index, 582 } 583 u.progressDigits = len(strconv.Itoa(u.total)) 584 u.activeDigits = len(strconv.Itoa(u.concurrent)) 585 586 return u 587 } 588 589 // update writes out the progress of the replicated job. 590 func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) { 591 if !u.initialized { 592 u.writeOverallProgress(0, 0) 593 594 // only write out progress bars if there will be less than the maximum 595 if u.total <= maxProgressBars { 596 for i := 1; i <= u.total; i++ { 597 u.progressOut.WriteProgress(progress.Progress{ 598 ID: fmt.Sprintf("%d/%d", i, u.total), 599 Action: " ", 600 }) 601 } 602 } 603 u.initialized = true 604 } 605 606 // tasksBySlot is a mapping of slot number to the task valid for that slot. 607 // it deduplicated tasks occupying the same numerical slot but in different 608 // states. 609 tasksBySlot := make(map[int]swarm.Task) 610 for _, task := range tasks { 611 // first, check if the task belongs to this service iteration. skip 612 // tasks belonging to other iterations. 613 if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration { 614 continue 615 } 616 617 // then, if the task is in an unknown state, ignore it. 618 if numberedStates[task.DesiredState] == 0 || 619 numberedStates[task.Status.State] == 0 { 620 continue 621 } 622 623 // finally, check if the task already exists in the map 624 if existing, ok := tasksBySlot[task.Slot]; ok { 625 // if so, use the task with the lower actual state 626 if numberedStates[existing.Status.State] > numberedStates[task.Status.State] { 627 tasksBySlot[task.Slot] = task 628 } 629 } else { 630 // otherwise, just add it to the map. 631 tasksBySlot[task.Slot] = task 632 } 633 } 634 635 activeTasks := 0 636 completeTasks := 0 637 638 for i := 0; i < len(tasksBySlot); i++ { 639 task := tasksBySlot[i] 640 u.writeTaskProgress(task) 641 642 if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] { 643 activeTasks++ 644 } 645 646 if task.Status.State == swarm.TaskStateComplete { 647 completeTasks++ 648 } 649 } 650 651 u.writeOverallProgress(activeTasks, completeTasks) 652 653 return completeTasks == u.total, nil 654 } 655 656 func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) { 657 u.progressOut.WriteProgress(progress.Progress{ 658 ID: "job progress", 659 Action: fmt.Sprintf( 660 // * means "use the next positional arg to compute padding" 661 "%*d out of %d complete", u.progressDigits, completed, u.total, 662 ), 663 Current: int64(completed), 664 Total: int64(u.total), 665 HideCounts: true, 666 }) 667 668 // actualDesired is the lesser of MaxConcurrent, or the remaining tasks 669 actualDesired := u.total - completed 670 if actualDesired > u.concurrent { 671 actualDesired = u.concurrent 672 } 673 674 u.progressOut.WriteProgress(progress.Progress{ 675 ID: "active tasks", 676 Action: fmt.Sprintf( 677 // [n] notation lets us select a specific argument, 1-indexed 678 // putting the [1] before the star means "make the string this 679 // length". putting the [2] or the [3] means "use this argument 680 // here" 681 // 682 // we pad both the numerator and the denominator because, as the 683 // job reaches its conclusion, the number of possible concurrent 684 // tasks will go down, as fewer than MaxConcurrent tasks are needed 685 // to complete the job. 686 "%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired, 687 ), 688 }) 689 } 690 691 func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) { 692 if u.total > maxProgressBars { 693 return 694 } 695 696 if task.Status.Err != "" { 697 u.progressOut.WriteProgress(progress.Progress{ 698 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 699 Action: truncError(task.Status.Err), 700 }) 701 return 702 } 703 704 u.progressOut.WriteProgress(progress.Progress{ 705 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 706 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 707 Current: numberedStates[task.Status.State], 708 Total: maxJobProgress, 709 HideCounts: true, 710 }) 711 } 712 713 // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services. 714 // Because GlobalJob services are so much simpler than ReplicatedJob services, 715 // this updater is in turn simpler as well. 716 type globalJobProgressUpdater struct { 717 progressOut progress.Output 718 719 // initialized is used to detect the first pass of update, and to perform 720 // first time initialization logic at that time. 721 initialized bool 722 723 // total is the total number of tasks expected for this job 724 total int 725 726 // progressDigits is the number of spaces to pad the numerator of the job 727 // progress field 728 progressDigits int 729 730 taskNodes map[string]struct{} 731 } 732 733 func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) { 734 if !u.initialized { 735 // if there are not yet tasks, then return early. 736 if len(tasks) == 0 && len(activeNodes) != 0 { 737 u.progressOut.WriteProgress(progress.Progress{ 738 ID: "job progress", 739 Action: "waiting for tasks", 740 }) 741 return false, nil 742 } 743 744 // when a global job starts, all of its tasks are created at once, so 745 // we can use len(tasks) to know how many we're expecting. 746 u.taskNodes = map[string]struct{}{} 747 748 for _, task := range tasks { 749 // skip any tasks not belonging to this job iteration. 750 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 751 continue 752 } 753 754 // collect the list of all node IDs for this service. 755 // 756 // basically, global jobs will execute on any new nodes that join 757 // the cluster in the future. to avoid making things complicated, 758 // we will only check the progress of the initial set of nodes. if 759 // any new nodes come online during the operation, we will ignore 760 // them. 761 u.taskNodes[task.NodeID] = struct{}{} 762 } 763 764 u.total = len(u.taskNodes) 765 u.progressDigits = len(strconv.Itoa(u.total)) 766 767 u.writeOverallProgress(0) 768 u.initialized = true 769 } 770 771 // tasksByNodeID maps a NodeID to the latest task for that Node ID. this 772 // lets us pick only the latest task for any given node. 773 tasksByNodeID := map[string]swarm.Task{} 774 775 for _, task := range tasks { 776 // skip any tasks not belonging to this job iteration 777 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 778 continue 779 } 780 781 // if the task is not on one of the initial set of nodes, ignore it. 782 if _, ok := u.taskNodes[task.NodeID]; !ok { 783 continue 784 } 785 786 // if there is already a task recorded for this node, choose the one 787 // with the lower state 788 if oldtask, ok := tasksByNodeID[task.NodeID]; ok { 789 if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] { 790 tasksByNodeID[task.NodeID] = task 791 } 792 } else { 793 tasksByNodeID[task.NodeID] = task 794 } 795 } 796 797 complete := 0 798 for _, task := range tasksByNodeID { 799 u.writeTaskProgress(task) 800 if task.Status.State == swarm.TaskStateComplete { 801 complete++ 802 } 803 } 804 805 u.writeOverallProgress(complete) 806 return complete == u.total, nil 807 } 808 809 func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) { 810 if u.total > maxProgressBars { 811 return 812 } 813 814 if task.Status.Err != "" { 815 u.progressOut.WriteProgress(progress.Progress{ 816 ID: task.NodeID, 817 Action: truncError(task.Status.Err), 818 }) 819 return 820 } 821 822 u.progressOut.WriteProgress(progress.Progress{ 823 ID: task.NodeID, 824 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 825 Current: numberedStates[task.Status.State], 826 Total: maxJobProgress, 827 HideCounts: true, 828 }) 829 } 830 831 func (u *globalJobProgressUpdater) writeOverallProgress(complete int) { 832 // all tasks for a global job are active at once, so we only write out the 833 // total progress. 834 u.progressOut.WriteProgress(progress.Progress{ 835 // see (*replicatedJobProgressUpdater).writeOverallProgress for an 836 // explanation fo the advanced fmt use in this function. 837 ID: "job progress", 838 Action: fmt.Sprintf( 839 "%*d out of %d complete", u.progressDigits, complete, u.total, 840 ), 841 Current: int64(complete), 842 Total: int64(u.total), 843 HideCounts: true, 844 }) 845 }