github.com/khulnasoft/cli@v0.0.0-20240402070845-01bcad7beefa/cli/command/service/progress/progress.go (about) 1 package progress 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/swarm" 17 "github.com/docker/docker/client" 18 "github.com/docker/docker/pkg/progress" 19 "github.com/docker/docker/pkg/streamformatter" 20 "github.com/docker/docker/pkg/stringid" 21 ) 22 23 var ( 24 numberedStates = map[swarm.TaskState]int64{ 25 swarm.TaskStateNew: 1, 26 swarm.TaskStateAllocated: 2, 27 swarm.TaskStatePending: 3, 28 swarm.TaskStateAssigned: 4, 29 swarm.TaskStateAccepted: 5, 30 swarm.TaskStatePreparing: 6, 31 swarm.TaskStateReady: 7, 32 swarm.TaskStateStarting: 8, 33 swarm.TaskStateRunning: 9, 34 35 // The following states are not actually shown in progress 36 // output, but are used internally for ordering. 37 swarm.TaskStateComplete: 10, 38 swarm.TaskStateShutdown: 11, 39 swarm.TaskStateFailed: 12, 40 swarm.TaskStateRejected: 13, 41 } 42 43 longestState int 44 ) 45 46 const ( 47 maxProgress = 9 48 maxProgressBars = 20 49 maxJobProgress = 10 50 ) 51 52 type progressUpdater interface { 53 update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) 54 } 55 56 func init() { 57 for state := range numberedStates { 58 // for jobs, we use the "complete" state, and so it should be factored 59 // in to the computation of the longest state. 60 if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState { 61 longestState = len(state) 62 } 63 } 64 } 65 66 func terminalState(state swarm.TaskState) bool { 67 return numberedStates[state] > numberedStates[swarm.TaskStateRunning] 68 } 69 70 // ServiceProgress outputs progress information for convergence of a service. 71 // 72 //nolint:gocyclo 73 func ServiceProgress(ctx context.Context, apiClient client.APIClient, serviceID string, progressWriter io.WriteCloser) error { 74 defer progressWriter.Close() 75 76 progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false) 77 78 sigint := make(chan os.Signal, 1) 79 signal.Notify(sigint, os.Interrupt) 80 defer signal.Stop(sigint) 81 82 taskFilter := filters.NewArgs() 83 taskFilter.Add("service", serviceID) 84 taskFilter.Add("_up-to-date", "true") 85 86 getUpToDateTasks := func() ([]swarm.Task, error) { 87 return apiClient.TaskList(ctx, types.TaskListOptions{Filters: taskFilter}) 88 } 89 90 var ( 91 updater progressUpdater 92 converged bool 93 convergedAt time.Time 94 monitor = 5 * time.Second 95 rollback bool 96 message *progress.Progress 97 ) 98 99 for { 100 service, _, err := apiClient.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{}) 101 if err != nil { 102 return err 103 } 104 105 if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 { 106 monitor = service.Spec.UpdateConfig.Monitor 107 } 108 109 if updater == nil { 110 updater, err = initializeUpdater(service, progressOut) 111 if err != nil { 112 return err 113 } 114 } 115 116 if service.UpdateStatus != nil { 117 switch service.UpdateStatus.State { 118 case swarm.UpdateStateUpdating: 119 rollback = false 120 case swarm.UpdateStateCompleted: 121 if !converged { 122 return nil 123 } 124 case swarm.UpdateStatePaused: 125 return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message) 126 case swarm.UpdateStateRollbackStarted: 127 if !rollback && service.UpdateStatus.Message != "" { 128 progressOut.WriteProgress(progress.Progress{ 129 ID: "rollback", 130 Action: service.UpdateStatus.Message, 131 }) 132 } 133 rollback = true 134 case swarm.UpdateStateRollbackPaused: 135 return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message) 136 case swarm.UpdateStateRollbackCompleted: 137 if !converged { 138 message = &progress.Progress{ID: "rollback", Message: service.UpdateStatus.Message} 139 } 140 rollback = true 141 } 142 } 143 if converged && time.Since(convergedAt) >= monitor { 144 progressOut.WriteProgress(progress.Progress{ 145 ID: "verify", 146 Action: fmt.Sprintf("Service %s converged", serviceID), 147 }) 148 if message != nil { 149 progressOut.WriteProgress(*message) 150 } 151 return nil 152 } 153 154 tasks, err := getUpToDateTasks() 155 if err != nil { 156 return err 157 } 158 159 activeNodes, err := getActiveNodes(ctx, apiClient) 160 if err != nil { 161 return err 162 } 163 164 converged, err = updater.update(service, tasks, activeNodes, rollback) 165 if err != nil { 166 return err 167 } 168 if converged { 169 // if the service is a job, there's no need to verify it. jobs are 170 // stay done once they're done. skip the verification and just end 171 // the progress monitoring. 172 // 173 // only job services have a non-nil job status, which means we can 174 // use the presence of this field to check if the service is a job 175 // here. 176 if service.JobStatus != nil { 177 progress.Message(progressOut, "", "job complete") 178 return nil 179 } 180 181 if convergedAt.IsZero() { 182 convergedAt = time.Now() 183 } 184 wait := monitor - time.Since(convergedAt) 185 if wait >= 0 { 186 progressOut.WriteProgress(progress.Progress{ 187 // Ideally this would have no ID, but 188 // the progress rendering code behaves 189 // poorly on an "action" with no ID. It 190 // returns the cursor to the beginning 191 // of the line, so the first character 192 // may be difficult to read. Then the 193 // output is overwritten by the shell 194 // prompt when the command finishes. 195 ID: "verify", 196 Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1), 197 }) 198 } 199 } else { 200 if !convergedAt.IsZero() { 201 progressOut.WriteProgress(progress.Progress{ 202 ID: "verify", 203 Action: "Detected task failure", 204 }) 205 } 206 convergedAt = time.Time{} 207 } 208 209 select { 210 case <-time.After(200 * time.Millisecond): 211 case <-sigint: 212 if !converged { 213 progress.Message(progressOut, "", "Operation continuing in background.") 214 progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID) 215 } 216 return nil 217 } 218 } 219 } 220 221 func getActiveNodes(ctx context.Context, apiClient client.APIClient) (map[string]struct{}, error) { 222 nodes, err := apiClient.NodeList(ctx, types.NodeListOptions{}) 223 if err != nil { 224 return nil, err 225 } 226 227 activeNodes := make(map[string]struct{}) 228 for _, n := range nodes { 229 if n.Status.State != swarm.NodeStateDown { 230 activeNodes[n.ID] = struct{}{} 231 } 232 } 233 return activeNodes, nil 234 } 235 236 func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) { 237 if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil { 238 return &replicatedProgressUpdater{ 239 progressOut: progressOut, 240 }, nil 241 } 242 if service.Spec.Mode.Global != nil { 243 return &globalProgressUpdater{ 244 progressOut: progressOut, 245 }, nil 246 } 247 if service.Spec.Mode.ReplicatedJob != nil { 248 return newReplicatedJobProgressUpdater(service, progressOut), nil 249 } 250 if service.Spec.Mode.GlobalJob != nil { 251 return &globalJobProgressUpdater{ 252 progressOut: progressOut, 253 }, nil 254 } 255 return nil, errors.New("unrecognized service mode") 256 } 257 258 func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) { 259 if rollback { 260 progressOut.WriteProgress(progress.Progress{ 261 ID: "overall progress", 262 Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator), 263 }) 264 return 265 } 266 progressOut.WriteProgress(progress.Progress{ 267 ID: "overall progress", 268 Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator), 269 }) 270 } 271 272 func truncError(errMsg string) string { 273 // Remove newlines from the error, which corrupt the output. 274 errMsg = strings.ReplaceAll(errMsg, "\n", " ") 275 276 // Limit the length to 75 characters, so that even on narrow terminals 277 // this will not overflow to the next line. 278 if len(errMsg) > 75 { 279 errMsg = errMsg[:74] + "…" 280 } 281 return errMsg 282 } 283 284 type replicatedProgressUpdater struct { 285 progressOut progress.Output 286 287 // used for mapping slots to a contiguous space 288 // this also causes progress bars to appear in order 289 slotMap map[int]int 290 291 initialized bool 292 done bool 293 } 294 295 func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 296 if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil { 297 return false, errors.New("no replica count") 298 } 299 replicas := *service.Spec.Mode.Replicated.Replicas 300 301 if !u.initialized { 302 u.slotMap = make(map[int]int) 303 304 // Draw progress bars in order 305 writeOverallProgress(u.progressOut, 0, int(replicas), rollback) 306 307 if replicas <= maxProgressBars { 308 for i := uint64(1); i <= replicas; i++ { 309 progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ") 310 } 311 } 312 u.initialized = true 313 } 314 315 tasksBySlot := u.tasksBySlot(tasks, activeNodes) 316 317 // If we had reached a converged state, check if we are still converged. 318 if u.done { 319 for _, task := range tasksBySlot { 320 if task.Status.State != swarm.TaskStateRunning { 321 u.done = false 322 break 323 } 324 } 325 } 326 327 running := uint64(0) 328 329 for _, task := range tasksBySlot { 330 mappedSlot := u.slotMap[task.Slot] 331 if mappedSlot == 0 { 332 mappedSlot = len(u.slotMap) + 1 333 u.slotMap[task.Slot] = mappedSlot 334 } 335 336 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 337 running++ 338 } 339 340 u.writeTaskProgress(task, mappedSlot, replicas) 341 } 342 343 if !u.done { 344 writeOverallProgress(u.progressOut, int(running), int(replicas), rollback) 345 346 if running == replicas { 347 u.done = true 348 } 349 } 350 351 return running == replicas, nil 352 } 353 354 func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task { 355 // If there are multiple tasks with the same slot number, favor the one 356 // with the *lowest* desired state. This can happen in restart 357 // scenarios. 358 tasksBySlot := make(map[int]swarm.Task) 359 for _, task := range tasks { 360 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 361 continue 362 } 363 if existingTask, ok := tasksBySlot[task.Slot]; ok { 364 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 365 continue 366 } 367 // If the desired states match, observed state breaks 368 // ties. This can happen with the "start first" service 369 // update mode. 370 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 371 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 372 continue 373 } 374 } 375 if task.NodeID != "" { 376 if _, nodeActive := activeNodes[task.NodeID]; !nodeActive { 377 continue 378 } 379 } 380 tasksBySlot[task.Slot] = task 381 } 382 383 return tasksBySlot 384 } 385 386 func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64) { 387 if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas { 388 return 389 } 390 391 if task.Status.Err != "" { 392 u.progressOut.WriteProgress(progress.Progress{ 393 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 394 Action: truncError(task.Status.Err), 395 }) 396 return 397 } 398 399 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 400 u.progressOut.WriteProgress(progress.Progress{ 401 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 402 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 403 Current: numberedStates[task.Status.State], 404 Total: maxProgress, 405 HideCounts: true, 406 }) 407 } 408 } 409 410 type globalProgressUpdater struct { 411 progressOut progress.Output 412 413 initialized bool 414 done bool 415 } 416 417 func (u *globalProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 418 tasksByNode := u.tasksByNode(tasks) 419 420 // We don't have perfect knowledge of how many nodes meet the 421 // constraints for this service. But the orchestrator creates tasks 422 // for all eligible nodes at the same time, so we should see all those 423 // nodes represented among the up-to-date tasks. 424 nodeCount := len(tasksByNode) 425 426 if !u.initialized { 427 if nodeCount == 0 { 428 // Two possibilities: either the orchestrator hasn't created 429 // the tasks yet, or the service doesn't meet constraints for 430 // any node. Either way, we wait. 431 u.progressOut.WriteProgress(progress.Progress{ 432 ID: "overall progress", 433 Action: "waiting for new tasks", 434 }) 435 return false, nil 436 } 437 438 writeOverallProgress(u.progressOut, 0, nodeCount, rollback) 439 u.initialized = true 440 } 441 442 // If we had reached a converged state, check if we are still converged. 443 if u.done { 444 for _, task := range tasksByNode { 445 if task.Status.State != swarm.TaskStateRunning { 446 u.done = false 447 break 448 } 449 } 450 } 451 452 running := 0 453 454 for _, task := range tasksByNode { 455 if _, nodeActive := activeNodes[task.NodeID]; nodeActive { 456 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 457 running++ 458 } 459 460 u.writeTaskProgress(task, nodeCount) 461 } 462 } 463 464 if !u.done { 465 writeOverallProgress(u.progressOut, running, nodeCount, rollback) 466 467 if running == nodeCount { 468 u.done = true 469 } 470 } 471 472 return running == nodeCount, nil 473 } 474 475 func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task { 476 // If there are multiple tasks with the same node ID, favor the one 477 // with the *lowest* desired state. This can happen in restart 478 // scenarios. 479 tasksByNode := make(map[string]swarm.Task) 480 for _, task := range tasks { 481 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 482 continue 483 } 484 if existingTask, ok := tasksByNode[task.NodeID]; ok { 485 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 486 continue 487 } 488 489 // If the desired states match, observed state breaks 490 // ties. This can happen with the "start first" service 491 // update mode. 492 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 493 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 494 continue 495 } 496 } 497 tasksByNode[task.NodeID] = task 498 } 499 500 return tasksByNode 501 } 502 503 func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int) { 504 if u.done || nodeCount > maxProgressBars { 505 return 506 } 507 508 if task.Status.Err != "" { 509 u.progressOut.WriteProgress(progress.Progress{ 510 ID: stringid.TruncateID(task.NodeID), 511 Action: truncError(task.Status.Err), 512 }) 513 return 514 } 515 516 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 517 u.progressOut.WriteProgress(progress.Progress{ 518 ID: stringid.TruncateID(task.NodeID), 519 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 520 Current: numberedStates[task.Status.State], 521 Total: maxProgress, 522 HideCounts: true, 523 }) 524 } 525 } 526 527 // replicatedJobProgressUpdater outputs the progress of a replicated job. This 528 // progress consists of a few main elements. 529 // 530 // The first is the progress bar for the job as a whole. This shows the number 531 // of completed out of total tasks for the job. Tasks that are currently 532 // running are not counted. 533 // 534 // The second is the status of the "active" tasks for the job. We count a task 535 // as "active" if it has any non-terminal state, not just running. This is 536 // shown as a fraction of the maximum concurrent tasks that can be running, 537 // which is the less of MaxConcurrent or TotalCompletions - completed tasks. 538 type replicatedJobProgressUpdater struct { 539 progressOut progress.Output 540 541 // jobIteration is the service's job iteration, used to exclude tasks 542 // belonging to earlier iterations. 543 jobIteration uint64 544 545 // concurrent is the value of MaxConcurrent as an int. That is, the maximum 546 // number of tasks allowed to be run simultaneously. 547 concurrent int 548 549 // total is the value of TotalCompletions, the number of complete tasks 550 // desired. 551 total int 552 553 // initialized is set to true after the first time update is called. the 554 // first time update is called, the components of the progress UI are all 555 // written out in an initial pass. this ensure that they will subsequently 556 // be in order, no matter how they are updated. 557 initialized bool 558 559 // progressDigits is the number digits in total, so that we know how much 560 // to pad the job progress field with. 561 // 562 // when we're writing the number of completed over total tasks, we need to 563 // pad the numerator with spaces, so that the bar doesn't jump around. 564 // we'll compute that once on init, and then reuse it over and over. 565 // 566 // we compute this in the least clever way possible: convert to string 567 // with strconv.Itoa, then take the len. 568 progressDigits int 569 570 // activeDigits is the same, but for active tasks, and it applies to both 571 // the numerator and denominator. 572 activeDigits int 573 } 574 575 func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater { 576 u := &replicatedJobProgressUpdater{ 577 progressOut: progressOut, 578 concurrent: int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent), 579 total: int(*service.Spec.Mode.ReplicatedJob.TotalCompletions), 580 jobIteration: service.JobStatus.JobIteration.Index, 581 } 582 u.progressDigits = len(strconv.Itoa(u.total)) 583 u.activeDigits = len(strconv.Itoa(u.concurrent)) 584 585 return u 586 } 587 588 // update writes out the progress of the replicated job. 589 func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) { 590 if !u.initialized { 591 u.writeOverallProgress(0, 0) 592 593 // only write out progress bars if there will be less than the maximum 594 if u.total <= maxProgressBars { 595 for i := 1; i <= u.total; i++ { 596 u.progressOut.WriteProgress(progress.Progress{ 597 ID: fmt.Sprintf("%d/%d", i, u.total), 598 Action: " ", 599 }) 600 } 601 } 602 u.initialized = true 603 } 604 605 // tasksBySlot is a mapping of slot number to the task valid for that slot. 606 // it deduplicated tasks occupying the same numerical slot but in different 607 // states. 608 tasksBySlot := make(map[int]swarm.Task) 609 for _, task := range tasks { 610 // first, check if the task belongs to this service iteration. skip 611 // tasks belonging to other iterations. 612 if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration { 613 continue 614 } 615 616 // then, if the task is in an unknown state, ignore it. 617 if numberedStates[task.DesiredState] == 0 || 618 numberedStates[task.Status.State] == 0 { 619 continue 620 } 621 622 // finally, check if the task already exists in the map 623 if existing, ok := tasksBySlot[task.Slot]; ok { 624 // if so, use the task with the lower actual state 625 if numberedStates[existing.Status.State] > numberedStates[task.Status.State] { 626 tasksBySlot[task.Slot] = task 627 } 628 } else { 629 // otherwise, just add it to the map. 630 tasksBySlot[task.Slot] = task 631 } 632 } 633 634 activeTasks := 0 635 completeTasks := 0 636 637 for i := 0; i < len(tasksBySlot); i++ { 638 task := tasksBySlot[i] 639 u.writeTaskProgress(task) 640 641 if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] { 642 activeTasks++ 643 } 644 645 if task.Status.State == swarm.TaskStateComplete { 646 completeTasks++ 647 } 648 } 649 650 u.writeOverallProgress(activeTasks, completeTasks) 651 652 return completeTasks == u.total, nil 653 } 654 655 func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) { 656 u.progressOut.WriteProgress(progress.Progress{ 657 ID: "job progress", 658 Action: fmt.Sprintf( 659 // * means "use the next positional arg to compute padding" 660 "%*d out of %d complete", u.progressDigits, completed, u.total, 661 ), 662 Current: int64(completed), 663 Total: int64(u.total), 664 HideCounts: true, 665 }) 666 667 // actualDesired is the lesser of MaxConcurrent, or the remaining tasks 668 actualDesired := u.total - completed 669 if actualDesired > u.concurrent { 670 actualDesired = u.concurrent 671 } 672 673 u.progressOut.WriteProgress(progress.Progress{ 674 ID: "active tasks", 675 Action: fmt.Sprintf( 676 // [n] notation lets us select a specific argument, 1-indexed 677 // putting the [1] before the star means "make the string this 678 // length". putting the [2] or the [3] means "use this argument 679 // here" 680 // 681 // we pad both the numerator and the denominator because, as the 682 // job reaches its conclusion, the number of possible concurrent 683 // tasks will go down, as fewer than MaxConcurrent tasks are needed 684 // to complete the job. 685 "%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired, 686 ), 687 }) 688 } 689 690 func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) { 691 if u.total > maxProgressBars { 692 return 693 } 694 695 if task.Status.Err != "" { 696 u.progressOut.WriteProgress(progress.Progress{ 697 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 698 Action: truncError(task.Status.Err), 699 }) 700 return 701 } 702 703 u.progressOut.WriteProgress(progress.Progress{ 704 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 705 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 706 Current: numberedStates[task.Status.State], 707 Total: maxJobProgress, 708 HideCounts: true, 709 }) 710 } 711 712 // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services. 713 // Because GlobalJob services are so much simpler than ReplicatedJob services, 714 // this updater is in turn simpler as well. 715 type globalJobProgressUpdater struct { 716 progressOut progress.Output 717 718 // initialized is used to detect the first pass of update, and to perform 719 // first time initialization logic at that time. 720 initialized bool 721 722 // total is the total number of tasks expected for this job 723 total int 724 725 // progressDigits is the number of spaces to pad the numerator of the job 726 // progress field 727 progressDigits int 728 729 taskNodes map[string]struct{} 730 } 731 732 func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) { 733 if !u.initialized { 734 // if there are not yet tasks, then return early. 735 if len(tasks) == 0 && len(activeNodes) != 0 { 736 u.progressOut.WriteProgress(progress.Progress{ 737 ID: "job progress", 738 Action: "waiting for tasks", 739 }) 740 return false, nil 741 } 742 743 // when a global job starts, all of its tasks are created at once, so 744 // we can use len(tasks) to know how many we're expecting. 745 u.taskNodes = map[string]struct{}{} 746 747 for _, task := range tasks { 748 // skip any tasks not belonging to this job iteration. 749 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 750 continue 751 } 752 753 // collect the list of all node IDs for this service. 754 // 755 // basically, global jobs will execute on any new nodes that join 756 // the cluster in the future. to avoid making things complicated, 757 // we will only check the progress of the initial set of nodes. if 758 // any new nodes come online during the operation, we will ignore 759 // them. 760 u.taskNodes[task.NodeID] = struct{}{} 761 } 762 763 u.total = len(u.taskNodes) 764 u.progressDigits = len(strconv.Itoa(u.total)) 765 766 u.writeOverallProgress(0) 767 u.initialized = true 768 } 769 770 // tasksByNodeID maps a NodeID to the latest task for that Node ID. this 771 // lets us pick only the latest task for any given node. 772 tasksByNodeID := map[string]swarm.Task{} 773 774 for _, task := range tasks { 775 // skip any tasks not belonging to this job iteration 776 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 777 continue 778 } 779 780 // if the task is not on one of the initial set of nodes, ignore it. 781 if _, ok := u.taskNodes[task.NodeID]; !ok { 782 continue 783 } 784 785 // if there is already a task recorded for this node, choose the one 786 // with the lower state 787 if oldtask, ok := tasksByNodeID[task.NodeID]; ok { 788 if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] { 789 tasksByNodeID[task.NodeID] = task 790 } 791 } else { 792 tasksByNodeID[task.NodeID] = task 793 } 794 } 795 796 complete := 0 797 for _, task := range tasksByNodeID { 798 u.writeTaskProgress(task) 799 if task.Status.State == swarm.TaskStateComplete { 800 complete++ 801 } 802 } 803 804 u.writeOverallProgress(complete) 805 return complete == u.total, nil 806 } 807 808 func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) { 809 if u.total > maxProgressBars { 810 return 811 } 812 813 if task.Status.Err != "" { 814 u.progressOut.WriteProgress(progress.Progress{ 815 ID: task.NodeID, 816 Action: truncError(task.Status.Err), 817 }) 818 return 819 } 820 821 u.progressOut.WriteProgress(progress.Progress{ 822 ID: task.NodeID, 823 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 824 Current: numberedStates[task.Status.State], 825 Total: maxJobProgress, 826 HideCounts: true, 827 }) 828 } 829 830 func (u *globalJobProgressUpdater) writeOverallProgress(complete int) { 831 // all tasks for a global job are active at once, so we only write out the 832 // total progress. 833 u.progressOut.WriteProgress(progress.Progress{ 834 // see (*replicatedJobProgressUpdater).writeOverallProgress for an 835 // explanation fo the advanced fmt use in this function. 836 ID: "job progress", 837 Action: fmt.Sprintf( 838 "%*d out of %d complete", u.progressDigits, complete, u.total, 839 ), 840 Current: int64(complete), 841 Total: int64(u.total), 842 HideCounts: true, 843 }) 844 }