github.com/ali-iotechsys/cli@v20.10.0+incompatible/cli/command/service/progress/progress.go (about) 1 package progress 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/swarm" 17 "github.com/docker/docker/client" 18 "github.com/docker/docker/pkg/progress" 19 "github.com/docker/docker/pkg/streamformatter" 20 "github.com/docker/docker/pkg/stringid" 21 ) 22 23 var ( 24 numberedStates = map[swarm.TaskState]int64{ 25 swarm.TaskStateNew: 1, 26 swarm.TaskStateAllocated: 2, 27 swarm.TaskStatePending: 3, 28 swarm.TaskStateAssigned: 4, 29 swarm.TaskStateAccepted: 5, 30 swarm.TaskStatePreparing: 6, 31 swarm.TaskStateReady: 7, 32 swarm.TaskStateStarting: 8, 33 swarm.TaskStateRunning: 9, 34 35 // The following states are not actually shown in progress 36 // output, but are used internally for ordering. 37 swarm.TaskStateComplete: 10, 38 swarm.TaskStateShutdown: 11, 39 swarm.TaskStateFailed: 12, 40 swarm.TaskStateRejected: 13, 41 } 42 43 longestState int 44 ) 45 46 const ( 47 maxProgress = 9 48 maxProgressBars = 20 49 maxJobProgress = 10 50 ) 51 52 type progressUpdater interface { 53 update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) 54 } 55 56 func init() { 57 for state := range numberedStates { 58 // for jobs, we use the "complete" state, and so it should be factored 59 // in to the computation of the longest state. 60 if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState { 61 longestState = len(state) 62 } 63 } 64 } 65 66 func terminalState(state swarm.TaskState) bool { 67 return numberedStates[state] > numberedStates[swarm.TaskStateRunning] 68 } 69 70 func stateToProgress(state swarm.TaskState, rollback bool) int64 { 71 if !rollback { 72 return numberedStates[state] 73 } 74 return numberedStates[swarm.TaskStateRunning] - numberedStates[state] 75 } 76 77 // ServiceProgress outputs progress information for convergence of a service. 78 // nolint: gocyclo 79 func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error { 80 defer progressWriter.Close() 81 82 progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false) 83 84 sigint := make(chan os.Signal, 1) 85 signal.Notify(sigint, os.Interrupt) 86 defer signal.Stop(sigint) 87 88 taskFilter := filters.NewArgs() 89 taskFilter.Add("service", serviceID) 90 taskFilter.Add("_up-to-date", "true") 91 92 getUpToDateTasks := func() ([]swarm.Task, error) { 93 return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter}) 94 } 95 96 var ( 97 updater progressUpdater 98 converged bool 99 convergedAt time.Time 100 monitor = 5 * time.Second 101 rollback bool 102 ) 103 104 for { 105 service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{}) 106 if err != nil { 107 return err 108 } 109 110 if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 { 111 monitor = service.Spec.UpdateConfig.Monitor 112 } 113 114 if updater == nil { 115 updater, err = initializeUpdater(service, progressOut) 116 if err != nil { 117 return err 118 } 119 } 120 121 if service.UpdateStatus != nil { 122 switch service.UpdateStatus.State { 123 case swarm.UpdateStateUpdating: 124 rollback = false 125 case swarm.UpdateStateCompleted: 126 if !converged { 127 return nil 128 } 129 case swarm.UpdateStatePaused: 130 return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message) 131 case swarm.UpdateStateRollbackStarted: 132 if !rollback && service.UpdateStatus.Message != "" { 133 progressOut.WriteProgress(progress.Progress{ 134 ID: "rollback", 135 Action: service.UpdateStatus.Message, 136 }) 137 } 138 rollback = true 139 case swarm.UpdateStateRollbackPaused: 140 return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message) 141 case swarm.UpdateStateRollbackCompleted: 142 if !converged { 143 return fmt.Errorf("service rolled back: %s", service.UpdateStatus.Message) 144 } 145 } 146 } 147 if converged && time.Since(convergedAt) >= monitor { 148 progressOut.WriteProgress(progress.Progress{ 149 ID: "verify", 150 Action: "Service converged", 151 }) 152 153 return nil 154 } 155 156 tasks, err := getUpToDateTasks() 157 if err != nil { 158 return err 159 } 160 161 activeNodes, err := getActiveNodes(ctx, client) 162 if err != nil { 163 return err 164 } 165 166 converged, err = updater.update(service, tasks, activeNodes, rollback) 167 if err != nil { 168 return err 169 } 170 if converged { 171 // if the service is a job, there's no need to verify it. jobs are 172 // stay done once they're done. skip the verification and just end 173 // the progress monitoring. 174 // 175 // only job services have a non-nil job status, which means we can 176 // use the presence of this field to check if the service is a job 177 // here. 178 if service.JobStatus != nil { 179 progress.Message(progressOut, "", "job complete") 180 return nil 181 } 182 183 if convergedAt.IsZero() { 184 convergedAt = time.Now() 185 } 186 wait := monitor - time.Since(convergedAt) 187 if wait >= 0 { 188 progressOut.WriteProgress(progress.Progress{ 189 // Ideally this would have no ID, but 190 // the progress rendering code behaves 191 // poorly on an "action" with no ID. It 192 // returns the cursor to the beginning 193 // of the line, so the first character 194 // may be difficult to read. Then the 195 // output is overwritten by the shell 196 // prompt when the command finishes. 197 ID: "verify", 198 Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1), 199 }) 200 } 201 } else { 202 if !convergedAt.IsZero() { 203 progressOut.WriteProgress(progress.Progress{ 204 ID: "verify", 205 Action: "Detected task failure", 206 }) 207 } 208 convergedAt = time.Time{} 209 } 210 211 select { 212 case <-time.After(200 * time.Millisecond): 213 case <-sigint: 214 if !converged { 215 progress.Message(progressOut, "", "Operation continuing in background.") 216 progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID) 217 } 218 return nil 219 } 220 } 221 } 222 223 func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) { 224 nodes, err := client.NodeList(ctx, types.NodeListOptions{}) 225 if err != nil { 226 return nil, err 227 } 228 229 activeNodes := make(map[string]struct{}) 230 for _, n := range nodes { 231 if n.Status.State != swarm.NodeStateDown { 232 activeNodes[n.ID] = struct{}{} 233 } 234 } 235 return activeNodes, nil 236 } 237 238 func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) { 239 if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil { 240 return &replicatedProgressUpdater{ 241 progressOut: progressOut, 242 }, nil 243 } 244 if service.Spec.Mode.Global != nil { 245 return &globalProgressUpdater{ 246 progressOut: progressOut, 247 }, nil 248 } 249 if service.Spec.Mode.ReplicatedJob != nil { 250 return newReplicatedJobProgressUpdater(service, progressOut), nil 251 } 252 if service.Spec.Mode.GlobalJob != nil { 253 return &globalJobProgressUpdater{ 254 progressOut: progressOut, 255 }, nil 256 } 257 return nil, errors.New("unrecognized service mode") 258 } 259 260 func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) { 261 if rollback { 262 progressOut.WriteProgress(progress.Progress{ 263 ID: "overall progress", 264 Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator), 265 }) 266 return 267 } 268 progressOut.WriteProgress(progress.Progress{ 269 ID: "overall progress", 270 Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator), 271 }) 272 } 273 274 func truncError(errMsg string) string { 275 // Remove newlines from the error, which corrupt the output. 276 errMsg = strings.Replace(errMsg, "\n", " ", -1) 277 278 // Limit the length to 75 characters, so that even on narrow terminals 279 // this will not overflow to the next line. 280 if len(errMsg) > 75 { 281 errMsg = errMsg[:74] + "…" 282 } 283 return errMsg 284 } 285 286 type replicatedProgressUpdater struct { 287 progressOut progress.Output 288 289 // used for mapping slots to a contiguous space 290 // this also causes progress bars to appear in order 291 slotMap map[int]int 292 293 initialized bool 294 done bool 295 } 296 297 func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 298 if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil { 299 return false, errors.New("no replica count") 300 } 301 replicas := *service.Spec.Mode.Replicated.Replicas 302 303 if !u.initialized { 304 u.slotMap = make(map[int]int) 305 306 // Draw progress bars in order 307 writeOverallProgress(u.progressOut, 0, int(replicas), rollback) 308 309 if replicas <= maxProgressBars { 310 for i := uint64(1); i <= replicas; i++ { 311 progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ") 312 } 313 } 314 u.initialized = true 315 } 316 317 tasksBySlot := u.tasksBySlot(tasks, activeNodes) 318 319 // If we had reached a converged state, check if we are still converged. 320 if u.done { 321 for _, task := range tasksBySlot { 322 if task.Status.State != swarm.TaskStateRunning { 323 u.done = false 324 break 325 } 326 } 327 } 328 329 running := uint64(0) 330 331 for _, task := range tasksBySlot { 332 mappedSlot := u.slotMap[task.Slot] 333 if mappedSlot == 0 { 334 mappedSlot = len(u.slotMap) + 1 335 u.slotMap[task.Slot] = mappedSlot 336 } 337 338 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 339 running++ 340 } 341 342 u.writeTaskProgress(task, mappedSlot, replicas, rollback) 343 } 344 345 if !u.done { 346 writeOverallProgress(u.progressOut, int(running), int(replicas), rollback) 347 348 if running == replicas { 349 u.done = true 350 } 351 } 352 353 return running == replicas, nil 354 } 355 356 func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task { 357 // If there are multiple tasks with the same slot number, favor the one 358 // with the *lowest* desired state. This can happen in restart 359 // scenarios. 360 tasksBySlot := make(map[int]swarm.Task) 361 for _, task := range tasks { 362 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 363 continue 364 } 365 if existingTask, ok := tasksBySlot[task.Slot]; ok { 366 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 367 continue 368 } 369 // If the desired states match, observed state breaks 370 // ties. This can happen with the "start first" service 371 // update mode. 372 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 373 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 374 continue 375 } 376 } 377 if task.NodeID != "" { 378 if _, nodeActive := activeNodes[task.NodeID]; !nodeActive { 379 continue 380 } 381 } 382 tasksBySlot[task.Slot] = task 383 } 384 385 return tasksBySlot 386 } 387 388 func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64, rollback bool) { 389 if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas { 390 return 391 } 392 393 if task.Status.Err != "" { 394 u.progressOut.WriteProgress(progress.Progress{ 395 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 396 Action: truncError(task.Status.Err), 397 }) 398 return 399 } 400 401 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 402 u.progressOut.WriteProgress(progress.Progress{ 403 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 404 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 405 Current: stateToProgress(task.Status.State, rollback), 406 Total: maxProgress, 407 HideCounts: true, 408 }) 409 } 410 } 411 412 type globalProgressUpdater struct { 413 progressOut progress.Output 414 415 initialized bool 416 done bool 417 } 418 419 func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 420 tasksByNode := u.tasksByNode(tasks) 421 422 // We don't have perfect knowledge of how many nodes meet the 423 // constraints for this service. But the orchestrator creates tasks 424 // for all eligible nodes at the same time, so we should see all those 425 // nodes represented among the up-to-date tasks. 426 nodeCount := len(tasksByNode) 427 428 if !u.initialized { 429 if nodeCount == 0 { 430 // Two possibilities: either the orchestrator hasn't created 431 // the tasks yet, or the service doesn't meet constraints for 432 // any node. Either way, we wait. 433 u.progressOut.WriteProgress(progress.Progress{ 434 ID: "overall progress", 435 Action: "waiting for new tasks", 436 }) 437 return false, nil 438 } 439 440 writeOverallProgress(u.progressOut, 0, nodeCount, rollback) 441 u.initialized = true 442 } 443 444 // If we had reached a converged state, check if we are still converged. 445 if u.done { 446 for _, task := range tasksByNode { 447 if task.Status.State != swarm.TaskStateRunning { 448 u.done = false 449 break 450 } 451 } 452 } 453 454 running := 0 455 456 for _, task := range tasksByNode { 457 if _, nodeActive := activeNodes[task.NodeID]; nodeActive { 458 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 459 running++ 460 } 461 462 u.writeTaskProgress(task, nodeCount, rollback) 463 } 464 } 465 466 if !u.done { 467 writeOverallProgress(u.progressOut, running, nodeCount, rollback) 468 469 if running == nodeCount { 470 u.done = true 471 } 472 } 473 474 return running == nodeCount, nil 475 } 476 477 func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task { 478 // If there are multiple tasks with the same node ID, favor the one 479 // with the *lowest* desired state. This can happen in restart 480 // scenarios. 481 tasksByNode := make(map[string]swarm.Task) 482 for _, task := range tasks { 483 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 484 continue 485 } 486 if existingTask, ok := tasksByNode[task.NodeID]; ok { 487 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 488 continue 489 } 490 491 // If the desired states match, observed state breaks 492 // ties. This can happen with the "start first" service 493 // update mode. 494 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 495 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 496 continue 497 } 498 499 } 500 tasksByNode[task.NodeID] = task 501 } 502 503 return tasksByNode 504 } 505 506 func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int, rollback bool) { 507 if u.done || nodeCount > maxProgressBars { 508 return 509 } 510 511 if task.Status.Err != "" { 512 u.progressOut.WriteProgress(progress.Progress{ 513 ID: stringid.TruncateID(task.NodeID), 514 Action: truncError(task.Status.Err), 515 }) 516 return 517 } 518 519 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 520 u.progressOut.WriteProgress(progress.Progress{ 521 ID: stringid.TruncateID(task.NodeID), 522 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 523 Current: stateToProgress(task.Status.State, rollback), 524 Total: maxProgress, 525 HideCounts: true, 526 }) 527 } 528 } 529 530 // replicatedJobProgressUpdater outputs the progress of a replicated job. This 531 // progress consists of a few main elements. 532 // 533 // The first is the progress bar for the job as a whole. This shows the number 534 // of completed out of total tasks for the job. Tasks that are currently 535 // running are not counted. 536 // 537 // The second is the status of the "active" tasks for the job. We count a task 538 // as "active" if it has any non-terminal state, not just running. This is 539 // shown as a fraction of the maximum concurrent tasks that can be running, 540 // which is the less of MaxConcurrent or TotalCompletions - completed tasks. 541 type replicatedJobProgressUpdater struct { 542 progressOut progress.Output 543 544 // jobIteration is the service's job iteration, used to exclude tasks 545 // belonging to earlier iterations. 546 jobIteration uint64 547 548 // concurrent is the value of MaxConcurrent as an int. That is, the maximum 549 // number of tasks allowed to be run simultaneously. 550 concurrent int 551 552 // total is the value of TotalCompletions, the number of complete tasks 553 // desired. 554 total int 555 556 // initialized is set to true after the first time update is called. the 557 // first time update is called, the components of the progress UI are all 558 // written out in an initial pass. this ensure that they will subsequently 559 // be in order, no matter how they are updated. 560 initialized bool 561 562 // progressDigits is the number digits in total, so that we know how much 563 // to pad the job progress field with. 564 // 565 // when we're writing the number of completed over total tasks, we need to 566 // pad the numerator with spaces, so that the bar doesn't jump around. 567 // we'll compute that once on init, and then reuse it over and over. 568 // 569 // we compute this in the least clever way possible: convert to string 570 // with strconv.Itoa, then take the len. 571 progressDigits int 572 573 // activeDigits is the same, but for active tasks, and it applies to both 574 // the numerator and denominator. 575 activeDigits int 576 } 577 578 func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater { 579 u := &replicatedJobProgressUpdater{ 580 progressOut: progressOut, 581 concurrent: int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent), 582 total: int(*service.Spec.Mode.ReplicatedJob.TotalCompletions), 583 jobIteration: service.JobStatus.JobIteration.Index, 584 } 585 u.progressDigits = len(strconv.Itoa(u.total)) 586 u.activeDigits = len(strconv.Itoa(u.concurrent)) 587 588 return u 589 } 590 591 // update writes out the progress of the replicated job. 592 func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) { 593 if !u.initialized { 594 u.writeOverallProgress(0, 0) 595 596 // only write out progress bars if there will be less than the maximum 597 if u.total <= maxProgressBars { 598 for i := 1; i <= u.total; i++ { 599 u.progressOut.WriteProgress(progress.Progress{ 600 ID: fmt.Sprintf("%d/%d", i, u.total), 601 Action: " ", 602 }) 603 } 604 } 605 u.initialized = true 606 } 607 608 // tasksBySlot is a mapping of slot number to the task valid for that slot. 609 // it deduplicated tasks occupying the same numerical slot but in different 610 // states. 611 tasksBySlot := make(map[int]swarm.Task) 612 for _, task := range tasks { 613 // first, check if the task belongs to this service iteration. skip 614 // tasks belonging to other iterations. 615 if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration { 616 continue 617 } 618 619 // then, if the task is in an unknown state, ignore it. 620 if numberedStates[task.DesiredState] == 0 || 621 numberedStates[task.Status.State] == 0 { 622 continue 623 } 624 625 // finally, check if the task already exists in the map 626 if existing, ok := tasksBySlot[task.Slot]; ok { 627 // if so, use the task with the lower actual state 628 if numberedStates[existing.Status.State] > numberedStates[task.Status.State] { 629 tasksBySlot[task.Slot] = task 630 } 631 } else { 632 // otherwise, just add it to the map. 633 tasksBySlot[task.Slot] = task 634 } 635 } 636 637 activeTasks := 0 638 completeTasks := 0 639 640 for i := 0; i < len(tasksBySlot); i++ { 641 task := tasksBySlot[i] 642 u.writeTaskProgress(task) 643 644 if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] { 645 activeTasks++ 646 } 647 648 if task.Status.State == swarm.TaskStateComplete { 649 completeTasks++ 650 } 651 } 652 653 u.writeOverallProgress(activeTasks, completeTasks) 654 655 return completeTasks == u.total, nil 656 } 657 658 func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) { 659 u.progressOut.WriteProgress(progress.Progress{ 660 ID: "job progress", 661 Action: fmt.Sprintf( 662 // * means "use the next positional arg to compute padding" 663 "%*d out of %d complete", u.progressDigits, completed, u.total, 664 ), 665 Current: int64(completed), 666 Total: int64(u.total), 667 HideCounts: true, 668 }) 669 670 // actualDesired is the lesser of MaxConcurrent, or the remaining tasks 671 actualDesired := u.total - completed 672 if actualDesired > u.concurrent { 673 actualDesired = u.concurrent 674 } 675 676 u.progressOut.WriteProgress(progress.Progress{ 677 ID: "active tasks", 678 Action: fmt.Sprintf( 679 // [n] notation lets us select a specific argument, 1-indexed 680 // putting the [1] before the star means "make the string this 681 // length". putting the [2] or the [3] means "use this argument 682 // here" 683 // 684 // we pad both the numerator and the denominator because, as the 685 // job reaches its conclusion, the number of possible concurrent 686 // tasks will go down, as fewer than MaxConcurrent tasks are needed 687 // to complete the job. 688 "%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired, 689 ), 690 }) 691 } 692 693 func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) { 694 if u.total > maxProgressBars { 695 return 696 } 697 698 if task.Status.Err != "" { 699 u.progressOut.WriteProgress(progress.Progress{ 700 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 701 Action: truncError(task.Status.Err), 702 }) 703 return 704 } 705 706 u.progressOut.WriteProgress(progress.Progress{ 707 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 708 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 709 Current: numberedStates[task.Status.State], 710 Total: maxJobProgress, 711 HideCounts: true, 712 }) 713 } 714 715 // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services. 716 // Because GlobalJob services are so much simpler than ReplicatedJob services, 717 // this updater is in turn simpler as well. 718 type globalJobProgressUpdater struct { 719 progressOut progress.Output 720 721 // initialized is used to detect the first pass of update, and to perform 722 // first time initialization logic at that time. 723 initialized bool 724 725 // total is the total number of tasks expected for this job 726 total int 727 728 // progressDigits is the number of spaces to pad the numerator of the job 729 // progress field 730 progressDigits int 731 732 taskNodes map[string]struct{} 733 } 734 735 func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) { 736 if !u.initialized { 737 // if there are not yet tasks, then return early. 738 if len(tasks) == 0 && len(activeNodes) != 0 { 739 u.progressOut.WriteProgress(progress.Progress{ 740 ID: "job progress", 741 Action: "waiting for tasks", 742 }) 743 return false, nil 744 } 745 746 // when a global job starts, all of its tasks are created at once, so 747 // we can use len(tasks) to know how many we're expecting. 748 u.taskNodes = map[string]struct{}{} 749 750 for _, task := range tasks { 751 // skip any tasks not belonging to this job iteration. 752 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 753 continue 754 } 755 756 // collect the list of all node IDs for this service. 757 // 758 // basically, global jobs will execute on any new nodes that join 759 // the cluster in the future. to avoid making things complicated, 760 // we will only check the progress of the initial set of nodes. if 761 // any new nodes come online during the operation, we will ignore 762 // them. 763 u.taskNodes[task.NodeID] = struct{}{} 764 } 765 766 u.total = len(u.taskNodes) 767 u.progressDigits = len(strconv.Itoa(u.total)) 768 769 u.writeOverallProgress(0) 770 u.initialized = true 771 } 772 773 // tasksByNodeID maps a NodeID to the latest task for that Node ID. this 774 // lets us pick only the latest task for any given node. 775 tasksByNodeID := map[string]swarm.Task{} 776 777 for _, task := range tasks { 778 // skip any tasks not belonging to this job iteration 779 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 780 continue 781 } 782 783 // if the task is not on one of the initial set of nodes, ignore it. 784 if _, ok := u.taskNodes[task.NodeID]; !ok { 785 continue 786 } 787 788 // if there is already a task recorded for this node, choose the one 789 // with the lower state 790 if oldtask, ok := tasksByNodeID[task.NodeID]; ok { 791 if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] { 792 tasksByNodeID[task.NodeID] = task 793 } 794 } else { 795 tasksByNodeID[task.NodeID] = task 796 } 797 } 798 799 complete := 0 800 for _, task := range tasksByNodeID { 801 u.writeTaskProgress(task) 802 if task.Status.State == swarm.TaskStateComplete { 803 complete++ 804 } 805 } 806 807 u.writeOverallProgress(complete) 808 return complete == u.total, nil 809 } 810 811 func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) { 812 if u.total > maxProgressBars { 813 return 814 } 815 816 if task.Status.Err != "" { 817 u.progressOut.WriteProgress(progress.Progress{ 818 ID: task.NodeID, 819 Action: truncError(task.Status.Err), 820 }) 821 return 822 } 823 824 u.progressOut.WriteProgress(progress.Progress{ 825 ID: task.NodeID, 826 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 827 Current: numberedStates[task.Status.State], 828 Total: maxJobProgress, 829 HideCounts: true, 830 }) 831 } 832 833 func (u *globalJobProgressUpdater) writeOverallProgress(complete int) { 834 // all tasks for a global job are active at once, so we only write out the 835 // total progress. 836 u.progressOut.WriteProgress(progress.Progress{ 837 // see (*replicatedJobProgressUpdater).writeOverallProgress for an 838 // explanation fo the advanced fmt use in this function. 839 ID: "job progress", 840 Action: fmt.Sprintf( 841 "%*d out of %d complete", u.progressDigits, complete, u.total, 842 ), 843 Current: int64(complete), 844 Total: int64(u.total), 845 HideCounts: true, 846 }) 847 }