github.com/xeptore/docker-cli@v20.10.14+incompatible/cli/command/service/progress/progress.go (about) 1 package progress 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "os" 9 "os/signal" 10 "strconv" 11 "strings" 12 "time" 13 14 "github.com/docker/docker/api/types" 15 "github.com/docker/docker/api/types/filters" 16 "github.com/docker/docker/api/types/swarm" 17 "github.com/docker/docker/client" 18 "github.com/docker/docker/pkg/progress" 19 "github.com/docker/docker/pkg/streamformatter" 20 "github.com/docker/docker/pkg/stringid" 21 ) 22 23 var ( 24 numberedStates = map[swarm.TaskState]int64{ 25 swarm.TaskStateNew: 1, 26 swarm.TaskStateAllocated: 2, 27 swarm.TaskStatePending: 3, 28 swarm.TaskStateAssigned: 4, 29 swarm.TaskStateAccepted: 5, 30 swarm.TaskStatePreparing: 6, 31 swarm.TaskStateReady: 7, 32 swarm.TaskStateStarting: 8, 33 swarm.TaskStateRunning: 9, 34 35 // The following states are not actually shown in progress 36 // output, but are used internally for ordering. 37 swarm.TaskStateComplete: 10, 38 swarm.TaskStateShutdown: 11, 39 swarm.TaskStateFailed: 12, 40 swarm.TaskStateRejected: 13, 41 } 42 43 longestState int 44 ) 45 46 const ( 47 maxProgress = 9 48 maxProgressBars = 20 49 maxJobProgress = 10 50 ) 51 52 type progressUpdater interface { 53 update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) 54 } 55 56 func init() { 57 for state := range numberedStates { 58 // for jobs, we use the "complete" state, and so it should be factored 59 // in to the computation of the longest state. 60 if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState { 61 longestState = len(state) 62 } 63 } 64 } 65 66 func terminalState(state swarm.TaskState) bool { 67 return numberedStates[state] > numberedStates[swarm.TaskStateRunning] 68 } 69 70 func stateToProgress(state swarm.TaskState, rollback bool) int64 { 71 if !rollback { 72 return numberedStates[state] 73 } 74 return numberedStates[swarm.TaskStateRunning] - numberedStates[state] 75 } 76 77 // ServiceProgress outputs progress information for convergence of a service. 78 // nolint: gocyclo 79 func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error { 80 defer progressWriter.Close() 81 82 progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false) 83 84 sigint := make(chan os.Signal, 1) 85 signal.Notify(sigint, os.Interrupt) 86 defer signal.Stop(sigint) 87 88 taskFilter := filters.NewArgs() 89 taskFilter.Add("service", serviceID) 90 taskFilter.Add("_up-to-date", "true") 91 92 getUpToDateTasks := func() ([]swarm.Task, error) { 93 return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter}) 94 } 95 96 var ( 97 updater progressUpdater 98 converged bool 99 convergedAt time.Time 100 monitor = 5 * time.Second 101 rollback bool 102 message *progress.Progress 103 ) 104 105 for { 106 service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{}) 107 if err != nil { 108 return err 109 } 110 111 if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 { 112 monitor = service.Spec.UpdateConfig.Monitor 113 } 114 115 if updater == nil { 116 updater, err = initializeUpdater(service, progressOut) 117 if err != nil { 118 return err 119 } 120 } 121 122 if service.UpdateStatus != nil { 123 switch service.UpdateStatus.State { 124 case swarm.UpdateStateUpdating: 125 rollback = false 126 case swarm.UpdateStateCompleted: 127 if !converged { 128 return nil 129 } 130 case swarm.UpdateStatePaused: 131 return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message) 132 case swarm.UpdateStateRollbackStarted: 133 if !rollback && service.UpdateStatus.Message != "" { 134 progressOut.WriteProgress(progress.Progress{ 135 ID: "rollback", 136 Action: service.UpdateStatus.Message, 137 }) 138 } 139 rollback = true 140 case swarm.UpdateStateRollbackPaused: 141 return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message) 142 case swarm.UpdateStateRollbackCompleted: 143 if !converged { 144 message = &progress.Progress{ID: "rollback", Message: service.UpdateStatus.Message} 145 } 146 rollback = true 147 } 148 } 149 if converged && time.Since(convergedAt) >= monitor { 150 progressOut.WriteProgress(progress.Progress{ 151 ID: "verify", 152 Action: "Service converged", 153 }) 154 if message != nil { 155 progressOut.WriteProgress(*message) 156 } 157 return nil 158 } 159 160 tasks, err := getUpToDateTasks() 161 if err != nil { 162 return err 163 } 164 165 activeNodes, err := getActiveNodes(ctx, client) 166 if err != nil { 167 return err 168 } 169 170 converged, err = updater.update(service, tasks, activeNodes, rollback) 171 if err != nil { 172 return err 173 } 174 if converged { 175 // if the service is a job, there's no need to verify it. jobs are 176 // stay done once they're done. skip the verification and just end 177 // the progress monitoring. 178 // 179 // only job services have a non-nil job status, which means we can 180 // use the presence of this field to check if the service is a job 181 // here. 182 if service.JobStatus != nil { 183 progress.Message(progressOut, "", "job complete") 184 return nil 185 } 186 187 if convergedAt.IsZero() { 188 convergedAt = time.Now() 189 } 190 wait := monitor - time.Since(convergedAt) 191 if wait >= 0 { 192 progressOut.WriteProgress(progress.Progress{ 193 // Ideally this would have no ID, but 194 // the progress rendering code behaves 195 // poorly on an "action" with no ID. It 196 // returns the cursor to the beginning 197 // of the line, so the first character 198 // may be difficult to read. Then the 199 // output is overwritten by the shell 200 // prompt when the command finishes. 201 ID: "verify", 202 Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1), 203 }) 204 } 205 } else { 206 if !convergedAt.IsZero() { 207 progressOut.WriteProgress(progress.Progress{ 208 ID: "verify", 209 Action: "Detected task failure", 210 }) 211 } 212 convergedAt = time.Time{} 213 } 214 215 select { 216 case <-time.After(200 * time.Millisecond): 217 case <-sigint: 218 if !converged { 219 progress.Message(progressOut, "", "Operation continuing in background.") 220 progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID) 221 } 222 return nil 223 } 224 } 225 } 226 227 func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) { 228 nodes, err := client.NodeList(ctx, types.NodeListOptions{}) 229 if err != nil { 230 return nil, err 231 } 232 233 activeNodes := make(map[string]struct{}) 234 for _, n := range nodes { 235 if n.Status.State != swarm.NodeStateDown { 236 activeNodes[n.ID] = struct{}{} 237 } 238 } 239 return activeNodes, nil 240 } 241 242 func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) { 243 if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil { 244 return &replicatedProgressUpdater{ 245 progressOut: progressOut, 246 }, nil 247 } 248 if service.Spec.Mode.Global != nil { 249 return &globalProgressUpdater{ 250 progressOut: progressOut, 251 }, nil 252 } 253 if service.Spec.Mode.ReplicatedJob != nil { 254 return newReplicatedJobProgressUpdater(service, progressOut), nil 255 } 256 if service.Spec.Mode.GlobalJob != nil { 257 return &globalJobProgressUpdater{ 258 progressOut: progressOut, 259 }, nil 260 } 261 return nil, errors.New("unrecognized service mode") 262 } 263 264 func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) { 265 if rollback { 266 progressOut.WriteProgress(progress.Progress{ 267 ID: "overall progress", 268 Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator), 269 }) 270 return 271 } 272 progressOut.WriteProgress(progress.Progress{ 273 ID: "overall progress", 274 Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator), 275 }) 276 } 277 278 func truncError(errMsg string) string { 279 // Remove newlines from the error, which corrupt the output. 280 errMsg = strings.Replace(errMsg, "\n", " ", -1) 281 282 // Limit the length to 75 characters, so that even on narrow terminals 283 // this will not overflow to the next line. 284 if len(errMsg) > 75 { 285 errMsg = errMsg[:74] + "…" 286 } 287 return errMsg 288 } 289 290 type replicatedProgressUpdater struct { 291 progressOut progress.Output 292 293 // used for mapping slots to a contiguous space 294 // this also causes progress bars to appear in order 295 slotMap map[int]int 296 297 initialized bool 298 done bool 299 } 300 301 func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 302 if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil { 303 return false, errors.New("no replica count") 304 } 305 replicas := *service.Spec.Mode.Replicated.Replicas 306 307 if !u.initialized { 308 u.slotMap = make(map[int]int) 309 310 // Draw progress bars in order 311 writeOverallProgress(u.progressOut, 0, int(replicas), rollback) 312 313 if replicas <= maxProgressBars { 314 for i := uint64(1); i <= replicas; i++ { 315 progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ") 316 } 317 } 318 u.initialized = true 319 } 320 321 tasksBySlot := u.tasksBySlot(tasks, activeNodes) 322 323 // If we had reached a converged state, check if we are still converged. 324 if u.done { 325 for _, task := range tasksBySlot { 326 if task.Status.State != swarm.TaskStateRunning { 327 u.done = false 328 break 329 } 330 } 331 } 332 333 running := uint64(0) 334 335 for _, task := range tasksBySlot { 336 mappedSlot := u.slotMap[task.Slot] 337 if mappedSlot == 0 { 338 mappedSlot = len(u.slotMap) + 1 339 u.slotMap[task.Slot] = mappedSlot 340 } 341 342 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 343 running++ 344 } 345 346 u.writeTaskProgress(task, mappedSlot, replicas, rollback) 347 } 348 349 if !u.done { 350 writeOverallProgress(u.progressOut, int(running), int(replicas), rollback) 351 352 if running == replicas { 353 u.done = true 354 } 355 } 356 357 return running == replicas, nil 358 } 359 360 func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task { 361 // If there are multiple tasks with the same slot number, favor the one 362 // with the *lowest* desired state. This can happen in restart 363 // scenarios. 364 tasksBySlot := make(map[int]swarm.Task) 365 for _, task := range tasks { 366 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 367 continue 368 } 369 if existingTask, ok := tasksBySlot[task.Slot]; ok { 370 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 371 continue 372 } 373 // If the desired states match, observed state breaks 374 // ties. This can happen with the "start first" service 375 // update mode. 376 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 377 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 378 continue 379 } 380 } 381 if task.NodeID != "" { 382 if _, nodeActive := activeNodes[task.NodeID]; !nodeActive { 383 continue 384 } 385 } 386 tasksBySlot[task.Slot] = task 387 } 388 389 return tasksBySlot 390 } 391 392 func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64, rollback bool) { 393 if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas { 394 return 395 } 396 397 if task.Status.Err != "" { 398 u.progressOut.WriteProgress(progress.Progress{ 399 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 400 Action: truncError(task.Status.Err), 401 }) 402 return 403 } 404 405 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 406 u.progressOut.WriteProgress(progress.Progress{ 407 ID: fmt.Sprintf("%d/%d", mappedSlot, replicas), 408 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 409 Current: stateToProgress(task.Status.State, rollback), 410 Total: maxProgress, 411 HideCounts: true, 412 }) 413 } 414 } 415 416 type globalProgressUpdater struct { 417 progressOut progress.Output 418 419 initialized bool 420 done bool 421 } 422 423 func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) { 424 tasksByNode := u.tasksByNode(tasks) 425 426 // We don't have perfect knowledge of how many nodes meet the 427 // constraints for this service. But the orchestrator creates tasks 428 // for all eligible nodes at the same time, so we should see all those 429 // nodes represented among the up-to-date tasks. 430 nodeCount := len(tasksByNode) 431 432 if !u.initialized { 433 if nodeCount == 0 { 434 // Two possibilities: either the orchestrator hasn't created 435 // the tasks yet, or the service doesn't meet constraints for 436 // any node. Either way, we wait. 437 u.progressOut.WriteProgress(progress.Progress{ 438 ID: "overall progress", 439 Action: "waiting for new tasks", 440 }) 441 return false, nil 442 } 443 444 writeOverallProgress(u.progressOut, 0, nodeCount, rollback) 445 u.initialized = true 446 } 447 448 // If we had reached a converged state, check if we are still converged. 449 if u.done { 450 for _, task := range tasksByNode { 451 if task.Status.State != swarm.TaskStateRunning { 452 u.done = false 453 break 454 } 455 } 456 } 457 458 running := 0 459 460 for _, task := range tasksByNode { 461 if _, nodeActive := activeNodes[task.NodeID]; nodeActive { 462 if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning { 463 running++ 464 } 465 466 u.writeTaskProgress(task, nodeCount, rollback) 467 } 468 } 469 470 if !u.done { 471 writeOverallProgress(u.progressOut, running, nodeCount, rollback) 472 473 if running == nodeCount { 474 u.done = true 475 } 476 } 477 478 return running == nodeCount, nil 479 } 480 481 func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task { 482 // If there are multiple tasks with the same node ID, favor the one 483 // with the *lowest* desired state. This can happen in restart 484 // scenarios. 485 tasksByNode := make(map[string]swarm.Task) 486 for _, task := range tasks { 487 if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 { 488 continue 489 } 490 if existingTask, ok := tasksByNode[task.NodeID]; ok { 491 if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] { 492 continue 493 } 494 495 // If the desired states match, observed state breaks 496 // ties. This can happen with the "start first" service 497 // update mode. 498 if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] && 499 numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] { 500 continue 501 } 502 503 } 504 tasksByNode[task.NodeID] = task 505 } 506 507 return tasksByNode 508 } 509 510 func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int, rollback bool) { 511 if u.done || nodeCount > maxProgressBars { 512 return 513 } 514 515 if task.Status.Err != "" { 516 u.progressOut.WriteProgress(progress.Progress{ 517 ID: stringid.TruncateID(task.NodeID), 518 Action: truncError(task.Status.Err), 519 }) 520 return 521 } 522 523 if !terminalState(task.DesiredState) && !terminalState(task.Status.State) { 524 u.progressOut.WriteProgress(progress.Progress{ 525 ID: stringid.TruncateID(task.NodeID), 526 Action: fmt.Sprintf("%-[1]*s", longestState, task.Status.State), 527 Current: stateToProgress(task.Status.State, rollback), 528 Total: maxProgress, 529 HideCounts: true, 530 }) 531 } 532 } 533 534 // replicatedJobProgressUpdater outputs the progress of a replicated job. This 535 // progress consists of a few main elements. 536 // 537 // The first is the progress bar for the job as a whole. This shows the number 538 // of completed out of total tasks for the job. Tasks that are currently 539 // running are not counted. 540 // 541 // The second is the status of the "active" tasks for the job. We count a task 542 // as "active" if it has any non-terminal state, not just running. This is 543 // shown as a fraction of the maximum concurrent tasks that can be running, 544 // which is the less of MaxConcurrent or TotalCompletions - completed tasks. 545 type replicatedJobProgressUpdater struct { 546 progressOut progress.Output 547 548 // jobIteration is the service's job iteration, used to exclude tasks 549 // belonging to earlier iterations. 550 jobIteration uint64 551 552 // concurrent is the value of MaxConcurrent as an int. That is, the maximum 553 // number of tasks allowed to be run simultaneously. 554 concurrent int 555 556 // total is the value of TotalCompletions, the number of complete tasks 557 // desired. 558 total int 559 560 // initialized is set to true after the first time update is called. the 561 // first time update is called, the components of the progress UI are all 562 // written out in an initial pass. this ensure that they will subsequently 563 // be in order, no matter how they are updated. 564 initialized bool 565 566 // progressDigits is the number digits in total, so that we know how much 567 // to pad the job progress field with. 568 // 569 // when we're writing the number of completed over total tasks, we need to 570 // pad the numerator with spaces, so that the bar doesn't jump around. 571 // we'll compute that once on init, and then reuse it over and over. 572 // 573 // we compute this in the least clever way possible: convert to string 574 // with strconv.Itoa, then take the len. 575 progressDigits int 576 577 // activeDigits is the same, but for active tasks, and it applies to both 578 // the numerator and denominator. 579 activeDigits int 580 } 581 582 func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater { 583 u := &replicatedJobProgressUpdater{ 584 progressOut: progressOut, 585 concurrent: int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent), 586 total: int(*service.Spec.Mode.ReplicatedJob.TotalCompletions), 587 jobIteration: service.JobStatus.JobIteration.Index, 588 } 589 u.progressDigits = len(strconv.Itoa(u.total)) 590 u.activeDigits = len(strconv.Itoa(u.concurrent)) 591 592 return u 593 } 594 595 // update writes out the progress of the replicated job. 596 func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) { 597 if !u.initialized { 598 u.writeOverallProgress(0, 0) 599 600 // only write out progress bars if there will be less than the maximum 601 if u.total <= maxProgressBars { 602 for i := 1; i <= u.total; i++ { 603 u.progressOut.WriteProgress(progress.Progress{ 604 ID: fmt.Sprintf("%d/%d", i, u.total), 605 Action: " ", 606 }) 607 } 608 } 609 u.initialized = true 610 } 611 612 // tasksBySlot is a mapping of slot number to the task valid for that slot. 613 // it deduplicated tasks occupying the same numerical slot but in different 614 // states. 615 tasksBySlot := make(map[int]swarm.Task) 616 for _, task := range tasks { 617 // first, check if the task belongs to this service iteration. skip 618 // tasks belonging to other iterations. 619 if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration { 620 continue 621 } 622 623 // then, if the task is in an unknown state, ignore it. 624 if numberedStates[task.DesiredState] == 0 || 625 numberedStates[task.Status.State] == 0 { 626 continue 627 } 628 629 // finally, check if the task already exists in the map 630 if existing, ok := tasksBySlot[task.Slot]; ok { 631 // if so, use the task with the lower actual state 632 if numberedStates[existing.Status.State] > numberedStates[task.Status.State] { 633 tasksBySlot[task.Slot] = task 634 } 635 } else { 636 // otherwise, just add it to the map. 637 tasksBySlot[task.Slot] = task 638 } 639 } 640 641 activeTasks := 0 642 completeTasks := 0 643 644 for i := 0; i < len(tasksBySlot); i++ { 645 task := tasksBySlot[i] 646 u.writeTaskProgress(task) 647 648 if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] { 649 activeTasks++ 650 } 651 652 if task.Status.State == swarm.TaskStateComplete { 653 completeTasks++ 654 } 655 } 656 657 u.writeOverallProgress(activeTasks, completeTasks) 658 659 return completeTasks == u.total, nil 660 } 661 662 func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) { 663 u.progressOut.WriteProgress(progress.Progress{ 664 ID: "job progress", 665 Action: fmt.Sprintf( 666 // * means "use the next positional arg to compute padding" 667 "%*d out of %d complete", u.progressDigits, completed, u.total, 668 ), 669 Current: int64(completed), 670 Total: int64(u.total), 671 HideCounts: true, 672 }) 673 674 // actualDesired is the lesser of MaxConcurrent, or the remaining tasks 675 actualDesired := u.total - completed 676 if actualDesired > u.concurrent { 677 actualDesired = u.concurrent 678 } 679 680 u.progressOut.WriteProgress(progress.Progress{ 681 ID: "active tasks", 682 Action: fmt.Sprintf( 683 // [n] notation lets us select a specific argument, 1-indexed 684 // putting the [1] before the star means "make the string this 685 // length". putting the [2] or the [3] means "use this argument 686 // here" 687 // 688 // we pad both the numerator and the denominator because, as the 689 // job reaches its conclusion, the number of possible concurrent 690 // tasks will go down, as fewer than MaxConcurrent tasks are needed 691 // to complete the job. 692 "%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired, 693 ), 694 }) 695 } 696 697 func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) { 698 if u.total > maxProgressBars { 699 return 700 } 701 702 if task.Status.Err != "" { 703 u.progressOut.WriteProgress(progress.Progress{ 704 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 705 Action: truncError(task.Status.Err), 706 }) 707 return 708 } 709 710 u.progressOut.WriteProgress(progress.Progress{ 711 ID: fmt.Sprintf("%d/%d", task.Slot+1, u.total), 712 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 713 Current: numberedStates[task.Status.State], 714 Total: maxJobProgress, 715 HideCounts: true, 716 }) 717 } 718 719 // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services. 720 // Because GlobalJob services are so much simpler than ReplicatedJob services, 721 // this updater is in turn simpler as well. 722 type globalJobProgressUpdater struct { 723 progressOut progress.Output 724 725 // initialized is used to detect the first pass of update, and to perform 726 // first time initialization logic at that time. 727 initialized bool 728 729 // total is the total number of tasks expected for this job 730 total int 731 732 // progressDigits is the number of spaces to pad the numerator of the job 733 // progress field 734 progressDigits int 735 736 taskNodes map[string]struct{} 737 } 738 739 func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) { 740 if !u.initialized { 741 // if there are not yet tasks, then return early. 742 if len(tasks) == 0 && len(activeNodes) != 0 { 743 u.progressOut.WriteProgress(progress.Progress{ 744 ID: "job progress", 745 Action: "waiting for tasks", 746 }) 747 return false, nil 748 } 749 750 // when a global job starts, all of its tasks are created at once, so 751 // we can use len(tasks) to know how many we're expecting. 752 u.taskNodes = map[string]struct{}{} 753 754 for _, task := range tasks { 755 // skip any tasks not belonging to this job iteration. 756 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 757 continue 758 } 759 760 // collect the list of all node IDs for this service. 761 // 762 // basically, global jobs will execute on any new nodes that join 763 // the cluster in the future. to avoid making things complicated, 764 // we will only check the progress of the initial set of nodes. if 765 // any new nodes come online during the operation, we will ignore 766 // them. 767 u.taskNodes[task.NodeID] = struct{}{} 768 } 769 770 u.total = len(u.taskNodes) 771 u.progressDigits = len(strconv.Itoa(u.total)) 772 773 u.writeOverallProgress(0) 774 u.initialized = true 775 } 776 777 // tasksByNodeID maps a NodeID to the latest task for that Node ID. this 778 // lets us pick only the latest task for any given node. 779 tasksByNodeID := map[string]swarm.Task{} 780 781 for _, task := range tasks { 782 // skip any tasks not belonging to this job iteration 783 if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index { 784 continue 785 } 786 787 // if the task is not on one of the initial set of nodes, ignore it. 788 if _, ok := u.taskNodes[task.NodeID]; !ok { 789 continue 790 } 791 792 // if there is already a task recorded for this node, choose the one 793 // with the lower state 794 if oldtask, ok := tasksByNodeID[task.NodeID]; ok { 795 if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] { 796 tasksByNodeID[task.NodeID] = task 797 } 798 } else { 799 tasksByNodeID[task.NodeID] = task 800 } 801 } 802 803 complete := 0 804 for _, task := range tasksByNodeID { 805 u.writeTaskProgress(task) 806 if task.Status.State == swarm.TaskStateComplete { 807 complete++ 808 } 809 } 810 811 u.writeOverallProgress(complete) 812 return complete == u.total, nil 813 } 814 815 func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) { 816 if u.total > maxProgressBars { 817 return 818 } 819 820 if task.Status.Err != "" { 821 u.progressOut.WriteProgress(progress.Progress{ 822 ID: task.NodeID, 823 Action: truncError(task.Status.Err), 824 }) 825 return 826 } 827 828 u.progressOut.WriteProgress(progress.Progress{ 829 ID: task.NodeID, 830 Action: fmt.Sprintf("%-*s", longestState, task.Status.State), 831 Current: numberedStates[task.Status.State], 832 Total: maxJobProgress, 833 HideCounts: true, 834 }) 835 } 836 837 func (u *globalJobProgressUpdater) writeOverallProgress(complete int) { 838 // all tasks for a global job are active at once, so we only write out the 839 // total progress. 840 u.progressOut.WriteProgress(progress.Progress{ 841 // see (*replicatedJobProgressUpdater).writeOverallProgress for an 842 // explanation fo the advanced fmt use in this function. 843 ID: "job progress", 844 Action: fmt.Sprintf( 845 "%*d out of %d complete", u.progressDigits, complete, u.total, 846 ), 847 Current: int64(complete), 848 Total: int64(u.total), 849 HideCounts: true, 850 }) 851 }