github.com/panekj/cli@v0.0.0-20230304125325-467dd2f3797e/cli/command/service/progress/progress.go (about)

     1  package progress
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/signal"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/filters"
    16  	"github.com/docker/docker/api/types/swarm"
    17  	"github.com/docker/docker/client"
    18  	"github.com/docker/docker/pkg/progress"
    19  	"github.com/docker/docker/pkg/streamformatter"
    20  	"github.com/docker/docker/pkg/stringid"
    21  )
    22  
    23  var (
    24  	numberedStates = map[swarm.TaskState]int64{
    25  		swarm.TaskStateNew:       1,
    26  		swarm.TaskStateAllocated: 2,
    27  		swarm.TaskStatePending:   3,
    28  		swarm.TaskStateAssigned:  4,
    29  		swarm.TaskStateAccepted:  5,
    30  		swarm.TaskStatePreparing: 6,
    31  		swarm.TaskStateReady:     7,
    32  		swarm.TaskStateStarting:  8,
    33  		swarm.TaskStateRunning:   9,
    34  
    35  		// The following states are not actually shown in progress
    36  		// output, but are used internally for ordering.
    37  		swarm.TaskStateComplete: 10,
    38  		swarm.TaskStateShutdown: 11,
    39  		swarm.TaskStateFailed:   12,
    40  		swarm.TaskStateRejected: 13,
    41  	}
    42  
    43  	longestState int
    44  )
    45  
    46  const (
    47  	maxProgress     = 9
    48  	maxProgressBars = 20
    49  	maxJobProgress  = 10
    50  )
    51  
    52  type progressUpdater interface {
    53  	update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error)
    54  }
    55  
    56  func init() {
    57  	for state := range numberedStates {
    58  		// for jobs, we use the "complete" state, and so it should be factored
    59  		// in to the computation of the longest state.
    60  		if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState {
    61  			longestState = len(state)
    62  		}
    63  	}
    64  }
    65  
    66  func terminalState(state swarm.TaskState) bool {
    67  	return numberedStates[state] > numberedStates[swarm.TaskStateRunning]
    68  }
    69  
    70  // ServiceProgress outputs progress information for convergence of a service.
    71  //
    72  //nolint:gocyclo
    73  func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error {
    74  	defer progressWriter.Close()
    75  
    76  	progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false)
    77  
    78  	sigint := make(chan os.Signal, 1)
    79  	signal.Notify(sigint, os.Interrupt)
    80  	defer signal.Stop(sigint)
    81  
    82  	taskFilter := filters.NewArgs()
    83  	taskFilter.Add("service", serviceID)
    84  	taskFilter.Add("_up-to-date", "true")
    85  
    86  	getUpToDateTasks := func() ([]swarm.Task, error) {
    87  		return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter})
    88  	}
    89  
    90  	var (
    91  		updater     progressUpdater
    92  		converged   bool
    93  		convergedAt time.Time
    94  		monitor     = 5 * time.Second
    95  		rollback    bool
    96  		message     *progress.Progress
    97  	)
    98  
    99  	for {
   100  		service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{})
   101  		if err != nil {
   102  			return err
   103  		}
   104  
   105  		if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 {
   106  			monitor = service.Spec.UpdateConfig.Monitor
   107  		}
   108  
   109  		if updater == nil {
   110  			updater, err = initializeUpdater(service, progressOut)
   111  			if err != nil {
   112  				return err
   113  			}
   114  		}
   115  
   116  		if service.UpdateStatus != nil {
   117  			switch service.UpdateStatus.State {
   118  			case swarm.UpdateStateUpdating:
   119  				rollback = false
   120  			case swarm.UpdateStateCompleted:
   121  				if !converged {
   122  					return nil
   123  				}
   124  			case swarm.UpdateStatePaused:
   125  				return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message)
   126  			case swarm.UpdateStateRollbackStarted:
   127  				if !rollback && service.UpdateStatus.Message != "" {
   128  					progressOut.WriteProgress(progress.Progress{
   129  						ID:     "rollback",
   130  						Action: service.UpdateStatus.Message,
   131  					})
   132  				}
   133  				rollback = true
   134  			case swarm.UpdateStateRollbackPaused:
   135  				return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message)
   136  			case swarm.UpdateStateRollbackCompleted:
   137  				if !converged {
   138  					message = &progress.Progress{ID: "rollback", Message: service.UpdateStatus.Message}
   139  				}
   140  				rollback = true
   141  			}
   142  		}
   143  		if converged && time.Since(convergedAt) >= monitor {
   144  			progressOut.WriteProgress(progress.Progress{
   145  				ID:     "verify",
   146  				Action: "Service converged",
   147  			})
   148  			if message != nil {
   149  				progressOut.WriteProgress(*message)
   150  			}
   151  			return nil
   152  		}
   153  
   154  		tasks, err := getUpToDateTasks()
   155  		if err != nil {
   156  			return err
   157  		}
   158  
   159  		activeNodes, err := getActiveNodes(ctx, client)
   160  		if err != nil {
   161  			return err
   162  		}
   163  
   164  		converged, err = updater.update(service, tasks, activeNodes, rollback)
   165  		if err != nil {
   166  			return err
   167  		}
   168  		if converged {
   169  			// if the service is a job, there's no need to verify it. jobs are
   170  			// stay done once they're done. skip the verification and just end
   171  			// the progress monitoring.
   172  			//
   173  			// only job services have a non-nil job status, which means we can
   174  			// use the presence of this field to check if the service is a job
   175  			// here.
   176  			if service.JobStatus != nil {
   177  				progress.Message(progressOut, "", "job complete")
   178  				return nil
   179  			}
   180  
   181  			if convergedAt.IsZero() {
   182  				convergedAt = time.Now()
   183  			}
   184  			wait := monitor - time.Since(convergedAt)
   185  			if wait >= 0 {
   186  				progressOut.WriteProgress(progress.Progress{
   187  					// Ideally this would have no ID, but
   188  					// the progress rendering code behaves
   189  					// poorly on an "action" with no ID. It
   190  					// returns the cursor to the beginning
   191  					// of the line, so the first character
   192  					// may be difficult to read. Then the
   193  					// output is overwritten by the shell
   194  					// prompt when the command finishes.
   195  					ID:     "verify",
   196  					Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1),
   197  				})
   198  			}
   199  		} else {
   200  			if !convergedAt.IsZero() {
   201  				progressOut.WriteProgress(progress.Progress{
   202  					ID:     "verify",
   203  					Action: "Detected task failure",
   204  				})
   205  			}
   206  			convergedAt = time.Time{}
   207  		}
   208  
   209  		select {
   210  		case <-time.After(200 * time.Millisecond):
   211  		case <-sigint:
   212  			if !converged {
   213  				progress.Message(progressOut, "", "Operation continuing in background.")
   214  				progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID)
   215  			}
   216  			return nil
   217  		}
   218  	}
   219  }
   220  
   221  func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) {
   222  	nodes, err := client.NodeList(ctx, types.NodeListOptions{})
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  
   227  	activeNodes := make(map[string]struct{})
   228  	for _, n := range nodes {
   229  		if n.Status.State != swarm.NodeStateDown {
   230  			activeNodes[n.ID] = struct{}{}
   231  		}
   232  	}
   233  	return activeNodes, nil
   234  }
   235  
   236  func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) {
   237  	if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil {
   238  		return &replicatedProgressUpdater{
   239  			progressOut: progressOut,
   240  		}, nil
   241  	}
   242  	if service.Spec.Mode.Global != nil {
   243  		return &globalProgressUpdater{
   244  			progressOut: progressOut,
   245  		}, nil
   246  	}
   247  	if service.Spec.Mode.ReplicatedJob != nil {
   248  		return newReplicatedJobProgressUpdater(service, progressOut), nil
   249  	}
   250  	if service.Spec.Mode.GlobalJob != nil {
   251  		return &globalJobProgressUpdater{
   252  			progressOut: progressOut,
   253  		}, nil
   254  	}
   255  	return nil, errors.New("unrecognized service mode")
   256  }
   257  
   258  func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) {
   259  	if rollback {
   260  		progressOut.WriteProgress(progress.Progress{
   261  			ID:     "overall progress",
   262  			Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator),
   263  		})
   264  		return
   265  	}
   266  	progressOut.WriteProgress(progress.Progress{
   267  		ID:     "overall progress",
   268  		Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator),
   269  	})
   270  }
   271  
   272  func truncError(errMsg string) string {
   273  	// Remove newlines from the error, which corrupt the output.
   274  	errMsg = strings.Replace(errMsg, "\n", " ", -1)
   275  
   276  	// Limit the length to 75 characters, so that even on narrow terminals
   277  	// this will not overflow to the next line.
   278  	if len(errMsg) > 75 {
   279  		errMsg = errMsg[:74] + "…"
   280  	}
   281  	return errMsg
   282  }
   283  
   284  type replicatedProgressUpdater struct {
   285  	progressOut progress.Output
   286  
   287  	// used for mapping slots to a contiguous space
   288  	// this also causes progress bars to appear in order
   289  	slotMap map[int]int
   290  
   291  	initialized bool
   292  	done        bool
   293  }
   294  
   295  func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   296  	if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil {
   297  		return false, errors.New("no replica count")
   298  	}
   299  	replicas := *service.Spec.Mode.Replicated.Replicas
   300  
   301  	if !u.initialized {
   302  		u.slotMap = make(map[int]int)
   303  
   304  		// Draw progress bars in order
   305  		writeOverallProgress(u.progressOut, 0, int(replicas), rollback)
   306  
   307  		if replicas <= maxProgressBars {
   308  			for i := uint64(1); i <= replicas; i++ {
   309  				progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ")
   310  			}
   311  		}
   312  		u.initialized = true
   313  	}
   314  
   315  	tasksBySlot := u.tasksBySlot(tasks, activeNodes)
   316  
   317  	// If we had reached a converged state, check if we are still converged.
   318  	if u.done {
   319  		for _, task := range tasksBySlot {
   320  			if task.Status.State != swarm.TaskStateRunning {
   321  				u.done = false
   322  				break
   323  			}
   324  		}
   325  	}
   326  
   327  	running := uint64(0)
   328  
   329  	for _, task := range tasksBySlot {
   330  		mappedSlot := u.slotMap[task.Slot]
   331  		if mappedSlot == 0 {
   332  			mappedSlot = len(u.slotMap) + 1
   333  			u.slotMap[task.Slot] = mappedSlot
   334  		}
   335  
   336  		if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   337  			running++
   338  		}
   339  
   340  		u.writeTaskProgress(task, mappedSlot, replicas)
   341  	}
   342  
   343  	if !u.done {
   344  		writeOverallProgress(u.progressOut, int(running), int(replicas), rollback)
   345  
   346  		if running == replicas {
   347  			u.done = true
   348  		}
   349  	}
   350  
   351  	return running == replicas, nil
   352  }
   353  
   354  func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task {
   355  	// If there are multiple tasks with the same slot number, favor the one
   356  	// with the *lowest* desired state. This can happen in restart
   357  	// scenarios.
   358  	tasksBySlot := make(map[int]swarm.Task)
   359  	for _, task := range tasks {
   360  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   361  			continue
   362  		}
   363  		if existingTask, ok := tasksBySlot[task.Slot]; ok {
   364  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   365  				continue
   366  			}
   367  			// If the desired states match, observed state breaks
   368  			// ties. This can happen with the "start first" service
   369  			// update mode.
   370  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   371  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   372  				continue
   373  			}
   374  		}
   375  		if task.NodeID != "" {
   376  			if _, nodeActive := activeNodes[task.NodeID]; !nodeActive {
   377  				continue
   378  			}
   379  		}
   380  		tasksBySlot[task.Slot] = task
   381  	}
   382  
   383  	return tasksBySlot
   384  }
   385  
   386  func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64) {
   387  	if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas {
   388  		return
   389  	}
   390  
   391  	if task.Status.Err != "" {
   392  		u.progressOut.WriteProgress(progress.Progress{
   393  			ID:     fmt.Sprintf("%d/%d", mappedSlot, replicas),
   394  			Action: truncError(task.Status.Err),
   395  		})
   396  		return
   397  	}
   398  
   399  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   400  		u.progressOut.WriteProgress(progress.Progress{
   401  			ID:         fmt.Sprintf("%d/%d", mappedSlot, replicas),
   402  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   403  			Current:    numberedStates[task.Status.State],
   404  			Total:      maxProgress,
   405  			HideCounts: true,
   406  		})
   407  	}
   408  }
   409  
   410  type globalProgressUpdater struct {
   411  	progressOut progress.Output
   412  
   413  	initialized bool
   414  	done        bool
   415  }
   416  
   417  func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   418  	tasksByNode := u.tasksByNode(tasks)
   419  
   420  	// We don't have perfect knowledge of how many nodes meet the
   421  	// constraints for this service. But the orchestrator creates tasks
   422  	// for all eligible nodes at the same time, so we should see all those
   423  	// nodes represented among the up-to-date tasks.
   424  	nodeCount := len(tasksByNode)
   425  
   426  	if !u.initialized {
   427  		if nodeCount == 0 {
   428  			// Two possibilities: either the orchestrator hasn't created
   429  			// the tasks yet, or the service doesn't meet constraints for
   430  			// any node. Either way, we wait.
   431  			u.progressOut.WriteProgress(progress.Progress{
   432  				ID:     "overall progress",
   433  				Action: "waiting for new tasks",
   434  			})
   435  			return false, nil
   436  		}
   437  
   438  		writeOverallProgress(u.progressOut, 0, nodeCount, rollback)
   439  		u.initialized = true
   440  	}
   441  
   442  	// If we had reached a converged state, check if we are still converged.
   443  	if u.done {
   444  		for _, task := range tasksByNode {
   445  			if task.Status.State != swarm.TaskStateRunning {
   446  				u.done = false
   447  				break
   448  			}
   449  		}
   450  	}
   451  
   452  	running := 0
   453  
   454  	for _, task := range tasksByNode {
   455  		if _, nodeActive := activeNodes[task.NodeID]; nodeActive {
   456  			if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   457  				running++
   458  			}
   459  
   460  			u.writeTaskProgress(task, nodeCount)
   461  		}
   462  	}
   463  
   464  	if !u.done {
   465  		writeOverallProgress(u.progressOut, running, nodeCount, rollback)
   466  
   467  		if running == nodeCount {
   468  			u.done = true
   469  		}
   470  	}
   471  
   472  	return running == nodeCount, nil
   473  }
   474  
   475  func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task {
   476  	// If there are multiple tasks with the same node ID, favor the one
   477  	// with the *lowest* desired state. This can happen in restart
   478  	// scenarios.
   479  	tasksByNode := make(map[string]swarm.Task)
   480  	for _, task := range tasks {
   481  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   482  			continue
   483  		}
   484  		if existingTask, ok := tasksByNode[task.NodeID]; ok {
   485  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   486  				continue
   487  			}
   488  
   489  			// If the desired states match, observed state breaks
   490  			// ties. This can happen with the "start first" service
   491  			// update mode.
   492  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   493  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   494  				continue
   495  			}
   496  
   497  		}
   498  		tasksByNode[task.NodeID] = task
   499  	}
   500  
   501  	return tasksByNode
   502  }
   503  
   504  func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int) {
   505  	if u.done || nodeCount > maxProgressBars {
   506  		return
   507  	}
   508  
   509  	if task.Status.Err != "" {
   510  		u.progressOut.WriteProgress(progress.Progress{
   511  			ID:     stringid.TruncateID(task.NodeID),
   512  			Action: truncError(task.Status.Err),
   513  		})
   514  		return
   515  	}
   516  
   517  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   518  		u.progressOut.WriteProgress(progress.Progress{
   519  			ID:         stringid.TruncateID(task.NodeID),
   520  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   521  			Current:    numberedStates[task.Status.State],
   522  			Total:      maxProgress,
   523  			HideCounts: true,
   524  		})
   525  	}
   526  }
   527  
   528  // replicatedJobProgressUpdater outputs the progress of a replicated job. This
   529  // progress consists of a few main elements.
   530  //
   531  // The first is the progress bar for the job as a whole. This shows the number
   532  // of completed out of total tasks for the job. Tasks that are currently
   533  // running are not counted.
   534  //
   535  // The second is the status of the "active" tasks for the job. We count a task
   536  // as "active" if it has any non-terminal state, not just running. This is
   537  // shown as a fraction of the maximum concurrent tasks that can be running,
   538  // which is the less of MaxConcurrent or TotalCompletions - completed tasks.
   539  type replicatedJobProgressUpdater struct {
   540  	progressOut progress.Output
   541  
   542  	// jobIteration is the service's job iteration, used to exclude tasks
   543  	// belonging to earlier iterations.
   544  	jobIteration uint64
   545  
   546  	// concurrent is the value of MaxConcurrent as an int. That is, the maximum
   547  	// number of tasks allowed to be run simultaneously.
   548  	concurrent int
   549  
   550  	// total is the value of TotalCompletions, the number of complete tasks
   551  	// desired.
   552  	total int
   553  
   554  	// initialized is set to true after the first time update is called. the
   555  	// first time update is called, the components of the progress UI are all
   556  	// written out in an initial pass. this ensure that they will subsequently
   557  	// be in order, no matter how they are updated.
   558  	initialized bool
   559  
   560  	// progressDigits is the number digits in total, so that we know how much
   561  	// to pad the job progress field with.
   562  	//
   563  	// when we're writing the number of completed over total tasks, we need to
   564  	// pad the numerator with spaces, so that the bar doesn't jump around.
   565  	// we'll compute that once on init, and then reuse it over and over.
   566  	//
   567  	// we compute this in the least clever way possible: convert to string
   568  	// with strconv.Itoa, then take the len.
   569  	progressDigits int
   570  
   571  	// activeDigits is the same, but for active tasks, and it applies to both
   572  	// the numerator and denominator.
   573  	activeDigits int
   574  }
   575  
   576  func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater {
   577  	u := &replicatedJobProgressUpdater{
   578  		progressOut:  progressOut,
   579  		concurrent:   int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent),
   580  		total:        int(*service.Spec.Mode.ReplicatedJob.TotalCompletions),
   581  		jobIteration: service.JobStatus.JobIteration.Index,
   582  	}
   583  	u.progressDigits = len(strconv.Itoa(u.total))
   584  	u.activeDigits = len(strconv.Itoa(u.concurrent))
   585  
   586  	return u
   587  }
   588  
   589  // update writes out the progress of the replicated job.
   590  func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) {
   591  	if !u.initialized {
   592  		u.writeOverallProgress(0, 0)
   593  
   594  		// only write out progress bars if there will be less than the maximum
   595  		if u.total <= maxProgressBars {
   596  			for i := 1; i <= u.total; i++ {
   597  				u.progressOut.WriteProgress(progress.Progress{
   598  					ID:     fmt.Sprintf("%d/%d", i, u.total),
   599  					Action: " ",
   600  				})
   601  			}
   602  		}
   603  		u.initialized = true
   604  	}
   605  
   606  	// tasksBySlot is a mapping of slot number to the task valid for that slot.
   607  	// it deduplicated tasks occupying the same numerical slot but in different
   608  	// states.
   609  	tasksBySlot := make(map[int]swarm.Task)
   610  	for _, task := range tasks {
   611  		// first, check if the task belongs to this service iteration. skip
   612  		// tasks belonging to other iterations.
   613  		if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration {
   614  			continue
   615  		}
   616  
   617  		// then, if the task is in an unknown state, ignore it.
   618  		if numberedStates[task.DesiredState] == 0 ||
   619  			numberedStates[task.Status.State] == 0 {
   620  			continue
   621  		}
   622  
   623  		// finally, check if the task already exists in the map
   624  		if existing, ok := tasksBySlot[task.Slot]; ok {
   625  			// if so, use the task with the lower actual state
   626  			if numberedStates[existing.Status.State] > numberedStates[task.Status.State] {
   627  				tasksBySlot[task.Slot] = task
   628  			}
   629  		} else {
   630  			// otherwise, just add it to the map.
   631  			tasksBySlot[task.Slot] = task
   632  		}
   633  	}
   634  
   635  	activeTasks := 0
   636  	completeTasks := 0
   637  
   638  	for i := 0; i < len(tasksBySlot); i++ {
   639  		task := tasksBySlot[i]
   640  		u.writeTaskProgress(task)
   641  
   642  		if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] {
   643  			activeTasks++
   644  		}
   645  
   646  		if task.Status.State == swarm.TaskStateComplete {
   647  			completeTasks++
   648  		}
   649  	}
   650  
   651  	u.writeOverallProgress(activeTasks, completeTasks)
   652  
   653  	return completeTasks == u.total, nil
   654  }
   655  
   656  func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) {
   657  	u.progressOut.WriteProgress(progress.Progress{
   658  		ID: "job progress",
   659  		Action: fmt.Sprintf(
   660  			// * means "use the next positional arg to compute padding"
   661  			"%*d out of %d complete", u.progressDigits, completed, u.total,
   662  		),
   663  		Current:    int64(completed),
   664  		Total:      int64(u.total),
   665  		HideCounts: true,
   666  	})
   667  
   668  	// actualDesired is the lesser of MaxConcurrent, or the remaining tasks
   669  	actualDesired := u.total - completed
   670  	if actualDesired > u.concurrent {
   671  		actualDesired = u.concurrent
   672  	}
   673  
   674  	u.progressOut.WriteProgress(progress.Progress{
   675  		ID: "active tasks",
   676  		Action: fmt.Sprintf(
   677  			// [n] notation lets us select a specific argument, 1-indexed
   678  			// putting the [1] before the star means "make the string this
   679  			// length". putting the [2] or the [3] means "use this argument
   680  			// here"
   681  			//
   682  			// we pad both the numerator and the denominator because, as the
   683  			// job reaches its conclusion, the number of possible concurrent
   684  			// tasks will go down, as fewer than MaxConcurrent tasks are needed
   685  			// to complete the job.
   686  			"%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired,
   687  		),
   688  	})
   689  }
   690  
   691  func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   692  	if u.total > maxProgressBars {
   693  		return
   694  	}
   695  
   696  	if task.Status.Err != "" {
   697  		u.progressOut.WriteProgress(progress.Progress{
   698  			ID:     fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   699  			Action: truncError(task.Status.Err),
   700  		})
   701  		return
   702  	}
   703  
   704  	u.progressOut.WriteProgress(progress.Progress{
   705  		ID:         fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   706  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   707  		Current:    numberedStates[task.Status.State],
   708  		Total:      maxJobProgress,
   709  		HideCounts: true,
   710  	})
   711  }
   712  
   713  // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services.
   714  // Because GlobalJob services are so much simpler than ReplicatedJob services,
   715  // this updater is in turn simpler as well.
   716  type globalJobProgressUpdater struct {
   717  	progressOut progress.Output
   718  
   719  	// initialized is used to detect the first pass of update, and to perform
   720  	// first time initialization logic at that time.
   721  	initialized bool
   722  
   723  	// total is the total number of tasks expected for this job
   724  	total int
   725  
   726  	// progressDigits is the number of spaces to pad the numerator of the job
   727  	// progress field
   728  	progressDigits int
   729  
   730  	taskNodes map[string]struct{}
   731  }
   732  
   733  func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) {
   734  	if !u.initialized {
   735  		// if there are not yet tasks, then return early.
   736  		if len(tasks) == 0 && len(activeNodes) != 0 {
   737  			u.progressOut.WriteProgress(progress.Progress{
   738  				ID:     "job progress",
   739  				Action: "waiting for tasks",
   740  			})
   741  			return false, nil
   742  		}
   743  
   744  		// when a global job starts, all of its tasks are created at once, so
   745  		// we can use len(tasks) to know how many we're expecting.
   746  		u.taskNodes = map[string]struct{}{}
   747  
   748  		for _, task := range tasks {
   749  			// skip any tasks not belonging to this job iteration.
   750  			if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   751  				continue
   752  			}
   753  
   754  			// collect the list of all node IDs for this service.
   755  			//
   756  			// basically, global jobs will execute on any new nodes that join
   757  			// the cluster in the future. to avoid making things complicated,
   758  			// we will only check the progress of the initial set of nodes. if
   759  			// any new nodes come online during the operation, we will ignore
   760  			// them.
   761  			u.taskNodes[task.NodeID] = struct{}{}
   762  		}
   763  
   764  		u.total = len(u.taskNodes)
   765  		u.progressDigits = len(strconv.Itoa(u.total))
   766  
   767  		u.writeOverallProgress(0)
   768  		u.initialized = true
   769  	}
   770  
   771  	// tasksByNodeID maps a NodeID to the latest task for that Node ID. this
   772  	// lets us pick only the latest task for any given node.
   773  	tasksByNodeID := map[string]swarm.Task{}
   774  
   775  	for _, task := range tasks {
   776  		// skip any tasks not belonging to this job iteration
   777  		if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   778  			continue
   779  		}
   780  
   781  		// if the task is not on one of the initial set of nodes, ignore it.
   782  		if _, ok := u.taskNodes[task.NodeID]; !ok {
   783  			continue
   784  		}
   785  
   786  		// if there is already a task recorded for this node, choose the one
   787  		// with the lower state
   788  		if oldtask, ok := tasksByNodeID[task.NodeID]; ok {
   789  			if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] {
   790  				tasksByNodeID[task.NodeID] = task
   791  			}
   792  		} else {
   793  			tasksByNodeID[task.NodeID] = task
   794  		}
   795  	}
   796  
   797  	complete := 0
   798  	for _, task := range tasksByNodeID {
   799  		u.writeTaskProgress(task)
   800  		if task.Status.State == swarm.TaskStateComplete {
   801  			complete++
   802  		}
   803  	}
   804  
   805  	u.writeOverallProgress(complete)
   806  	return complete == u.total, nil
   807  }
   808  
   809  func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   810  	if u.total > maxProgressBars {
   811  		return
   812  	}
   813  
   814  	if task.Status.Err != "" {
   815  		u.progressOut.WriteProgress(progress.Progress{
   816  			ID:     task.NodeID,
   817  			Action: truncError(task.Status.Err),
   818  		})
   819  		return
   820  	}
   821  
   822  	u.progressOut.WriteProgress(progress.Progress{
   823  		ID:         task.NodeID,
   824  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   825  		Current:    numberedStates[task.Status.State],
   826  		Total:      maxJobProgress,
   827  		HideCounts: true,
   828  	})
   829  }
   830  
   831  func (u *globalJobProgressUpdater) writeOverallProgress(complete int) {
   832  	// all tasks for a global job are active at once, so we only write out the
   833  	// total progress.
   834  	u.progressOut.WriteProgress(progress.Progress{
   835  		// see (*replicatedJobProgressUpdater).writeOverallProgress for an
   836  		// explanation fo the advanced fmt use in this function.
   837  		ID: "job progress",
   838  		Action: fmt.Sprintf(
   839  			"%*d out of %d complete", u.progressDigits, complete, u.total,
   840  		),
   841  		Current:    int64(complete),
   842  		Total:      int64(u.total),
   843  		HideCounts: true,
   844  	})
   845  }