github.com/ali-iotechsys/cli@v20.10.0+incompatible/cli/command/service/progress/progress.go

github.com/ali-iotechsys/cli@v20.10.0+incompatible/cli/command/service/progress/progress.go (about)

     1  package progress
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/signal"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/filters"
    16  	"github.com/docker/docker/api/types/swarm"
    17  	"github.com/docker/docker/client"
    18  	"github.com/docker/docker/pkg/progress"
    19  	"github.com/docker/docker/pkg/streamformatter"
    20  	"github.com/docker/docker/pkg/stringid"
    21  )
    22  
    23  var (
    24  	numberedStates = map[swarm.TaskState]int64{
    25  		swarm.TaskStateNew:       1,
    26  		swarm.TaskStateAllocated: 2,
    27  		swarm.TaskStatePending:   3,
    28  		swarm.TaskStateAssigned:  4,
    29  		swarm.TaskStateAccepted:  5,
    30  		swarm.TaskStatePreparing: 6,
    31  		swarm.TaskStateReady:     7,
    32  		swarm.TaskStateStarting:  8,
    33  		swarm.TaskStateRunning:   9,
    34  
    35  		// The following states are not actually shown in progress
    36  		// output, but are used internally for ordering.
    37  		swarm.TaskStateComplete: 10,
    38  		swarm.TaskStateShutdown: 11,
    39  		swarm.TaskStateFailed:   12,
    40  		swarm.TaskStateRejected: 13,
    41  	}
    42  
    43  	longestState int
    44  )
    45  
    46  const (
    47  	maxProgress     = 9
    48  	maxProgressBars = 20
    49  	maxJobProgress  = 10
    50  )
    51  
    52  type progressUpdater interface {
    53  	update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error)
    54  }
    55  
    56  func init() {
    57  	for state := range numberedStates {
    58  		// for jobs, we use the "complete" state, and so it should be factored
    59  		// in to the computation of the longest state.
    60  		if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState {
    61  			longestState = len(state)
    62  		}
    63  	}
    64  }
    65  
    66  func terminalState(state swarm.TaskState) bool {
    67  	return numberedStates[state] > numberedStates[swarm.TaskStateRunning]
    68  }
    69  
    70  func stateToProgress(state swarm.TaskState, rollback bool) int64 {
    71  	if !rollback {
    72  		return numberedStates[state]
    73  	}
    74  	return numberedStates[swarm.TaskStateRunning] - numberedStates[state]
    75  }
    76  
    77  // ServiceProgress outputs progress information for convergence of a service.
    78  // nolint: gocyclo
    79  func ServiceProgress(ctx context.Context, client client.APIClient, serviceID string, progressWriter io.WriteCloser) error {
    80  	defer progressWriter.Close()
    81  
    82  	progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false)
    83  
    84  	sigint := make(chan os.Signal, 1)
    85  	signal.Notify(sigint, os.Interrupt)
    86  	defer signal.Stop(sigint)
    87  
    88  	taskFilter := filters.NewArgs()
    89  	taskFilter.Add("service", serviceID)
    90  	taskFilter.Add("_up-to-date", "true")
    91  
    92  	getUpToDateTasks := func() ([]swarm.Task, error) {
    93  		return client.TaskList(ctx, types.TaskListOptions{Filters: taskFilter})
    94  	}
    95  
    96  	var (
    97  		updater     progressUpdater
    98  		converged   bool
    99  		convergedAt time.Time
   100  		monitor     = 5 * time.Second
   101  		rollback    bool
   102  	)
   103  
   104  	for {
   105  		service, _, err := client.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{})
   106  		if err != nil {
   107  			return err
   108  		}
   109  
   110  		if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 {
   111  			monitor = service.Spec.UpdateConfig.Monitor
   112  		}
   113  
   114  		if updater == nil {
   115  			updater, err = initializeUpdater(service, progressOut)
   116  			if err != nil {
   117  				return err
   118  			}
   119  		}
   120  
   121  		if service.UpdateStatus != nil {
   122  			switch service.UpdateStatus.State {
   123  			case swarm.UpdateStateUpdating:
   124  				rollback = false
   125  			case swarm.UpdateStateCompleted:
   126  				if !converged {
   127  					return nil
   128  				}
   129  			case swarm.UpdateStatePaused:
   130  				return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message)
   131  			case swarm.UpdateStateRollbackStarted:
   132  				if !rollback && service.UpdateStatus.Message != "" {
   133  					progressOut.WriteProgress(progress.Progress{
   134  						ID:     "rollback",
   135  						Action: service.UpdateStatus.Message,
   136  					})
   137  				}
   138  				rollback = true
   139  			case swarm.UpdateStateRollbackPaused:
   140  				return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message)
   141  			case swarm.UpdateStateRollbackCompleted:
   142  				if !converged {
   143  					return fmt.Errorf("service rolled back: %s", service.UpdateStatus.Message)
   144  				}
   145  			}
   146  		}
   147  		if converged && time.Since(convergedAt) >= monitor {
   148  			progressOut.WriteProgress(progress.Progress{
   149  				ID:     "verify",
   150  				Action: "Service converged",
   151  			})
   152  
   153  			return nil
   154  		}
   155  
   156  		tasks, err := getUpToDateTasks()
   157  		if err != nil {
   158  			return err
   159  		}
   160  
   161  		activeNodes, err := getActiveNodes(ctx, client)
   162  		if err != nil {
   163  			return err
   164  		}
   165  
   166  		converged, err = updater.update(service, tasks, activeNodes, rollback)
   167  		if err != nil {
   168  			return err
   169  		}
   170  		if converged {
   171  			// if the service is a job, there's no need to verify it. jobs are
   172  			// stay done once they're done. skip the verification and just end
   173  			// the progress monitoring.
   174  			//
   175  			// only job services have a non-nil job status, which means we can
   176  			// use the presence of this field to check if the service is a job
   177  			// here.
   178  			if service.JobStatus != nil {
   179  				progress.Message(progressOut, "", "job complete")
   180  				return nil
   181  			}
   182  
   183  			if convergedAt.IsZero() {
   184  				convergedAt = time.Now()
   185  			}
   186  			wait := monitor - time.Since(convergedAt)
   187  			if wait >= 0 {
   188  				progressOut.WriteProgress(progress.Progress{
   189  					// Ideally this would have no ID, but
   190  					// the progress rendering code behaves
   191  					// poorly on an "action" with no ID. It
   192  					// returns the cursor to the beginning
   193  					// of the line, so the first character
   194  					// may be difficult to read. Then the
   195  					// output is overwritten by the shell
   196  					// prompt when the command finishes.
   197  					ID:     "verify",
   198  					Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1),
   199  				})
   200  			}
   201  		} else {
   202  			if !convergedAt.IsZero() {
   203  				progressOut.WriteProgress(progress.Progress{
   204  					ID:     "verify",
   205  					Action: "Detected task failure",
   206  				})
   207  			}
   208  			convergedAt = time.Time{}
   209  		}
   210  
   211  		select {
   212  		case <-time.After(200 * time.Millisecond):
   213  		case <-sigint:
   214  			if !converged {
   215  				progress.Message(progressOut, "", "Operation continuing in background.")
   216  				progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID)
   217  			}
   218  			return nil
   219  		}
   220  	}
   221  }
   222  
   223  func getActiveNodes(ctx context.Context, client client.APIClient) (map[string]struct{}, error) {
   224  	nodes, err := client.NodeList(ctx, types.NodeListOptions{})
   225  	if err != nil {
   226  		return nil, err
   227  	}
   228  
   229  	activeNodes := make(map[string]struct{})
   230  	for _, n := range nodes {
   231  		if n.Status.State != swarm.NodeStateDown {
   232  			activeNodes[n.ID] = struct{}{}
   233  		}
   234  	}
   235  	return activeNodes, nil
   236  }
   237  
   238  func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) {
   239  	if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil {
   240  		return &replicatedProgressUpdater{
   241  			progressOut: progressOut,
   242  		}, nil
   243  	}
   244  	if service.Spec.Mode.Global != nil {
   245  		return &globalProgressUpdater{
   246  			progressOut: progressOut,
   247  		}, nil
   248  	}
   249  	if service.Spec.Mode.ReplicatedJob != nil {
   250  		return newReplicatedJobProgressUpdater(service, progressOut), nil
   251  	}
   252  	if service.Spec.Mode.GlobalJob != nil {
   253  		return &globalJobProgressUpdater{
   254  			progressOut: progressOut,
   255  		}, nil
   256  	}
   257  	return nil, errors.New("unrecognized service mode")
   258  }
   259  
   260  func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) {
   261  	if rollback {
   262  		progressOut.WriteProgress(progress.Progress{
   263  			ID:     "overall progress",
   264  			Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator),
   265  		})
   266  		return
   267  	}
   268  	progressOut.WriteProgress(progress.Progress{
   269  		ID:     "overall progress",
   270  		Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator),
   271  	})
   272  }
   273  
   274  func truncError(errMsg string) string {
   275  	// Remove newlines from the error, which corrupt the output.
   276  	errMsg = strings.Replace(errMsg, "\n", " ", -1)
   277  
   278  	// Limit the length to 75 characters, so that even on narrow terminals
   279  	// this will not overflow to the next line.
   280  	if len(errMsg) > 75 {
   281  		errMsg = errMsg[:74] + "…"
   282  	}
   283  	return errMsg
   284  }
   285  
   286  type replicatedProgressUpdater struct {
   287  	progressOut progress.Output
   288  
   289  	// used for mapping slots to a contiguous space
   290  	// this also causes progress bars to appear in order
   291  	slotMap map[int]int
   292  
   293  	initialized bool
   294  	done        bool
   295  }
   296  
   297  func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   298  	if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil {
   299  		return false, errors.New("no replica count")
   300  	}
   301  	replicas := *service.Spec.Mode.Replicated.Replicas
   302  
   303  	if !u.initialized {
   304  		u.slotMap = make(map[int]int)
   305  
   306  		// Draw progress bars in order
   307  		writeOverallProgress(u.progressOut, 0, int(replicas), rollback)
   308  
   309  		if replicas <= maxProgressBars {
   310  			for i := uint64(1); i <= replicas; i++ {
   311  				progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ")
   312  			}
   313  		}
   314  		u.initialized = true
   315  	}
   316  
   317  	tasksBySlot := u.tasksBySlot(tasks, activeNodes)
   318  
   319  	// If we had reached a converged state, check if we are still converged.
   320  	if u.done {
   321  		for _, task := range tasksBySlot {
   322  			if task.Status.State != swarm.TaskStateRunning {
   323  				u.done = false
   324  				break
   325  			}
   326  		}
   327  	}
   328  
   329  	running := uint64(0)
   330  
   331  	for _, task := range tasksBySlot {
   332  		mappedSlot := u.slotMap[task.Slot]
   333  		if mappedSlot == 0 {
   334  			mappedSlot = len(u.slotMap) + 1
   335  			u.slotMap[task.Slot] = mappedSlot
   336  		}
   337  
   338  		if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   339  			running++
   340  		}
   341  
   342  		u.writeTaskProgress(task, mappedSlot, replicas, rollback)
   343  	}
   344  
   345  	if !u.done {
   346  		writeOverallProgress(u.progressOut, int(running), int(replicas), rollback)
   347  
   348  		if running == replicas {
   349  			u.done = true
   350  		}
   351  	}
   352  
   353  	return running == replicas, nil
   354  }
   355  
   356  func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task {
   357  	// If there are multiple tasks with the same slot number, favor the one
   358  	// with the *lowest* desired state. This can happen in restart
   359  	// scenarios.
   360  	tasksBySlot := make(map[int]swarm.Task)
   361  	for _, task := range tasks {
   362  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   363  			continue
   364  		}
   365  		if existingTask, ok := tasksBySlot[task.Slot]; ok {
   366  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   367  				continue
   368  			}
   369  			// If the desired states match, observed state breaks
   370  			// ties. This can happen with the "start first" service
   371  			// update mode.
   372  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   373  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   374  				continue
   375  			}
   376  		}
   377  		if task.NodeID != "" {
   378  			if _, nodeActive := activeNodes[task.NodeID]; !nodeActive {
   379  				continue
   380  			}
   381  		}
   382  		tasksBySlot[task.Slot] = task
   383  	}
   384  
   385  	return tasksBySlot
   386  }
   387  
   388  func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64, rollback bool) {
   389  	if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas {
   390  		return
   391  	}
   392  
   393  	if task.Status.Err != "" {
   394  		u.progressOut.WriteProgress(progress.Progress{
   395  			ID:     fmt.Sprintf("%d/%d", mappedSlot, replicas),
   396  			Action: truncError(task.Status.Err),
   397  		})
   398  		return
   399  	}
   400  
   401  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   402  		u.progressOut.WriteProgress(progress.Progress{
   403  			ID:         fmt.Sprintf("%d/%d", mappedSlot, replicas),
   404  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   405  			Current:    stateToProgress(task.Status.State, rollback),
   406  			Total:      maxProgress,
   407  			HideCounts: true,
   408  		})
   409  	}
   410  }
   411  
   412  type globalProgressUpdater struct {
   413  	progressOut progress.Output
   414  
   415  	initialized bool
   416  	done        bool
   417  }
   418  
   419  func (u *globalProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   420  	tasksByNode := u.tasksByNode(tasks)
   421  
   422  	// We don't have perfect knowledge of how many nodes meet the
   423  	// constraints for this service. But the orchestrator creates tasks
   424  	// for all eligible nodes at the same time, so we should see all those
   425  	// nodes represented among the up-to-date tasks.
   426  	nodeCount := len(tasksByNode)
   427  
   428  	if !u.initialized {
   429  		if nodeCount == 0 {
   430  			// Two possibilities: either the orchestrator hasn't created
   431  			// the tasks yet, or the service doesn't meet constraints for
   432  			// any node. Either way, we wait.
   433  			u.progressOut.WriteProgress(progress.Progress{
   434  				ID:     "overall progress",
   435  				Action: "waiting for new tasks",
   436  			})
   437  			return false, nil
   438  		}
   439  
   440  		writeOverallProgress(u.progressOut, 0, nodeCount, rollback)
   441  		u.initialized = true
   442  	}
   443  
   444  	// If we had reached a converged state, check if we are still converged.
   445  	if u.done {
   446  		for _, task := range tasksByNode {
   447  			if task.Status.State != swarm.TaskStateRunning {
   448  				u.done = false
   449  				break
   450  			}
   451  		}
   452  	}
   453  
   454  	running := 0
   455  
   456  	for _, task := range tasksByNode {
   457  		if _, nodeActive := activeNodes[task.NodeID]; nodeActive {
   458  			if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   459  				running++
   460  			}
   461  
   462  			u.writeTaskProgress(task, nodeCount, rollback)
   463  		}
   464  	}
   465  
   466  	if !u.done {
   467  		writeOverallProgress(u.progressOut, running, nodeCount, rollback)
   468  
   469  		if running == nodeCount {
   470  			u.done = true
   471  		}
   472  	}
   473  
   474  	return running == nodeCount, nil
   475  }
   476  
   477  func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task {
   478  	// If there are multiple tasks with the same node ID, favor the one
   479  	// with the *lowest* desired state. This can happen in restart
   480  	// scenarios.
   481  	tasksByNode := make(map[string]swarm.Task)
   482  	for _, task := range tasks {
   483  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   484  			continue
   485  		}
   486  		if existingTask, ok := tasksByNode[task.NodeID]; ok {
   487  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   488  				continue
   489  			}
   490  
   491  			// If the desired states match, observed state breaks
   492  			// ties. This can happen with the "start first" service
   493  			// update mode.
   494  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   495  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   496  				continue
   497  			}
   498  
   499  		}
   500  		tasksByNode[task.NodeID] = task
   501  	}
   502  
   503  	return tasksByNode
   504  }
   505  
   506  func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int, rollback bool) {
   507  	if u.done || nodeCount > maxProgressBars {
   508  		return
   509  	}
   510  
   511  	if task.Status.Err != "" {
   512  		u.progressOut.WriteProgress(progress.Progress{
   513  			ID:     stringid.TruncateID(task.NodeID),
   514  			Action: truncError(task.Status.Err),
   515  		})
   516  		return
   517  	}
   518  
   519  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   520  		u.progressOut.WriteProgress(progress.Progress{
   521  			ID:         stringid.TruncateID(task.NodeID),
   522  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   523  			Current:    stateToProgress(task.Status.State, rollback),
   524  			Total:      maxProgress,
   525  			HideCounts: true,
   526  		})
   527  	}
   528  }
   529  
   530  // replicatedJobProgressUpdater outputs the progress of a replicated job. This
   531  // progress consists of a few main elements.
   532  //
   533  // The first is the progress bar for the job as a whole. This shows the number
   534  // of completed out of total tasks for the job. Tasks that are currently
   535  // running are not counted.
   536  //
   537  // The second is the status of the "active" tasks for the job. We count a task
   538  // as "active" if it has any non-terminal state, not just running. This is
   539  // shown as a fraction of the maximum concurrent tasks that can be running,
   540  // which is the less of MaxConcurrent or TotalCompletions - completed tasks.
   541  type replicatedJobProgressUpdater struct {
   542  	progressOut progress.Output
   543  
   544  	// jobIteration is the service's job iteration, used to exclude tasks
   545  	// belonging to earlier iterations.
   546  	jobIteration uint64
   547  
   548  	// concurrent is the value of MaxConcurrent as an int. That is, the maximum
   549  	// number of tasks allowed to be run simultaneously.
   550  	concurrent int
   551  
   552  	// total is the value of TotalCompletions, the number of complete tasks
   553  	// desired.
   554  	total int
   555  
   556  	// initialized is set to true after the first time update is called. the
   557  	// first time update is called, the components of the progress UI are all
   558  	// written out in an initial pass. this ensure that they will subsequently
   559  	// be in order, no matter how they are updated.
   560  	initialized bool
   561  
   562  	// progressDigits is the number digits in total, so that we know how much
   563  	// to pad the job progress field with.
   564  	//
   565  	// when we're writing the number of completed over total tasks, we need to
   566  	// pad the numerator with spaces, so that the bar doesn't jump around.
   567  	// we'll compute that once on init, and then reuse it over and over.
   568  	//
   569  	// we compute this in the least clever way possible: convert to string
   570  	// with strconv.Itoa, then take the len.
   571  	progressDigits int
   572  
   573  	// activeDigits is the same, but for active tasks, and it applies to both
   574  	// the numerator and denominator.
   575  	activeDigits int
   576  }
   577  
   578  func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater {
   579  	u := &replicatedJobProgressUpdater{
   580  		progressOut:  progressOut,
   581  		concurrent:   int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent),
   582  		total:        int(*service.Spec.Mode.ReplicatedJob.TotalCompletions),
   583  		jobIteration: service.JobStatus.JobIteration.Index,
   584  	}
   585  	u.progressDigits = len(strconv.Itoa(u.total))
   586  	u.activeDigits = len(strconv.Itoa(u.concurrent))
   587  
   588  	return u
   589  }
   590  
   591  // update writes out the progress of the replicated job.
   592  func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) {
   593  	if !u.initialized {
   594  		u.writeOverallProgress(0, 0)
   595  
   596  		// only write out progress bars if there will be less than the maximum
   597  		if u.total <= maxProgressBars {
   598  			for i := 1; i <= u.total; i++ {
   599  				u.progressOut.WriteProgress(progress.Progress{
   600  					ID:     fmt.Sprintf("%d/%d", i, u.total),
   601  					Action: " ",
   602  				})
   603  			}
   604  		}
   605  		u.initialized = true
   606  	}
   607  
   608  	// tasksBySlot is a mapping of slot number to the task valid for that slot.
   609  	// it deduplicated tasks occupying the same numerical slot but in different
   610  	// states.
   611  	tasksBySlot := make(map[int]swarm.Task)
   612  	for _, task := range tasks {
   613  		// first, check if the task belongs to this service iteration. skip
   614  		// tasks belonging to other iterations.
   615  		if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration {
   616  			continue
   617  		}
   618  
   619  		// then, if the task is in an unknown state, ignore it.
   620  		if numberedStates[task.DesiredState] == 0 ||
   621  			numberedStates[task.Status.State] == 0 {
   622  			continue
   623  		}
   624  
   625  		// finally, check if the task already exists in the map
   626  		if existing, ok := tasksBySlot[task.Slot]; ok {
   627  			// if so, use the task with the lower actual state
   628  			if numberedStates[existing.Status.State] > numberedStates[task.Status.State] {
   629  				tasksBySlot[task.Slot] = task
   630  			}
   631  		} else {
   632  			// otherwise, just add it to the map.
   633  			tasksBySlot[task.Slot] = task
   634  		}
   635  	}
   636  
   637  	activeTasks := 0
   638  	completeTasks := 0
   639  
   640  	for i := 0; i < len(tasksBySlot); i++ {
   641  		task := tasksBySlot[i]
   642  		u.writeTaskProgress(task)
   643  
   644  		if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] {
   645  			activeTasks++
   646  		}
   647  
   648  		if task.Status.State == swarm.TaskStateComplete {
   649  			completeTasks++
   650  		}
   651  	}
   652  
   653  	u.writeOverallProgress(activeTasks, completeTasks)
   654  
   655  	return completeTasks == u.total, nil
   656  }
   657  
   658  func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) {
   659  	u.progressOut.WriteProgress(progress.Progress{
   660  		ID: "job progress",
   661  		Action: fmt.Sprintf(
   662  			// * means "use the next positional arg to compute padding"
   663  			"%*d out of %d complete", u.progressDigits, completed, u.total,
   664  		),
   665  		Current:    int64(completed),
   666  		Total:      int64(u.total),
   667  		HideCounts: true,
   668  	})
   669  
   670  	// actualDesired is the lesser of MaxConcurrent, or the remaining tasks
   671  	actualDesired := u.total - completed
   672  	if actualDesired > u.concurrent {
   673  		actualDesired = u.concurrent
   674  	}
   675  
   676  	u.progressOut.WriteProgress(progress.Progress{
   677  		ID: "active tasks",
   678  		Action: fmt.Sprintf(
   679  			// [n] notation lets us select a specific argument, 1-indexed
   680  			// putting the [1] before the star means "make the string this
   681  			// length". putting the [2] or the [3] means "use this argument
   682  			// here"
   683  			//
   684  			// we pad both the numerator and the denominator because, as the
   685  			// job reaches its conclusion, the number of possible concurrent
   686  			// tasks will go down, as fewer than MaxConcurrent tasks are needed
   687  			// to complete the job.
   688  			"%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired,
   689  		),
   690  	})
   691  }
   692  
   693  func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   694  	if u.total > maxProgressBars {
   695  		return
   696  	}
   697  
   698  	if task.Status.Err != "" {
   699  		u.progressOut.WriteProgress(progress.Progress{
   700  			ID:     fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   701  			Action: truncError(task.Status.Err),
   702  		})
   703  		return
   704  	}
   705  
   706  	u.progressOut.WriteProgress(progress.Progress{
   707  		ID:         fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   708  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   709  		Current:    numberedStates[task.Status.State],
   710  		Total:      maxJobProgress,
   711  		HideCounts: true,
   712  	})
   713  }
   714  
   715  // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services.
   716  // Because GlobalJob services are so much simpler than ReplicatedJob services,
   717  // this updater is in turn simpler as well.
   718  type globalJobProgressUpdater struct {
   719  	progressOut progress.Output
   720  
   721  	// initialized is used to detect the first pass of update, and to perform
   722  	// first time initialization logic at that time.
   723  	initialized bool
   724  
   725  	// total is the total number of tasks expected for this job
   726  	total int
   727  
   728  	// progressDigits is the number of spaces to pad the numerator of the job
   729  	// progress field
   730  	progressDigits int
   731  
   732  	taskNodes map[string]struct{}
   733  }
   734  
   735  func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) {
   736  	if !u.initialized {
   737  		// if there are not yet tasks, then return early.
   738  		if len(tasks) == 0 && len(activeNodes) != 0 {
   739  			u.progressOut.WriteProgress(progress.Progress{
   740  				ID:     "job progress",
   741  				Action: "waiting for tasks",
   742  			})
   743  			return false, nil
   744  		}
   745  
   746  		// when a global job starts, all of its tasks are created at once, so
   747  		// we can use len(tasks) to know how many we're expecting.
   748  		u.taskNodes = map[string]struct{}{}
   749  
   750  		for _, task := range tasks {
   751  			// skip any tasks not belonging to this job iteration.
   752  			if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   753  				continue
   754  			}
   755  
   756  			// collect the list of all node IDs for this service.
   757  			//
   758  			// basically, global jobs will execute on any new nodes that join
   759  			// the cluster in the future. to avoid making things complicated,
   760  			// we will only check the progress of the initial set of nodes. if
   761  			// any new nodes come online during the operation, we will ignore
   762  			// them.
   763  			u.taskNodes[task.NodeID] = struct{}{}
   764  		}
   765  
   766  		u.total = len(u.taskNodes)
   767  		u.progressDigits = len(strconv.Itoa(u.total))
   768  
   769  		u.writeOverallProgress(0)
   770  		u.initialized = true
   771  	}
   772  
   773  	// tasksByNodeID maps a NodeID to the latest task for that Node ID. this
   774  	// lets us pick only the latest task for any given node.
   775  	tasksByNodeID := map[string]swarm.Task{}
   776  
   777  	for _, task := range tasks {
   778  		// skip any tasks not belonging to this job iteration
   779  		if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   780  			continue
   781  		}
   782  
   783  		// if the task is not on one of the initial set of nodes, ignore it.
   784  		if _, ok := u.taskNodes[task.NodeID]; !ok {
   785  			continue
   786  		}
   787  
   788  		// if there is already a task recorded for this node, choose the one
   789  		// with the lower state
   790  		if oldtask, ok := tasksByNodeID[task.NodeID]; ok {
   791  			if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] {
   792  				tasksByNodeID[task.NodeID] = task
   793  			}
   794  		} else {
   795  			tasksByNodeID[task.NodeID] = task
   796  		}
   797  	}
   798  
   799  	complete := 0
   800  	for _, task := range tasksByNodeID {
   801  		u.writeTaskProgress(task)
   802  		if task.Status.State == swarm.TaskStateComplete {
   803  			complete++
   804  		}
   805  	}
   806  
   807  	u.writeOverallProgress(complete)
   808  	return complete == u.total, nil
   809  }
   810  
   811  func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   812  	if u.total > maxProgressBars {
   813  		return
   814  	}
   815  
   816  	if task.Status.Err != "" {
   817  		u.progressOut.WriteProgress(progress.Progress{
   818  			ID:     task.NodeID,
   819  			Action: truncError(task.Status.Err),
   820  		})
   821  		return
   822  	}
   823  
   824  	u.progressOut.WriteProgress(progress.Progress{
   825  		ID:         task.NodeID,
   826  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   827  		Current:    numberedStates[task.Status.State],
   828  		Total:      maxJobProgress,
   829  		HideCounts: true,
   830  	})
   831  }
   832  
   833  func (u *globalJobProgressUpdater) writeOverallProgress(complete int) {
   834  	// all tasks for a global job are active at once, so we only write out the
   835  	// total progress.
   836  	u.progressOut.WriteProgress(progress.Progress{
   837  		// see (*replicatedJobProgressUpdater).writeOverallProgress for an
   838  		// explanation fo the advanced fmt use in this function.
   839  		ID: "job progress",
   840  		Action: fmt.Sprintf(
   841  			"%*d out of %d complete", u.progressDigits, complete, u.total,
   842  		),
   843  		Current:    int64(complete),
   844  		Total:      int64(u.total),
   845  		HideCounts: true,
   846  	})
   847  }