github.com/khulnasoft/cli@v0.0.0-20240402070845-01bcad7beefa/cli/command/service/progress/progress.go (about)

     1  package progress
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"os/signal"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/filters"
    16  	"github.com/docker/docker/api/types/swarm"
    17  	"github.com/docker/docker/client"
    18  	"github.com/docker/docker/pkg/progress"
    19  	"github.com/docker/docker/pkg/streamformatter"
    20  	"github.com/docker/docker/pkg/stringid"
    21  )
    22  
    23  var (
    24  	numberedStates = map[swarm.TaskState]int64{
    25  		swarm.TaskStateNew:       1,
    26  		swarm.TaskStateAllocated: 2,
    27  		swarm.TaskStatePending:   3,
    28  		swarm.TaskStateAssigned:  4,
    29  		swarm.TaskStateAccepted:  5,
    30  		swarm.TaskStatePreparing: 6,
    31  		swarm.TaskStateReady:     7,
    32  		swarm.TaskStateStarting:  8,
    33  		swarm.TaskStateRunning:   9,
    34  
    35  		// The following states are not actually shown in progress
    36  		// output, but are used internally for ordering.
    37  		swarm.TaskStateComplete: 10,
    38  		swarm.TaskStateShutdown: 11,
    39  		swarm.TaskStateFailed:   12,
    40  		swarm.TaskStateRejected: 13,
    41  	}
    42  
    43  	longestState int
    44  )
    45  
    46  const (
    47  	maxProgress     = 9
    48  	maxProgressBars = 20
    49  	maxJobProgress  = 10
    50  )
    51  
    52  type progressUpdater interface {
    53  	update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error)
    54  }
    55  
    56  func init() {
    57  	for state := range numberedStates {
    58  		// for jobs, we use the "complete" state, and so it should be factored
    59  		// in to the computation of the longest state.
    60  		if (!terminalState(state) || state == swarm.TaskStateComplete) && len(state) > longestState {
    61  			longestState = len(state)
    62  		}
    63  	}
    64  }
    65  
    66  func terminalState(state swarm.TaskState) bool {
    67  	return numberedStates[state] > numberedStates[swarm.TaskStateRunning]
    68  }
    69  
    70  // ServiceProgress outputs progress information for convergence of a service.
    71  //
    72  //nolint:gocyclo
    73  func ServiceProgress(ctx context.Context, apiClient client.APIClient, serviceID string, progressWriter io.WriteCloser) error {
    74  	defer progressWriter.Close()
    75  
    76  	progressOut := streamformatter.NewJSONProgressOutput(progressWriter, false)
    77  
    78  	sigint := make(chan os.Signal, 1)
    79  	signal.Notify(sigint, os.Interrupt)
    80  	defer signal.Stop(sigint)
    81  
    82  	taskFilter := filters.NewArgs()
    83  	taskFilter.Add("service", serviceID)
    84  	taskFilter.Add("_up-to-date", "true")
    85  
    86  	getUpToDateTasks := func() ([]swarm.Task, error) {
    87  		return apiClient.TaskList(ctx, types.TaskListOptions{Filters: taskFilter})
    88  	}
    89  
    90  	var (
    91  		updater     progressUpdater
    92  		converged   bool
    93  		convergedAt time.Time
    94  		monitor     = 5 * time.Second
    95  		rollback    bool
    96  		message     *progress.Progress
    97  	)
    98  
    99  	for {
   100  		service, _, err := apiClient.ServiceInspectWithRaw(ctx, serviceID, types.ServiceInspectOptions{})
   101  		if err != nil {
   102  			return err
   103  		}
   104  
   105  		if service.Spec.UpdateConfig != nil && service.Spec.UpdateConfig.Monitor != 0 {
   106  			monitor = service.Spec.UpdateConfig.Monitor
   107  		}
   108  
   109  		if updater == nil {
   110  			updater, err = initializeUpdater(service, progressOut)
   111  			if err != nil {
   112  				return err
   113  			}
   114  		}
   115  
   116  		if service.UpdateStatus != nil {
   117  			switch service.UpdateStatus.State {
   118  			case swarm.UpdateStateUpdating:
   119  				rollback = false
   120  			case swarm.UpdateStateCompleted:
   121  				if !converged {
   122  					return nil
   123  				}
   124  			case swarm.UpdateStatePaused:
   125  				return fmt.Errorf("service update paused: %s", service.UpdateStatus.Message)
   126  			case swarm.UpdateStateRollbackStarted:
   127  				if !rollback && service.UpdateStatus.Message != "" {
   128  					progressOut.WriteProgress(progress.Progress{
   129  						ID:     "rollback",
   130  						Action: service.UpdateStatus.Message,
   131  					})
   132  				}
   133  				rollback = true
   134  			case swarm.UpdateStateRollbackPaused:
   135  				return fmt.Errorf("service rollback paused: %s", service.UpdateStatus.Message)
   136  			case swarm.UpdateStateRollbackCompleted:
   137  				if !converged {
   138  					message = &progress.Progress{ID: "rollback", Message: service.UpdateStatus.Message}
   139  				}
   140  				rollback = true
   141  			}
   142  		}
   143  		if converged && time.Since(convergedAt) >= monitor {
   144  			progressOut.WriteProgress(progress.Progress{
   145  				ID:     "verify",
   146  				Action: fmt.Sprintf("Service %s converged", serviceID),
   147  			})
   148  			if message != nil {
   149  				progressOut.WriteProgress(*message)
   150  			}
   151  			return nil
   152  		}
   153  
   154  		tasks, err := getUpToDateTasks()
   155  		if err != nil {
   156  			return err
   157  		}
   158  
   159  		activeNodes, err := getActiveNodes(ctx, apiClient)
   160  		if err != nil {
   161  			return err
   162  		}
   163  
   164  		converged, err = updater.update(service, tasks, activeNodes, rollback)
   165  		if err != nil {
   166  			return err
   167  		}
   168  		if converged {
   169  			// if the service is a job, there's no need to verify it. jobs are
   170  			// stay done once they're done. skip the verification and just end
   171  			// the progress monitoring.
   172  			//
   173  			// only job services have a non-nil job status, which means we can
   174  			// use the presence of this field to check if the service is a job
   175  			// here.
   176  			if service.JobStatus != nil {
   177  				progress.Message(progressOut, "", "job complete")
   178  				return nil
   179  			}
   180  
   181  			if convergedAt.IsZero() {
   182  				convergedAt = time.Now()
   183  			}
   184  			wait := monitor - time.Since(convergedAt)
   185  			if wait >= 0 {
   186  				progressOut.WriteProgress(progress.Progress{
   187  					// Ideally this would have no ID, but
   188  					// the progress rendering code behaves
   189  					// poorly on an "action" with no ID. It
   190  					// returns the cursor to the beginning
   191  					// of the line, so the first character
   192  					// may be difficult to read. Then the
   193  					// output is overwritten by the shell
   194  					// prompt when the command finishes.
   195  					ID:     "verify",
   196  					Action: fmt.Sprintf("Waiting %d seconds to verify that tasks are stable...", wait/time.Second+1),
   197  				})
   198  			}
   199  		} else {
   200  			if !convergedAt.IsZero() {
   201  				progressOut.WriteProgress(progress.Progress{
   202  					ID:     "verify",
   203  					Action: "Detected task failure",
   204  				})
   205  			}
   206  			convergedAt = time.Time{}
   207  		}
   208  
   209  		select {
   210  		case <-time.After(200 * time.Millisecond):
   211  		case <-sigint:
   212  			if !converged {
   213  				progress.Message(progressOut, "", "Operation continuing in background.")
   214  				progress.Messagef(progressOut, "", "Use `docker service ps %s` to check progress.", serviceID)
   215  			}
   216  			return nil
   217  		}
   218  	}
   219  }
   220  
   221  func getActiveNodes(ctx context.Context, apiClient client.APIClient) (map[string]struct{}, error) {
   222  	nodes, err := apiClient.NodeList(ctx, types.NodeListOptions{})
   223  	if err != nil {
   224  		return nil, err
   225  	}
   226  
   227  	activeNodes := make(map[string]struct{})
   228  	for _, n := range nodes {
   229  		if n.Status.State != swarm.NodeStateDown {
   230  			activeNodes[n.ID] = struct{}{}
   231  		}
   232  	}
   233  	return activeNodes, nil
   234  }
   235  
   236  func initializeUpdater(service swarm.Service, progressOut progress.Output) (progressUpdater, error) {
   237  	if service.Spec.Mode.Replicated != nil && service.Spec.Mode.Replicated.Replicas != nil {
   238  		return &replicatedProgressUpdater{
   239  			progressOut: progressOut,
   240  		}, nil
   241  	}
   242  	if service.Spec.Mode.Global != nil {
   243  		return &globalProgressUpdater{
   244  			progressOut: progressOut,
   245  		}, nil
   246  	}
   247  	if service.Spec.Mode.ReplicatedJob != nil {
   248  		return newReplicatedJobProgressUpdater(service, progressOut), nil
   249  	}
   250  	if service.Spec.Mode.GlobalJob != nil {
   251  		return &globalJobProgressUpdater{
   252  			progressOut: progressOut,
   253  		}, nil
   254  	}
   255  	return nil, errors.New("unrecognized service mode")
   256  }
   257  
   258  func writeOverallProgress(progressOut progress.Output, numerator, denominator int, rollback bool) {
   259  	if rollback {
   260  		progressOut.WriteProgress(progress.Progress{
   261  			ID:     "overall progress",
   262  			Action: fmt.Sprintf("rolling back update: %d out of %d tasks", numerator, denominator),
   263  		})
   264  		return
   265  	}
   266  	progressOut.WriteProgress(progress.Progress{
   267  		ID:     "overall progress",
   268  		Action: fmt.Sprintf("%d out of %d tasks", numerator, denominator),
   269  	})
   270  }
   271  
   272  func truncError(errMsg string) string {
   273  	// Remove newlines from the error, which corrupt the output.
   274  	errMsg = strings.ReplaceAll(errMsg, "\n", " ")
   275  
   276  	// Limit the length to 75 characters, so that even on narrow terminals
   277  	// this will not overflow to the next line.
   278  	if len(errMsg) > 75 {
   279  		errMsg = errMsg[:74] + "…"
   280  	}
   281  	return errMsg
   282  }
   283  
   284  type replicatedProgressUpdater struct {
   285  	progressOut progress.Output
   286  
   287  	// used for mapping slots to a contiguous space
   288  	// this also causes progress bars to appear in order
   289  	slotMap map[int]int
   290  
   291  	initialized bool
   292  	done        bool
   293  }
   294  
   295  func (u *replicatedProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   296  	if service.Spec.Mode.Replicated == nil || service.Spec.Mode.Replicated.Replicas == nil {
   297  		return false, errors.New("no replica count")
   298  	}
   299  	replicas := *service.Spec.Mode.Replicated.Replicas
   300  
   301  	if !u.initialized {
   302  		u.slotMap = make(map[int]int)
   303  
   304  		// Draw progress bars in order
   305  		writeOverallProgress(u.progressOut, 0, int(replicas), rollback)
   306  
   307  		if replicas <= maxProgressBars {
   308  			for i := uint64(1); i <= replicas; i++ {
   309  				progress.Update(u.progressOut, fmt.Sprintf("%d/%d", i, replicas), " ")
   310  			}
   311  		}
   312  		u.initialized = true
   313  	}
   314  
   315  	tasksBySlot := u.tasksBySlot(tasks, activeNodes)
   316  
   317  	// If we had reached a converged state, check if we are still converged.
   318  	if u.done {
   319  		for _, task := range tasksBySlot {
   320  			if task.Status.State != swarm.TaskStateRunning {
   321  				u.done = false
   322  				break
   323  			}
   324  		}
   325  	}
   326  
   327  	running := uint64(0)
   328  
   329  	for _, task := range tasksBySlot {
   330  		mappedSlot := u.slotMap[task.Slot]
   331  		if mappedSlot == 0 {
   332  			mappedSlot = len(u.slotMap) + 1
   333  			u.slotMap[task.Slot] = mappedSlot
   334  		}
   335  
   336  		if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   337  			running++
   338  		}
   339  
   340  		u.writeTaskProgress(task, mappedSlot, replicas)
   341  	}
   342  
   343  	if !u.done {
   344  		writeOverallProgress(u.progressOut, int(running), int(replicas), rollback)
   345  
   346  		if running == replicas {
   347  			u.done = true
   348  		}
   349  	}
   350  
   351  	return running == replicas, nil
   352  }
   353  
   354  func (u *replicatedProgressUpdater) tasksBySlot(tasks []swarm.Task, activeNodes map[string]struct{}) map[int]swarm.Task {
   355  	// If there are multiple tasks with the same slot number, favor the one
   356  	// with the *lowest* desired state. This can happen in restart
   357  	// scenarios.
   358  	tasksBySlot := make(map[int]swarm.Task)
   359  	for _, task := range tasks {
   360  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   361  			continue
   362  		}
   363  		if existingTask, ok := tasksBySlot[task.Slot]; ok {
   364  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   365  				continue
   366  			}
   367  			// If the desired states match, observed state breaks
   368  			// ties. This can happen with the "start first" service
   369  			// update mode.
   370  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   371  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   372  				continue
   373  			}
   374  		}
   375  		if task.NodeID != "" {
   376  			if _, nodeActive := activeNodes[task.NodeID]; !nodeActive {
   377  				continue
   378  			}
   379  		}
   380  		tasksBySlot[task.Slot] = task
   381  	}
   382  
   383  	return tasksBySlot
   384  }
   385  
   386  func (u *replicatedProgressUpdater) writeTaskProgress(task swarm.Task, mappedSlot int, replicas uint64) {
   387  	if u.done || replicas > maxProgressBars || uint64(mappedSlot) > replicas {
   388  		return
   389  	}
   390  
   391  	if task.Status.Err != "" {
   392  		u.progressOut.WriteProgress(progress.Progress{
   393  			ID:     fmt.Sprintf("%d/%d", mappedSlot, replicas),
   394  			Action: truncError(task.Status.Err),
   395  		})
   396  		return
   397  	}
   398  
   399  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   400  		u.progressOut.WriteProgress(progress.Progress{
   401  			ID:         fmt.Sprintf("%d/%d", mappedSlot, replicas),
   402  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   403  			Current:    numberedStates[task.Status.State],
   404  			Total:      maxProgress,
   405  			HideCounts: true,
   406  		})
   407  	}
   408  }
   409  
   410  type globalProgressUpdater struct {
   411  	progressOut progress.Output
   412  
   413  	initialized bool
   414  	done        bool
   415  }
   416  
   417  func (u *globalProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, rollback bool) (bool, error) {
   418  	tasksByNode := u.tasksByNode(tasks)
   419  
   420  	// We don't have perfect knowledge of how many nodes meet the
   421  	// constraints for this service. But the orchestrator creates tasks
   422  	// for all eligible nodes at the same time, so we should see all those
   423  	// nodes represented among the up-to-date tasks.
   424  	nodeCount := len(tasksByNode)
   425  
   426  	if !u.initialized {
   427  		if nodeCount == 0 {
   428  			// Two possibilities: either the orchestrator hasn't created
   429  			// the tasks yet, or the service doesn't meet constraints for
   430  			// any node. Either way, we wait.
   431  			u.progressOut.WriteProgress(progress.Progress{
   432  				ID:     "overall progress",
   433  				Action: "waiting for new tasks",
   434  			})
   435  			return false, nil
   436  		}
   437  
   438  		writeOverallProgress(u.progressOut, 0, nodeCount, rollback)
   439  		u.initialized = true
   440  	}
   441  
   442  	// If we had reached a converged state, check if we are still converged.
   443  	if u.done {
   444  		for _, task := range tasksByNode {
   445  			if task.Status.State != swarm.TaskStateRunning {
   446  				u.done = false
   447  				break
   448  			}
   449  		}
   450  	}
   451  
   452  	running := 0
   453  
   454  	for _, task := range tasksByNode {
   455  		if _, nodeActive := activeNodes[task.NodeID]; nodeActive {
   456  			if !terminalState(task.DesiredState) && task.Status.State == swarm.TaskStateRunning {
   457  				running++
   458  			}
   459  
   460  			u.writeTaskProgress(task, nodeCount)
   461  		}
   462  	}
   463  
   464  	if !u.done {
   465  		writeOverallProgress(u.progressOut, running, nodeCount, rollback)
   466  
   467  		if running == nodeCount {
   468  			u.done = true
   469  		}
   470  	}
   471  
   472  	return running == nodeCount, nil
   473  }
   474  
   475  func (u *globalProgressUpdater) tasksByNode(tasks []swarm.Task) map[string]swarm.Task {
   476  	// If there are multiple tasks with the same node ID, favor the one
   477  	// with the *lowest* desired state. This can happen in restart
   478  	// scenarios.
   479  	tasksByNode := make(map[string]swarm.Task)
   480  	for _, task := range tasks {
   481  		if numberedStates[task.DesiredState] == 0 || numberedStates[task.Status.State] == 0 {
   482  			continue
   483  		}
   484  		if existingTask, ok := tasksByNode[task.NodeID]; ok {
   485  			if numberedStates[existingTask.DesiredState] < numberedStates[task.DesiredState] {
   486  				continue
   487  			}
   488  
   489  			// If the desired states match, observed state breaks
   490  			// ties. This can happen with the "start first" service
   491  			// update mode.
   492  			if numberedStates[existingTask.DesiredState] == numberedStates[task.DesiredState] &&
   493  				numberedStates[existingTask.Status.State] <= numberedStates[task.Status.State] {
   494  				continue
   495  			}
   496  		}
   497  		tasksByNode[task.NodeID] = task
   498  	}
   499  
   500  	return tasksByNode
   501  }
   502  
   503  func (u *globalProgressUpdater) writeTaskProgress(task swarm.Task, nodeCount int) {
   504  	if u.done || nodeCount > maxProgressBars {
   505  		return
   506  	}
   507  
   508  	if task.Status.Err != "" {
   509  		u.progressOut.WriteProgress(progress.Progress{
   510  			ID:     stringid.TruncateID(task.NodeID),
   511  			Action: truncError(task.Status.Err),
   512  		})
   513  		return
   514  	}
   515  
   516  	if !terminalState(task.DesiredState) && !terminalState(task.Status.State) {
   517  		u.progressOut.WriteProgress(progress.Progress{
   518  			ID:         stringid.TruncateID(task.NodeID),
   519  			Action:     fmt.Sprintf("%-[1]*s", longestState, task.Status.State),
   520  			Current:    numberedStates[task.Status.State],
   521  			Total:      maxProgress,
   522  			HideCounts: true,
   523  		})
   524  	}
   525  }
   526  
   527  // replicatedJobProgressUpdater outputs the progress of a replicated job. This
   528  // progress consists of a few main elements.
   529  //
   530  // The first is the progress bar for the job as a whole. This shows the number
   531  // of completed out of total tasks for the job. Tasks that are currently
   532  // running are not counted.
   533  //
   534  // The second is the status of the "active" tasks for the job. We count a task
   535  // as "active" if it has any non-terminal state, not just running. This is
   536  // shown as a fraction of the maximum concurrent tasks that can be running,
   537  // which is the less of MaxConcurrent or TotalCompletions - completed tasks.
   538  type replicatedJobProgressUpdater struct {
   539  	progressOut progress.Output
   540  
   541  	// jobIteration is the service's job iteration, used to exclude tasks
   542  	// belonging to earlier iterations.
   543  	jobIteration uint64
   544  
   545  	// concurrent is the value of MaxConcurrent as an int. That is, the maximum
   546  	// number of tasks allowed to be run simultaneously.
   547  	concurrent int
   548  
   549  	// total is the value of TotalCompletions, the number of complete tasks
   550  	// desired.
   551  	total int
   552  
   553  	// initialized is set to true after the first time update is called. the
   554  	// first time update is called, the components of the progress UI are all
   555  	// written out in an initial pass. this ensure that they will subsequently
   556  	// be in order, no matter how they are updated.
   557  	initialized bool
   558  
   559  	// progressDigits is the number digits in total, so that we know how much
   560  	// to pad the job progress field with.
   561  	//
   562  	// when we're writing the number of completed over total tasks, we need to
   563  	// pad the numerator with spaces, so that the bar doesn't jump around.
   564  	// we'll compute that once on init, and then reuse it over and over.
   565  	//
   566  	// we compute this in the least clever way possible: convert to string
   567  	// with strconv.Itoa, then take the len.
   568  	progressDigits int
   569  
   570  	// activeDigits is the same, but for active tasks, and it applies to both
   571  	// the numerator and denominator.
   572  	activeDigits int
   573  }
   574  
   575  func newReplicatedJobProgressUpdater(service swarm.Service, progressOut progress.Output) *replicatedJobProgressUpdater {
   576  	u := &replicatedJobProgressUpdater{
   577  		progressOut:  progressOut,
   578  		concurrent:   int(*service.Spec.Mode.ReplicatedJob.MaxConcurrent),
   579  		total:        int(*service.Spec.Mode.ReplicatedJob.TotalCompletions),
   580  		jobIteration: service.JobStatus.JobIteration.Index,
   581  	}
   582  	u.progressDigits = len(strconv.Itoa(u.total))
   583  	u.activeDigits = len(strconv.Itoa(u.concurrent))
   584  
   585  	return u
   586  }
   587  
   588  // update writes out the progress of the replicated job.
   589  func (u *replicatedJobProgressUpdater) update(_ swarm.Service, tasks []swarm.Task, _ map[string]struct{}, _ bool) (bool, error) {
   590  	if !u.initialized {
   591  		u.writeOverallProgress(0, 0)
   592  
   593  		// only write out progress bars if there will be less than the maximum
   594  		if u.total <= maxProgressBars {
   595  			for i := 1; i <= u.total; i++ {
   596  				u.progressOut.WriteProgress(progress.Progress{
   597  					ID:     fmt.Sprintf("%d/%d", i, u.total),
   598  					Action: " ",
   599  				})
   600  			}
   601  		}
   602  		u.initialized = true
   603  	}
   604  
   605  	// tasksBySlot is a mapping of slot number to the task valid for that slot.
   606  	// it deduplicated tasks occupying the same numerical slot but in different
   607  	// states.
   608  	tasksBySlot := make(map[int]swarm.Task)
   609  	for _, task := range tasks {
   610  		// first, check if the task belongs to this service iteration. skip
   611  		// tasks belonging to other iterations.
   612  		if task.JobIteration == nil || task.JobIteration.Index != u.jobIteration {
   613  			continue
   614  		}
   615  
   616  		// then, if the task is in an unknown state, ignore it.
   617  		if numberedStates[task.DesiredState] == 0 ||
   618  			numberedStates[task.Status.State] == 0 {
   619  			continue
   620  		}
   621  
   622  		// finally, check if the task already exists in the map
   623  		if existing, ok := tasksBySlot[task.Slot]; ok {
   624  			// if so, use the task with the lower actual state
   625  			if numberedStates[existing.Status.State] > numberedStates[task.Status.State] {
   626  				tasksBySlot[task.Slot] = task
   627  			}
   628  		} else {
   629  			// otherwise, just add it to the map.
   630  			tasksBySlot[task.Slot] = task
   631  		}
   632  	}
   633  
   634  	activeTasks := 0
   635  	completeTasks := 0
   636  
   637  	for i := 0; i < len(tasksBySlot); i++ {
   638  		task := tasksBySlot[i]
   639  		u.writeTaskProgress(task)
   640  
   641  		if numberedStates[task.Status.State] < numberedStates[swarm.TaskStateComplete] {
   642  			activeTasks++
   643  		}
   644  
   645  		if task.Status.State == swarm.TaskStateComplete {
   646  			completeTasks++
   647  		}
   648  	}
   649  
   650  	u.writeOverallProgress(activeTasks, completeTasks)
   651  
   652  	return completeTasks == u.total, nil
   653  }
   654  
   655  func (u *replicatedJobProgressUpdater) writeOverallProgress(active, completed int) {
   656  	u.progressOut.WriteProgress(progress.Progress{
   657  		ID: "job progress",
   658  		Action: fmt.Sprintf(
   659  			// * means "use the next positional arg to compute padding"
   660  			"%*d out of %d complete", u.progressDigits, completed, u.total,
   661  		),
   662  		Current:    int64(completed),
   663  		Total:      int64(u.total),
   664  		HideCounts: true,
   665  	})
   666  
   667  	// actualDesired is the lesser of MaxConcurrent, or the remaining tasks
   668  	actualDesired := u.total - completed
   669  	if actualDesired > u.concurrent {
   670  		actualDesired = u.concurrent
   671  	}
   672  
   673  	u.progressOut.WriteProgress(progress.Progress{
   674  		ID: "active tasks",
   675  		Action: fmt.Sprintf(
   676  			// [n] notation lets us select a specific argument, 1-indexed
   677  			// putting the [1] before the star means "make the string this
   678  			// length". putting the [2] or the [3] means "use this argument
   679  			// here"
   680  			//
   681  			// we pad both the numerator and the denominator because, as the
   682  			// job reaches its conclusion, the number of possible concurrent
   683  			// tasks will go down, as fewer than MaxConcurrent tasks are needed
   684  			// to complete the job.
   685  			"%[1]*[2]d out of %[1]*[3]d tasks", u.activeDigits, active, actualDesired,
   686  		),
   687  	})
   688  }
   689  
   690  func (u *replicatedJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   691  	if u.total > maxProgressBars {
   692  		return
   693  	}
   694  
   695  	if task.Status.Err != "" {
   696  		u.progressOut.WriteProgress(progress.Progress{
   697  			ID:     fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   698  			Action: truncError(task.Status.Err),
   699  		})
   700  		return
   701  	}
   702  
   703  	u.progressOut.WriteProgress(progress.Progress{
   704  		ID:         fmt.Sprintf("%d/%d", task.Slot+1, u.total),
   705  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   706  		Current:    numberedStates[task.Status.State],
   707  		Total:      maxJobProgress,
   708  		HideCounts: true,
   709  	})
   710  }
   711  
   712  // globalJobProgressUpdater is the progressUpdater for GlobalJob-mode services.
   713  // Because GlobalJob services are so much simpler than ReplicatedJob services,
   714  // this updater is in turn simpler as well.
   715  type globalJobProgressUpdater struct {
   716  	progressOut progress.Output
   717  
   718  	// initialized is used to detect the first pass of update, and to perform
   719  	// first time initialization logic at that time.
   720  	initialized bool
   721  
   722  	// total is the total number of tasks expected for this job
   723  	total int
   724  
   725  	// progressDigits is the number of spaces to pad the numerator of the job
   726  	// progress field
   727  	progressDigits int
   728  
   729  	taskNodes map[string]struct{}
   730  }
   731  
   732  func (u *globalJobProgressUpdater) update(service swarm.Service, tasks []swarm.Task, activeNodes map[string]struct{}, _ bool) (bool, error) {
   733  	if !u.initialized {
   734  		// if there are not yet tasks, then return early.
   735  		if len(tasks) == 0 && len(activeNodes) != 0 {
   736  			u.progressOut.WriteProgress(progress.Progress{
   737  				ID:     "job progress",
   738  				Action: "waiting for tasks",
   739  			})
   740  			return false, nil
   741  		}
   742  
   743  		// when a global job starts, all of its tasks are created at once, so
   744  		// we can use len(tasks) to know how many we're expecting.
   745  		u.taskNodes = map[string]struct{}{}
   746  
   747  		for _, task := range tasks {
   748  			// skip any tasks not belonging to this job iteration.
   749  			if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   750  				continue
   751  			}
   752  
   753  			// collect the list of all node IDs for this service.
   754  			//
   755  			// basically, global jobs will execute on any new nodes that join
   756  			// the cluster in the future. to avoid making things complicated,
   757  			// we will only check the progress of the initial set of nodes. if
   758  			// any new nodes come online during the operation, we will ignore
   759  			// them.
   760  			u.taskNodes[task.NodeID] = struct{}{}
   761  		}
   762  
   763  		u.total = len(u.taskNodes)
   764  		u.progressDigits = len(strconv.Itoa(u.total))
   765  
   766  		u.writeOverallProgress(0)
   767  		u.initialized = true
   768  	}
   769  
   770  	// tasksByNodeID maps a NodeID to the latest task for that Node ID. this
   771  	// lets us pick only the latest task for any given node.
   772  	tasksByNodeID := map[string]swarm.Task{}
   773  
   774  	for _, task := range tasks {
   775  		// skip any tasks not belonging to this job iteration
   776  		if task.JobIteration == nil || task.JobIteration.Index != service.JobStatus.JobIteration.Index {
   777  			continue
   778  		}
   779  
   780  		// if the task is not on one of the initial set of nodes, ignore it.
   781  		if _, ok := u.taskNodes[task.NodeID]; !ok {
   782  			continue
   783  		}
   784  
   785  		// if there is already a task recorded for this node, choose the one
   786  		// with the lower state
   787  		if oldtask, ok := tasksByNodeID[task.NodeID]; ok {
   788  			if numberedStates[oldtask.Status.State] > numberedStates[task.Status.State] {
   789  				tasksByNodeID[task.NodeID] = task
   790  			}
   791  		} else {
   792  			tasksByNodeID[task.NodeID] = task
   793  		}
   794  	}
   795  
   796  	complete := 0
   797  	for _, task := range tasksByNodeID {
   798  		u.writeTaskProgress(task)
   799  		if task.Status.State == swarm.TaskStateComplete {
   800  			complete++
   801  		}
   802  	}
   803  
   804  	u.writeOverallProgress(complete)
   805  	return complete == u.total, nil
   806  }
   807  
   808  func (u *globalJobProgressUpdater) writeTaskProgress(task swarm.Task) {
   809  	if u.total > maxProgressBars {
   810  		return
   811  	}
   812  
   813  	if task.Status.Err != "" {
   814  		u.progressOut.WriteProgress(progress.Progress{
   815  			ID:     task.NodeID,
   816  			Action: truncError(task.Status.Err),
   817  		})
   818  		return
   819  	}
   820  
   821  	u.progressOut.WriteProgress(progress.Progress{
   822  		ID:         task.NodeID,
   823  		Action:     fmt.Sprintf("%-*s", longestState, task.Status.State),
   824  		Current:    numberedStates[task.Status.State],
   825  		Total:      maxJobProgress,
   826  		HideCounts: true,
   827  	})
   828  }
   829  
   830  func (u *globalJobProgressUpdater) writeOverallProgress(complete int) {
   831  	// all tasks for a global job are active at once, so we only write out the
   832  	// total progress.
   833  	u.progressOut.WriteProgress(progress.Progress{
   834  		// see (*replicatedJobProgressUpdater).writeOverallProgress for an
   835  		// explanation fo the advanced fmt use in this function.
   836  		ID: "job progress",
   837  		Action: fmt.Sprintf(
   838  			"%*d out of %d complete", u.progressDigits, complete, u.total,
   839  		),
   840  		Current:    int64(complete),
   841  		Total:      int64(u.total),
   842  		HideCounts: true,
   843  	})
   844  }