github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/taskinit/init.go (about)

     1  package taskinit
     2  
     3  import (
     4  	"context"
     5  	"sort"
     6  	"time"
     7  
     8  	"github.com/docker/swarmkit/api"
     9  	"github.com/docker/swarmkit/api/defaults"
    10  	"github.com/docker/swarmkit/log"
    11  	"github.com/docker/swarmkit/manager/orchestrator"
    12  	"github.com/docker/swarmkit/manager/orchestrator/restart"
    13  	"github.com/docker/swarmkit/manager/state/store"
    14  	gogotypes "github.com/gogo/protobuf/types"
    15  )
    16  
    17  // InitHandler defines orchestrator's action to fix tasks at start.
    18  type InitHandler interface {
    19  	IsRelatedService(service *api.Service) bool
    20  	FixTask(ctx context.Context, batch *store.Batch, t *api.Task)
    21  	SlotTuple(t *api.Task) orchestrator.SlotTuple
    22  }
    23  
    24  // CheckTasks fixes tasks in the store before orchestrator runs. The previous leader might
    25  // not have finished processing their updates and left them in an inconsistent state.
    26  func CheckTasks(ctx context.Context, s *store.MemoryStore, readTx store.ReadTx, initHandler InitHandler, startSupervisor restart.SupervisorInterface) error {
    27  	instances := make(map[orchestrator.SlotTuple][]*api.Task)
    28  	err := s.Batch(func(batch *store.Batch) error {
    29  		tasks, err := store.FindTasks(readTx, store.All)
    30  		if err != nil {
    31  			return err
    32  		}
    33  		for _, t := range tasks {
    34  			if t.ServiceID == "" {
    35  				continue
    36  			}
    37  
    38  			// TODO(aluzzardi): We should NOT retrieve the service here.
    39  			service := store.GetService(readTx, t.ServiceID)
    40  			if service == nil {
    41  				// Service was deleted
    42  				err := batch.Update(func(tx store.Tx) error {
    43  					return store.DeleteTask(tx, t.ID)
    44  				})
    45  				if err != nil {
    46  					log.G(ctx).WithError(err).Error("failed to delete task")
    47  				}
    48  				continue
    49  			}
    50  			if !initHandler.IsRelatedService(service) {
    51  				continue
    52  			}
    53  
    54  			tuple := initHandler.SlotTuple(t)
    55  			instances[tuple] = append(instances[tuple], t)
    56  
    57  			// handle task updates from agent which should have been triggered by task update events
    58  			initHandler.FixTask(ctx, batch, t)
    59  
    60  			// desired state ready is a transient state that it should be started.
    61  			// however previous leader may not have started it, retry start here
    62  			if t.DesiredState != api.TaskStateReady || t.Status.State > api.TaskStateCompleted {
    63  				continue
    64  			}
    65  			restartDelay, _ := gogotypes.DurationFromProto(defaults.Service.Task.Restart.Delay)
    66  			if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil {
    67  				var err error
    68  				restartDelay, err = gogotypes.DurationFromProto(t.Spec.Restart.Delay)
    69  				if err != nil {
    70  					log.G(ctx).WithError(err).Error("invalid restart delay")
    71  					restartDelay, _ = gogotypes.DurationFromProto(defaults.Service.Task.Restart.Delay)
    72  				}
    73  			}
    74  			if restartDelay != 0 {
    75  				var timestamp time.Time
    76  				if t.Status.AppliedAt != nil {
    77  					timestamp, err = gogotypes.TimestampFromProto(t.Status.AppliedAt)
    78  				} else {
    79  					timestamp, err = gogotypes.TimestampFromProto(t.Status.Timestamp)
    80  				}
    81  				if err == nil {
    82  					restartTime := timestamp.Add(restartDelay)
    83  					calculatedRestartDelay := time.Until(restartTime)
    84  					if calculatedRestartDelay < restartDelay {
    85  						restartDelay = calculatedRestartDelay
    86  					}
    87  					if restartDelay > 0 {
    88  						_ = batch.Update(func(tx store.Tx) error {
    89  							t := store.GetTask(tx, t.ID)
    90  							// TODO(aluzzardi): This is shady as well. We should have a more generic condition.
    91  							if t == nil || t.DesiredState != api.TaskStateReady {
    92  								return nil
    93  							}
    94  							startSupervisor.DelayStart(ctx, tx, nil, t.ID, restartDelay, true)
    95  							return nil
    96  						})
    97  						continue
    98  					}
    99  				} else {
   100  					log.G(ctx).WithError(err).Error("invalid status timestamp")
   101  				}
   102  			}
   103  
   104  			// Start now
   105  			err := batch.Update(func(tx store.Tx) error {
   106  				return startSupervisor.StartNow(tx, t.ID)
   107  			})
   108  			if err != nil {
   109  				log.G(ctx).WithError(err).WithField("task.id", t.ID).Error("moving task out of delayed state failed")
   110  			}
   111  		}
   112  		return nil
   113  	})
   114  	if err != nil {
   115  		return err
   116  	}
   117  
   118  	for tuple, instance := range instances {
   119  		// Find the most current spec version. That's the only one
   120  		// we care about for the purpose of reconstructing restart
   121  		// history.
   122  		maxVersion := uint64(0)
   123  		for _, t := range instance {
   124  			if t.SpecVersion != nil && t.SpecVersion.Index > maxVersion {
   125  				maxVersion = t.SpecVersion.Index
   126  			}
   127  		}
   128  
   129  		// Create a new slice with just the current spec version tasks.
   130  		var upToDate []*api.Task
   131  		for _, t := range instance {
   132  			if t.SpecVersion != nil && t.SpecVersion.Index == maxVersion {
   133  				upToDate = append(upToDate, t)
   134  			}
   135  		}
   136  
   137  		// Sort by creation timestamp
   138  		sort.Sort(tasksByCreationTimestamp(upToDate))
   139  
   140  		// All up-to-date tasks in this instance except the first one
   141  		// should be considered restarted.
   142  		if len(upToDate) < 2 {
   143  			continue
   144  		}
   145  		for _, t := range upToDate[1:] {
   146  			startSupervisor.RecordRestartHistory(tuple, t)
   147  		}
   148  	}
   149  	return nil
   150  }
   151  
   152  type tasksByCreationTimestamp []*api.Task
   153  
   154  func (t tasksByCreationTimestamp) Len() int {
   155  	return len(t)
   156  }
   157  func (t tasksByCreationTimestamp) Swap(i, j int) {
   158  	t[i], t[j] = t[j], t[i]
   159  }
   160  func (t tasksByCreationTimestamp) Less(i, j int) bool {
   161  	if t[i].Meta.CreatedAt == nil {
   162  		return true
   163  	}
   164  	if t[j].Meta.CreatedAt == nil {
   165  		return false
   166  	}
   167  	if t[i].Meta.CreatedAt.Seconds < t[j].Meta.CreatedAt.Seconds {
   168  		return true
   169  	}
   170  	if t[i].Meta.CreatedAt.Seconds > t[j].Meta.CreatedAt.Seconds {
   171  		return false
   172  	}
   173  	return t[i].Meta.CreatedAt.Nanos < t[j].Meta.CreatedAt.Nanos
   174  }