github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/taskinit/init.go (about) 1 package taskinit 2 3 import ( 4 "context" 5 "sort" 6 "time" 7 8 "github.com/docker/swarmkit/api" 9 "github.com/docker/swarmkit/api/defaults" 10 "github.com/docker/swarmkit/log" 11 "github.com/docker/swarmkit/manager/orchestrator" 12 "github.com/docker/swarmkit/manager/orchestrator/restart" 13 "github.com/docker/swarmkit/manager/state/store" 14 gogotypes "github.com/gogo/protobuf/types" 15 ) 16 17 // InitHandler defines orchestrator's action to fix tasks at start. 18 type InitHandler interface { 19 IsRelatedService(service *api.Service) bool 20 FixTask(ctx context.Context, batch *store.Batch, t *api.Task) 21 SlotTuple(t *api.Task) orchestrator.SlotTuple 22 } 23 24 // CheckTasks fixes tasks in the store before orchestrator runs. The previous leader might 25 // not have finished processing their updates and left them in an inconsistent state. 26 func CheckTasks(ctx context.Context, s *store.MemoryStore, readTx store.ReadTx, initHandler InitHandler, startSupervisor restart.SupervisorInterface) error { 27 instances := make(map[orchestrator.SlotTuple][]*api.Task) 28 err := s.Batch(func(batch *store.Batch) error { 29 tasks, err := store.FindTasks(readTx, store.All) 30 if err != nil { 31 return err 32 } 33 for _, t := range tasks { 34 if t.ServiceID == "" { 35 continue 36 } 37 38 // TODO(aluzzardi): We should NOT retrieve the service here. 39 service := store.GetService(readTx, t.ServiceID) 40 if service == nil { 41 // Service was deleted 42 err := batch.Update(func(tx store.Tx) error { 43 return store.DeleteTask(tx, t.ID) 44 }) 45 if err != nil { 46 log.G(ctx).WithError(err).Error("failed to delete task") 47 } 48 continue 49 } 50 if !initHandler.IsRelatedService(service) { 51 continue 52 } 53 54 tuple := initHandler.SlotTuple(t) 55 instances[tuple] = append(instances[tuple], t) 56 57 // handle task updates from agent which should have been triggered by task update events 58 initHandler.FixTask(ctx, batch, t) 59 60 // desired state ready is a transient state that it should be started. 61 // however previous leader may not have started it, retry start here 62 if t.DesiredState != api.TaskStateReady || t.Status.State > api.TaskStateCompleted { 63 continue 64 } 65 restartDelay, _ := gogotypes.DurationFromProto(defaults.Service.Task.Restart.Delay) 66 if t.Spec.Restart != nil && t.Spec.Restart.Delay != nil { 67 var err error 68 restartDelay, err = gogotypes.DurationFromProto(t.Spec.Restart.Delay) 69 if err != nil { 70 log.G(ctx).WithError(err).Error("invalid restart delay") 71 restartDelay, _ = gogotypes.DurationFromProto(defaults.Service.Task.Restart.Delay) 72 } 73 } 74 if restartDelay != 0 { 75 var timestamp time.Time 76 if t.Status.AppliedAt != nil { 77 timestamp, err = gogotypes.TimestampFromProto(t.Status.AppliedAt) 78 } else { 79 timestamp, err = gogotypes.TimestampFromProto(t.Status.Timestamp) 80 } 81 if err == nil { 82 restartTime := timestamp.Add(restartDelay) 83 calculatedRestartDelay := time.Until(restartTime) 84 if calculatedRestartDelay < restartDelay { 85 restartDelay = calculatedRestartDelay 86 } 87 if restartDelay > 0 { 88 _ = batch.Update(func(tx store.Tx) error { 89 t := store.GetTask(tx, t.ID) 90 // TODO(aluzzardi): This is shady as well. We should have a more generic condition. 91 if t == nil || t.DesiredState != api.TaskStateReady { 92 return nil 93 } 94 startSupervisor.DelayStart(ctx, tx, nil, t.ID, restartDelay, true) 95 return nil 96 }) 97 continue 98 } 99 } else { 100 log.G(ctx).WithError(err).Error("invalid status timestamp") 101 } 102 } 103 104 // Start now 105 err := batch.Update(func(tx store.Tx) error { 106 return startSupervisor.StartNow(tx, t.ID) 107 }) 108 if err != nil { 109 log.G(ctx).WithError(err).WithField("task.id", t.ID).Error("moving task out of delayed state failed") 110 } 111 } 112 return nil 113 }) 114 if err != nil { 115 return err 116 } 117 118 for tuple, instance := range instances { 119 // Find the most current spec version. That's the only one 120 // we care about for the purpose of reconstructing restart 121 // history. 122 maxVersion := uint64(0) 123 for _, t := range instance { 124 if t.SpecVersion != nil && t.SpecVersion.Index > maxVersion { 125 maxVersion = t.SpecVersion.Index 126 } 127 } 128 129 // Create a new slice with just the current spec version tasks. 130 var upToDate []*api.Task 131 for _, t := range instance { 132 if t.SpecVersion != nil && t.SpecVersion.Index == maxVersion { 133 upToDate = append(upToDate, t) 134 } 135 } 136 137 // Sort by creation timestamp 138 sort.Sort(tasksByCreationTimestamp(upToDate)) 139 140 // All up-to-date tasks in this instance except the first one 141 // should be considered restarted. 142 if len(upToDate) < 2 { 143 continue 144 } 145 for _, t := range upToDate[1:] { 146 startSupervisor.RecordRestartHistory(tuple, t) 147 } 148 } 149 return nil 150 } 151 152 type tasksByCreationTimestamp []*api.Task 153 154 func (t tasksByCreationTimestamp) Len() int { 155 return len(t) 156 } 157 func (t tasksByCreationTimestamp) Swap(i, j int) { 158 t[i], t[j] = t[j], t[i] 159 } 160 func (t tasksByCreationTimestamp) Less(i, j int) bool { 161 if t[i].Meta.CreatedAt == nil { 162 return true 163 } 164 if t[j].Meta.CreatedAt == nil { 165 return false 166 } 167 if t[i].Meta.CreatedAt.Seconds < t[j].Meta.CreatedAt.Seconds { 168 return true 169 } 170 if t[i].Meta.CreatedAt.Seconds > t[j].Meta.CreatedAt.Seconds { 171 return false 172 } 173 return t[i].Meta.CreatedAt.Nanos < t[j].Meta.CreatedAt.Nanos 174 }