github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/jobs/replicated/reconciler.go (about) 1 package replicated 2 3 import ( 4 "context" 5 "fmt" 6 7 "github.com/docker/swarmkit/api" 8 "github.com/docker/swarmkit/manager/orchestrator" 9 "github.com/docker/swarmkit/manager/state/store" 10 ) 11 12 // restartSupervisor is an interface representing the methods from the 13 // restart.SupervisorInterface that are actually needed by the reconciler. This 14 // more limited interface allows us to write a less ugly fake for unit testing. 15 type restartSupervisor interface { 16 Restart(context.Context, store.Tx, *api.Cluster, *api.Service, api.Task) error 17 } 18 19 // Reconciler is an object that manages reconciliation of replicated jobs. It 20 // is blocking and non-asynchronous, for ease of testing. It implements two 21 // interfaces. The first is the Reconciler interface of the Orchestrator 22 // package above this one. The second is the taskinit.InitHandler interface. 23 type Reconciler struct { 24 // we need the store, of course, to do updates 25 store *store.MemoryStore 26 27 restart restartSupervisor 28 } 29 30 // newReconciler creates a new reconciler object 31 func NewReconciler(store *store.MemoryStore, restart restartSupervisor) *Reconciler { 32 return &Reconciler{ 33 store: store, 34 restart: restart, 35 } 36 } 37 38 // ReconcileService reconciles the replicated job service with the given ID by 39 // checking to see if new replicas should be created. reconcileService returns 40 // an error if there is some case prevent it from correctly reconciling the 41 // service. 42 func (r *Reconciler) ReconcileService(id string) error { 43 var ( 44 service *api.Service 45 tasks []*api.Task 46 cluster *api.Cluster 47 viewErr error 48 ) 49 // first, get the service and all of its tasks 50 r.store.View(func(tx store.ReadTx) { 51 service = store.GetService(tx, id) 52 53 tasks, viewErr = store.FindTasks(tx, store.ByServiceID(id)) 54 55 // there should only ever be 1 cluster object, but for reasons 56 // forgotten by me, it needs to be retrieved in a rather roundabout way 57 // from the store 58 var clusters []*api.Cluster 59 clusters, viewErr = store.FindClusters(tx, store.All) 60 if len(clusters) == 1 { 61 cluster = clusters[0] 62 } else if len(clusters) > 1 { 63 // this should never happen, and indicates that the system is 64 // broken. 65 panic("there should never be more than one cluster object") 66 } 67 }) 68 69 // errors during view should only happen in a few rather catastrophic 70 // cases, but here it's not unreasonable to just return an error anyway. 71 if viewErr != nil { 72 return viewErr 73 } 74 75 // if the service has already been deleted, there's nothing to do here. 76 if service == nil { 77 return nil 78 } 79 80 // if this is the first iteration of the service, it may not yet have a 81 // JobStatus, so we should create one if so. this won't actually be 82 // committed, though. 83 if service.JobStatus == nil { 84 service.JobStatus = &api.JobStatus{} 85 } 86 87 // Jobs can be run in multiple iterations. The JobStatus of the service 88 // indicates which Version of iteration we're on. We should only be looking 89 // at tasks of the latest Version 90 91 jobVersion := service.JobStatus.JobIteration.Index 92 93 // now, check how many tasks we need and how many we have running. note 94 // that some of these Running tasks may complete before we even finish this 95 // code block, and so we might have to immediately re-enter reconciliation, 96 // so this number is 100% definitive, but it is accurate for this 97 // particular moment in time, and it won't result in us going OVER the 98 // needed task count 99 // 100 // importantly, we are computing only how many _new_ tasks are needed. Some 101 // tasks may need to be restarted as well, but we don't do this directly; 102 // restarting tasks is under the purview of the restartSupervisor. 103 // 104 // also also, for the math later, we need these values to be of type uint64. 105 runningTasks := uint64(0) 106 completeTasks := uint64(0) 107 restartTasks := []string{} 108 removeTasks := []string{} 109 110 // for replicated jobs, each task will get a different slot number, so that 111 // when the job has completed, there will be one Completed task in every 112 // slot number [0, TotalCompletions-1]. 113 // 114 // By assigning each task to a unique slot, we simply handling of 115 // restarting failed tasks through the restart manager. 116 slots := map[uint64]bool{} 117 for _, task := range tasks { 118 // we only care about tasks from this job iteration. tasks from the 119 // previous job iteration are not important 120 if task.JobIteration != nil { 121 if task.JobIteration.Index == jobVersion { 122 if task.Status.State == api.TaskStateCompleted { 123 completeTasks++ 124 slots[task.Slot] = true 125 } 126 127 // the Restart Manager may put a task in the desired state Ready, 128 // so we should match not only tasks in desired state Completed, 129 // but also those in any valid running state. 130 if task.Status.State != api.TaskStateCompleted && task.DesiredState <= api.TaskStateCompleted { 131 runningTasks++ 132 slots[task.Slot] = true 133 134 // if the task is in a terminal state, we might need to restart 135 // it. throw it on the pile if so. this is still counted as a 136 // running task for the purpose of determining how many new 137 // tasks to create. 138 if task.Status.State > api.TaskStateCompleted { 139 restartTasks = append(restartTasks, task.ID) 140 } 141 } 142 } else { 143 // tasks belonging to a previous iteration of the job may 144 // exist. if any such tasks exist, they should have their task 145 // state set to Remove 146 if task.Status.State <= api.TaskStateRunning && task.DesiredState != api.TaskStateRemove { 147 removeTasks = append(removeTasks, task.ID) 148 } 149 } 150 } 151 } 152 153 // now that we have our counts, we need to see how many new tasks to 154 // create. this number can never exceed MaxConcurrent, but also should not 155 // result in us exceeding TotalCompletions. first, get these numbers out of 156 // the service spec. 157 rj := service.Spec.GetReplicatedJob() 158 159 // possibleNewTasks gives us the upper bound for how many tasks we'll 160 // create. also, ugh, subtracting uints. there's no way this can ever go 161 // wrong. 162 possibleNewTasks := rj.MaxConcurrent - runningTasks 163 164 // allowedNewTasks is how many tasks we could create, if there were no 165 // restriction on maximum concurrency. This is the total number of tasks 166 // we want completed, minus the tasks that are already completed, minus 167 // the tasks that are in progress. 168 // 169 // seriously, ugh, subtracting unsigned ints. totally a fine and not at all 170 // risky operation, with no possibility for catastrophe 171 allowedNewTasks := rj.TotalCompletions - completeTasks - runningTasks 172 173 // the lower number of allowedNewTasks and possibleNewTasks is how many we 174 // can create. we'll just use an if statement instead of some fancy floor 175 // function. 176 actualNewTasks := allowedNewTasks 177 if possibleNewTasks < allowedNewTasks { 178 actualNewTasks = possibleNewTasks 179 } 180 181 // this check might seem odd, but it protects us from an underflow of the 182 // above subtractions, which, again, is a totally impossible thing that can 183 // never happen, ever, obviously. 184 if actualNewTasks > rj.TotalCompletions { 185 return fmt.Errorf( 186 "uint64 underflow, we're not going to create %v tasks", 187 actualNewTasks, 188 ) 189 } 190 191 // finally, we can create these tasks. do this in a batch operation, to 192 // avoid exceeding transaction size limits 193 err := r.store.Batch(func(batch *store.Batch) error { 194 for i := uint64(0); i < actualNewTasks; i++ { 195 if err := batch.Update(func(tx store.Tx) error { 196 var slot uint64 197 // each task will go into a unique slot, and at the end, there 198 // should be the same number of slots as there are desired 199 // total completions. We could simplify this logic by simply 200 // assuming that slots are filled in order, but it's a more 201 // robust solution to not assume that, and instead assure that 202 // the slot is unoccupied. 203 for s := uint64(0); s < rj.TotalCompletions; s++ { 204 // when we're iterating through, if the service has slots 205 // that haven't been used yet (for example, if this is the 206 // first time we're running this iteration), then doing 207 // a map lookup for the number will return the 0-value 208 // (false) even if the number doesn't exist in the map. 209 if !slots[s] { 210 slot = s 211 // once we've found a slot, mark it as occupied, so we 212 // don't double assign in subsequent iterations. 213 slots[slot] = true 214 break 215 } 216 } 217 218 task := orchestrator.NewTask(cluster, service, slot, "") 219 // when we create the task, we also need to set the 220 // JobIteration. 221 task.JobIteration = &api.Version{Index: jobVersion} 222 task.DesiredState = api.TaskStateCompleted 223 224 // finally, create the task in the store. 225 return store.CreateTask(tx, task) 226 }); err != nil { 227 return err 228 } 229 } 230 231 for _, taskID := range restartTasks { 232 if err := batch.Update(func(tx store.Tx) error { 233 t := store.GetTask(tx, taskID) 234 if t == nil { 235 return nil 236 } 237 238 if t.DesiredState > api.TaskStateCompleted { 239 return nil 240 } 241 242 // TODO(dperny): pass in context from above 243 return r.restart.Restart(context.Background(), tx, cluster, service, *t) 244 }); err != nil { 245 return err 246 } 247 } 248 249 for _, taskID := range removeTasks { 250 if err := batch.Update(func(tx store.Tx) error { 251 t := store.GetTask(tx, taskID) 252 if t == nil { 253 return nil 254 } 255 256 // don't do unnecessary updates 257 if t.DesiredState == api.TaskStateRemove { 258 return nil 259 } 260 t.DesiredState = api.TaskStateRemove 261 return store.UpdateTask(tx, t) 262 }); err != nil { 263 return err 264 } 265 } 266 267 return nil 268 }) 269 270 return err 271 } 272 273 // IsRelatedService returns true if the task is a replicated job. This method 274 // fulfills the taskinit.InitHandler interface. Because it is just a wrapper 275 // around a well-tested function call, it has no tests of its own. 276 func (r *Reconciler) IsRelatedService(service *api.Service) bool { 277 return orchestrator.IsReplicatedJob(service) 278 } 279 280 // FixTask ostensibly validates that a task is compliant with the rest of the 281 // cluster state. However, in the replicated jobs case, the only action we 282 // can take with a noncompliant task is to restart it. Because the replicated 283 // jobs orchestrator reconciles the whole service at once, any tasks that 284 // need to be restarted will be done when we make the reconiliation pass over 285 // all services. Therefore, in this instance, FixTask does nothing except 286 // implement the FixTask method of the taskinit.InitHandler interface. 287 func (r *Reconciler) FixTask(_ context.Context, _ *store.Batch, _ *api.Task) {} 288 289 // SlotTuple returns an orchestrator.SlotTuple object for this task. It 290 // implements the taskinit.InitHandler interface 291 func (r *Reconciler) SlotTuple(t *api.Task) orchestrator.SlotTuple { 292 return orchestrator.SlotTuple{ 293 ServiceID: t.ServiceID, 294 Slot: t.Slot, 295 } 296 }