github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/orchestrator/replicated/tasks.go (about) 1 package replicated 2 3 import ( 4 "context" 5 6 "github.com/docker/go-events" 7 "github.com/docker/swarmkit/api" 8 "github.com/docker/swarmkit/log" 9 "github.com/docker/swarmkit/manager/orchestrator" 10 "github.com/docker/swarmkit/manager/orchestrator/taskinit" 11 "github.com/docker/swarmkit/manager/state/store" 12 ) 13 14 // This file provides task-level orchestration. It observes changes to task 15 // and node state and kills/recreates tasks if necessary. This is distinct from 16 // service-level reconciliation, which observes changes to services and creates 17 // and/or kills tasks to match the service definition. 18 19 func (r *Orchestrator) initTasks(ctx context.Context, readTx store.ReadTx) error { 20 return taskinit.CheckTasks(ctx, r.store, readTx, r, r.restarts) 21 } 22 23 func (r *Orchestrator) handleTaskEvent(ctx context.Context, event events.Event) { 24 switch v := event.(type) { 25 case api.EventDeleteNode: 26 r.restartTasksByNodeID(ctx, v.Node.ID) 27 case api.EventCreateNode: 28 r.handleNodeChange(ctx, v.Node) 29 case api.EventUpdateNode: 30 r.handleNodeChange(ctx, v.Node) 31 case api.EventDeleteTask: 32 if v.Task.DesiredState <= api.TaskStateRunning { 33 service := r.resolveService(ctx, v.Task) 34 if !orchestrator.IsReplicatedService(service) { 35 return 36 } 37 r.reconcileServices[service.ID] = service 38 } 39 r.restarts.Cancel(v.Task.ID) 40 case api.EventUpdateTask: 41 r.handleTaskChange(ctx, v.Task) 42 case api.EventCreateTask: 43 r.handleTaskChange(ctx, v.Task) 44 } 45 } 46 47 func (r *Orchestrator) tickTasks(ctx context.Context) { 48 if len(r.restartTasks) > 0 { 49 err := r.store.Batch(func(batch *store.Batch) error { 50 for taskID := range r.restartTasks { 51 err := batch.Update(func(tx store.Tx) error { 52 // TODO(aaronl): optimistic update? 53 t := store.GetTask(tx, taskID) 54 if t != nil { 55 if t.DesiredState > api.TaskStateRunning { 56 return nil 57 } 58 59 service := store.GetService(tx, t.ServiceID) 60 if !orchestrator.IsReplicatedService(service) { 61 return nil 62 } 63 64 // Restart task if applicable 65 if err := r.restarts.Restart(ctx, tx, r.cluster, service, *t); err != nil { 66 return err 67 } 68 } 69 return nil 70 }) 71 if err != nil { 72 log.G(ctx).WithError(err).Errorf("Orchestrator task reaping transaction failed") 73 } 74 } 75 return nil 76 }) 77 78 if err != nil { 79 log.G(ctx).WithError(err).Errorf("orchestrator task removal batch failed") 80 } 81 82 r.restartTasks = make(map[string]struct{}) 83 } 84 } 85 86 func (r *Orchestrator) restartTasksByNodeID(ctx context.Context, nodeID string) { 87 var err error 88 r.store.View(func(tx store.ReadTx) { 89 var tasks []*api.Task 90 tasks, err = store.FindTasks(tx, store.ByNodeID(nodeID)) 91 if err != nil { 92 return 93 } 94 95 for _, t := range tasks { 96 if t.DesiredState > api.TaskStateRunning { 97 continue 98 } 99 service := store.GetService(tx, t.ServiceID) 100 if orchestrator.IsReplicatedService(service) { 101 r.restartTasks[t.ID] = struct{}{} 102 } 103 } 104 }) 105 if err != nil { 106 log.G(ctx).WithError(err).Errorf("failed to list tasks to remove") 107 } 108 } 109 110 func (r *Orchestrator) handleNodeChange(ctx context.Context, n *api.Node) { 111 if !orchestrator.InvalidNode(n) { 112 return 113 } 114 115 r.restartTasksByNodeID(ctx, n.ID) 116 } 117 118 // handleTaskChange defines what orchestrator does when a task is updated by agent. 119 func (r *Orchestrator) handleTaskChange(ctx context.Context, t *api.Task) { 120 // If we already set the desired state past TaskStateRunning, there is no 121 // further action necessary. 122 if t.DesiredState > api.TaskStateRunning { 123 return 124 } 125 126 var ( 127 n *api.Node 128 service *api.Service 129 ) 130 r.store.View(func(tx store.ReadTx) { 131 if t.NodeID != "" { 132 n = store.GetNode(tx, t.NodeID) 133 } 134 if t.ServiceID != "" { 135 service = store.GetService(tx, t.ServiceID) 136 } 137 }) 138 139 if !orchestrator.IsReplicatedService(service) { 140 return 141 } 142 143 if t.Status.State > api.TaskStateRunning || 144 (t.NodeID != "" && orchestrator.InvalidNode(n)) { 145 r.restartTasks[t.ID] = struct{}{} 146 } 147 } 148 149 // FixTask validates a task with the current cluster settings, and takes 150 // action to make it conformant. it's called at orchestrator initialization. 151 func (r *Orchestrator) FixTask(ctx context.Context, batch *store.Batch, t *api.Task) { 152 // If we already set the desired state past TaskStateRunning, there is no 153 // further action necessary. 154 if t.DesiredState > api.TaskStateRunning { 155 return 156 } 157 158 var ( 159 n *api.Node 160 service *api.Service 161 ) 162 batch.Update(func(tx store.Tx) error { 163 if t.NodeID != "" { 164 n = store.GetNode(tx, t.NodeID) 165 } 166 if t.ServiceID != "" { 167 service = store.GetService(tx, t.ServiceID) 168 } 169 return nil 170 }) 171 172 if !orchestrator.IsReplicatedService(service) { 173 return 174 } 175 176 if t.Status.State > api.TaskStateRunning || 177 (t.NodeID != "" && orchestrator.InvalidNode(n)) { 178 r.restartTasks[t.ID] = struct{}{} 179 return 180 } 181 }