github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/task.go (about) 1 package agent 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 "github.com/docker/swarmkit/agent/exec" 9 "github.com/docker/swarmkit/api" 10 "github.com/docker/swarmkit/api/equality" 11 "github.com/docker/swarmkit/log" 12 ) 13 14 // taskManager manages all aspects of task execution and reporting for an agent 15 // through state management. 16 type taskManager struct { 17 task *api.Task 18 ctlr exec.Controller 19 reporter StatusReporter 20 21 updateq chan *api.Task 22 23 shutdown chan struct{} 24 shutdownOnce sync.Once 25 closed chan struct{} 26 closeOnce sync.Once 27 } 28 29 func newTaskManager(ctx context.Context, task *api.Task, ctlr exec.Controller, reporter StatusReporter) *taskManager { 30 t := &taskManager{ 31 task: task.Copy(), 32 ctlr: ctlr, 33 reporter: reporter, 34 updateq: make(chan *api.Task), 35 shutdown: make(chan struct{}), 36 closed: make(chan struct{}), 37 } 38 go t.run(ctx) 39 return t 40 } 41 42 // Update the task data. 43 func (tm *taskManager) Update(ctx context.Context, task *api.Task) error { 44 select { 45 case tm.updateq <- task: 46 return nil 47 case <-tm.closed: 48 return ErrClosed 49 case <-ctx.Done(): 50 return ctx.Err() 51 } 52 } 53 54 // Close shuts down the task manager, blocking until it is closed. 55 func (tm *taskManager) Close() error { 56 tm.shutdownOnce.Do(func() { 57 close(tm.shutdown) 58 }) 59 60 <-tm.closed 61 62 return nil 63 } 64 65 func (tm *taskManager) Logs(ctx context.Context, options api.LogSubscriptionOptions, publisher exec.LogPublisher) { 66 ctx = log.WithModule(ctx, "taskmanager") 67 68 logCtlr, ok := tm.ctlr.(exec.ControllerLogs) 69 if !ok { 70 return // no logs available 71 } 72 if err := logCtlr.Logs(ctx, publisher, options); err != nil { 73 log.G(ctx).WithError(err).Errorf("logs call failed") 74 } 75 } 76 77 func (tm *taskManager) run(ctx context.Context) { 78 ctx, cancelAll := context.WithCancel(ctx) 79 defer cancelAll() // cancel all child operations on exit. 80 81 ctx = log.WithModule(ctx, "taskmanager") 82 83 var ( 84 opctx context.Context 85 cancel context.CancelFunc 86 run = make(chan struct{}, 1) 87 statusq = make(chan *api.TaskStatus) 88 errs = make(chan error) 89 shutdown = tm.shutdown 90 updated bool // true if the task was updated. 91 ) 92 93 defer func() { 94 // closure picks up current value of cancel. 95 if cancel != nil { 96 cancel() 97 } 98 }() 99 100 run <- struct{}{} // prime the pump 101 for { 102 select { 103 case <-run: 104 // always check for shutdown before running. 105 select { 106 case <-tm.shutdown: 107 shutdown = tm.shutdown // a little questionable 108 continue // ignore run request and handle shutdown 109 case <-tm.closed: 110 continue 111 default: 112 } 113 114 opctx, cancel = context.WithCancel(ctx) 115 116 // Several variables need to be snapshotted for the closure below. 117 opcancel := cancel // fork for the closure 118 running := tm.task.Copy() // clone the task before dispatch 119 statusqLocal := statusq 120 updatedLocal := updated // capture state of update for goroutine 121 updated = false 122 go runctx(ctx, tm.closed, errs, func(ctx context.Context) error { 123 defer opcancel() 124 125 if updatedLocal { 126 // before we do anything, update the task for the controller. 127 // always update the controller before running. 128 if err := tm.ctlr.Update(opctx, running); err != nil { 129 log.G(ctx).WithError(err).Error("updating task controller failed") 130 return err 131 } 132 } 133 134 status, err := exec.Do(opctx, running, tm.ctlr) 135 if status != nil { 136 // always report the status if we get one back. This 137 // returns to the manager loop, then reports the status 138 // upstream. 139 select { 140 case statusqLocal <- status: 141 case <-ctx.Done(): // not opctx, since that may have been cancelled. 142 } 143 144 if err := tm.reporter.UpdateTaskStatus(ctx, running.ID, status); err != nil { 145 log.G(ctx).WithError(err).Error("task manager failed to report status to agent") 146 } 147 } 148 149 return err 150 }) 151 case err := <-errs: 152 // This branch is always executed when an operations completes. The 153 // goal is to decide whether or not we re-dispatch the operation. 154 cancel = nil 155 156 select { 157 case <-tm.shutdown: 158 shutdown = tm.shutdown // re-enable the shutdown branch 159 continue // no dispatch if we are in shutdown. 160 default: 161 } 162 163 switch err { 164 case exec.ErrTaskNoop: 165 if !updated { 166 continue // wait till getting pumped via update. 167 } 168 case exec.ErrTaskRetry: 169 // TODO(stevvooe): Add exponential backoff with random jitter 170 // here. For now, this backoff is enough to keep the task 171 // manager from running away with the CPU. 172 time.AfterFunc(time.Second, func() { 173 errs <- nil // repump this branch, with no err 174 }) 175 continue 176 case nil, context.Canceled, context.DeadlineExceeded: 177 // no log in this case 178 default: 179 log.G(ctx).WithError(err).Error("task operation failed") 180 } 181 182 select { 183 case run <- struct{}{}: 184 default: 185 } 186 case status := <-statusq: 187 tm.task.Status = *status 188 case task := <-tm.updateq: 189 if equality.TasksEqualStable(task, tm.task) { 190 continue // ignore the update 191 } 192 193 if task.ID != tm.task.ID { 194 log.G(ctx).WithField("task.update.id", task.ID).Error("received update for incorrect task") 195 continue 196 } 197 198 if task.DesiredState < tm.task.DesiredState { 199 log.G(ctx).WithField("task.update.desiredstate", task.DesiredState). 200 Error("ignoring task update with invalid desired state") 201 continue 202 } 203 204 task = task.Copy() 205 task.Status = tm.task.Status // overwrite our status, as it is canonical. 206 tm.task = task 207 updated = true 208 209 // we have accepted the task update 210 if cancel != nil { 211 cancel() // cancel outstanding if necessary. 212 } else { 213 // If this channel op fails, it means there is already a 214 // message on the run queue. 215 select { 216 case run <- struct{}{}: 217 default: 218 } 219 } 220 case <-shutdown: 221 if cancel != nil { 222 // cancel outstanding operation. 223 cancel() 224 225 // subtle: after a cancellation, we want to avoid busy wait 226 // here. this gets renabled in the errs branch and we'll come 227 // back around and try shutdown again. 228 shutdown = nil // turn off this branch until op proceeds 229 continue // wait until operation actually exits. 230 } 231 232 // disable everything, and prepare for closing. 233 statusq = nil 234 errs = nil 235 shutdown = nil 236 tm.closeOnce.Do(func() { 237 close(tm.closed) 238 }) 239 case <-tm.closed: 240 return 241 case <-ctx.Done(): 242 tm.closeOnce.Do(func() { 243 close(tm.closed) 244 }) 245 return 246 } 247 } 248 }