github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/controller.go (about) 1 package exec 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 "github.com/docker/swarmkit/api" 9 "github.com/docker/swarmkit/api/equality" 10 "github.com/docker/swarmkit/log" 11 "github.com/docker/swarmkit/protobuf/ptypes" 12 "github.com/pkg/errors" 13 "github.com/sirupsen/logrus" 14 ) 15 16 // Controller controls execution of a task. 17 type Controller interface { 18 // Update the task definition seen by the controller. Will return 19 // ErrTaskUpdateFailed if the provided task definition changes fields that 20 // cannot be changed. 21 // 22 // Will be ignored if the task has exited. 23 Update(ctx context.Context, t *api.Task) error 24 25 // Prepare the task for execution. This should ensure that all resources 26 // are created such that a call to start should execute immediately. 27 Prepare(ctx context.Context) error 28 29 // Start the target and return when it has started successfully. 30 Start(ctx context.Context) error 31 32 // Wait blocks until the target has exited. 33 Wait(ctx context.Context) error 34 35 // Shutdown requests to exit the target gracefully. 36 Shutdown(ctx context.Context) error 37 38 // Terminate the target. 39 Terminate(ctx context.Context) error 40 41 // Remove all resources allocated by the controller. 42 Remove(ctx context.Context) error 43 44 // Close closes any ephemeral resources associated with controller instance. 45 Close() error 46 } 47 48 // ControllerLogs defines a component that makes logs accessible. 49 // 50 // Can usually be accessed on a controller instance via type assertion. 51 type ControllerLogs interface { 52 // Logs will write publisher until the context is cancelled or an error 53 // occurs. 54 Logs(ctx context.Context, publisher LogPublisher, options api.LogSubscriptionOptions) error 55 } 56 57 // LogPublisher defines the protocol for receiving a log message. 58 type LogPublisher interface { 59 Publish(ctx context.Context, message api.LogMessage) error 60 } 61 62 // LogPublisherFunc implements publisher with just a function. 63 type LogPublisherFunc func(ctx context.Context, message api.LogMessage) error 64 65 // Publish calls the wrapped function. 66 func (fn LogPublisherFunc) Publish(ctx context.Context, message api.LogMessage) error { 67 return fn(ctx, message) 68 } 69 70 // LogPublisherProvider defines the protocol for receiving a log publisher 71 type LogPublisherProvider interface { 72 Publisher(ctx context.Context, subscriptionID string) (LogPublisher, func(), error) 73 } 74 75 // ContainerStatuser reports status of a container. 76 // 77 // This can be implemented by controllers or error types. 78 type ContainerStatuser interface { 79 // ContainerStatus returns the status of the target container, if 80 // available. When the container is not available, the status will be nil. 81 ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) 82 } 83 84 // PortStatuser reports status of ports which are allocated by the executor 85 type PortStatuser interface { 86 // PortStatus returns the status on a list of PortConfigs 87 // which are managed at the host level by the controller. 88 PortStatus(ctx context.Context) (*api.PortStatus, error) 89 } 90 91 // Resolve attempts to get a controller from the executor and reports the 92 // correct status depending on the tasks current state according to the result. 93 // 94 // Unlike Do, if an error is returned, the status should still be reported. The 95 // error merely reports the failure at getting the controller. 96 func Resolve(ctx context.Context, task *api.Task, executor Executor) (Controller, *api.TaskStatus, error) { 97 status := task.Status.Copy() 98 99 defer func() { 100 logStateChange(ctx, task.DesiredState, task.Status.State, status.State) 101 }() 102 103 ctlr, err := executor.Controller(task) 104 105 // depending on the tasks state, a failed controller resolution has varying 106 // impact. The following expresses that impact. 107 if err != nil { 108 status.Message = "resolving controller failed" 109 status.Err = err.Error() 110 // before the task has been started, we consider it a rejection. 111 // if task is running, consider the task has failed 112 // otherwise keep the existing state 113 if task.Status.State < api.TaskStateStarting { 114 status.State = api.TaskStateRejected 115 } else if task.Status.State <= api.TaskStateRunning { 116 status.State = api.TaskStateFailed 117 } 118 } else if task.Status.State < api.TaskStateAccepted { 119 // we always want to proceed to accepted when we resolve the controller 120 status.Message = "accepted" 121 status.State = api.TaskStateAccepted 122 status.Err = "" 123 } 124 125 return ctlr, status, err 126 } 127 128 // Do progresses the task state using the controller performing a single 129 // operation on the controller. The return TaskStatus should be marked as the 130 // new state of the task. 131 // 132 // The returned status should be reported and placed back on to task 133 // before the next call. The operation can be cancelled by creating a 134 // cancelling context. 135 // 136 // Errors from the task controller will reported on the returned status. Any 137 // errors coming from this function should not be reported as related to the 138 // individual task. 139 // 140 // If ErrTaskNoop is returned, it means a second call to Do will result in no 141 // change. If ErrTaskDead is returned, calls to Do will no longer result in any 142 // action. 143 func Do(ctx context.Context, task *api.Task, ctlr Controller) (*api.TaskStatus, error) { 144 status := task.Status.Copy() 145 146 // stay in the current state. 147 noop := func(errs ...error) (*api.TaskStatus, error) { 148 return status, ErrTaskNoop 149 } 150 151 retry := func() (*api.TaskStatus, error) { 152 // while we retry on all errors, this allows us to explicitly declare 153 // retry cases. 154 return status, ErrTaskRetry 155 } 156 157 // transition moves the task to the next state. 158 transition := func(state api.TaskState, msg string) (*api.TaskStatus, error) { 159 current := status.State 160 status.State = state 161 status.Message = msg 162 status.Err = "" 163 164 if current > state { 165 panic("invalid state transition") 166 } 167 return status, nil 168 } 169 170 // containerStatus exitCode keeps track of whether or not we've set it in 171 // this particular method. Eventually, we assemble this as part of a defer. 172 var ( 173 containerStatus *api.ContainerStatus 174 portStatus *api.PortStatus 175 exitCode int 176 ) 177 178 // returned when a fatal execution of the task is fatal. In this case, we 179 // proceed to a terminal error state and set the appropriate fields. 180 // 181 // Common checks for the nature of an error should be included here. If the 182 // error is determined not to be fatal for the task, 183 fatal := func(err error) (*api.TaskStatus, error) { 184 if err == nil { 185 panic("err must not be nil when fatal") 186 } 187 188 if cs, ok := err.(ContainerStatuser); ok { 189 var err error 190 containerStatus, err = cs.ContainerStatus(ctx) 191 if err != nil && !contextDoneError(err) { 192 log.G(ctx).WithError(err).Error("error resolving container status on fatal") 193 } 194 } 195 196 // make sure we've set the *correct* exit code 197 if ec, ok := err.(ExitCoder); ok { 198 exitCode = ec.ExitCode() 199 } 200 201 if cause := errors.Cause(err); cause == context.DeadlineExceeded || cause == context.Canceled { 202 return retry() 203 } 204 205 status.Err = err.Error() // still reported on temporary 206 if IsTemporary(err) { 207 return retry() 208 } 209 210 // only at this point do we consider the error fatal to the task. 211 log.G(ctx).WithError(err).Error("fatal task error") 212 213 // NOTE(stevvooe): The following switch dictates the terminal failure 214 // state based on the state in which the failure was encountered. 215 switch { 216 case status.State < api.TaskStateStarting: 217 status.State = api.TaskStateRejected 218 case status.State >= api.TaskStateStarting: 219 status.State = api.TaskStateFailed 220 } 221 222 return status, nil 223 } 224 225 // below, we have several callbacks that are run after the state transition 226 // is completed. 227 defer func() { 228 logStateChange(ctx, task.DesiredState, task.Status.State, status.State) 229 230 if !equality.TaskStatusesEqualStable(status, &task.Status) { 231 status.Timestamp = ptypes.MustTimestampProto(time.Now()) 232 } 233 }() 234 235 // extract the container status from the container, if supported. 236 defer func() { 237 // only do this if in an active state 238 if status.State < api.TaskStateStarting { 239 return 240 } 241 242 if containerStatus == nil { 243 // collect this, if we haven't 244 cctlr, ok := ctlr.(ContainerStatuser) 245 if !ok { 246 return 247 } 248 249 var err error 250 containerStatus, err = cctlr.ContainerStatus(ctx) 251 if err != nil && !contextDoneError(err) { 252 log.G(ctx).WithError(err).Error("container status unavailable") 253 } 254 255 // at this point, things have gone fairly wrong. Remain positive 256 // and let's get something out the door. 257 if containerStatus == nil { 258 containerStatus = new(api.ContainerStatus) 259 containerStatusTask := task.Status.GetContainer() 260 if containerStatusTask != nil { 261 *containerStatus = *containerStatusTask // copy it over. 262 } 263 } 264 } 265 266 // at this point, we *must* have a containerStatus. 267 if exitCode != 0 { 268 containerStatus.ExitCode = int32(exitCode) 269 } 270 271 status.RuntimeStatus = &api.TaskStatus_Container{ 272 Container: containerStatus, 273 } 274 275 if portStatus == nil { 276 pctlr, ok := ctlr.(PortStatuser) 277 if !ok { 278 return 279 } 280 281 var err error 282 portStatus, err = pctlr.PortStatus(ctx) 283 if err != nil && !contextDoneError(err) { 284 log.G(ctx).WithError(err).Error("container port status unavailable") 285 } 286 } 287 288 status.PortStatus = portStatus 289 }() 290 291 // this branch bounds the largest state achievable in the agent as SHUTDOWN, which 292 // is exactly the correct behavior for the agent. 293 if task.DesiredState >= api.TaskStateShutdown { 294 if status.State >= api.TaskStateCompleted { 295 return noop() 296 } 297 298 if err := ctlr.Shutdown(ctx); err != nil { 299 return fatal(err) 300 } 301 302 return transition(api.TaskStateShutdown, "shutdown") 303 } 304 305 if status.State > task.DesiredState { 306 return noop() // way beyond desired state, pause 307 } 308 309 // the following states may proceed past desired state. 310 switch status.State { 311 case api.TaskStatePreparing: 312 if err := ctlr.Prepare(ctx); err != nil && err != ErrTaskPrepared { 313 return fatal(err) 314 } 315 316 return transition(api.TaskStateReady, "prepared") 317 case api.TaskStateStarting: 318 if err := ctlr.Start(ctx); err != nil && err != ErrTaskStarted { 319 return fatal(err) 320 } 321 322 return transition(api.TaskStateRunning, "started") 323 case api.TaskStateRunning: 324 if err := ctlr.Wait(ctx); err != nil { 325 return fatal(err) 326 } 327 328 return transition(api.TaskStateCompleted, "finished") 329 } 330 331 // The following represent "pause" states. We can only proceed when the 332 // desired state is beyond our current state. 333 if status.State >= task.DesiredState { 334 return noop() 335 } 336 337 switch status.State { 338 case api.TaskStateNew, api.TaskStatePending, api.TaskStateAssigned: 339 return transition(api.TaskStateAccepted, "accepted") 340 case api.TaskStateAccepted: 341 return transition(api.TaskStatePreparing, "preparing") 342 case api.TaskStateReady: 343 return transition(api.TaskStateStarting, "starting") 344 default: // terminal states 345 return noop() 346 } 347 } 348 349 func logStateChange(ctx context.Context, desired, previous, next api.TaskState) { 350 if previous != next { 351 fields := logrus.Fields{ 352 "state.transition": fmt.Sprintf("%v->%v", previous, next), 353 "state.desired": desired, 354 } 355 log.G(ctx).WithFields(fields).Debug("state changed") 356 } 357 } 358 359 func contextDoneError(err error) bool { 360 cause := errors.Cause(err) 361 return cause == context.Canceled || cause == context.DeadlineExceeded 362 }