github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/controller.go (about)

     1  package exec
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"time"
     7  
     8  	"github.com/docker/swarmkit/api"
     9  	"github.com/docker/swarmkit/api/equality"
    10  	"github.com/docker/swarmkit/log"
    11  	"github.com/docker/swarmkit/protobuf/ptypes"
    12  	"github.com/pkg/errors"
    13  	"github.com/sirupsen/logrus"
    14  )
    15  
    16  // Controller controls execution of a task.
    17  type Controller interface {
    18  	// Update the task definition seen by the controller. Will return
    19  	// ErrTaskUpdateFailed if the provided task definition changes fields that
    20  	// cannot be changed.
    21  	//
    22  	// Will be ignored if the task has exited.
    23  	Update(ctx context.Context, t *api.Task) error
    24  
    25  	// Prepare the task for execution. This should ensure that all resources
    26  	// are created such that a call to start should execute immediately.
    27  	Prepare(ctx context.Context) error
    28  
    29  	// Start the target and return when it has started successfully.
    30  	Start(ctx context.Context) error
    31  
    32  	// Wait blocks until the target has exited.
    33  	Wait(ctx context.Context) error
    34  
    35  	// Shutdown requests to exit the target gracefully.
    36  	Shutdown(ctx context.Context) error
    37  
    38  	// Terminate the target.
    39  	Terminate(ctx context.Context) error
    40  
    41  	// Remove all resources allocated by the controller.
    42  	Remove(ctx context.Context) error
    43  
    44  	// Close closes any ephemeral resources associated with controller instance.
    45  	Close() error
    46  }
    47  
    48  // ControllerLogs defines a component that makes logs accessible.
    49  //
    50  // Can usually be accessed on a controller instance via type assertion.
    51  type ControllerLogs interface {
    52  	// Logs will write publisher until the context is cancelled or an error
    53  	// occurs.
    54  	Logs(ctx context.Context, publisher LogPublisher, options api.LogSubscriptionOptions) error
    55  }
    56  
    57  // LogPublisher defines the protocol for receiving a log message.
    58  type LogPublisher interface {
    59  	Publish(ctx context.Context, message api.LogMessage) error
    60  }
    61  
    62  // LogPublisherFunc implements publisher with just a function.
    63  type LogPublisherFunc func(ctx context.Context, message api.LogMessage) error
    64  
    65  // Publish calls the wrapped function.
    66  func (fn LogPublisherFunc) Publish(ctx context.Context, message api.LogMessage) error {
    67  	return fn(ctx, message)
    68  }
    69  
    70  // LogPublisherProvider defines the protocol for receiving a log publisher
    71  type LogPublisherProvider interface {
    72  	Publisher(ctx context.Context, subscriptionID string) (LogPublisher, func(), error)
    73  }
    74  
    75  // ContainerStatuser reports status of a container.
    76  //
    77  // This can be implemented by controllers or error types.
    78  type ContainerStatuser interface {
    79  	// ContainerStatus returns the status of the target container, if
    80  	// available. When the container is not available, the status will be nil.
    81  	ContainerStatus(ctx context.Context) (*api.ContainerStatus, error)
    82  }
    83  
    84  // PortStatuser reports status of ports which are allocated by the executor
    85  type PortStatuser interface {
    86  	// PortStatus returns the status on a list of PortConfigs
    87  	// which are managed at the host level by the controller.
    88  	PortStatus(ctx context.Context) (*api.PortStatus, error)
    89  }
    90  
    91  // Resolve attempts to get a controller from the executor and reports the
    92  // correct status depending on the tasks current state according to the result.
    93  //
    94  // Unlike Do, if an error is returned, the status should still be reported. The
    95  // error merely reports the failure at getting the controller.
    96  func Resolve(ctx context.Context, task *api.Task, executor Executor) (Controller, *api.TaskStatus, error) {
    97  	status := task.Status.Copy()
    98  
    99  	defer func() {
   100  		logStateChange(ctx, task.DesiredState, task.Status.State, status.State)
   101  	}()
   102  
   103  	ctlr, err := executor.Controller(task)
   104  
   105  	// depending on the tasks state, a failed controller resolution has varying
   106  	// impact. The following expresses that impact.
   107  	if err != nil {
   108  		status.Message = "resolving controller failed"
   109  		status.Err = err.Error()
   110  		// before the task has been started, we consider it a rejection.
   111  		// if task is running, consider the task has failed
   112  		// otherwise keep the existing state
   113  		if task.Status.State < api.TaskStateStarting {
   114  			status.State = api.TaskStateRejected
   115  		} else if task.Status.State <= api.TaskStateRunning {
   116  			status.State = api.TaskStateFailed
   117  		}
   118  	} else if task.Status.State < api.TaskStateAccepted {
   119  		// we always want to proceed to accepted when we resolve the controller
   120  		status.Message = "accepted"
   121  		status.State = api.TaskStateAccepted
   122  		status.Err = ""
   123  	}
   124  
   125  	return ctlr, status, err
   126  }
   127  
   128  // Do progresses the task state using the controller performing a single
   129  // operation on the controller. The return TaskStatus should be marked as the
   130  // new state of the task.
   131  //
   132  // The returned status should be reported and placed back on to task
   133  // before the next call. The operation can be cancelled by creating a
   134  // cancelling context.
   135  //
   136  // Errors from the task controller will reported on the returned status. Any
   137  // errors coming from this function should not be reported as related to the
   138  // individual task.
   139  //
   140  // If ErrTaskNoop is returned, it means a second call to Do will result in no
   141  // change. If ErrTaskDead is returned, calls to Do will no longer result in any
   142  // action.
   143  func Do(ctx context.Context, task *api.Task, ctlr Controller) (*api.TaskStatus, error) {
   144  	status := task.Status.Copy()
   145  
   146  	// stay in the current state.
   147  	noop := func(errs ...error) (*api.TaskStatus, error) {
   148  		return status, ErrTaskNoop
   149  	}
   150  
   151  	retry := func() (*api.TaskStatus, error) {
   152  		// while we retry on all errors, this allows us to explicitly declare
   153  		// retry cases.
   154  		return status, ErrTaskRetry
   155  	}
   156  
   157  	// transition moves the task to the next state.
   158  	transition := func(state api.TaskState, msg string) (*api.TaskStatus, error) {
   159  		current := status.State
   160  		status.State = state
   161  		status.Message = msg
   162  		status.Err = ""
   163  
   164  		if current > state {
   165  			panic("invalid state transition")
   166  		}
   167  		return status, nil
   168  	}
   169  
   170  	// containerStatus exitCode keeps track of whether or not we've set it in
   171  	// this particular method. Eventually, we assemble this as part of a defer.
   172  	var (
   173  		containerStatus *api.ContainerStatus
   174  		portStatus      *api.PortStatus
   175  		exitCode        int
   176  	)
   177  
   178  	// returned when a fatal execution of the task is fatal. In this case, we
   179  	// proceed to a terminal error state and set the appropriate fields.
   180  	//
   181  	// Common checks for the nature of an error should be included here. If the
   182  	// error is determined not to be fatal for the task,
   183  	fatal := func(err error) (*api.TaskStatus, error) {
   184  		if err == nil {
   185  			panic("err must not be nil when fatal")
   186  		}
   187  
   188  		if cs, ok := err.(ContainerStatuser); ok {
   189  			var err error
   190  			containerStatus, err = cs.ContainerStatus(ctx)
   191  			if err != nil && !contextDoneError(err) {
   192  				log.G(ctx).WithError(err).Error("error resolving container status on fatal")
   193  			}
   194  		}
   195  
   196  		// make sure we've set the *correct* exit code
   197  		if ec, ok := err.(ExitCoder); ok {
   198  			exitCode = ec.ExitCode()
   199  		}
   200  
   201  		if cause := errors.Cause(err); cause == context.DeadlineExceeded || cause == context.Canceled {
   202  			return retry()
   203  		}
   204  
   205  		status.Err = err.Error() // still reported on temporary
   206  		if IsTemporary(err) {
   207  			return retry()
   208  		}
   209  
   210  		// only at this point do we consider the error fatal to the task.
   211  		log.G(ctx).WithError(err).Error("fatal task error")
   212  
   213  		// NOTE(stevvooe): The following switch dictates the terminal failure
   214  		// state based on the state in which the failure was encountered.
   215  		switch {
   216  		case status.State < api.TaskStateStarting:
   217  			status.State = api.TaskStateRejected
   218  		case status.State >= api.TaskStateStarting:
   219  			status.State = api.TaskStateFailed
   220  		}
   221  
   222  		return status, nil
   223  	}
   224  
   225  	// below, we have several callbacks that are run after the state transition
   226  	// is completed.
   227  	defer func() {
   228  		logStateChange(ctx, task.DesiredState, task.Status.State, status.State)
   229  
   230  		if !equality.TaskStatusesEqualStable(status, &task.Status) {
   231  			status.Timestamp = ptypes.MustTimestampProto(time.Now())
   232  		}
   233  	}()
   234  
   235  	// extract the container status from the container, if supported.
   236  	defer func() {
   237  		// only do this if in an active state
   238  		if status.State < api.TaskStateStarting {
   239  			return
   240  		}
   241  
   242  		if containerStatus == nil {
   243  			// collect this, if we haven't
   244  			cctlr, ok := ctlr.(ContainerStatuser)
   245  			if !ok {
   246  				return
   247  			}
   248  
   249  			var err error
   250  			containerStatus, err = cctlr.ContainerStatus(ctx)
   251  			if err != nil && !contextDoneError(err) {
   252  				log.G(ctx).WithError(err).Error("container status unavailable")
   253  			}
   254  
   255  			// at this point, things have gone fairly wrong. Remain positive
   256  			// and let's get something out the door.
   257  			if containerStatus == nil {
   258  				containerStatus = new(api.ContainerStatus)
   259  				containerStatusTask := task.Status.GetContainer()
   260  				if containerStatusTask != nil {
   261  					*containerStatus = *containerStatusTask // copy it over.
   262  				}
   263  			}
   264  		}
   265  
   266  		// at this point, we *must* have a containerStatus.
   267  		if exitCode != 0 {
   268  			containerStatus.ExitCode = int32(exitCode)
   269  		}
   270  
   271  		status.RuntimeStatus = &api.TaskStatus_Container{
   272  			Container: containerStatus,
   273  		}
   274  
   275  		if portStatus == nil {
   276  			pctlr, ok := ctlr.(PortStatuser)
   277  			if !ok {
   278  				return
   279  			}
   280  
   281  			var err error
   282  			portStatus, err = pctlr.PortStatus(ctx)
   283  			if err != nil && !contextDoneError(err) {
   284  				log.G(ctx).WithError(err).Error("container port status unavailable")
   285  			}
   286  		}
   287  
   288  		status.PortStatus = portStatus
   289  	}()
   290  
   291  	// this branch bounds the largest state achievable in the agent as SHUTDOWN, which
   292  	// is exactly the correct behavior for the agent.
   293  	if task.DesiredState >= api.TaskStateShutdown {
   294  		if status.State >= api.TaskStateCompleted {
   295  			return noop()
   296  		}
   297  
   298  		if err := ctlr.Shutdown(ctx); err != nil {
   299  			return fatal(err)
   300  		}
   301  
   302  		return transition(api.TaskStateShutdown, "shutdown")
   303  	}
   304  
   305  	if status.State > task.DesiredState {
   306  		return noop() // way beyond desired state, pause
   307  	}
   308  
   309  	// the following states may proceed past desired state.
   310  	switch status.State {
   311  	case api.TaskStatePreparing:
   312  		if err := ctlr.Prepare(ctx); err != nil && err != ErrTaskPrepared {
   313  			return fatal(err)
   314  		}
   315  
   316  		return transition(api.TaskStateReady, "prepared")
   317  	case api.TaskStateStarting:
   318  		if err := ctlr.Start(ctx); err != nil && err != ErrTaskStarted {
   319  			return fatal(err)
   320  		}
   321  
   322  		return transition(api.TaskStateRunning, "started")
   323  	case api.TaskStateRunning:
   324  		if err := ctlr.Wait(ctx); err != nil {
   325  			return fatal(err)
   326  		}
   327  
   328  		return transition(api.TaskStateCompleted, "finished")
   329  	}
   330  
   331  	// The following represent "pause" states. We can only proceed when the
   332  	// desired state is beyond our current state.
   333  	if status.State >= task.DesiredState {
   334  		return noop()
   335  	}
   336  
   337  	switch status.State {
   338  	case api.TaskStateNew, api.TaskStatePending, api.TaskStateAssigned:
   339  		return transition(api.TaskStateAccepted, "accepted")
   340  	case api.TaskStateAccepted:
   341  		return transition(api.TaskStatePreparing, "preparing")
   342  	case api.TaskStateReady:
   343  		return transition(api.TaskStateStarting, "starting")
   344  	default: // terminal states
   345  		return noop()
   346  	}
   347  }
   348  
   349  func logStateChange(ctx context.Context, desired, previous, next api.TaskState) {
   350  	if previous != next {
   351  		fields := logrus.Fields{
   352  			"state.transition": fmt.Sprintf("%v->%v", previous, next),
   353  			"state.desired":    desired,
   354  		}
   355  		log.G(ctx).WithFields(fields).Debug("state changed")
   356  	}
   357  }
   358  
   359  func contextDoneError(err error) bool {
   360  	cause := errors.Cause(err)
   361  	return cause == context.Canceled || cause == context.DeadlineExceeded
   362  }