github.com/kaisenlinux/docker@v0.0.0-20230510090727-ea55db55fac7/swarmkit/agent/exec/dockerapi/controller.go (about)

     1  package dockerapi
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"context"
     7  	"encoding/binary"
     8  	"fmt"
     9  	"io"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/events"
    16  	engineapi "github.com/docker/docker/client"
    17  	"github.com/docker/go-connections/nat"
    18  	"github.com/docker/swarmkit/agent/exec"
    19  	"github.com/docker/swarmkit/api"
    20  	"github.com/docker/swarmkit/log"
    21  	gogotypes "github.com/gogo/protobuf/types"
    22  	"github.com/pkg/errors"
    23  	"golang.org/x/time/rate"
    24  )
    25  
    26  // controller implements agent.Controller against docker's API.
    27  //
    28  // Most operations against docker's API are done through the container name,
    29  // which is unique to the task.
    30  type controller struct {
    31  	task    *api.Task
    32  	adapter *containerAdapter
    33  	closed  chan struct{}
    34  	err     error
    35  
    36  	pulled     chan struct{} // closed after pull
    37  	cancelPull func()        // cancels pull context if not nil
    38  	pullErr    error         // pull error, protected by close of pulled
    39  }
    40  
    41  var _ exec.Controller = &controller{}
    42  
    43  // newController returns a docker exec controller for the provided task.
    44  func newController(client engineapi.APIClient, nodeDescription *api.NodeDescription, task *api.Task, secrets exec.SecretGetter) (exec.Controller, error) {
    45  	adapter, err := newContainerAdapter(client, nodeDescription, task, secrets)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  
    50  	return &controller{
    51  		task:    task,
    52  		adapter: adapter,
    53  		closed:  make(chan struct{}),
    54  	}, nil
    55  }
    56  
    57  // ContainerStatus returns the container-specific status for the task.
    58  func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
    59  	ctnr, err := r.adapter.inspect(ctx)
    60  	if err != nil {
    61  		if isUnknownContainer(err) {
    62  			return nil, nil
    63  		}
    64  
    65  		return nil, err
    66  	}
    67  	return parseContainerStatus(ctnr)
    68  }
    69  
    70  func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
    71  	ctnr, err := r.adapter.inspect(ctx)
    72  	if err != nil {
    73  		if isUnknownContainer(err) {
    74  			return nil, nil
    75  		}
    76  
    77  		return nil, err
    78  	}
    79  
    80  	return parsePortStatus(ctnr)
    81  }
    82  
    83  // Update takes a recent task update and applies it to the container.
    84  func (r *controller) Update(ctx context.Context, t *api.Task) error {
    85  	log.G(ctx).Warnf("task updates not yet supported")
    86  	// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
    87  	// updates of metadata, such as labelling, as well as any other properties
    88  	// that make sense.
    89  	return nil
    90  }
    91  
    92  // Prepare creates a container and ensures the image is pulled.
    93  //
    94  // If the container has already be created, exec.ErrTaskPrepared is returned.
    95  func (r *controller) Prepare(ctx context.Context) error {
    96  	if err := r.checkClosed(); err != nil {
    97  		return err
    98  	}
    99  
   100  	// Make sure all the networks that the task needs are created.
   101  	if err := r.adapter.createNetworks(ctx); err != nil {
   102  		return err
   103  	}
   104  
   105  	// Make sure all the volumes that the task needs are created.
   106  	if err := r.adapter.createVolumes(ctx); err != nil {
   107  		return err
   108  	}
   109  
   110  	if r.pulled == nil {
   111  		// Launches a re-entrant pull operation associated with controller,
   112  		// dissociating the context from the caller's context. Allows pull
   113  		// operation to be re-entrant on calls to prepare, resuming from the
   114  		// same point after cancellation.
   115  		var pctx context.Context
   116  
   117  		r.pulled = make(chan struct{})
   118  		pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
   119  
   120  		go func() {
   121  			defer close(r.pulled)
   122  			r.pullErr = r.adapter.pullImage(pctx)
   123  		}()
   124  	}
   125  
   126  	select {
   127  	case <-ctx.Done():
   128  		return ctx.Err()
   129  	case <-r.pulled:
   130  		if r.pullErr != nil {
   131  			// NOTE(stevvooe): We always try to pull the image to make sure we have
   132  			// the most up to date version. This will return an error, but we only
   133  			// log it. If the image truly doesn't exist, the create below will
   134  			// error out.
   135  			//
   136  			// This gives us some nice behavior where we use up to date versions of
   137  			// mutable tags, but will still run if the old image is available but a
   138  			// registry is down.
   139  			//
   140  			// If you don't want this behavior, lock down your image to an
   141  			// immutable tag or digest.
   142  			log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
   143  		}
   144  	}
   145  
   146  	if err := r.adapter.create(ctx); err != nil {
   147  		if isContainerCreateNameConflict(err) {
   148  			if _, err := r.adapter.inspect(ctx); err != nil {
   149  				return err
   150  			}
   151  
   152  			// container is already created. success!
   153  			return exec.ErrTaskPrepared
   154  		}
   155  
   156  		return err
   157  	}
   158  
   159  	return nil
   160  }
   161  
   162  // Start the container. An error will be returned if the container is already started.
   163  func (r *controller) Start(ctx context.Context) error {
   164  	if err := r.checkClosed(); err != nil {
   165  		return err
   166  	}
   167  
   168  	ctnr, err := r.adapter.inspect(ctx)
   169  	if err != nil {
   170  		return err
   171  	}
   172  
   173  	// Detect whether the container has *ever* been started. If so, we don't
   174  	// issue the start.
   175  	//
   176  	// TODO(stevvooe): This is very racy. While reading inspect, another could
   177  	// start the process and we could end up starting it twice.
   178  	if ctnr.State.Status != "created" {
   179  		return exec.ErrTaskStarted
   180  	}
   181  
   182  	if err := r.adapter.start(ctx); err != nil {
   183  		return errors.Wrap(err, "starting container failed")
   184  	}
   185  
   186  	// no health check
   187  	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil {
   188  		return nil
   189  	}
   190  
   191  	healthCmd := ctnr.Config.Healthcheck.Test
   192  
   193  	if len(healthCmd) == 0 {
   194  		// this field should be filled, even if inherited from image
   195  		// if it's empty, health check will always be at starting status
   196  		// so treat it as no health check, and return directly
   197  		return nil
   198  	}
   199  
   200  	// health check is disabled
   201  	if healthCmd[0] == "NONE" {
   202  		return nil
   203  	}
   204  
   205  	// wait for container to be healthy
   206  	eventq, closed, err := r.adapter.events(ctx)
   207  	if err != nil {
   208  		return err
   209  	}
   210  	for {
   211  		select {
   212  		case event := <-eventq:
   213  			if !r.matchevent(event) {
   214  				continue
   215  			}
   216  
   217  			switch event.Action {
   218  			case "die": // exit on terminal events
   219  				ctnr, err := r.adapter.inspect(ctx)
   220  				if err != nil {
   221  					return errors.Wrap(err, "die event received")
   222  				}
   223  
   224  				return makeExitError(ctnr)
   225  			case "destroy":
   226  				// If we get here, something has gone wrong but we want to exit
   227  				// and report anyways.
   228  				return ErrContainerDestroyed
   229  
   230  			case "health_status: unhealthy":
   231  				// in this case, we stop the container and report unhealthy status
   232  				// TODO(runshenzhu): double check if it can cause a dead lock issue here
   233  				if err := r.Shutdown(ctx); err != nil {
   234  					return errors.Wrap(err, "unhealthy container shutdown failed")
   235  				}
   236  				return ErrContainerUnhealthy
   237  
   238  			case "health_status: healthy":
   239  				return nil
   240  			}
   241  		case <-closed:
   242  			// restart!
   243  			eventq, closed, err = r.adapter.events(ctx)
   244  			if err != nil {
   245  				return err
   246  			}
   247  		case <-ctx.Done():
   248  			return ctx.Err()
   249  		case <-r.closed:
   250  			return r.err
   251  		}
   252  	}
   253  }
   254  
   255  // Wait on the container to exit.
   256  func (r *controller) Wait(ctx context.Context) error {
   257  	if err := r.checkClosed(); err != nil {
   258  		return err
   259  	}
   260  
   261  	// check the initial state and report that.
   262  	ctnr, err := r.adapter.inspect(ctx)
   263  	if err != nil {
   264  		return errors.Wrap(err, "inspecting container failed")
   265  	}
   266  
   267  	switch ctnr.State.Status {
   268  	case "exited", "dead":
   269  		// TODO(stevvooe): Treating container status dead as exited. There may
   270  		// be more to do if we have dead containers. Note that this is not the
   271  		// same as task state DEAD, which means the container is completely
   272  		// freed on a node.
   273  
   274  		return makeExitError(ctnr)
   275  	}
   276  
   277  	eventq, closed, err := r.adapter.events(ctx)
   278  	if err != nil {
   279  		return err
   280  	}
   281  
   282  	for {
   283  		select {
   284  		case event := <-eventq:
   285  			if !r.matchevent(event) {
   286  				continue
   287  			}
   288  
   289  			switch event.Action {
   290  			case "die": // exit on terminal events
   291  				ctnr, err := r.adapter.inspect(ctx)
   292  				if err != nil {
   293  					return errors.Wrap(err, "die event received")
   294  				}
   295  
   296  				return makeExitError(ctnr)
   297  			case "destroy":
   298  				// If we get here, something has gone wrong but we want to exit
   299  				// and report anyways.
   300  				return ErrContainerDestroyed
   301  
   302  			case "health_status: unhealthy":
   303  				// in this case, we stop the container and report unhealthy status
   304  				// TODO(runshenzhu): double check if it can cause a dead lock issue here
   305  				if err := r.Shutdown(ctx); err != nil {
   306  					return errors.Wrap(err, "unhealthy container shutdown failed")
   307  				}
   308  				return ErrContainerUnhealthy
   309  			}
   310  		case <-closed:
   311  			// restart!
   312  			eventq, closed, err = r.adapter.events(ctx)
   313  			if err != nil {
   314  				return err
   315  			}
   316  		case <-ctx.Done():
   317  			return ctx.Err()
   318  		case <-r.closed:
   319  			return r.err
   320  		}
   321  	}
   322  }
   323  
   324  // Shutdown the container cleanly.
   325  func (r *controller) Shutdown(ctx context.Context) error {
   326  	if err := r.checkClosed(); err != nil {
   327  		return err
   328  	}
   329  
   330  	if r.cancelPull != nil {
   331  		r.cancelPull()
   332  	}
   333  
   334  	if err := r.adapter.shutdown(ctx); err != nil {
   335  		if isUnknownContainer(err) || isStoppedContainer(err) {
   336  			return nil
   337  		}
   338  
   339  		return err
   340  	}
   341  
   342  	return nil
   343  }
   344  
   345  // Terminate the container, with force.
   346  func (r *controller) Terminate(ctx context.Context) error {
   347  	if err := r.checkClosed(); err != nil {
   348  		return err
   349  	}
   350  
   351  	if r.cancelPull != nil {
   352  		r.cancelPull()
   353  	}
   354  
   355  	if err := r.adapter.terminate(ctx); err != nil {
   356  		if isUnknownContainer(err) {
   357  			return nil
   358  		}
   359  
   360  		return err
   361  	}
   362  
   363  	return nil
   364  }
   365  
   366  // Remove the container and its resources.
   367  func (r *controller) Remove(ctx context.Context) error {
   368  	if err := r.checkClosed(); err != nil {
   369  		return err
   370  	}
   371  
   372  	if r.cancelPull != nil {
   373  		r.cancelPull()
   374  	}
   375  
   376  	// It may be necessary to shut down the task before removing it.
   377  	if err := r.Shutdown(ctx); err != nil {
   378  		if isUnknownContainer(err) {
   379  			return nil
   380  		}
   381  
   382  		// This may fail if the task was already shut down.
   383  		log.G(ctx).WithError(err).Debug("shutdown failed on removal")
   384  	}
   385  
   386  	// Try removing networks referenced in this task in case this
   387  	// task is the last one referencing it
   388  	if err := r.adapter.removeNetworks(ctx); err != nil {
   389  		if isUnknownContainer(err) {
   390  			return nil
   391  		}
   392  
   393  		return err
   394  	}
   395  
   396  	if err := r.adapter.remove(ctx); err != nil {
   397  		if isUnknownContainer(err) {
   398  			return nil
   399  		}
   400  
   401  		return err
   402  	}
   403  
   404  	return nil
   405  }
   406  
   407  // waitReady waits for a container to be "ready".
   408  // Ready means it's past the started state.
   409  func (r *controller) waitReady(pctx context.Context) error {
   410  	if err := r.checkClosed(); err != nil {
   411  		return err
   412  	}
   413  
   414  	ctx, cancel := context.WithCancel(pctx)
   415  	defer cancel()
   416  
   417  	eventq, closed, err := r.adapter.events(ctx)
   418  	if err != nil {
   419  		return err
   420  	}
   421  
   422  	ctnr, err := r.adapter.inspect(ctx)
   423  	if err != nil {
   424  		if !isUnknownContainer(err) {
   425  			return errors.Wrap(err, "inspect container failed")
   426  		}
   427  	} else {
   428  		switch ctnr.State.Status {
   429  		case "running", "exited", "dead":
   430  			return nil
   431  		}
   432  	}
   433  
   434  	for {
   435  		select {
   436  		case event := <-eventq:
   437  			if !r.matchevent(event) {
   438  				continue
   439  			}
   440  
   441  			switch event.Action {
   442  			case "start":
   443  				return nil
   444  			}
   445  		case <-closed:
   446  			// restart!
   447  			eventq, closed, err = r.adapter.events(ctx)
   448  			if err != nil {
   449  				return err
   450  			}
   451  		case <-ctx.Done():
   452  			return ctx.Err()
   453  		case <-r.closed:
   454  			return r.err
   455  		}
   456  	}
   457  }
   458  
   459  func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
   460  	if err := r.checkClosed(); err != nil {
   461  		return err
   462  	}
   463  
   464  	if err := r.waitReady(ctx); err != nil {
   465  		return errors.Wrap(err, "container not ready for logs")
   466  	}
   467  
   468  	rc, err := r.adapter.logs(ctx, options)
   469  	if err != nil {
   470  		return errors.Wrap(err, "failed getting container logs")
   471  	}
   472  	defer rc.Close()
   473  
   474  	var (
   475  		// use a rate limiter to keep things under control but also provides some
   476  		// ability coalesce messages.
   477  		limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
   478  		msgctx  = api.LogContext{
   479  			NodeID:    r.task.NodeID,
   480  			ServiceID: r.task.ServiceID,
   481  			TaskID:    r.task.ID,
   482  		}
   483  	)
   484  
   485  	brd := bufio.NewReader(rc)
   486  	for {
   487  		// so, message header is 8 bytes, treat as uint64, pull stream off MSB
   488  		var header uint64
   489  		if err := binary.Read(brd, binary.BigEndian, &header); err != nil {
   490  			if err == io.EOF {
   491  				return nil
   492  			}
   493  
   494  			return errors.Wrap(err, "failed reading log header")
   495  		}
   496  
   497  		stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3))
   498  
   499  		// limit here to decrease allocation back pressure.
   500  		if err := limiter.WaitN(ctx, int(size)); err != nil {
   501  			return errors.Wrap(err, "failed rate limiter")
   502  		}
   503  
   504  		buf := make([]byte, size)
   505  		_, err := io.ReadFull(brd, buf)
   506  		if err != nil {
   507  			return errors.Wrap(err, "failed reading buffer")
   508  		}
   509  
   510  		// Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish
   511  		parts := bytes.SplitN(buf, []byte(" "), 2)
   512  		if len(parts) != 2 {
   513  			return fmt.Errorf("invalid timestamp in log message: %v", buf)
   514  		}
   515  
   516  		ts, err := time.Parse(time.RFC3339Nano, string(parts[0]))
   517  		if err != nil {
   518  			return errors.Wrap(err, "failed to parse timestamp")
   519  		}
   520  
   521  		tsp, err := gogotypes.TimestampProto(ts)
   522  		if err != nil {
   523  			return errors.Wrap(err, "failed to convert timestamp")
   524  		}
   525  
   526  		if err := publisher.Publish(ctx, api.LogMessage{
   527  			Context:   msgctx,
   528  			Timestamp: tsp,
   529  			Stream:    api.LogStream(stream),
   530  
   531  			Data: parts[1],
   532  		}); err != nil {
   533  			return errors.Wrap(err, "failed to publish log message")
   534  		}
   535  	}
   536  }
   537  
   538  // Close the controller and clean up any ephemeral resources.
   539  func (r *controller) Close() error {
   540  	select {
   541  	case <-r.closed:
   542  		return r.err
   543  	default:
   544  		if r.cancelPull != nil {
   545  			r.cancelPull()
   546  		}
   547  
   548  		r.err = exec.ErrControllerClosed
   549  		close(r.closed)
   550  	}
   551  	return nil
   552  }
   553  
   554  func (r *controller) matchevent(event events.Message) bool {
   555  	if event.Type != events.ContainerEventType {
   556  		return false
   557  	}
   558  
   559  	// TODO(stevvooe): Filter based on ID matching, in addition to name.
   560  
   561  	// Make sure the events are for this container.
   562  	if event.Actor.Attributes["name"] != r.adapter.container.name() {
   563  		return false
   564  	}
   565  
   566  	return true
   567  }
   568  
   569  func (r *controller) checkClosed() error {
   570  	select {
   571  	case <-r.closed:
   572  		return r.err
   573  	default:
   574  		return nil
   575  	}
   576  }
   577  
   578  type exitError struct {
   579  	code            int
   580  	cause           error
   581  	containerStatus *api.ContainerStatus
   582  }
   583  
   584  func (e *exitError) Error() string {
   585  	if e.cause != nil {
   586  		return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
   587  	}
   588  
   589  	return fmt.Sprintf("task: non-zero exit (%v)", e.code)
   590  }
   591  
   592  func (e *exitError) ExitCode() int {
   593  	return int(e.containerStatus.ExitCode)
   594  }
   595  
   596  func (e *exitError) Cause() error {
   597  	return e.cause
   598  }
   599  
   600  func makeExitError(ctnr types.ContainerJSON) error {
   601  	if ctnr.State.ExitCode != 0 {
   602  		var cause error
   603  		if ctnr.State.Error != "" {
   604  			cause = errors.New(ctnr.State.Error)
   605  		}
   606  
   607  		cstatus, _ := parseContainerStatus(ctnr)
   608  		return &exitError{
   609  			code:            ctnr.State.ExitCode,
   610  			cause:           cause,
   611  			containerStatus: cstatus,
   612  		}
   613  	}
   614  
   615  	return nil
   616  
   617  }
   618  
   619  func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
   620  	status := &api.ContainerStatus{
   621  		ContainerID: ctnr.ID,
   622  		PID:         int32(ctnr.State.Pid),
   623  		ExitCode:    int32(ctnr.State.ExitCode),
   624  	}
   625  
   626  	return status, nil
   627  }
   628  
   629  func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
   630  	status := &api.PortStatus{}
   631  
   632  	if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
   633  		exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
   634  		if err != nil {
   635  			return nil, err
   636  		}
   637  		status.Ports = exposedPorts
   638  	}
   639  
   640  	return status, nil
   641  }
   642  
   643  func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
   644  	exposedPorts := make([]*api.PortConfig, 0, len(portMap))
   645  
   646  	for portProtocol, mapping := range portMap {
   647  		parts := strings.SplitN(string(portProtocol), "/", 2)
   648  		if len(parts) != 2 {
   649  			return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
   650  		}
   651  
   652  		port, err := strconv.ParseUint(parts[0], 10, 16)
   653  		if err != nil {
   654  			return nil, err
   655  		}
   656  
   657  		var protocol api.PortConfig_Protocol
   658  		switch strings.ToLower(parts[1]) {
   659  		case "tcp":
   660  			protocol = api.ProtocolTCP
   661  		case "udp":
   662  			protocol = api.ProtocolUDP
   663  		case "sctp":
   664  			protocol = api.ProtocolSCTP
   665  		default:
   666  			return nil, fmt.Errorf("invalid protocol: %s", parts[1])
   667  		}
   668  
   669  		for _, binding := range mapping {
   670  			hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
   671  			if err != nil {
   672  				return nil, err
   673  			}
   674  
   675  			// TODO(aluzzardi): We're losing the port `name` here since
   676  			// there's no way to retrieve it back from the Engine.
   677  			exposedPorts = append(exposedPorts, &api.PortConfig{
   678  				PublishMode:   api.PublishModeHost,
   679  				Protocol:      protocol,
   680  				TargetPort:    uint32(port),
   681  				PublishedPort: uint32(hostPort),
   682  			})
   683  		}
   684  	}
   685  
   686  	return exposedPorts, nil
   687  }