github.com/olljanat/moby@v1.13.1/daemon/cluster/executor/container/controller.go (about)

     1  package container
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/events"
    16  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    17  	"github.com/docker/go-connections/nat"
    18  	"github.com/docker/libnetwork"
    19  	"github.com/docker/swarmkit/agent/exec"
    20  	"github.com/docker/swarmkit/api"
    21  	"github.com/docker/swarmkit/log"
    22  	"github.com/docker/swarmkit/protobuf/ptypes"
    23  	"github.com/pkg/errors"
    24  	"golang.org/x/net/context"
    25  	"golang.org/x/time/rate"
    26  )
    27  
    28  // controller implements agent.Controller against docker's API.
    29  //
    30  // Most operations against docker's API are done through the container name,
    31  // which is unique to the task.
    32  type controller struct {
    33  	task    *api.Task
    34  	adapter *containerAdapter
    35  	closed  chan struct{}
    36  	err     error
    37  
    38  	pulled     chan struct{} // closed after pull
    39  	cancelPull func()        // cancels pull context if not nil
    40  	pullErr    error         // pull error, only read after pulled closed
    41  }
    42  
    43  var _ exec.Controller = &controller{}
    44  
    45  // NewController returns a docker exec runner for the provided task.
    46  func newController(b executorpkg.Backend, task *api.Task, secrets exec.SecretGetter) (*controller, error) {
    47  	adapter, err := newContainerAdapter(b, task, secrets)
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	return &controller{
    53  		task:    task,
    54  		adapter: adapter,
    55  		closed:  make(chan struct{}),
    56  	}, nil
    57  }
    58  
    59  func (r *controller) Task() (*api.Task, error) {
    60  	return r.task, nil
    61  }
    62  
    63  // ContainerStatus returns the container-specific status for the task.
    64  func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
    65  	ctnr, err := r.adapter.inspect(ctx)
    66  	if err != nil {
    67  		if isUnknownContainer(err) {
    68  			return nil, nil
    69  		}
    70  		return nil, err
    71  	}
    72  	return parseContainerStatus(ctnr)
    73  }
    74  
    75  func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
    76  	ctnr, err := r.adapter.inspect(ctx)
    77  	if err != nil {
    78  		if isUnknownContainer(err) {
    79  			return nil, nil
    80  		}
    81  
    82  		return nil, err
    83  	}
    84  
    85  	return parsePortStatus(ctnr)
    86  }
    87  
    88  // Update tasks a recent task update and applies it to the container.
    89  func (r *controller) Update(ctx context.Context, t *api.Task) error {
    90  	// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
    91  	// updates of metadata, such as labelling, as well as any other properties
    92  	// that make sense.
    93  	return nil
    94  }
    95  
    96  // Prepare creates a container and ensures the image is pulled.
    97  //
    98  // If the container has already be created, exec.ErrTaskPrepared is returned.
    99  func (r *controller) Prepare(ctx context.Context) error {
   100  	if err := r.checkClosed(); err != nil {
   101  		return err
   102  	}
   103  
   104  	// Make sure all the networks that the task needs are created.
   105  	if err := r.adapter.createNetworks(ctx); err != nil {
   106  		return err
   107  	}
   108  
   109  	// Make sure all the volumes that the task needs are created.
   110  	if err := r.adapter.createVolumes(ctx); err != nil {
   111  		return err
   112  	}
   113  
   114  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   115  		if r.pulled == nil {
   116  			// Fork the pull to a different context to allow pull to continue
   117  			// on re-entrant calls to Prepare. This ensures that Prepare can be
   118  			// idempotent and not incur the extra cost of pulling when
   119  			// cancelled on updates.
   120  			var pctx context.Context
   121  
   122  			r.pulled = make(chan struct{})
   123  			pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
   124  
   125  			go func() {
   126  				defer close(r.pulled)
   127  				r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled
   128  			}()
   129  		}
   130  
   131  		select {
   132  		case <-ctx.Done():
   133  			return ctx.Err()
   134  		case <-r.pulled:
   135  			if r.pullErr != nil {
   136  				// NOTE(stevvooe): We always try to pull the image to make sure we have
   137  				// the most up to date version. This will return an error, but we only
   138  				// log it. If the image truly doesn't exist, the create below will
   139  				// error out.
   140  				//
   141  				// This gives us some nice behavior where we use up to date versions of
   142  				// mutable tags, but will still run if the old image is available but a
   143  				// registry is down.
   144  				//
   145  				// If you don't want this behavior, lock down your image to an
   146  				// immutable tag or digest.
   147  				log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
   148  			}
   149  		}
   150  	}
   151  
   152  	if err := r.adapter.create(ctx); err != nil {
   153  		if isContainerCreateNameConflict(err) {
   154  			if _, err := r.adapter.inspect(ctx); err != nil {
   155  				return err
   156  			}
   157  
   158  			// container is already created. success!
   159  			return exec.ErrTaskPrepared
   160  		}
   161  
   162  		return err
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  // Start the container. An error will be returned if the container is already started.
   169  func (r *controller) Start(ctx context.Context) error {
   170  	if err := r.checkClosed(); err != nil {
   171  		return err
   172  	}
   173  
   174  	ctnr, err := r.adapter.inspect(ctx)
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	// Detect whether the container has *ever* been started. If so, we don't
   180  	// issue the start.
   181  	//
   182  	// TODO(stevvooe): This is very racy. While reading inspect, another could
   183  	// start the process and we could end up starting it twice.
   184  	if ctnr.State.Status != "created" {
   185  		return exec.ErrTaskStarted
   186  	}
   187  
   188  	for {
   189  		if err := r.adapter.start(ctx); err != nil {
   190  			if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
   191  				// Retry network creation again if we
   192  				// failed because some of the networks
   193  				// were not found.
   194  				if err := r.adapter.createNetworks(ctx); err != nil {
   195  					return err
   196  				}
   197  
   198  				continue
   199  			}
   200  
   201  			return errors.Wrap(err, "starting container failed")
   202  		}
   203  
   204  		break
   205  	}
   206  
   207  	// no health check
   208  	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil {
   209  		if err := r.adapter.activateServiceBinding(); err != nil {
   210  			log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name())
   211  			return err
   212  		}
   213  		return nil
   214  	}
   215  
   216  	healthCmd := ctnr.Config.Healthcheck.Test
   217  
   218  	if len(healthCmd) == 0 || healthCmd[0] == "NONE" {
   219  		return nil
   220  	}
   221  
   222  	// wait for container to be healthy
   223  	eventq := r.adapter.events(ctx)
   224  
   225  	var healthErr error
   226  	for {
   227  		select {
   228  		case event := <-eventq:
   229  			if !r.matchevent(event) {
   230  				continue
   231  			}
   232  
   233  			switch event.Action {
   234  			case "die": // exit on terminal events
   235  				ctnr, err := r.adapter.inspect(ctx)
   236  				if err != nil {
   237  					return errors.Wrap(err, "die event received")
   238  				} else if ctnr.State.ExitCode != 0 {
   239  					return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
   240  				}
   241  
   242  				return nil
   243  			case "destroy":
   244  				// If we get here, something has gone wrong but we want to exit
   245  				// and report anyways.
   246  				return ErrContainerDestroyed
   247  			case "health_status: unhealthy":
   248  				// in this case, we stop the container and report unhealthy status
   249  				if err := r.Shutdown(ctx); err != nil {
   250  					return errors.Wrap(err, "unhealthy container shutdown failed")
   251  				}
   252  				// set health check error, and wait for container to fully exit ("die" event)
   253  				healthErr = ErrContainerUnhealthy
   254  			case "health_status: healthy":
   255  				if err := r.adapter.activateServiceBinding(); err != nil {
   256  					log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name())
   257  					return err
   258  				}
   259  				return nil
   260  			}
   261  		case <-ctx.Done():
   262  			return ctx.Err()
   263  		case <-r.closed:
   264  			return r.err
   265  		}
   266  	}
   267  }
   268  
   269  // Wait on the container to exit.
   270  func (r *controller) Wait(pctx context.Context) error {
   271  	if err := r.checkClosed(); err != nil {
   272  		return err
   273  	}
   274  
   275  	ctx, cancel := context.WithCancel(pctx)
   276  	defer cancel()
   277  
   278  	healthErr := make(chan error, 1)
   279  	go func() {
   280  		ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
   281  		defer cancel()
   282  		if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
   283  			healthErr <- ErrContainerUnhealthy
   284  			if err := r.Shutdown(ectx); err != nil {
   285  				log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
   286  			}
   287  		}
   288  	}()
   289  
   290  	err := r.adapter.wait(ctx)
   291  	if ctx.Err() != nil {
   292  		return ctx.Err()
   293  	}
   294  
   295  	if err != nil {
   296  		ee := &exitError{}
   297  		if ec, ok := err.(exec.ExitCoder); ok {
   298  			ee.code = ec.ExitCode()
   299  		}
   300  		select {
   301  		case e := <-healthErr:
   302  			ee.cause = e
   303  		default:
   304  			if err.Error() != "" {
   305  				ee.cause = err
   306  			}
   307  		}
   308  		return ee
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  // Shutdown the container cleanly.
   315  func (r *controller) Shutdown(ctx context.Context) error {
   316  	if err := r.checkClosed(); err != nil {
   317  		return err
   318  	}
   319  
   320  	if r.cancelPull != nil {
   321  		r.cancelPull()
   322  	}
   323  
   324  	// remove container from service binding
   325  	if err := r.adapter.deactivateServiceBinding(); err != nil {
   326  		log.G(ctx).WithError(err).Errorf("failed to deactivate service binding for container %s", r.adapter.container.name())
   327  		return err
   328  	}
   329  
   330  	if err := r.adapter.shutdown(ctx); err != nil {
   331  		if isUnknownContainer(err) || isStoppedContainer(err) {
   332  			return nil
   333  		}
   334  
   335  		return err
   336  	}
   337  
   338  	return nil
   339  }
   340  
   341  // Terminate the container, with force.
   342  func (r *controller) Terminate(ctx context.Context) error {
   343  	if err := r.checkClosed(); err != nil {
   344  		return err
   345  	}
   346  
   347  	if r.cancelPull != nil {
   348  		r.cancelPull()
   349  	}
   350  
   351  	if err := r.adapter.terminate(ctx); err != nil {
   352  		if isUnknownContainer(err) {
   353  			return nil
   354  		}
   355  
   356  		return err
   357  	}
   358  
   359  	return nil
   360  }
   361  
   362  // Remove the container and its resources.
   363  func (r *controller) Remove(ctx context.Context) error {
   364  	if err := r.checkClosed(); err != nil {
   365  		return err
   366  	}
   367  
   368  	if r.cancelPull != nil {
   369  		r.cancelPull()
   370  	}
   371  
   372  	// It may be necessary to shut down the task before removing it.
   373  	if err := r.Shutdown(ctx); err != nil {
   374  		if isUnknownContainer(err) {
   375  			return nil
   376  		}
   377  		// This may fail if the task was already shut down.
   378  		log.G(ctx).WithError(err).Debug("shutdown failed on removal")
   379  	}
   380  
   381  	// Try removing networks referenced in this task in case this
   382  	// task is the last one referencing it
   383  	if err := r.adapter.removeNetworks(ctx); err != nil {
   384  		if isUnknownContainer(err) {
   385  			return nil
   386  		}
   387  		return err
   388  	}
   389  
   390  	if err := r.adapter.remove(ctx); err != nil {
   391  		if isUnknownContainer(err) {
   392  			return nil
   393  		}
   394  
   395  		return err
   396  	}
   397  	return nil
   398  }
   399  
   400  // waitReady waits for a container to be "ready".
   401  // Ready means it's past the started state.
   402  func (r *controller) waitReady(pctx context.Context) error {
   403  	if err := r.checkClosed(); err != nil {
   404  		return err
   405  	}
   406  
   407  	ctx, cancel := context.WithCancel(pctx)
   408  	defer cancel()
   409  
   410  	eventq := r.adapter.events(ctx)
   411  
   412  	ctnr, err := r.adapter.inspect(ctx)
   413  	if err != nil {
   414  		if !isUnknownContainer(err) {
   415  			return errors.Wrap(err, "inspect container failed")
   416  		}
   417  	} else {
   418  		switch ctnr.State.Status {
   419  		case "running", "exited", "dead":
   420  			return nil
   421  		}
   422  	}
   423  
   424  	for {
   425  		select {
   426  		case event := <-eventq:
   427  			if !r.matchevent(event) {
   428  				continue
   429  			}
   430  
   431  			switch event.Action {
   432  			case "start":
   433  				return nil
   434  			}
   435  		case <-ctx.Done():
   436  			return ctx.Err()
   437  		case <-r.closed:
   438  			return r.err
   439  		}
   440  	}
   441  }
   442  
   443  func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
   444  	if err := r.checkClosed(); err != nil {
   445  		return err
   446  	}
   447  
   448  	if err := r.waitReady(ctx); err != nil {
   449  		return errors.Wrap(err, "container not ready for logs")
   450  	}
   451  
   452  	rc, err := r.adapter.logs(ctx, options)
   453  	if err != nil {
   454  		return errors.Wrap(err, "failed getting container logs")
   455  	}
   456  	defer rc.Close()
   457  
   458  	var (
   459  		// use a rate limiter to keep things under control but also provides some
   460  		// ability coalesce messages.
   461  		limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
   462  		msgctx  = api.LogContext{
   463  			NodeID:    r.task.NodeID,
   464  			ServiceID: r.task.ServiceID,
   465  			TaskID:    r.task.ID,
   466  		}
   467  	)
   468  
   469  	brd := bufio.NewReader(rc)
   470  	for {
   471  		// so, message header is 8 bytes, treat as uint64, pull stream off MSB
   472  		var header uint64
   473  		if err := binary.Read(brd, binary.BigEndian, &header); err != nil {
   474  			if err == io.EOF {
   475  				return nil
   476  			}
   477  
   478  			return errors.Wrap(err, "failed reading log header")
   479  		}
   480  
   481  		stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3))
   482  
   483  		// limit here to decrease allocation back pressure.
   484  		if err := limiter.WaitN(ctx, int(size)); err != nil {
   485  			return errors.Wrap(err, "failed rate limiter")
   486  		}
   487  
   488  		buf := make([]byte, size)
   489  		_, err := io.ReadFull(brd, buf)
   490  		if err != nil {
   491  			return errors.Wrap(err, "failed reading buffer")
   492  		}
   493  
   494  		// Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish
   495  		parts := bytes.SplitN(buf, []byte(" "), 2)
   496  		if len(parts) != 2 {
   497  			return fmt.Errorf("invalid timestamp in log message: %v", buf)
   498  		}
   499  
   500  		ts, err := time.Parse(time.RFC3339Nano, string(parts[0]))
   501  		if err != nil {
   502  			return errors.Wrap(err, "failed to parse timestamp")
   503  		}
   504  
   505  		tsp, err := ptypes.TimestampProto(ts)
   506  		if err != nil {
   507  			return errors.Wrap(err, "failed to convert timestamp")
   508  		}
   509  
   510  		if err := publisher.Publish(ctx, api.LogMessage{
   511  			Context:   msgctx,
   512  			Timestamp: tsp,
   513  			Stream:    api.LogStream(stream),
   514  
   515  			Data: parts[1],
   516  		}); err != nil {
   517  			return errors.Wrap(err, "failed to publish log message")
   518  		}
   519  	}
   520  }
   521  
   522  // Close the runner and clean up any ephemeral resources.
   523  func (r *controller) Close() error {
   524  	select {
   525  	case <-r.closed:
   526  		return r.err
   527  	default:
   528  		if r.cancelPull != nil {
   529  			r.cancelPull()
   530  		}
   531  
   532  		r.err = exec.ErrControllerClosed
   533  		close(r.closed)
   534  	}
   535  	return nil
   536  }
   537  
   538  func (r *controller) matchevent(event events.Message) bool {
   539  	if event.Type != events.ContainerEventType {
   540  		return false
   541  	}
   542  
   543  	// TODO(stevvooe): Filter based on ID matching, in addition to name.
   544  
   545  	// Make sure the events are for this container.
   546  	if event.Actor.Attributes["name"] != r.adapter.container.name() {
   547  		return false
   548  	}
   549  
   550  	return true
   551  }
   552  
   553  func (r *controller) checkClosed() error {
   554  	select {
   555  	case <-r.closed:
   556  		return r.err
   557  	default:
   558  		return nil
   559  	}
   560  }
   561  
   562  func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
   563  	status := &api.ContainerStatus{
   564  		ContainerID: ctnr.ID,
   565  		PID:         int32(ctnr.State.Pid),
   566  		ExitCode:    int32(ctnr.State.ExitCode),
   567  	}
   568  
   569  	return status, nil
   570  }
   571  
   572  func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
   573  	status := &api.PortStatus{}
   574  
   575  	if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
   576  		exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
   577  		if err != nil {
   578  			return nil, err
   579  		}
   580  		status.Ports = exposedPorts
   581  	}
   582  
   583  	return status, nil
   584  }
   585  
   586  func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
   587  	exposedPorts := make([]*api.PortConfig, 0, len(portMap))
   588  
   589  	for portProtocol, mapping := range portMap {
   590  		parts := strings.SplitN(string(portProtocol), "/", 2)
   591  		if len(parts) != 2 {
   592  			return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
   593  		}
   594  
   595  		port, err := strconv.ParseUint(parts[0], 10, 16)
   596  		if err != nil {
   597  			return nil, err
   598  		}
   599  
   600  		protocol := api.ProtocolTCP
   601  		switch strings.ToLower(parts[1]) {
   602  		case "tcp":
   603  			protocol = api.ProtocolTCP
   604  		case "udp":
   605  			protocol = api.ProtocolUDP
   606  		default:
   607  			return nil, fmt.Errorf("invalid protocol: %s", parts[1])
   608  		}
   609  
   610  		for _, binding := range mapping {
   611  			hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
   612  			if err != nil {
   613  				return nil, err
   614  			}
   615  
   616  			// TODO(aluzzardi): We're losing the port `name` here since
   617  			// there's no way to retrieve it back from the Engine.
   618  			exposedPorts = append(exposedPorts, &api.PortConfig{
   619  				PublishMode:   api.PublishModeHost,
   620  				Protocol:      protocol,
   621  				TargetPort:    uint32(port),
   622  				PublishedPort: uint32(hostPort),
   623  			})
   624  		}
   625  	}
   626  
   627  	return exposedPorts, nil
   628  }
   629  
   630  type exitError struct {
   631  	code  int
   632  	cause error
   633  }
   634  
   635  func (e *exitError) Error() string {
   636  	if e.cause != nil {
   637  		return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
   638  	}
   639  
   640  	return fmt.Sprintf("task: non-zero exit (%v)", e.code)
   641  }
   642  
   643  func (e *exitError) ExitCode() int {
   644  	return int(e.code)
   645  }
   646  
   647  func (e *exitError) Cause() error {
   648  	return e.cause
   649  }
   650  
   651  // checkHealth blocks until unhealthy container is detected or ctx exits
   652  func (r *controller) checkHealth(ctx context.Context) error {
   653  	eventq := r.adapter.events(ctx)
   654  
   655  	for {
   656  		select {
   657  		case <-ctx.Done():
   658  			return nil
   659  		case <-r.closed:
   660  			return nil
   661  		case event := <-eventq:
   662  			if !r.matchevent(event) {
   663  				continue
   664  			}
   665  
   666  			switch event.Action {
   667  			case "health_status: unhealthy":
   668  				return ErrContainerUnhealthy
   669  			}
   670  		}
   671  	}
   672  }