github.com/sams1990/dockerrepo@v17.12.1-ce-rc2+incompatible/daemon/cluster/executor/container/controller.go (about)

     1  package container
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/docker/docker/api/types"
    11  	"github.com/docker/docker/api/types/events"
    12  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    13  	"github.com/docker/go-connections/nat"
    14  	"github.com/docker/libnetwork"
    15  	"github.com/docker/swarmkit/agent/exec"
    16  	"github.com/docker/swarmkit/api"
    17  	"github.com/docker/swarmkit/log"
    18  	gogotypes "github.com/gogo/protobuf/types"
    19  	"github.com/pkg/errors"
    20  	"golang.org/x/net/context"
    21  	"golang.org/x/time/rate"
    22  )
    23  
    24  const defaultGossipConvergeDelay = 2 * time.Second
    25  
    26  // controller implements agent.Controller against docker's API.
    27  //
    28  // Most operations against docker's API are done through the container name,
    29  // which is unique to the task.
    30  type controller struct {
    31  	task       *api.Task
    32  	adapter    *containerAdapter
    33  	closed     chan struct{}
    34  	err        error
    35  	pulled     chan struct{} // closed after pull
    36  	cancelPull func()        // cancels pull context if not nil
    37  	pullErr    error         // pull error, only read after pulled closed
    38  }
    39  
    40  var _ exec.Controller = &controller{}
    41  
    42  // NewController returns a docker exec runner for the provided task.
    43  func newController(b executorpkg.Backend, task *api.Task, node *api.NodeDescription, dependencies exec.DependencyGetter) (*controller, error) {
    44  	adapter, err := newContainerAdapter(b, task, node, dependencies)
    45  	if err != nil {
    46  		return nil, err
    47  	}
    48  
    49  	return &controller{
    50  		task:    task,
    51  		adapter: adapter,
    52  		closed:  make(chan struct{}),
    53  	}, nil
    54  }
    55  
    56  func (r *controller) Task() (*api.Task, error) {
    57  	return r.task, nil
    58  }
    59  
    60  // ContainerStatus returns the container-specific status for the task.
    61  func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
    62  	ctnr, err := r.adapter.inspect(ctx)
    63  	if err != nil {
    64  		if isUnknownContainer(err) {
    65  			return nil, nil
    66  		}
    67  		return nil, err
    68  	}
    69  	return parseContainerStatus(ctnr)
    70  }
    71  
    72  func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
    73  	ctnr, err := r.adapter.inspect(ctx)
    74  	if err != nil {
    75  		if isUnknownContainer(err) {
    76  			return nil, nil
    77  		}
    78  
    79  		return nil, err
    80  	}
    81  
    82  	return parsePortStatus(ctnr)
    83  }
    84  
    85  // Update tasks a recent task update and applies it to the container.
    86  func (r *controller) Update(ctx context.Context, t *api.Task) error {
    87  	// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
    88  	// updates of metadata, such as labelling, as well as any other properties
    89  	// that make sense.
    90  	return nil
    91  }
    92  
    93  // Prepare creates a container and ensures the image is pulled.
    94  //
    95  // If the container has already be created, exec.ErrTaskPrepared is returned.
    96  func (r *controller) Prepare(ctx context.Context) error {
    97  	if err := r.checkClosed(); err != nil {
    98  		return err
    99  	}
   100  
   101  	// Make sure all the networks that the task needs are created.
   102  	if err := r.adapter.createNetworks(ctx); err != nil {
   103  		return err
   104  	}
   105  
   106  	// Make sure all the volumes that the task needs are created.
   107  	if err := r.adapter.createVolumes(ctx); err != nil {
   108  		return err
   109  	}
   110  
   111  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   112  		if r.pulled == nil {
   113  			// Fork the pull to a different context to allow pull to continue
   114  			// on re-entrant calls to Prepare. This ensures that Prepare can be
   115  			// idempotent and not incur the extra cost of pulling when
   116  			// cancelled on updates.
   117  			var pctx context.Context
   118  
   119  			r.pulled = make(chan struct{})
   120  			pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
   121  
   122  			go func() {
   123  				defer close(r.pulled)
   124  				r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled
   125  			}()
   126  		}
   127  
   128  		select {
   129  		case <-ctx.Done():
   130  			return ctx.Err()
   131  		case <-r.pulled:
   132  			if r.pullErr != nil {
   133  				// NOTE(stevvooe): We always try to pull the image to make sure we have
   134  				// the most up to date version. This will return an error, but we only
   135  				// log it. If the image truly doesn't exist, the create below will
   136  				// error out.
   137  				//
   138  				// This gives us some nice behavior where we use up to date versions of
   139  				// mutable tags, but will still run if the old image is available but a
   140  				// registry is down.
   141  				//
   142  				// If you don't want this behavior, lock down your image to an
   143  				// immutable tag or digest.
   144  				log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
   145  			}
   146  		}
   147  	}
   148  	if err := r.adapter.create(ctx); err != nil {
   149  		if isContainerCreateNameConflict(err) {
   150  			if _, err := r.adapter.inspect(ctx); err != nil {
   151  				return err
   152  			}
   153  
   154  			// container is already created. success!
   155  			return exec.ErrTaskPrepared
   156  		}
   157  
   158  		return err
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  // Start the container. An error will be returned if the container is already started.
   165  func (r *controller) Start(ctx context.Context) error {
   166  	if err := r.checkClosed(); err != nil {
   167  		return err
   168  	}
   169  
   170  	ctnr, err := r.adapter.inspect(ctx)
   171  	if err != nil {
   172  		return err
   173  	}
   174  
   175  	// Detect whether the container has *ever* been started. If so, we don't
   176  	// issue the start.
   177  	//
   178  	// TODO(stevvooe): This is very racy. While reading inspect, another could
   179  	// start the process and we could end up starting it twice.
   180  	if ctnr.State.Status != "created" {
   181  		return exec.ErrTaskStarted
   182  	}
   183  
   184  	for {
   185  		if err := r.adapter.start(ctx); err != nil {
   186  			if _, ok := errors.Cause(err).(libnetwork.ErrNoSuchNetwork); ok {
   187  				// Retry network creation again if we
   188  				// failed because some of the networks
   189  				// were not found.
   190  				if err := r.adapter.createNetworks(ctx); err != nil {
   191  					return err
   192  				}
   193  
   194  				continue
   195  			}
   196  
   197  			return errors.Wrap(err, "starting container failed")
   198  		}
   199  
   200  		break
   201  	}
   202  
   203  	// no health check
   204  	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" {
   205  		if err := r.adapter.activateServiceBinding(); err != nil {
   206  			log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name())
   207  			return err
   208  		}
   209  		return nil
   210  	}
   211  
   212  	// wait for container to be healthy
   213  	eventq := r.adapter.events(ctx)
   214  
   215  	var healthErr error
   216  	for {
   217  		select {
   218  		case event := <-eventq:
   219  			if !r.matchevent(event) {
   220  				continue
   221  			}
   222  
   223  			switch event.Action {
   224  			case "die": // exit on terminal events
   225  				ctnr, err := r.adapter.inspect(ctx)
   226  				if err != nil {
   227  					return errors.Wrap(err, "die event received")
   228  				} else if ctnr.State.ExitCode != 0 {
   229  					return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
   230  				}
   231  
   232  				return nil
   233  			case "destroy":
   234  				// If we get here, something has gone wrong but we want to exit
   235  				// and report anyways.
   236  				return ErrContainerDestroyed
   237  			case "health_status: unhealthy":
   238  				// in this case, we stop the container and report unhealthy status
   239  				if err := r.Shutdown(ctx); err != nil {
   240  					return errors.Wrap(err, "unhealthy container shutdown failed")
   241  				}
   242  				// set health check error, and wait for container to fully exit ("die" event)
   243  				healthErr = ErrContainerUnhealthy
   244  			case "health_status: healthy":
   245  				if err := r.adapter.activateServiceBinding(); err != nil {
   246  					log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name())
   247  					return err
   248  				}
   249  				return nil
   250  			}
   251  		case <-ctx.Done():
   252  			return ctx.Err()
   253  		case <-r.closed:
   254  			return r.err
   255  		}
   256  	}
   257  }
   258  
   259  // Wait on the container to exit.
   260  func (r *controller) Wait(pctx context.Context) error {
   261  	if err := r.checkClosed(); err != nil {
   262  		return err
   263  	}
   264  
   265  	ctx, cancel := context.WithCancel(pctx)
   266  	defer cancel()
   267  
   268  	healthErr := make(chan error, 1)
   269  	go func() {
   270  		ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
   271  		defer cancel()
   272  		if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
   273  			healthErr <- ErrContainerUnhealthy
   274  			if err := r.Shutdown(ectx); err != nil {
   275  				log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
   276  			}
   277  		}
   278  	}()
   279  
   280  	waitC, err := r.adapter.wait(ctx)
   281  	if err != nil {
   282  		return err
   283  	}
   284  
   285  	if status := <-waitC; status.ExitCode() != 0 {
   286  		exitErr := &exitError{
   287  			code: status.ExitCode(),
   288  		}
   289  
   290  		// Set the cause if it is knowable.
   291  		select {
   292  		case e := <-healthErr:
   293  			exitErr.cause = e
   294  		default:
   295  			if status.Err() != nil {
   296  				exitErr.cause = status.Err()
   297  			}
   298  		}
   299  
   300  		return exitErr
   301  	}
   302  
   303  	return nil
   304  }
   305  
   306  func (r *controller) hasServiceBinding() bool {
   307  	if r.task == nil {
   308  		return false
   309  	}
   310  
   311  	// service is attached to a network besides the default bridge
   312  	for _, na := range r.task.Networks {
   313  		if na.Network == nil ||
   314  			na.Network.DriverState == nil ||
   315  			na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" {
   316  			continue
   317  		}
   318  		return true
   319  	}
   320  
   321  	return false
   322  }
   323  
   324  // Shutdown the container cleanly.
   325  func (r *controller) Shutdown(ctx context.Context) error {
   326  	if err := r.checkClosed(); err != nil {
   327  		return err
   328  	}
   329  
   330  	if r.cancelPull != nil {
   331  		r.cancelPull()
   332  	}
   333  
   334  	if r.hasServiceBinding() {
   335  		// remove container from service binding
   336  		if err := r.adapter.deactivateServiceBinding(); err != nil {
   337  			log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name())
   338  			// Don't return an error here, because failure to deactivate
   339  			// the service binding is expected if the container was never
   340  			// started.
   341  		}
   342  
   343  		// add a delay for gossip converge
   344  		// TODO(dongluochen): this delay should be configurable to fit different cluster size and network delay.
   345  		time.Sleep(defaultGossipConvergeDelay)
   346  	}
   347  
   348  	if err := r.adapter.shutdown(ctx); err != nil {
   349  		if isUnknownContainer(err) || isStoppedContainer(err) {
   350  			return nil
   351  		}
   352  
   353  		return err
   354  	}
   355  
   356  	return nil
   357  }
   358  
   359  // Terminate the container, with force.
   360  func (r *controller) Terminate(ctx context.Context) error {
   361  	if err := r.checkClosed(); err != nil {
   362  		return err
   363  	}
   364  
   365  	if r.cancelPull != nil {
   366  		r.cancelPull()
   367  	}
   368  
   369  	if err := r.adapter.terminate(ctx); err != nil {
   370  		if isUnknownContainer(err) {
   371  			return nil
   372  		}
   373  
   374  		return err
   375  	}
   376  
   377  	return nil
   378  }
   379  
   380  // Remove the container and its resources.
   381  func (r *controller) Remove(ctx context.Context) error {
   382  	if err := r.checkClosed(); err != nil {
   383  		return err
   384  	}
   385  
   386  	if r.cancelPull != nil {
   387  		r.cancelPull()
   388  	}
   389  
   390  	// It may be necessary to shut down the task before removing it.
   391  	if err := r.Shutdown(ctx); err != nil {
   392  		if isUnknownContainer(err) {
   393  			return nil
   394  		}
   395  		// This may fail if the task was already shut down.
   396  		log.G(ctx).WithError(err).Debug("shutdown failed on removal")
   397  	}
   398  
   399  	// Try removing networks referenced in this task in case this
   400  	// task is the last one referencing it
   401  	if err := r.adapter.removeNetworks(ctx); err != nil {
   402  		if isUnknownContainer(err) {
   403  			return nil
   404  		}
   405  		return err
   406  	}
   407  
   408  	if err := r.adapter.remove(ctx); err != nil {
   409  		if isUnknownContainer(err) {
   410  			return nil
   411  		}
   412  
   413  		return err
   414  	}
   415  	return nil
   416  }
   417  
   418  // waitReady waits for a container to be "ready".
   419  // Ready means it's past the started state.
   420  func (r *controller) waitReady(pctx context.Context) error {
   421  	if err := r.checkClosed(); err != nil {
   422  		return err
   423  	}
   424  
   425  	ctx, cancel := context.WithCancel(pctx)
   426  	defer cancel()
   427  
   428  	eventq := r.adapter.events(ctx)
   429  
   430  	ctnr, err := r.adapter.inspect(ctx)
   431  	if err != nil {
   432  		if !isUnknownContainer(err) {
   433  			return errors.Wrap(err, "inspect container failed")
   434  		}
   435  	} else {
   436  		switch ctnr.State.Status {
   437  		case "running", "exited", "dead":
   438  			return nil
   439  		}
   440  	}
   441  
   442  	for {
   443  		select {
   444  		case event := <-eventq:
   445  			if !r.matchevent(event) {
   446  				continue
   447  			}
   448  
   449  			switch event.Action {
   450  			case "start":
   451  				return nil
   452  			}
   453  		case <-ctx.Done():
   454  			return ctx.Err()
   455  		case <-r.closed:
   456  			return r.err
   457  		}
   458  	}
   459  }
   460  
   461  func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
   462  	if err := r.checkClosed(); err != nil {
   463  		return err
   464  	}
   465  
   466  	// if we're following, wait for this container to be ready. there is a
   467  	// problem here: if the container will never be ready (for example, it has
   468  	// been totally deleted) then this will wait forever. however, this doesn't
   469  	// actually cause any UI issues, and shouldn't be a problem. the stuck wait
   470  	// will go away when the follow (context) is canceled.
   471  	if options.Follow {
   472  		if err := r.waitReady(ctx); err != nil {
   473  			return errors.Wrap(err, "container not ready for logs")
   474  		}
   475  	}
   476  	// if we're not following, we're not gonna wait for the container to be
   477  	// ready. just call logs. if the container isn't ready, the call will fail
   478  	// and return an error. no big deal, we don't care, we only want the logs
   479  	// we can get RIGHT NOW with no follow
   480  
   481  	logsContext, cancel := context.WithCancel(ctx)
   482  	msgs, err := r.adapter.logs(logsContext, options)
   483  	defer cancel()
   484  	if err != nil {
   485  		return errors.Wrap(err, "failed getting container logs")
   486  	}
   487  
   488  	var (
   489  		// use a rate limiter to keep things under control but also provides some
   490  		// ability coalesce messages.
   491  		limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
   492  		msgctx  = api.LogContext{
   493  			NodeID:    r.task.NodeID,
   494  			ServiceID: r.task.ServiceID,
   495  			TaskID:    r.task.ID,
   496  		}
   497  	)
   498  
   499  	for {
   500  		msg, ok := <-msgs
   501  		if !ok {
   502  			// we're done here, no more messages
   503  			return nil
   504  		}
   505  
   506  		if msg.Err != nil {
   507  			// the defered cancel closes the adapter's log stream
   508  			return msg.Err
   509  		}
   510  
   511  		// wait here for the limiter to catch up
   512  		if err := limiter.WaitN(ctx, len(msg.Line)); err != nil {
   513  			return errors.Wrap(err, "failed rate limiter")
   514  		}
   515  		tsp, err := gogotypes.TimestampProto(msg.Timestamp)
   516  		if err != nil {
   517  			return errors.Wrap(err, "failed to convert timestamp")
   518  		}
   519  		var stream api.LogStream
   520  		if msg.Source == "stdout" {
   521  			stream = api.LogStreamStdout
   522  		} else if msg.Source == "stderr" {
   523  			stream = api.LogStreamStderr
   524  		}
   525  
   526  		// parse the details out of the Attrs map
   527  		var attrs []api.LogAttr
   528  		if len(msg.Attrs) != 0 {
   529  			attrs = make([]api.LogAttr, 0, len(msg.Attrs))
   530  			for _, attr := range msg.Attrs {
   531  				attrs = append(attrs, api.LogAttr{Key: attr.Key, Value: attr.Value})
   532  			}
   533  		}
   534  
   535  		if err := publisher.Publish(ctx, api.LogMessage{
   536  			Context:   msgctx,
   537  			Timestamp: tsp,
   538  			Stream:    stream,
   539  			Attrs:     attrs,
   540  			Data:      msg.Line,
   541  		}); err != nil {
   542  			return errors.Wrap(err, "failed to publish log message")
   543  		}
   544  	}
   545  }
   546  
   547  // Close the runner and clean up any ephemeral resources.
   548  func (r *controller) Close() error {
   549  	select {
   550  	case <-r.closed:
   551  		return r.err
   552  	default:
   553  		if r.cancelPull != nil {
   554  			r.cancelPull()
   555  		}
   556  
   557  		r.err = exec.ErrControllerClosed
   558  		close(r.closed)
   559  	}
   560  	return nil
   561  }
   562  
   563  func (r *controller) matchevent(event events.Message) bool {
   564  	if event.Type != events.ContainerEventType {
   565  		return false
   566  	}
   567  	// we can't filter using id since it will have huge chances to introduce a deadlock. see #33377.
   568  	return event.Actor.Attributes["name"] == r.adapter.container.name()
   569  }
   570  
   571  func (r *controller) checkClosed() error {
   572  	select {
   573  	case <-r.closed:
   574  		return r.err
   575  	default:
   576  		return nil
   577  	}
   578  }
   579  
   580  func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
   581  	status := &api.ContainerStatus{
   582  		ContainerID: ctnr.ID,
   583  		PID:         int32(ctnr.State.Pid),
   584  		ExitCode:    int32(ctnr.State.ExitCode),
   585  	}
   586  
   587  	return status, nil
   588  }
   589  
   590  func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
   591  	status := &api.PortStatus{}
   592  
   593  	if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
   594  		exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
   595  		if err != nil {
   596  			return nil, err
   597  		}
   598  		status.Ports = exposedPorts
   599  	}
   600  
   601  	return status, nil
   602  }
   603  
   604  func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
   605  	exposedPorts := make([]*api.PortConfig, 0, len(portMap))
   606  
   607  	for portProtocol, mapping := range portMap {
   608  		parts := strings.SplitN(string(portProtocol), "/", 2)
   609  		if len(parts) != 2 {
   610  			return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
   611  		}
   612  
   613  		port, err := strconv.ParseUint(parts[0], 10, 16)
   614  		if err != nil {
   615  			return nil, err
   616  		}
   617  
   618  		protocol := api.ProtocolTCP
   619  		switch strings.ToLower(parts[1]) {
   620  		case "tcp":
   621  			protocol = api.ProtocolTCP
   622  		case "udp":
   623  			protocol = api.ProtocolUDP
   624  		default:
   625  			return nil, fmt.Errorf("invalid protocol: %s", parts[1])
   626  		}
   627  
   628  		for _, binding := range mapping {
   629  			hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
   630  			if err != nil {
   631  				return nil, err
   632  			}
   633  
   634  			// TODO(aluzzardi): We're losing the port `name` here since
   635  			// there's no way to retrieve it back from the Engine.
   636  			exposedPorts = append(exposedPorts, &api.PortConfig{
   637  				PublishMode:   api.PublishModeHost,
   638  				Protocol:      protocol,
   639  				TargetPort:    uint32(port),
   640  				PublishedPort: uint32(hostPort),
   641  			})
   642  		}
   643  	}
   644  
   645  	return exposedPorts, nil
   646  }
   647  
   648  type exitError struct {
   649  	code  int
   650  	cause error
   651  }
   652  
   653  func (e *exitError) Error() string {
   654  	if e.cause != nil {
   655  		return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
   656  	}
   657  
   658  	return fmt.Sprintf("task: non-zero exit (%v)", e.code)
   659  }
   660  
   661  func (e *exitError) ExitCode() int {
   662  	return e.code
   663  }
   664  
   665  func (e *exitError) Cause() error {
   666  	return e.cause
   667  }
   668  
   669  // checkHealth blocks until unhealthy container is detected or ctx exits
   670  func (r *controller) checkHealth(ctx context.Context) error {
   671  	eventq := r.adapter.events(ctx)
   672  
   673  	for {
   674  		select {
   675  		case <-ctx.Done():
   676  			return nil
   677  		case <-r.closed:
   678  			return nil
   679  		case event := <-eventq:
   680  			if !r.matchevent(event) {
   681  				continue
   682  			}
   683  
   684  			switch event.Action {
   685  			case "health_status: unhealthy":
   686  				return ErrContainerUnhealthy
   687  			}
   688  		}
   689  	}
   690  }