github.com/jiasir/docker@v1.3.3-0.20170609024000-252e610103e7/daemon/cluster/executor/container/controller.go (about)

     1  package container
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"strconv"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/docker/docker/api/types"
    11  	"github.com/docker/docker/api/types/events"
    12  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    13  	"github.com/docker/go-connections/nat"
    14  	"github.com/docker/libnetwork"
    15  	"github.com/docker/swarmkit/agent/exec"
    16  	"github.com/docker/swarmkit/api"
    17  	"github.com/docker/swarmkit/log"
    18  	gogotypes "github.com/gogo/protobuf/types"
    19  	"github.com/pkg/errors"
    20  	"golang.org/x/net/context"
    21  	"golang.org/x/time/rate"
    22  )
    23  
    24  const defaultGossipConvergeDelay = 2 * time.Second
    25  
    26  // controller implements agent.Controller against docker's API.
    27  //
    28  // Most operations against docker's API are done through the container name,
    29  // which is unique to the task.
    30  type controller struct {
    31  	task    *api.Task
    32  	adapter *containerAdapter
    33  	closed  chan struct{}
    34  	err     error
    35  
    36  	pulled     chan struct{} // closed after pull
    37  	cancelPull func()        // cancels pull context if not nil
    38  	pullErr    error         // pull error, only read after pulled closed
    39  }
    40  
    41  var _ exec.Controller = &controller{}
    42  
    43  // NewController returns a docker exec runner for the provided task.
    44  func newController(b executorpkg.Backend, task *api.Task, dependencies exec.DependencyGetter) (*controller, error) {
    45  	adapter, err := newContainerAdapter(b, task, dependencies)
    46  	if err != nil {
    47  		return nil, err
    48  	}
    49  
    50  	return &controller{
    51  		task:    task,
    52  		adapter: adapter,
    53  		closed:  make(chan struct{}),
    54  	}, nil
    55  }
    56  
    57  func (r *controller) Task() (*api.Task, error) {
    58  	return r.task, nil
    59  }
    60  
    61  // ContainerStatus returns the container-specific status for the task.
    62  func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
    63  	ctnr, err := r.adapter.inspect(ctx)
    64  	if err != nil {
    65  		if isUnknownContainer(err) {
    66  			return nil, nil
    67  		}
    68  		return nil, err
    69  	}
    70  	return parseContainerStatus(ctnr)
    71  }
    72  
    73  func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
    74  	ctnr, err := r.adapter.inspect(ctx)
    75  	if err != nil {
    76  		if isUnknownContainer(err) {
    77  			return nil, nil
    78  		}
    79  
    80  		return nil, err
    81  	}
    82  
    83  	return parsePortStatus(ctnr)
    84  }
    85  
    86  // Update tasks a recent task update and applies it to the container.
    87  func (r *controller) Update(ctx context.Context, t *api.Task) error {
    88  	// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
    89  	// updates of metadata, such as labelling, as well as any other properties
    90  	// that make sense.
    91  	return nil
    92  }
    93  
    94  // Prepare creates a container and ensures the image is pulled.
    95  //
    96  // If the container has already be created, exec.ErrTaskPrepared is returned.
    97  func (r *controller) Prepare(ctx context.Context) error {
    98  	if err := r.checkClosed(); err != nil {
    99  		return err
   100  	}
   101  
   102  	// Make sure all the networks that the task needs are created.
   103  	if err := r.adapter.createNetworks(ctx); err != nil {
   104  		return err
   105  	}
   106  
   107  	// Make sure all the volumes that the task needs are created.
   108  	if err := r.adapter.createVolumes(ctx); err != nil {
   109  		return err
   110  	}
   111  
   112  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   113  		if r.pulled == nil {
   114  			// Fork the pull to a different context to allow pull to continue
   115  			// on re-entrant calls to Prepare. This ensures that Prepare can be
   116  			// idempotent and not incur the extra cost of pulling when
   117  			// cancelled on updates.
   118  			var pctx context.Context
   119  
   120  			r.pulled = make(chan struct{})
   121  			pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
   122  
   123  			go func() {
   124  				defer close(r.pulled)
   125  				r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled
   126  			}()
   127  		}
   128  
   129  		select {
   130  		case <-ctx.Done():
   131  			return ctx.Err()
   132  		case <-r.pulled:
   133  			if r.pullErr != nil {
   134  				// NOTE(stevvooe): We always try to pull the image to make sure we have
   135  				// the most up to date version. This will return an error, but we only
   136  				// log it. If the image truly doesn't exist, the create below will
   137  				// error out.
   138  				//
   139  				// This gives us some nice behavior where we use up to date versions of
   140  				// mutable tags, but will still run if the old image is available but a
   141  				// registry is down.
   142  				//
   143  				// If you don't want this behavior, lock down your image to an
   144  				// immutable tag or digest.
   145  				log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
   146  			}
   147  		}
   148  	}
   149  
   150  	if err := r.adapter.create(ctx); err != nil {
   151  		if isContainerCreateNameConflict(err) {
   152  			if _, err := r.adapter.inspect(ctx); err != nil {
   153  				return err
   154  			}
   155  
   156  			// container is already created. success!
   157  			return exec.ErrTaskPrepared
   158  		}
   159  
   160  		return err
   161  	}
   162  
   163  	return nil
   164  }
   165  
   166  // Start the container. An error will be returned if the container is already started.
   167  func (r *controller) Start(ctx context.Context) error {
   168  	if err := r.checkClosed(); err != nil {
   169  		return err
   170  	}
   171  
   172  	ctnr, err := r.adapter.inspect(ctx)
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	// Detect whether the container has *ever* been started. If so, we don't
   178  	// issue the start.
   179  	//
   180  	// TODO(stevvooe): This is very racy. While reading inspect, another could
   181  	// start the process and we could end up starting it twice.
   182  	if ctnr.State.Status != "created" {
   183  		return exec.ErrTaskStarted
   184  	}
   185  
   186  	for {
   187  		if err := r.adapter.start(ctx); err != nil {
   188  			if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
   189  				// Retry network creation again if we
   190  				// failed because some of the networks
   191  				// were not found.
   192  				if err := r.adapter.createNetworks(ctx); err != nil {
   193  					return err
   194  				}
   195  
   196  				continue
   197  			}
   198  
   199  			return errors.Wrap(err, "starting container failed")
   200  		}
   201  
   202  		break
   203  	}
   204  
   205  	// no health check
   206  	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" {
   207  		if err := r.adapter.activateServiceBinding(); err != nil {
   208  			log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name())
   209  			return err
   210  		}
   211  		return nil
   212  	}
   213  
   214  	// wait for container to be healthy
   215  	eventq := r.adapter.events(ctx)
   216  
   217  	var healthErr error
   218  	for {
   219  		select {
   220  		case event := <-eventq:
   221  			if !r.matchevent(event) {
   222  				continue
   223  			}
   224  
   225  			switch event.Action {
   226  			case "die": // exit on terminal events
   227  				ctnr, err := r.adapter.inspect(ctx)
   228  				if err != nil {
   229  					return errors.Wrap(err, "die event received")
   230  				} else if ctnr.State.ExitCode != 0 {
   231  					return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
   232  				}
   233  
   234  				return nil
   235  			case "destroy":
   236  				// If we get here, something has gone wrong but we want to exit
   237  				// and report anyways.
   238  				return ErrContainerDestroyed
   239  			case "health_status: unhealthy":
   240  				// in this case, we stop the container and report unhealthy status
   241  				if err := r.Shutdown(ctx); err != nil {
   242  					return errors.Wrap(err, "unhealthy container shutdown failed")
   243  				}
   244  				// set health check error, and wait for container to fully exit ("die" event)
   245  				healthErr = ErrContainerUnhealthy
   246  			case "health_status: healthy":
   247  				if err := r.adapter.activateServiceBinding(); err != nil {
   248  					log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name())
   249  					return err
   250  				}
   251  				return nil
   252  			}
   253  		case <-ctx.Done():
   254  			return ctx.Err()
   255  		case <-r.closed:
   256  			return r.err
   257  		}
   258  	}
   259  }
   260  
   261  // Wait on the container to exit.
   262  func (r *controller) Wait(pctx context.Context) error {
   263  	if err := r.checkClosed(); err != nil {
   264  		return err
   265  	}
   266  
   267  	ctx, cancel := context.WithCancel(pctx)
   268  	defer cancel()
   269  
   270  	healthErr := make(chan error, 1)
   271  	go func() {
   272  		ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
   273  		defer cancel()
   274  		if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
   275  			healthErr <- ErrContainerUnhealthy
   276  			if err := r.Shutdown(ectx); err != nil {
   277  				log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
   278  			}
   279  		}
   280  	}()
   281  
   282  	waitC, err := r.adapter.wait(ctx)
   283  	if err != nil {
   284  		return err
   285  	}
   286  
   287  	if status := <-waitC; status.ExitCode() != 0 {
   288  		exitErr := &exitError{
   289  			code: status.ExitCode(),
   290  		}
   291  
   292  		// Set the cause if it is knowable.
   293  		select {
   294  		case e := <-healthErr:
   295  			exitErr.cause = e
   296  		default:
   297  			if status.Err() != nil {
   298  				exitErr.cause = status.Err()
   299  			}
   300  		}
   301  
   302  		return exitErr
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  func (r *controller) hasServiceBinding() bool {
   309  	if r.task == nil {
   310  		return false
   311  	}
   312  
   313  	// service is attached to a network besides the default bridge
   314  	for _, na := range r.task.Networks {
   315  		if na.Network == nil ||
   316  			na.Network.DriverState == nil ||
   317  			na.Network.DriverState.Name == "bridge" && na.Network.Spec.Annotations.Name == "bridge" {
   318  			continue
   319  		}
   320  		return true
   321  	}
   322  
   323  	return false
   324  }
   325  
   326  // Shutdown the container cleanly.
   327  func (r *controller) Shutdown(ctx context.Context) error {
   328  	if err := r.checkClosed(); err != nil {
   329  		return err
   330  	}
   331  
   332  	if r.cancelPull != nil {
   333  		r.cancelPull()
   334  	}
   335  
   336  	if r.hasServiceBinding() {
   337  		// remove container from service binding
   338  		if err := r.adapter.deactivateServiceBinding(); err != nil {
   339  			log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name())
   340  			// Don't return an error here, because failure to deactivate
   341  			// the service binding is expected if the container was never
   342  			// started.
   343  		}
   344  
   345  		// add a delay for gossip converge
   346  		// TODO(dongluochen): this delay shoud be configurable to fit different cluster size and network delay.
   347  		time.Sleep(defaultGossipConvergeDelay)
   348  	}
   349  
   350  	if err := r.adapter.shutdown(ctx); err != nil {
   351  		if isUnknownContainer(err) || isStoppedContainer(err) {
   352  			return nil
   353  		}
   354  
   355  		return err
   356  	}
   357  
   358  	return nil
   359  }
   360  
   361  // Terminate the container, with force.
   362  func (r *controller) Terminate(ctx context.Context) error {
   363  	if err := r.checkClosed(); err != nil {
   364  		return err
   365  	}
   366  
   367  	if r.cancelPull != nil {
   368  		r.cancelPull()
   369  	}
   370  
   371  	if err := r.adapter.terminate(ctx); err != nil {
   372  		if isUnknownContainer(err) {
   373  			return nil
   374  		}
   375  
   376  		return err
   377  	}
   378  
   379  	return nil
   380  }
   381  
   382  // Remove the container and its resources.
   383  func (r *controller) Remove(ctx context.Context) error {
   384  	if err := r.checkClosed(); err != nil {
   385  		return err
   386  	}
   387  
   388  	if r.cancelPull != nil {
   389  		r.cancelPull()
   390  	}
   391  
   392  	// It may be necessary to shut down the task before removing it.
   393  	if err := r.Shutdown(ctx); err != nil {
   394  		if isUnknownContainer(err) {
   395  			return nil
   396  		}
   397  		// This may fail if the task was already shut down.
   398  		log.G(ctx).WithError(err).Debug("shutdown failed on removal")
   399  	}
   400  
   401  	// Try removing networks referenced in this task in case this
   402  	// task is the last one referencing it
   403  	if err := r.adapter.removeNetworks(ctx); err != nil {
   404  		if isUnknownContainer(err) {
   405  			return nil
   406  		}
   407  		return err
   408  	}
   409  
   410  	if err := r.adapter.remove(ctx); err != nil {
   411  		if isUnknownContainer(err) {
   412  			return nil
   413  		}
   414  
   415  		return err
   416  	}
   417  	return nil
   418  }
   419  
   420  // waitReady waits for a container to be "ready".
   421  // Ready means it's past the started state.
   422  func (r *controller) waitReady(pctx context.Context) error {
   423  	if err := r.checkClosed(); err != nil {
   424  		return err
   425  	}
   426  
   427  	ctx, cancel := context.WithCancel(pctx)
   428  	defer cancel()
   429  
   430  	eventq := r.adapter.events(ctx)
   431  
   432  	ctnr, err := r.adapter.inspect(ctx)
   433  	if err != nil {
   434  		if !isUnknownContainer(err) {
   435  			return errors.Wrap(err, "inspect container failed")
   436  		}
   437  	} else {
   438  		switch ctnr.State.Status {
   439  		case "running", "exited", "dead":
   440  			return nil
   441  		}
   442  	}
   443  
   444  	for {
   445  		select {
   446  		case event := <-eventq:
   447  			if !r.matchevent(event) {
   448  				continue
   449  			}
   450  
   451  			switch event.Action {
   452  			case "start":
   453  				return nil
   454  			}
   455  		case <-ctx.Done():
   456  			return ctx.Err()
   457  		case <-r.closed:
   458  			return r.err
   459  		}
   460  	}
   461  }
   462  
   463  func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
   464  	if err := r.checkClosed(); err != nil {
   465  		return err
   466  	}
   467  
   468  	// if we're following, wait for this container to be ready. there is a
   469  	// problem here: if the container will never be ready (for example, it has
   470  	// been totally deleted) then this will wait forever. however, this doesn't
   471  	// actually cause any UI issues, and shouldn't be a problem. the stuck wait
   472  	// will go away when the follow (context) is canceled.
   473  	if options.Follow {
   474  		if err := r.waitReady(ctx); err != nil {
   475  			return errors.Wrap(err, "container not ready for logs")
   476  		}
   477  	}
   478  	// if we're not following, we're not gonna wait for the container to be
   479  	// ready. just call logs. if the container isn't ready, the call will fail
   480  	// and return an error. no big deal, we don't care, we only want the logs
   481  	// we can get RIGHT NOW with no follow
   482  
   483  	logsContext, cancel := context.WithCancel(ctx)
   484  	msgs, err := r.adapter.logs(logsContext, options)
   485  	defer cancel()
   486  	if err != nil {
   487  		return errors.Wrap(err, "failed getting container logs")
   488  	}
   489  
   490  	var (
   491  		// use a rate limiter to keep things under control but also provides some
   492  		// ability coalesce messages.
   493  		limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
   494  		msgctx  = api.LogContext{
   495  			NodeID:    r.task.NodeID,
   496  			ServiceID: r.task.ServiceID,
   497  			TaskID:    r.task.ID,
   498  		}
   499  	)
   500  
   501  	for {
   502  		msg, ok := <-msgs
   503  		if !ok {
   504  			// we're done here, no more messages
   505  			return nil
   506  		}
   507  
   508  		if msg.Err != nil {
   509  			// the defered cancel closes the adapter's log stream
   510  			return msg.Err
   511  		}
   512  
   513  		// wait here for the limiter to catch up
   514  		if err := limiter.WaitN(ctx, len(msg.Line)); err != nil {
   515  			return errors.Wrap(err, "failed rate limiter")
   516  		}
   517  		tsp, err := gogotypes.TimestampProto(msg.Timestamp)
   518  		if err != nil {
   519  			return errors.Wrap(err, "failed to convert timestamp")
   520  		}
   521  		var stream api.LogStream
   522  		if msg.Source == "stdout" {
   523  			stream = api.LogStreamStdout
   524  		} else if msg.Source == "stderr" {
   525  			stream = api.LogStreamStderr
   526  		}
   527  
   528  		// parse the details out of the Attrs map
   529  		attrs := []api.LogAttr{}
   530  		for k, v := range msg.Attrs {
   531  			attr := api.LogAttr{Key: k, Value: v}
   532  			attrs = append(attrs, attr)
   533  		}
   534  
   535  		if err := publisher.Publish(ctx, api.LogMessage{
   536  			Context:   msgctx,
   537  			Timestamp: tsp,
   538  			Stream:    stream,
   539  			Attrs:     attrs,
   540  			Data:      msg.Line,
   541  		}); err != nil {
   542  			return errors.Wrap(err, "failed to publish log message")
   543  		}
   544  	}
   545  }
   546  
   547  // Close the runner and clean up any ephemeral resources.
   548  func (r *controller) Close() error {
   549  	select {
   550  	case <-r.closed:
   551  		return r.err
   552  	default:
   553  		if r.cancelPull != nil {
   554  			r.cancelPull()
   555  		}
   556  
   557  		r.err = exec.ErrControllerClosed
   558  		close(r.closed)
   559  	}
   560  	return nil
   561  }
   562  
   563  func (r *controller) matchevent(event events.Message) bool {
   564  	if event.Type != events.ContainerEventType {
   565  		return false
   566  	}
   567  
   568  	// TODO(stevvooe): Filter based on ID matching, in addition to name.
   569  
   570  	// Make sure the events are for this container.
   571  	if event.Actor.Attributes["name"] != r.adapter.container.name() {
   572  		return false
   573  	}
   574  
   575  	return true
   576  }
   577  
   578  func (r *controller) checkClosed() error {
   579  	select {
   580  	case <-r.closed:
   581  		return r.err
   582  	default:
   583  		return nil
   584  	}
   585  }
   586  
   587  func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
   588  	status := &api.ContainerStatus{
   589  		ContainerID: ctnr.ID,
   590  		PID:         int32(ctnr.State.Pid),
   591  		ExitCode:    int32(ctnr.State.ExitCode),
   592  	}
   593  
   594  	return status, nil
   595  }
   596  
   597  func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
   598  	status := &api.PortStatus{}
   599  
   600  	if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
   601  		exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
   602  		if err != nil {
   603  			return nil, err
   604  		}
   605  		status.Ports = exposedPorts
   606  	}
   607  
   608  	return status, nil
   609  }
   610  
   611  func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
   612  	exposedPorts := make([]*api.PortConfig, 0, len(portMap))
   613  
   614  	for portProtocol, mapping := range portMap {
   615  		parts := strings.SplitN(string(portProtocol), "/", 2)
   616  		if len(parts) != 2 {
   617  			return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
   618  		}
   619  
   620  		port, err := strconv.ParseUint(parts[0], 10, 16)
   621  		if err != nil {
   622  			return nil, err
   623  		}
   624  
   625  		protocol := api.ProtocolTCP
   626  		switch strings.ToLower(parts[1]) {
   627  		case "tcp":
   628  			protocol = api.ProtocolTCP
   629  		case "udp":
   630  			protocol = api.ProtocolUDP
   631  		default:
   632  			return nil, fmt.Errorf("invalid protocol: %s", parts[1])
   633  		}
   634  
   635  		for _, binding := range mapping {
   636  			hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
   637  			if err != nil {
   638  				return nil, err
   639  			}
   640  
   641  			// TODO(aluzzardi): We're losing the port `name` here since
   642  			// there's no way to retrieve it back from the Engine.
   643  			exposedPorts = append(exposedPorts, &api.PortConfig{
   644  				PublishMode:   api.PublishModeHost,
   645  				Protocol:      protocol,
   646  				TargetPort:    uint32(port),
   647  				PublishedPort: uint32(hostPort),
   648  			})
   649  		}
   650  	}
   651  
   652  	return exposedPorts, nil
   653  }
   654  
   655  type exitError struct {
   656  	code  int
   657  	cause error
   658  }
   659  
   660  func (e *exitError) Error() string {
   661  	if e.cause != nil {
   662  		return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
   663  	}
   664  
   665  	return fmt.Sprintf("task: non-zero exit (%v)", e.code)
   666  }
   667  
   668  func (e *exitError) ExitCode() int {
   669  	return int(e.code)
   670  }
   671  
   672  func (e *exitError) Cause() error {
   673  	return e.cause
   674  }
   675  
   676  // checkHealth blocks until unhealthy container is detected or ctx exits
   677  func (r *controller) checkHealth(ctx context.Context) error {
   678  	eventq := r.adapter.events(ctx)
   679  
   680  	for {
   681  		select {
   682  		case <-ctx.Done():
   683  			return nil
   684  		case <-r.closed:
   685  			return nil
   686  		case event := <-eventq:
   687  			if !r.matchevent(event) {
   688  				continue
   689  			}
   690  
   691  			switch event.Action {
   692  			case "health_status: unhealthy":
   693  				return ErrContainerUnhealthy
   694  			}
   695  		}
   696  	}
   697  }