github.com/yogeshlonkar/moby@v1.13.2-0.20201203103638-c0b64beaea94/daemon/cluster/executor/container/controller.go (about)

     1  package container
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/docker/docker/api/types"
    15  	"github.com/docker/docker/api/types/events"
    16  	executorpkg "github.com/docker/docker/daemon/cluster/executor"
    17  	"github.com/docker/go-connections/nat"
    18  	"github.com/docker/libnetwork"
    19  	"github.com/docker/swarmkit/agent/exec"
    20  	"github.com/docker/swarmkit/api"
    21  	"github.com/docker/swarmkit/log"
    22  	"github.com/docker/swarmkit/protobuf/ptypes"
    23  	"github.com/pkg/errors"
    24  	"golang.org/x/net/context"
    25  	"golang.org/x/time/rate"
    26  )
    27  
    28  // controller implements agent.Controller against docker's API.
    29  //
    30  // Most operations against docker's API are done through the container name,
    31  // which is unique to the task.
    32  type controller struct {
    33  	task    *api.Task
    34  	adapter *containerAdapter
    35  	closed  chan struct{}
    36  	err     error
    37  
    38  	pulled     chan struct{} // closed after pull
    39  	cancelPull func()        // cancels pull context if not nil
    40  	pullErr    error         // pull error, only read after pulled closed
    41  }
    42  
    43  var _ exec.Controller = &controller{}
    44  
    45  // NewController returns a docker exec runner for the provided task.
    46  func newController(b executorpkg.Backend, task *api.Task, secrets exec.SecretGetter) (*controller, error) {
    47  	adapter, err := newContainerAdapter(b, task, secrets)
    48  	if err != nil {
    49  		return nil, err
    50  	}
    51  
    52  	return &controller{
    53  		task:    task,
    54  		adapter: adapter,
    55  		closed:  make(chan struct{}),
    56  	}, nil
    57  }
    58  
    59  func (r *controller) Task() (*api.Task, error) {
    60  	return r.task, nil
    61  }
    62  
    63  // ContainerStatus returns the container-specific status for the task.
    64  func (r *controller) ContainerStatus(ctx context.Context) (*api.ContainerStatus, error) {
    65  	ctnr, err := r.adapter.inspect(ctx)
    66  	if err != nil {
    67  		if isUnknownContainer(err) {
    68  			return nil, nil
    69  		}
    70  		return nil, err
    71  	}
    72  	return parseContainerStatus(ctnr)
    73  }
    74  
    75  func (r *controller) PortStatus(ctx context.Context) (*api.PortStatus, error) {
    76  	ctnr, err := r.adapter.inspect(ctx)
    77  	if err != nil {
    78  		if isUnknownContainer(err) {
    79  			return nil, nil
    80  		}
    81  
    82  		return nil, err
    83  	}
    84  
    85  	return parsePortStatus(ctnr)
    86  }
    87  
    88  // Update tasks a recent task update and applies it to the container.
    89  func (r *controller) Update(ctx context.Context, t *api.Task) error {
    90  	// TODO(stevvooe): While assignment of tasks is idempotent, we do allow
    91  	// updates of metadata, such as labelling, as well as any other properties
    92  	// that make sense.
    93  	return nil
    94  }
    95  
    96  // Prepare creates a container and ensures the image is pulled.
    97  //
    98  // If the container has already be created, exec.ErrTaskPrepared is returned.
    99  func (r *controller) Prepare(ctx context.Context) error {
   100  	if err := r.checkClosed(); err != nil {
   101  		return err
   102  	}
   103  
   104  	// Make sure all the networks that the task needs are created.
   105  	if err := r.adapter.createNetworks(ctx); err != nil {
   106  		return err
   107  	}
   108  
   109  	// Make sure all the volumes that the task needs are created.
   110  	if err := r.adapter.createVolumes(ctx); err != nil {
   111  		return err
   112  	}
   113  
   114  	if os.Getenv("DOCKER_SERVICE_PREFER_OFFLINE_IMAGE") != "1" {
   115  		if r.pulled == nil {
   116  			// Fork the pull to a different context to allow pull to continue
   117  			// on re-entrant calls to Prepare. This ensures that Prepare can be
   118  			// idempotent and not incur the extra cost of pulling when
   119  			// cancelled on updates.
   120  			var pctx context.Context
   121  
   122  			r.pulled = make(chan struct{})
   123  			pctx, r.cancelPull = context.WithCancel(context.Background()) // TODO(stevvooe): Bind a context to the entire controller.
   124  
   125  			go func() {
   126  				defer close(r.pulled)
   127  				r.pullErr = r.adapter.pullImage(pctx) // protected by closing r.pulled
   128  			}()
   129  		}
   130  
   131  		select {
   132  		case <-ctx.Done():
   133  			return ctx.Err()
   134  		case <-r.pulled:
   135  			if r.pullErr != nil {
   136  				// NOTE(stevvooe): We always try to pull the image to make sure we have
   137  				// the most up to date version. This will return an error, but we only
   138  				// log it. If the image truly doesn't exist, the create below will
   139  				// error out.
   140  				//
   141  				// This gives us some nice behavior where we use up to date versions of
   142  				// mutable tags, but will still run if the old image is available but a
   143  				// registry is down.
   144  				//
   145  				// If you don't want this behavior, lock down your image to an
   146  				// immutable tag or digest.
   147  				log.G(ctx).WithError(r.pullErr).Error("pulling image failed")
   148  			}
   149  		}
   150  	}
   151  
   152  	if err := r.adapter.create(ctx); err != nil {
   153  		if isContainerCreateNameConflict(err) {
   154  			if _, err := r.adapter.inspect(ctx); err != nil {
   155  				return err
   156  			}
   157  
   158  			// container is already created. success!
   159  			return exec.ErrTaskPrepared
   160  		}
   161  
   162  		return err
   163  	}
   164  
   165  	return nil
   166  }
   167  
   168  // Start the container. An error will be returned if the container is already started.
   169  func (r *controller) Start(ctx context.Context) error {
   170  	if err := r.checkClosed(); err != nil {
   171  		return err
   172  	}
   173  
   174  	ctnr, err := r.adapter.inspect(ctx)
   175  	if err != nil {
   176  		return err
   177  	}
   178  
   179  	// Detect whether the container has *ever* been started. If so, we don't
   180  	// issue the start.
   181  	//
   182  	// TODO(stevvooe): This is very racy. While reading inspect, another could
   183  	// start the process and we could end up starting it twice.
   184  	if ctnr.State.Status != "created" {
   185  		return exec.ErrTaskStarted
   186  	}
   187  
   188  	for {
   189  		if err := r.adapter.start(ctx); err != nil {
   190  			if _, ok := err.(libnetwork.ErrNoSuchNetwork); ok {
   191  				// Retry network creation again if we
   192  				// failed because some of the networks
   193  				// were not found.
   194  				if err := r.adapter.createNetworks(ctx); err != nil {
   195  					return err
   196  				}
   197  
   198  				continue
   199  			}
   200  
   201  			return errors.Wrap(err, "starting container failed")
   202  		}
   203  
   204  		break
   205  	}
   206  
   207  	// no health check
   208  	if ctnr.Config == nil || ctnr.Config.Healthcheck == nil || len(ctnr.Config.Healthcheck.Test) == 0 || ctnr.Config.Healthcheck.Test[0] == "NONE" {
   209  		if err := r.adapter.activateServiceBinding(); err != nil {
   210  			log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s which has no healthcheck config", r.adapter.container.name())
   211  			return err
   212  		}
   213  		return nil
   214  	}
   215  
   216  	// wait for container to be healthy
   217  	eventq := r.adapter.events(ctx)
   218  
   219  	var healthErr error
   220  	for {
   221  		select {
   222  		case event := <-eventq:
   223  			if !r.matchevent(event) {
   224  				continue
   225  			}
   226  
   227  			switch event.Action {
   228  			case "die": // exit on terminal events
   229  				ctnr, err := r.adapter.inspect(ctx)
   230  				if err != nil {
   231  					return errors.Wrap(err, "die event received")
   232  				} else if ctnr.State.ExitCode != 0 {
   233  					return &exitError{code: ctnr.State.ExitCode, cause: healthErr}
   234  				}
   235  
   236  				return nil
   237  			case "destroy":
   238  				// If we get here, something has gone wrong but we want to exit
   239  				// and report anyways.
   240  				return ErrContainerDestroyed
   241  			case "health_status: unhealthy":
   242  				// in this case, we stop the container and report unhealthy status
   243  				if err := r.Shutdown(ctx); err != nil {
   244  					return errors.Wrap(err, "unhealthy container shutdown failed")
   245  				}
   246  				// set health check error, and wait for container to fully exit ("die" event)
   247  				healthErr = ErrContainerUnhealthy
   248  			case "health_status: healthy":
   249  				if err := r.adapter.activateServiceBinding(); err != nil {
   250  					log.G(ctx).WithError(err).Errorf("failed to activate service binding for container %s after healthy event", r.adapter.container.name())
   251  					return err
   252  				}
   253  				return nil
   254  			}
   255  		case <-ctx.Done():
   256  			return ctx.Err()
   257  		case <-r.closed:
   258  			return r.err
   259  		}
   260  	}
   261  }
   262  
   263  // Wait on the container to exit.
   264  func (r *controller) Wait(pctx context.Context) error {
   265  	if err := r.checkClosed(); err != nil {
   266  		return err
   267  	}
   268  
   269  	ctx, cancel := context.WithCancel(pctx)
   270  	defer cancel()
   271  
   272  	healthErr := make(chan error, 1)
   273  	go func() {
   274  		ectx, cancel := context.WithCancel(ctx) // cancel event context on first event
   275  		defer cancel()
   276  		if err := r.checkHealth(ectx); err == ErrContainerUnhealthy {
   277  			healthErr <- ErrContainerUnhealthy
   278  			if err := r.Shutdown(ectx); err != nil {
   279  				log.G(ectx).WithError(err).Debug("shutdown failed on unhealthy")
   280  			}
   281  		}
   282  	}()
   283  
   284  	err := r.adapter.wait(ctx)
   285  	if ctx.Err() != nil {
   286  		return ctx.Err()
   287  	}
   288  
   289  	if err != nil {
   290  		ee := &exitError{}
   291  		if ec, ok := err.(exec.ExitCoder); ok {
   292  			ee.code = ec.ExitCode()
   293  		}
   294  		select {
   295  		case e := <-healthErr:
   296  			ee.cause = e
   297  		default:
   298  			if err.Error() != "" {
   299  				ee.cause = err
   300  			}
   301  		}
   302  		return ee
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  // Shutdown the container cleanly.
   309  func (r *controller) Shutdown(ctx context.Context) error {
   310  	if err := r.checkClosed(); err != nil {
   311  		return err
   312  	}
   313  
   314  	if r.cancelPull != nil {
   315  		r.cancelPull()
   316  	}
   317  
   318  	// remove container from service binding
   319  	if err := r.adapter.deactivateServiceBinding(); err != nil {
   320  		log.G(ctx).WithError(err).Warningf("failed to deactivate service binding for container %s", r.adapter.container.name())
   321  		// Don't return an error here, because failure to deactivate
   322  		// the service binding is expected if the container was never
   323  		// started.
   324  	}
   325  
   326  	if err := r.adapter.shutdown(ctx); err != nil {
   327  		if isUnknownContainer(err) || isStoppedContainer(err) {
   328  			return nil
   329  		}
   330  
   331  		return err
   332  	}
   333  
   334  	return nil
   335  }
   336  
   337  // Terminate the container, with force.
   338  func (r *controller) Terminate(ctx context.Context) error {
   339  	if err := r.checkClosed(); err != nil {
   340  		return err
   341  	}
   342  
   343  	if r.cancelPull != nil {
   344  		r.cancelPull()
   345  	}
   346  
   347  	if err := r.adapter.terminate(ctx); err != nil {
   348  		if isUnknownContainer(err) {
   349  			return nil
   350  		}
   351  
   352  		return err
   353  	}
   354  
   355  	return nil
   356  }
   357  
   358  // Remove the container and its resources.
   359  func (r *controller) Remove(ctx context.Context) error {
   360  	if err := r.checkClosed(); err != nil {
   361  		return err
   362  	}
   363  
   364  	if r.cancelPull != nil {
   365  		r.cancelPull()
   366  	}
   367  
   368  	// It may be necessary to shut down the task before removing it.
   369  	if err := r.Shutdown(ctx); err != nil {
   370  		if isUnknownContainer(err) {
   371  			return nil
   372  		}
   373  		// This may fail if the task was already shut down.
   374  		log.G(ctx).WithError(err).Debug("shutdown failed on removal")
   375  	}
   376  
   377  	// Try removing networks referenced in this task in case this
   378  	// task is the last one referencing it
   379  	if err := r.adapter.removeNetworks(ctx); err != nil {
   380  		if isUnknownContainer(err) {
   381  			return nil
   382  		}
   383  		return err
   384  	}
   385  
   386  	if err := r.adapter.remove(ctx); err != nil {
   387  		if isUnknownContainer(err) {
   388  			return nil
   389  		}
   390  
   391  		return err
   392  	}
   393  	return nil
   394  }
   395  
   396  // waitReady waits for a container to be "ready".
   397  // Ready means it's past the started state.
   398  func (r *controller) waitReady(pctx context.Context) error {
   399  	if err := r.checkClosed(); err != nil {
   400  		return err
   401  	}
   402  
   403  	ctx, cancel := context.WithCancel(pctx)
   404  	defer cancel()
   405  
   406  	eventq := r.adapter.events(ctx)
   407  
   408  	ctnr, err := r.adapter.inspect(ctx)
   409  	if err != nil {
   410  		if !isUnknownContainer(err) {
   411  			return errors.Wrap(err, "inspect container failed")
   412  		}
   413  	} else {
   414  		switch ctnr.State.Status {
   415  		case "running", "exited", "dead":
   416  			return nil
   417  		}
   418  	}
   419  
   420  	for {
   421  		select {
   422  		case event := <-eventq:
   423  			if !r.matchevent(event) {
   424  				continue
   425  			}
   426  
   427  			switch event.Action {
   428  			case "start":
   429  				return nil
   430  			}
   431  		case <-ctx.Done():
   432  			return ctx.Err()
   433  		case <-r.closed:
   434  			return r.err
   435  		}
   436  	}
   437  }
   438  
   439  func (r *controller) Logs(ctx context.Context, publisher exec.LogPublisher, options api.LogSubscriptionOptions) error {
   440  	if err := r.checkClosed(); err != nil {
   441  		return err
   442  	}
   443  
   444  	if err := r.waitReady(ctx); err != nil {
   445  		return errors.Wrap(err, "container not ready for logs")
   446  	}
   447  
   448  	rc, err := r.adapter.logs(ctx, options)
   449  	if err != nil {
   450  		return errors.Wrap(err, "failed getting container logs")
   451  	}
   452  	defer rc.Close()
   453  
   454  	var (
   455  		// use a rate limiter to keep things under control but also provides some
   456  		// ability coalesce messages.
   457  		limiter = rate.NewLimiter(rate.Every(time.Second), 10<<20) // 10 MB/s
   458  		msgctx  = api.LogContext{
   459  			NodeID:    r.task.NodeID,
   460  			ServiceID: r.task.ServiceID,
   461  			TaskID:    r.task.ID,
   462  		}
   463  	)
   464  
   465  	brd := bufio.NewReader(rc)
   466  	for {
   467  		// so, message header is 8 bytes, treat as uint64, pull stream off MSB
   468  		var header uint64
   469  		if err := binary.Read(brd, binary.BigEndian, &header); err != nil {
   470  			if err == io.EOF {
   471  				return nil
   472  			}
   473  
   474  			return errors.Wrap(err, "failed reading log header")
   475  		}
   476  
   477  		stream, size := (header>>(7<<3))&0xFF, header & ^(uint64(0xFF)<<(7<<3))
   478  
   479  		// limit here to decrease allocation back pressure.
   480  		if err := limiter.WaitN(ctx, int(size)); err != nil {
   481  			return errors.Wrap(err, "failed rate limiter")
   482  		}
   483  
   484  		buf := make([]byte, size)
   485  		_, err := io.ReadFull(brd, buf)
   486  		if err != nil {
   487  			return errors.Wrap(err, "failed reading buffer")
   488  		}
   489  
   490  		// Timestamp is RFC3339Nano with 1 space after. Lop, parse, publish
   491  		parts := bytes.SplitN(buf, []byte(" "), 2)
   492  		if len(parts) != 2 {
   493  			return fmt.Errorf("invalid timestamp in log message: %v", buf)
   494  		}
   495  
   496  		ts, err := time.Parse(time.RFC3339Nano, string(parts[0]))
   497  		if err != nil {
   498  			return errors.Wrap(err, "failed to parse timestamp")
   499  		}
   500  
   501  		tsp, err := ptypes.TimestampProto(ts)
   502  		if err != nil {
   503  			return errors.Wrap(err, "failed to convert timestamp")
   504  		}
   505  
   506  		if err := publisher.Publish(ctx, api.LogMessage{
   507  			Context:   msgctx,
   508  			Timestamp: tsp,
   509  			Stream:    api.LogStream(stream),
   510  
   511  			Data: parts[1],
   512  		}); err != nil {
   513  			return errors.Wrap(err, "failed to publish log message")
   514  		}
   515  	}
   516  }
   517  
   518  // Close the runner and clean up any ephemeral resources.
   519  func (r *controller) Close() error {
   520  	select {
   521  	case <-r.closed:
   522  		return r.err
   523  	default:
   524  		if r.cancelPull != nil {
   525  			r.cancelPull()
   526  		}
   527  
   528  		r.err = exec.ErrControllerClosed
   529  		close(r.closed)
   530  	}
   531  	return nil
   532  }
   533  
   534  func (r *controller) matchevent(event events.Message) bool {
   535  	if event.Type != events.ContainerEventType {
   536  		return false
   537  	}
   538  
   539  	// TODO(stevvooe): Filter based on ID matching, in addition to name.
   540  
   541  	// Make sure the events are for this container.
   542  	if event.Actor.Attributes["name"] != r.adapter.container.name() {
   543  		return false
   544  	}
   545  
   546  	return true
   547  }
   548  
   549  func (r *controller) checkClosed() error {
   550  	select {
   551  	case <-r.closed:
   552  		return r.err
   553  	default:
   554  		return nil
   555  	}
   556  }
   557  
   558  func parseContainerStatus(ctnr types.ContainerJSON) (*api.ContainerStatus, error) {
   559  	status := &api.ContainerStatus{
   560  		ContainerID: ctnr.ID,
   561  		PID:         int32(ctnr.State.Pid),
   562  		ExitCode:    int32(ctnr.State.ExitCode),
   563  	}
   564  
   565  	return status, nil
   566  }
   567  
   568  func parsePortStatus(ctnr types.ContainerJSON) (*api.PortStatus, error) {
   569  	status := &api.PortStatus{}
   570  
   571  	if ctnr.NetworkSettings != nil && len(ctnr.NetworkSettings.Ports) > 0 {
   572  		exposedPorts, err := parsePortMap(ctnr.NetworkSettings.Ports)
   573  		if err != nil {
   574  			return nil, err
   575  		}
   576  		status.Ports = exposedPorts
   577  	}
   578  
   579  	return status, nil
   580  }
   581  
   582  func parsePortMap(portMap nat.PortMap) ([]*api.PortConfig, error) {
   583  	exposedPorts := make([]*api.PortConfig, 0, len(portMap))
   584  
   585  	for portProtocol, mapping := range portMap {
   586  		parts := strings.SplitN(string(portProtocol), "/", 2)
   587  		if len(parts) != 2 {
   588  			return nil, fmt.Errorf("invalid port mapping: %s", portProtocol)
   589  		}
   590  
   591  		port, err := strconv.ParseUint(parts[0], 10, 16)
   592  		if err != nil {
   593  			return nil, err
   594  		}
   595  
   596  		protocol := api.ProtocolTCP
   597  		switch strings.ToLower(parts[1]) {
   598  		case "tcp":
   599  			protocol = api.ProtocolTCP
   600  		case "udp":
   601  			protocol = api.ProtocolUDP
   602  		default:
   603  			return nil, fmt.Errorf("invalid protocol: %s", parts[1])
   604  		}
   605  
   606  		for _, binding := range mapping {
   607  			hostPort, err := strconv.ParseUint(binding.HostPort, 10, 16)
   608  			if err != nil {
   609  				return nil, err
   610  			}
   611  
   612  			// TODO(aluzzardi): We're losing the port `name` here since
   613  			// there's no way to retrieve it back from the Engine.
   614  			exposedPorts = append(exposedPorts, &api.PortConfig{
   615  				PublishMode:   api.PublishModeHost,
   616  				Protocol:      protocol,
   617  				TargetPort:    uint32(port),
   618  				PublishedPort: uint32(hostPort),
   619  			})
   620  		}
   621  	}
   622  
   623  	return exposedPorts, nil
   624  }
   625  
   626  type exitError struct {
   627  	code  int
   628  	cause error
   629  }
   630  
   631  func (e *exitError) Error() string {
   632  	if e.cause != nil {
   633  		return fmt.Sprintf("task: non-zero exit (%v): %v", e.code, e.cause)
   634  	}
   635  
   636  	return fmt.Sprintf("task: non-zero exit (%v)", e.code)
   637  }
   638  
   639  func (e *exitError) ExitCode() int {
   640  	return int(e.code)
   641  }
   642  
   643  func (e *exitError) Cause() error {
   644  	return e.cause
   645  }
   646  
   647  // checkHealth blocks until unhealthy container is detected or ctx exits
   648  func (r *controller) checkHealth(ctx context.Context) error {
   649  	eventq := r.adapter.events(ctx)
   650  
   651  	for {
   652  		select {
   653  		case <-ctx.Done():
   654  			return nil
   655  		case <-r.closed:
   656  			return nil
   657  		case event := <-eventq:
   658  			if !r.matchevent(event) {
   659  				continue
   660  			}
   661  
   662  			switch event.Action {
   663  			case "health_status: unhealthy":
   664  				return ErrContainerUnhealthy
   665  			}
   666  		}
   667  	}
   668  }