github.com/bigcommerce/nomad@v0.9.3-bc/drivers/docker/driver.go (about)

     1  package docker
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"os"
     8  	"path/filepath"
     9  	"runtime"
    10  	"strconv"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	docker "github.com/fsouza/go-dockerclient"
    16  	"github.com/hashicorp/consul-template/signals"
    17  	hclog "github.com/hashicorp/go-hclog"
    18  	multierror "github.com/hashicorp/go-multierror"
    19  	plugin "github.com/hashicorp/go-plugin"
    20  	"github.com/hashicorp/nomad/client/taskenv"
    21  	"github.com/hashicorp/nomad/drivers/docker/docklog"
    22  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    23  	nstructs "github.com/hashicorp/nomad/nomad/structs"
    24  	"github.com/hashicorp/nomad/plugins/base"
    25  	"github.com/hashicorp/nomad/plugins/drivers"
    26  	"github.com/hashicorp/nomad/plugins/shared/structs"
    27  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    28  )
    29  
    30  var (
    31  	// createClientsLock is a lock that protects reading/writing global client
    32  	// variables
    33  	createClientsLock sync.Mutex
    34  
    35  	// client is a docker client with a timeout of 5 minutes. This is for doing
    36  	// all operations with the docker daemon besides which are not long running
    37  	// such as creating, killing containers, etc.
    38  	client *docker.Client
    39  
    40  	// waitClient is a docker client with no timeouts. This is used for long
    41  	// running operations such as waiting on containers and collect stats
    42  	waitClient *docker.Client
    43  
    44  	// recoverableErrTimeouts returns a recoverable error if the error was due
    45  	// to timeouts
    46  	recoverableErrTimeouts = func(err error) error {
    47  		r := false
    48  		if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") ||
    49  			strings.Contains(err.Error(), "EOF") {
    50  			r = true
    51  		}
    52  		return nstructs.NewRecoverableError(err, r)
    53  	}
    54  
    55  	// taskHandleVersion is the version of task handle which this driver sets
    56  	// and understands how to decode driver state
    57  	taskHandleVersion = 1
    58  
    59  	// Nvidia-container-runtime environment variable names
    60  	nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
    61  )
    62  
    63  type Driver struct {
    64  	// eventer is used to handle multiplexing of TaskEvents calls such that an
    65  	// event can be broadcast to all callers
    66  	eventer *eventer.Eventer
    67  
    68  	// config contains the runtime configuration for the driver set by the
    69  	// SetConfig RPC
    70  	config *DriverConfig
    71  
    72  	// clientConfig contains a driver specific subset of the Nomad client
    73  	// configuration
    74  	clientConfig *base.ClientDriverConfig
    75  
    76  	// ctx is the context for the driver. It is passed to other subsystems to
    77  	// coordinate shutdown
    78  	ctx context.Context
    79  
    80  	// signalShutdown is called when the driver is shutting down and cancels the
    81  	// ctx passed to any subsystems
    82  	signalShutdown context.CancelFunc
    83  
    84  	// tasks is the in memory datastore mapping taskIDs to taskHandles
    85  	tasks *taskStore
    86  
    87  	// coordinator is what tracks multiple image pulls against the same docker image
    88  	coordinator *dockerCoordinator
    89  
    90  	// logger will log to the Nomad agent
    91  	logger hclog.Logger
    92  
    93  	// gpuRuntime indicates nvidia-docker runtime availability
    94  	gpuRuntime bool
    95  
    96  	// A tri-state boolean to know if the fingerprinting has happened and
    97  	// whether it has been successful
    98  	fingerprintSuccess *bool
    99  	fingerprintLock    sync.RWMutex
   100  
   101  	// A boolean to know if the docker driver has ever been correctly detected
   102  	// for use during fingerprinting.
   103  	detected     bool
   104  	detectedLock sync.RWMutex
   105  }
   106  
   107  // NewDockerDriver returns a docker implementation of a driver plugin
   108  func NewDockerDriver(logger hclog.Logger) drivers.DriverPlugin {
   109  	ctx, cancel := context.WithCancel(context.Background())
   110  	logger = logger.Named(pluginName)
   111  	return &Driver{
   112  		eventer:        eventer.NewEventer(ctx, logger),
   113  		config:         &DriverConfig{},
   114  		tasks:          newTaskStore(),
   115  		ctx:            ctx,
   116  		signalShutdown: cancel,
   117  		logger:         logger,
   118  	}
   119  }
   120  
   121  func (d *Driver) reattachToDockerLogger(reattachConfig *structs.ReattachConfig) (docklog.DockerLogger, *plugin.Client, error) {
   122  	reattach, err := pstructs.ReattachConfigToGoPlugin(reattachConfig)
   123  	if err != nil {
   124  		return nil, nil, err
   125  	}
   126  
   127  	dlogger, dloggerPluginClient, err := docklog.ReattachDockerLogger(reattach)
   128  	if err != nil {
   129  		return nil, nil, fmt.Errorf("failed to reattach to docker logger process: %v", err)
   130  	}
   131  
   132  	return dlogger, dloggerPluginClient, nil
   133  }
   134  
   135  func (d *Driver) setupNewDockerLogger(container *docker.Container, cfg *drivers.TaskConfig, startTime time.Time) (docklog.DockerLogger, *plugin.Client, error) {
   136  	dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger)
   137  	if err != nil {
   138  		if pluginClient != nil {
   139  			pluginClient.Kill()
   140  		}
   141  		return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err)
   142  	}
   143  
   144  	if err := dlogger.Start(&docklog.StartOpts{
   145  		Endpoint:    d.config.Endpoint,
   146  		ContainerID: container.ID,
   147  		TTY:         container.Config.Tty,
   148  		Stdout:      cfg.StdoutPath,
   149  		Stderr:      cfg.StderrPath,
   150  		TLSCert:     d.config.TLS.Cert,
   151  		TLSKey:      d.config.TLS.Key,
   152  		TLSCA:       d.config.TLS.CA,
   153  		StartTime:   startTime.Unix(),
   154  	}); err != nil {
   155  		pluginClient.Kill()
   156  		return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err)
   157  	}
   158  
   159  	return dlogger, pluginClient, nil
   160  }
   161  
   162  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   163  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   164  		return nil
   165  	}
   166  
   167  	// COMPAT(0.10): pre 0.9 upgrade path check
   168  	if handle.Version == 0 {
   169  		return d.recoverPre09Task(handle)
   170  	}
   171  
   172  	var handleState taskHandleState
   173  	if err := handle.GetDriverState(&handleState); err != nil {
   174  		return fmt.Errorf("failed to decode driver task state: %v", err)
   175  	}
   176  
   177  	client, _, err := d.dockerClients()
   178  	if err != nil {
   179  		return fmt.Errorf("failed to get docker client: %v", err)
   180  	}
   181  
   182  	container, err := client.InspectContainer(handleState.ContainerID)
   183  	if err != nil {
   184  		return fmt.Errorf("failed to inspect container for id %q: %v", handleState.ContainerID, err)
   185  	}
   186  
   187  	h := &taskHandle{
   188  		client:                client,
   189  		waitClient:            waitClient,
   190  		logger:                d.logger.With("container_id", container.ID),
   191  		task:                  handle.Config,
   192  		containerID:           container.ID,
   193  		containerImage:        container.Image,
   194  		doneCh:                make(chan bool),
   195  		waitCh:                make(chan struct{}),
   196  		removeContainerOnExit: d.config.GC.Container,
   197  		net:                   handleState.DriverNetwork,
   198  	}
   199  
   200  	h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig)
   201  	if err != nil {
   202  		d.logger.Warn("failed to reattach to docker logger process", "error", err)
   203  
   204  		h.dlogger, h.dloggerPluginClient, err = d.setupNewDockerLogger(container, handle.Config, time.Now())
   205  		if err != nil {
   206  			if err := client.StopContainer(handleState.ContainerID, 0); err != nil {
   207  				d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err)
   208  			}
   209  			return fmt.Errorf("failed to setup replacement docker logger: %v", err)
   210  		}
   211  
   212  		if err := handle.SetDriverState(h.buildState()); err != nil {
   213  			if err := client.StopContainer(handleState.ContainerID, 0); err != nil {
   214  				d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err)
   215  			}
   216  			return fmt.Errorf("failed to store driver state: %v", err)
   217  		}
   218  	}
   219  
   220  	d.tasks.Set(handle.Config.ID, h)
   221  	go h.run()
   222  
   223  	return nil
   224  }
   225  
   226  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   227  	if _, ok := d.tasks.Get(cfg.ID); ok {
   228  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   229  	}
   230  
   231  	var driverConfig TaskConfig
   232  
   233  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   234  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   235  	}
   236  
   237  	if driverConfig.Image == "" {
   238  		return nil, nil, fmt.Errorf("image name required for docker driver")
   239  	}
   240  
   241  	// Remove any http
   242  	if strings.HasPrefix(driverConfig.Image, "https://") {
   243  		driverConfig.Image = strings.Replace(driverConfig.Image, "https://", "", 1)
   244  	}
   245  
   246  	handle := drivers.NewTaskHandle(taskHandleVersion)
   247  	handle.Config = cfg
   248  
   249  	// Initialize docker API clients
   250  	client, _, err := d.dockerClients()
   251  	if err != nil {
   252  		return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   253  	}
   254  
   255  	id, err := d.createImage(cfg, &driverConfig, client)
   256  	if err != nil {
   257  		return nil, nil, err
   258  	}
   259  
   260  	containerCfg, err := d.createContainerConfig(cfg, &driverConfig, driverConfig.Image)
   261  	if err != nil {
   262  		d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image,
   263  			"image_id", id, "error", err)
   264  		return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err)
   265  	}
   266  
   267  	startAttempts := 0
   268  CREATE:
   269  	container, err := d.createContainer(client, containerCfg, &driverConfig)
   270  	if err != nil {
   271  		d.logger.Error("failed to create container", "error", err)
   272  		return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create container: %v", err), err)
   273  	}
   274  
   275  	d.logger.Info("created container", "container_id", container.ID)
   276  
   277  	// We don't need to start the container if the container is already running
   278  	// since we don't create containers which are already present on the host
   279  	// and are running
   280  	if !container.State.Running {
   281  		// Start the container
   282  		if err := d.startContainer(container); err != nil {
   283  			d.logger.Error("failed to start container", "container_id", container.ID, "error", err)
   284  			client.RemoveContainer(docker.RemoveContainerOptions{
   285  				ID:    container.ID,
   286  				Force: true,
   287  			})
   288  			// Some sort of docker race bug, recreating the container usually works
   289  			if strings.Contains(err.Error(), "OCI runtime create failed: container with id exists:") && startAttempts < 5 {
   290  				startAttempts++
   291  				d.logger.Debug("reattempting container create/start sequence", "attempt", startAttempts, "container_id", id)
   292  				goto CREATE
   293  			}
   294  			return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("Failed to start container %s: %s", container.ID, err), err)
   295  		}
   296  
   297  		// InspectContainer to get all of the container metadata as
   298  		// much of the metadata (eg networking) isn't populated until
   299  		// the container is started
   300  		runningContainer, err := client.InspectContainer(container.ID)
   301  		if err != nil {
   302  			msg := "failed to inspect started container"
   303  			d.logger.Error(msg, "error", err)
   304  			return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true)
   305  		}
   306  		container = runningContainer
   307  		d.logger.Info("started container", "container_id", container.ID)
   308  	} else {
   309  		d.logger.Debug("re-attaching to container", "container_id",
   310  			container.ID, "container_state", container.State.String())
   311  	}
   312  
   313  	dlogger, pluginClient, err := d.setupNewDockerLogger(container, cfg, time.Unix(0, 0))
   314  	if err != nil {
   315  		d.logger.Error("an error occurred after container startup, terminating container", "container_id", container.ID)
   316  		client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true})
   317  		return nil, nil, err
   318  	}
   319  
   320  	// Detect container address
   321  	ip, autoUse := d.detectIP(container, &driverConfig)
   322  
   323  	net := &drivers.DriverNetwork{
   324  		PortMap:       driverConfig.PortMap,
   325  		IP:            ip,
   326  		AutoAdvertise: autoUse,
   327  	}
   328  
   329  	// Return a driver handle
   330  	h := &taskHandle{
   331  		client:                client,
   332  		waitClient:            waitClient,
   333  		dlogger:               dlogger,
   334  		dloggerPluginClient:   pluginClient,
   335  		logger:                d.logger.With("container_id", container.ID),
   336  		task:                  cfg,
   337  		containerID:           container.ID,
   338  		containerImage:        container.Image,
   339  		doneCh:                make(chan bool),
   340  		waitCh:                make(chan struct{}),
   341  		removeContainerOnExit: d.config.GC.Container,
   342  		net:                   net,
   343  	}
   344  
   345  	if err := handle.SetDriverState(h.buildState()); err != nil {
   346  		d.logger.Error("error encoding container occurred after startup, terminating container", "container_id", container.ID, "error", err)
   347  		dlogger.Stop()
   348  		pluginClient.Kill()
   349  		client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true})
   350  		return nil, nil, err
   351  	}
   352  
   353  	d.tasks.Set(cfg.ID, h)
   354  	go h.run()
   355  
   356  	return handle, net, nil
   357  }
   358  
   359  // createContainerClient is the subset of Docker Client methods used by the
   360  // createContainer method to ease testing subtle error conditions.
   361  type createContainerClient interface {
   362  	CreateContainer(docker.CreateContainerOptions) (*docker.Container, error)
   363  	InspectContainer(id string) (*docker.Container, error)
   364  	ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error)
   365  	RemoveContainer(opts docker.RemoveContainerOptions) error
   366  }
   367  
   368  // createContainer creates the container given the passed configuration. It
   369  // attempts to handle any transient Docker errors.
   370  func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions,
   371  	driverConfig *TaskConfig) (*docker.Container, error) {
   372  	// Create a container
   373  	attempted := 0
   374  CREATE:
   375  	container, createErr := client.CreateContainer(config)
   376  	if createErr == nil {
   377  		return container, nil
   378  	}
   379  
   380  	d.logger.Debug("failed to create container", "container_name",
   381  		config.Name, "image_name", driverConfig.Image, "image_id", config.Config.Image,
   382  		"attempt", attempted+1, "error", createErr)
   383  
   384  	// Volume management tools like Portworx may not have detached a volume
   385  	// from a previous node before Nomad started a task replacement task.
   386  	// Treat these errors as recoverable so we retry.
   387  	if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") {
   388  		return nil, nstructs.NewRecoverableError(createErr, true)
   389  	}
   390  
   391  	// If the container already exists determine whether it's already
   392  	// running or if it's dead and needs to be recreated.
   393  	if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") {
   394  		containers, err := client.ListContainers(docker.ListContainersOptions{
   395  			All: true,
   396  		})
   397  		if err != nil {
   398  			d.logger.Error("failed to query list of containers matching name", "container_name", config.Name)
   399  			return nil, recoverableErrTimeouts(fmt.Errorf("Failed to query list of containers: %s", err))
   400  		}
   401  
   402  		// Delete matching containers
   403  		// Adding a / infront of the container name since Docker returns the
   404  		// container names with a / pre-pended to the Nomad generated container names
   405  		containerName := "/" + config.Name
   406  		d.logger.Debug("searching for container to purge", "container_name", containerName)
   407  		for _, shimContainer := range containers {
   408  			d.logger.Debug("listed container", "names", hclog.Fmt("%+v", shimContainer.Names))
   409  			found := false
   410  			for _, name := range shimContainer.Names {
   411  				if name == containerName {
   412  					d.logger.Debug("Found container", "containter_name", containerName, "container_id", shimContainer.ID)
   413  					found = true
   414  					break
   415  				}
   416  			}
   417  
   418  			if !found {
   419  				continue
   420  			}
   421  
   422  			// Inspect the container and if the container isn't dead then return
   423  			// the container
   424  			container, err := client.InspectContainer(shimContainer.ID)
   425  			if err != nil {
   426  				err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err)
   427  
   428  				// This error is always recoverable as it could
   429  				// be caused by races between listing
   430  				// containers and this container being removed.
   431  				// See #2802
   432  				return nil, nstructs.NewRecoverableError(err, true)
   433  			}
   434  			if container != nil && container.State.Running {
   435  				return container, nil
   436  			}
   437  
   438  			err = client.RemoveContainer(docker.RemoveContainerOptions{
   439  				ID:    container.ID,
   440  				Force: true,
   441  			})
   442  			if err != nil {
   443  				d.logger.Error("failed to purge container", "container_id", container.ID)
   444  				return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
   445  			} else if err == nil {
   446  				d.logger.Info("purged container", "container_id", container.ID)
   447  			}
   448  		}
   449  
   450  		if attempted < 5 {
   451  			attempted++
   452  			time.Sleep(1 * time.Second)
   453  			goto CREATE
   454  		}
   455  	} else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") {
   456  		// There is still a very small chance this is possible even with the
   457  		// coordinator so retry.
   458  		return nil, nstructs.NewRecoverableError(createErr, true)
   459  	}
   460  
   461  	return nil, recoverableErrTimeouts(createErr)
   462  }
   463  
   464  // startContainer starts the passed container. It attempts to handle any
   465  // transient Docker errors.
   466  func (d *Driver) startContainer(c *docker.Container) error {
   467  	// Start a container
   468  	attempted := 0
   469  START:
   470  	startErr := client.StartContainer(c.ID, c.HostConfig)
   471  	if startErr == nil {
   472  		return nil
   473  	}
   474  
   475  	d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr)
   476  
   477  	// If it is a 500 error it is likely we can retry and be successful
   478  	if strings.Contains(startErr.Error(), "API error (500)") {
   479  		if attempted < 5 {
   480  			attempted++
   481  			time.Sleep(1 * time.Second)
   482  			goto START
   483  		}
   484  		return nstructs.NewRecoverableError(startErr, true)
   485  	}
   486  
   487  	return recoverableErrTimeouts(startErr)
   488  }
   489  
   490  // createImage creates a docker image either by pulling it from a registry or by
   491  // loading it from the file system
   492  func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) {
   493  	image := driverConfig.Image
   494  	repo, tag := parseDockerImage(image)
   495  
   496  	callerID := fmt.Sprintf("%s-%s", task.ID, task.Name)
   497  
   498  	// We're going to check whether the image is already downloaded. If the tag
   499  	// is "latest", or ForcePull is set, we have to check for a new version every time so we don't
   500  	// bother to check and cache the id here. We'll download first, then cache.
   501  	if driverConfig.ForcePull {
   502  		d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag))
   503  	} else if tag != "latest" {
   504  		if dockerImage, _ := client.InspectImage(image); dockerImage != nil {
   505  			// Image exists so just increment its reference count
   506  			d.coordinator.IncrementImageReference(dockerImage.ID, image, callerID)
   507  			return dockerImage.ID, nil
   508  		}
   509  	}
   510  
   511  	// Load the image if specified
   512  	if driverConfig.LoadImage != "" {
   513  		return d.loadImage(task, driverConfig, client)
   514  	}
   515  
   516  	// Download the image
   517  	return d.pullImage(task, driverConfig, client, repo, tag)
   518  }
   519  
   520  // pullImage creates an image by pulling it from a docker registry
   521  func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) {
   522  	authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo)
   523  	if err != nil {
   524  		if driverConfig.AuthSoftFail {
   525  			d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err)
   526  		} else {
   527  			return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err)
   528  		}
   529  	}
   530  
   531  	if authIsEmpty(authOptions) {
   532  		d.logger.Debug("did not find docker auth for repo", "repo", repo)
   533  	}
   534  
   535  	d.eventer.EmitEvent(&drivers.TaskEvent{
   536  		TaskID:    task.ID,
   537  		AllocID:   task.AllocID,
   538  		TaskName:  task.Name,
   539  		Timestamp: time.Now(),
   540  		Message:   "Downloading image",
   541  		Annotations: map[string]string{
   542  			"image": dockerImageRef(repo, tag),
   543  		},
   544  	})
   545  
   546  	return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task))
   547  }
   548  
   549  func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn {
   550  	return func(msg string, annotations map[string]string) {
   551  		d.eventer.EmitEvent(&drivers.TaskEvent{
   552  			TaskID:      task.ID,
   553  			AllocID:     task.AllocID,
   554  			TaskName:    task.Name,
   555  			Timestamp:   time.Now(),
   556  			Message:     msg,
   557  			Annotations: annotations,
   558  		})
   559  	}
   560  }
   561  
   562  // authBackend encapsulates a function that resolves registry credentials.
   563  type authBackend func(string) (*docker.AuthConfiguration, error)
   564  
   565  // resolveRegistryAuthentication attempts to retrieve auth credentials for the
   566  // repo, trying all authentication-backends possible.
   567  func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) {
   568  	return firstValidAuth(repo, []authBackend{
   569  		authFromTaskConfig(driverConfig),
   570  		authFromDockerConfig(d.config.Auth.Config),
   571  		authFromHelper(d.config.Auth.Helper),
   572  	})
   573  }
   574  
   575  // loadImage creates an image by loading it from the file system
   576  func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) {
   577  
   578  	archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage)
   579  	d.logger.Debug("loading image from disk", "archive", archive)
   580  
   581  	f, err := os.Open(archive)
   582  	if err != nil {
   583  		return "", fmt.Errorf("unable to open image archive: %v", err)
   584  	}
   585  
   586  	if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil {
   587  		return "", err
   588  	}
   589  	f.Close()
   590  
   591  	dockerImage, err := client.InspectImage(driverConfig.Image)
   592  	if err != nil {
   593  		return "", recoverableErrTimeouts(err)
   594  	}
   595  
   596  	d.coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, task.ID)
   597  	return dockerImage.ID, nil
   598  }
   599  
   600  func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) {
   601  
   602  	allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[taskenv.AllocDir])
   603  	taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[taskenv.TaskLocalDir])
   604  	secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[taskenv.SecretsDir])
   605  	binds := []string{allocDirBind, taskLocalBind, secretDirBind}
   606  
   607  	taskLocalBindVolume := driverConfig.VolumeDriver == ""
   608  
   609  	if !d.config.Volumes.Enabled && !taskLocalBindVolume {
   610  		return nil, fmt.Errorf("volumes are not enabled; cannot use volume driver %q", driverConfig.VolumeDriver)
   611  	}
   612  
   613  	for _, userbind := range driverConfig.Volumes {
   614  		// This assumes host OS = docker container OS.
   615  		// Not true, when we support Linux containers on Windows
   616  		src, dst, mode, err := parseVolumeSpec(userbind, runtime.GOOS)
   617  		if err != nil {
   618  			return nil, fmt.Errorf("invalid docker volume %q: %v", userbind, err)
   619  		}
   620  
   621  		// Paths inside task dir are always allowed when using the default driver,
   622  		// Relative paths are always allowed as they mount within a container
   623  		// When a VolumeDriver is set, we assume we receive a binding in the format
   624  		// volume-name:container-dest
   625  		// Otherwise, we assume we receive a relative path binding in the format
   626  		// relative/to/task:/also/in/container
   627  		if taskLocalBindVolume {
   628  			src = expandPath(task.TaskDir().Dir, src)
   629  		} else {
   630  			// Resolve dotted path segments
   631  			src = filepath.Clean(src)
   632  		}
   633  
   634  		if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, src) {
   635  			return nil, fmt.Errorf("volumes are not enabled; cannot mount host paths: %+q", userbind)
   636  		}
   637  
   638  		bind := src + ":" + dst
   639  		if mode != "" {
   640  			bind += ":" + mode
   641  		}
   642  		binds = append(binds, bind)
   643  	}
   644  
   645  	if selinuxLabel := d.config.Volumes.SelinuxLabel; selinuxLabel != "" {
   646  		// Apply SELinux Label to each volume
   647  		for i := range binds {
   648  			binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel)
   649  		}
   650  	}
   651  
   652  	return binds, nil
   653  }
   654  
   655  func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
   656  	imageID string) (docker.CreateContainerOptions, error) {
   657  
   658  	logger := d.logger.With("task_name", task.Name)
   659  	var c docker.CreateContainerOptions
   660  	if task.Resources == nil {
   661  		// Guard against missing resources. We should never have been able to
   662  		// schedule a job without specifying this.
   663  		logger.Error("task.Resources is empty")
   664  		return c, fmt.Errorf("task.Resources is empty")
   665  	}
   666  
   667  	binds, err := d.containerBinds(task, driverConfig)
   668  	if err != nil {
   669  		return c, err
   670  	}
   671  	logger.Trace("binding volumes", "volumes", binds)
   672  
   673  	// create the config block that will later be consumed by go-dockerclient
   674  	config := &docker.Config{
   675  		Image:      imageID,
   676  		Entrypoint: driverConfig.Entrypoint,
   677  		Hostname:   driverConfig.Hostname,
   678  		User:       task.User,
   679  		Tty:        driverConfig.TTY,
   680  		OpenStdin:  driverConfig.Interactive,
   681  	}
   682  
   683  	if driverConfig.WorkDir != "" {
   684  		config.WorkingDir = driverConfig.WorkDir
   685  	}
   686  
   687  	hostConfig := &docker.HostConfig{
   688  		Memory:    task.Resources.LinuxResources.MemoryLimitBytes,
   689  		CPUShares: task.Resources.LinuxResources.CPUShares,
   690  
   691  		// Binds are used to mount a host volume into the container. We mount a
   692  		// local directory for storage and a shared alloc directory that can be
   693  		// used to share data between different tasks in the same task group.
   694  		Binds: binds,
   695  
   696  		StorageOpt:   driverConfig.StorageOpt,
   697  		VolumeDriver: driverConfig.VolumeDriver,
   698  
   699  		PidsLimit: driverConfig.PidsLimit,
   700  	}
   701  
   702  	if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok {
   703  		if !d.gpuRuntime {
   704  			return c, fmt.Errorf("requested docker-runtime %q was not found", d.config.GPURuntimeName)
   705  		}
   706  		hostConfig.Runtime = d.config.GPURuntimeName
   707  	}
   708  
   709  	// Calculate CPU Quota
   710  	// cfs_quota_us is the time per core, so we must
   711  	// multiply the time by the number of cores available
   712  	// See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu
   713  	if driverConfig.CPUHardLimit {
   714  		numCores := runtime.NumCPU()
   715  		if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 {
   716  			return c, fmt.Errorf("invalid value for cpu_cfs_period")
   717  		}
   718  		if driverConfig.CPUCFSPeriod == 0 {
   719  			driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod
   720  		}
   721  		hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod
   722  		hostConfig.CPUQuota = int64(task.Resources.LinuxResources.PercentTicks*float64(driverConfig.CPUCFSPeriod)) * int64(numCores)
   723  	}
   724  
   725  	// Windows does not support MemorySwap/MemorySwappiness #2193
   726  	if runtime.GOOS == "windows" {
   727  		hostConfig.MemorySwap = 0
   728  		hostConfig.MemorySwappiness = -1
   729  	} else {
   730  		hostConfig.MemorySwap = task.Resources.LinuxResources.MemoryLimitBytes // MemorySwap is memory + swap.
   731  	}
   732  
   733  	loggingDriver := driverConfig.Logging.Type
   734  	if loggingDriver == "" {
   735  		loggingDriver = driverConfig.Logging.Driver
   736  	}
   737  
   738  	hostConfig.LogConfig = docker.LogConfig{
   739  		Type:   loggingDriver,
   740  		Config: driverConfig.Logging.Config,
   741  	}
   742  
   743  	logger.Debug("configured resources", "memory", hostConfig.Memory,
   744  		"cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota,
   745  		"cpu_period", hostConfig.CPUPeriod)
   746  	logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds))
   747  
   748  	//  set privileged mode
   749  	if driverConfig.Privileged && !d.config.AllowPrivileged {
   750  		return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`)
   751  	}
   752  	hostConfig.Privileged = driverConfig.Privileged
   753  
   754  	// set capabilities
   755  	hostCapsWhitelistConfig := d.config.AllowCaps
   756  	hostCapsWhitelist := make(map[string]struct{})
   757  	for _, cap := range hostCapsWhitelistConfig {
   758  		cap = strings.ToLower(strings.TrimSpace(cap))
   759  		hostCapsWhitelist[cap] = struct{}{}
   760  	}
   761  
   762  	if _, ok := hostCapsWhitelist["all"]; !ok {
   763  		effectiveCaps, err := tweakCapabilities(
   764  			strings.Split(dockerBasicCaps, ","),
   765  			driverConfig.CapAdd,
   766  			driverConfig.CapDrop,
   767  		)
   768  		if err != nil {
   769  			return c, err
   770  		}
   771  		var missingCaps []string
   772  		for _, cap := range effectiveCaps {
   773  			cap = strings.ToLower(cap)
   774  			if _, ok := hostCapsWhitelist[cap]; !ok {
   775  				missingCaps = append(missingCaps, cap)
   776  			}
   777  		}
   778  		if len(missingCaps) > 0 {
   779  			return c, fmt.Errorf("Docker driver doesn't have the following caps whitelisted on this Nomad agent: %s", missingCaps)
   780  		}
   781  	}
   782  
   783  	hostConfig.CapAdd = driverConfig.CapAdd
   784  	hostConfig.CapDrop = driverConfig.CapDrop
   785  
   786  	// set SHM size
   787  	if driverConfig.ShmSize != 0 {
   788  		hostConfig.ShmSize = driverConfig.ShmSize
   789  	}
   790  
   791  	// set DNS servers
   792  	for _, ip := range driverConfig.DNSServers {
   793  		if net.ParseIP(ip) != nil {
   794  			hostConfig.DNS = append(hostConfig.DNS, ip)
   795  		} else {
   796  			logger.Error("invalid ip address for container dns server", "ip", ip)
   797  		}
   798  	}
   799  
   800  	// Setup devices
   801  	for _, device := range driverConfig.Devices {
   802  		dd, err := device.toDockerDevice()
   803  		if err != nil {
   804  			return c, err
   805  		}
   806  		hostConfig.Devices = append(hostConfig.Devices, dd)
   807  	}
   808  	for _, device := range task.Devices {
   809  		hostConfig.Devices = append(hostConfig.Devices, docker.Device{
   810  			PathOnHost:        device.HostPath,
   811  			PathInContainer:   device.TaskPath,
   812  			CgroupPermissions: device.Permissions,
   813  		})
   814  	}
   815  
   816  	// Setup mounts
   817  	for _, m := range driverConfig.Mounts {
   818  		hm, err := m.toDockerHostMount()
   819  		if err != nil {
   820  			return c, err
   821  		}
   822  
   823  		if hm.Type == "bind" {
   824  			hm.Source = expandPath(task.TaskDir().Dir, hm.Source)
   825  
   826  			// paths inside alloc dir are always allowed as they mount within a container, and treated as relative to task dir
   827  			if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, hm.Source) {
   828  				return c, fmt.Errorf("volumes are not enabled; cannot mount host path: %q %q", hm.Source, task.AllocDir)
   829  			}
   830  		}
   831  
   832  		hostConfig.Mounts = append(hostConfig.Mounts, hm)
   833  	}
   834  	for _, m := range task.Mounts {
   835  		hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{
   836  			Type:     "bind",
   837  			Target:   m.TaskPath,
   838  			Source:   m.HostPath,
   839  			ReadOnly: m.Readonly,
   840  		})
   841  	}
   842  
   843  	// set DNS search domains and extra hosts
   844  	hostConfig.DNSSearch = driverConfig.DNSSearchDomains
   845  	hostConfig.DNSOptions = driverConfig.DNSOptions
   846  	hostConfig.ExtraHosts = driverConfig.ExtraHosts
   847  
   848  	hostConfig.IpcMode = driverConfig.IPCMode
   849  	hostConfig.PidMode = driverConfig.PidMode
   850  	hostConfig.UTSMode = driverConfig.UTSMode
   851  	hostConfig.UsernsMode = driverConfig.UsernsMode
   852  	hostConfig.SecurityOpt = driverConfig.SecurityOpt
   853  	hostConfig.Sysctls = driverConfig.Sysctl
   854  
   855  	ulimits, err := sliceMergeUlimit(driverConfig.Ulimit)
   856  	if err != nil {
   857  		return c, fmt.Errorf("failed to parse ulimit configuration: %v", err)
   858  	}
   859  	hostConfig.Ulimits = ulimits
   860  
   861  	hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs
   862  
   863  	hostConfig.NetworkMode = driverConfig.NetworkMode
   864  	if hostConfig.NetworkMode == "" {
   865  		// docker default
   866  		logger.Debug("networking mode not specified; using default", "network_mode", defaultNetworkMode)
   867  		hostConfig.NetworkMode = defaultNetworkMode
   868  	}
   869  
   870  	// Setup port mapping and exposed ports
   871  	if len(task.Resources.NomadResources.Networks) == 0 {
   872  		logger.Debug("no network interfaces are available")
   873  		if len(driverConfig.PortMap) > 0 {
   874  			return c, fmt.Errorf("Trying to map ports but no network interface is available")
   875  		}
   876  	} else {
   877  		// TODO add support for more than one network
   878  		network := task.Resources.NomadResources.Networks[0]
   879  		publishedPorts := map[docker.Port][]docker.PortBinding{}
   880  		exposedPorts := map[docker.Port]struct{}{}
   881  
   882  		for _, port := range network.ReservedPorts {
   883  			// By default we will map the allocated port 1:1 to the container
   884  			containerPortInt := port.Value
   885  
   886  			// If the user has mapped a port using port_map we'll change it here
   887  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   888  				containerPortInt = mapped
   889  			}
   890  
   891  			hostPortStr := strconv.Itoa(port.Value)
   892  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   893  
   894  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
   895  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
   896  			logger.Debug("allocated static port", "ip", network.IP, "port", port.Value)
   897  
   898  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   899  			exposedPorts[containerPort+"/udp"] = struct{}{}
   900  			logger.Debug("exposed port", "port", port.Value)
   901  		}
   902  
   903  		for _, port := range network.DynamicPorts {
   904  			// By default we will map the allocated port 1:1 to the container
   905  			containerPortInt := port.Value
   906  
   907  			// If the user has mapped a port using port_map we'll change it here
   908  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   909  				containerPortInt = mapped
   910  			}
   911  
   912  			hostPortStr := strconv.Itoa(port.Value)
   913  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   914  
   915  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
   916  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
   917  			logger.Debug("allocated mapped port", "ip", network.IP, "port", port.Value)
   918  
   919  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   920  			exposedPorts[containerPort+"/udp"] = struct{}{}
   921  			logger.Debug("exposed port", "port", containerPort)
   922  		}
   923  
   924  		hostConfig.PortBindings = publishedPorts
   925  		config.ExposedPorts = exposedPorts
   926  	}
   927  
   928  	// If the user specified a custom command to run, we'll inject it here.
   929  	if driverConfig.Command != "" {
   930  		// Validate command
   931  		if err := validateCommand(driverConfig.Command, "args"); err != nil {
   932  			return c, err
   933  		}
   934  
   935  		cmd := []string{driverConfig.Command}
   936  		if len(driverConfig.Args) != 0 {
   937  			cmd = append(cmd, driverConfig.Args...)
   938  		}
   939  		logger.Debug("setting container startup command", "command", strings.Join(cmd, " "))
   940  		config.Cmd = cmd
   941  	} else if len(driverConfig.Args) != 0 {
   942  		config.Cmd = driverConfig.Args
   943  	}
   944  
   945  	if len(driverConfig.Labels) > 0 {
   946  		config.Labels = driverConfig.Labels
   947  		logger.Debug("applied labels on the container", "labels", config.Labels)
   948  	}
   949  
   950  	config.Env = task.EnvList()
   951  
   952  	containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)
   953  	logger.Debug("setting container name", "container_name", containerName)
   954  
   955  	var networkingConfig *docker.NetworkingConfig
   956  	if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" {
   957  		networkingConfig = &docker.NetworkingConfig{
   958  			EndpointsConfig: map[string]*docker.EndpointConfig{
   959  				hostConfig.NetworkMode: {},
   960  			},
   961  		}
   962  	}
   963  
   964  	if len(driverConfig.NetworkAliases) > 0 {
   965  		networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases
   966  		logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode,
   967  			"network_aliases", strings.Join(driverConfig.NetworkAliases, ", "))
   968  	}
   969  
   970  	if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" {
   971  		networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{
   972  			IPv4Address: driverConfig.IPv4Address,
   973  			IPv6Address: driverConfig.IPv6Address,
   974  		}
   975  		logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode,
   976  			"ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address)
   977  	}
   978  
   979  	if driverConfig.MacAddress != "" {
   980  		config.MacAddress = driverConfig.MacAddress
   981  		logger.Debug("setting container mac address", "mac_address", config.MacAddress)
   982  	}
   983  
   984  	return docker.CreateContainerOptions{
   985  		Name:             containerName,
   986  		Config:           config,
   987  		HostConfig:       hostConfig,
   988  		NetworkingConfig: networkingConfig,
   989  	}, nil
   990  }
   991  
   992  // detectIP of Docker container. Returns the first IP found as well as true if
   993  // the IP should be advertised (bridge network IPs return false). Returns an
   994  // empty string and false if no IP could be found.
   995  func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) {
   996  	if c.NetworkSettings == nil {
   997  		// This should only happen if there's been a coding error (such
   998  		// as not calling InspectContainer after CreateContainer). Code
   999  		// defensively in case the Docker API changes subtly.
  1000  		d.logger.Error("no network settings for container", "container_id", c.ID)
  1001  		return "", false
  1002  	}
  1003  
  1004  	ip, ipName := "", ""
  1005  	auto := false
  1006  	for name, net := range c.NetworkSettings.Networks {
  1007  		if net.IPAddress == "" {
  1008  			// Ignore networks without an IP address
  1009  			continue
  1010  		}
  1011  
  1012  		ip = net.IPAddress
  1013  		if driverConfig.AdvertiseIPv6Addr {
  1014  			ip = net.GlobalIPv6Address
  1015  			auto = true
  1016  		}
  1017  		ipName = name
  1018  
  1019  		// Don't auto-advertise IPs for default networks (bridge on
  1020  		// Linux, nat on Windows)
  1021  		if name != "bridge" && name != "nat" {
  1022  			auto = true
  1023  		}
  1024  
  1025  		break
  1026  	}
  1027  
  1028  	if n := len(c.NetworkSettings.Networks); n > 1 {
  1029  		d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1",
  1030  			"total_networks", n,
  1031  			"container_id", c.ID,
  1032  			"container_network", ipName)
  1033  	}
  1034  
  1035  	return ip, auto
  1036  }
  1037  
  1038  // validateCommand validates that the command only has a single value and
  1039  // returns a user friendly error message telling them to use the passed
  1040  // argField.
  1041  func validateCommand(command, argField string) error {
  1042  	trimmed := strings.TrimSpace(command)
  1043  	if len(trimmed) == 0 {
  1044  		return fmt.Errorf("command empty: %q", command)
  1045  	}
  1046  
  1047  	if len(trimmed) != len(command) {
  1048  		return fmt.Errorf("command contains extra white space: %q", command)
  1049  	}
  1050  
  1051  	return nil
  1052  }
  1053  
  1054  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
  1055  	h, ok := d.tasks.Get(taskID)
  1056  	if !ok {
  1057  		return nil, drivers.ErrTaskNotFound
  1058  	}
  1059  	ch := make(chan *drivers.ExitResult)
  1060  	go d.handleWait(ctx, ch, h)
  1061  	return ch, nil
  1062  }
  1063  
  1064  func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) {
  1065  	defer close(ch)
  1066  	select {
  1067  	case <-h.waitCh:
  1068  		ch <- h.ExitResult()
  1069  	case <-ctx.Done():
  1070  		ch <- &drivers.ExitResult{
  1071  			Err: ctx.Err(),
  1072  		}
  1073  	}
  1074  }
  1075  
  1076  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
  1077  	h, ok := d.tasks.Get(taskID)
  1078  	if !ok {
  1079  		return drivers.ErrTaskNotFound
  1080  	}
  1081  
  1082  	if signal == "" {
  1083  		signal = "SIGINT"
  1084  	}
  1085  
  1086  	// Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that
  1087  	// allows for graceful shutdown before being followed up by a SIGKILL.
  1088  	// Supported signals:
  1089  	//   https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26
  1090  	if runtime.GOOS == "windows" && signal == "SIGINT" {
  1091  		signal = "SIGTERM"
  1092  	}
  1093  
  1094  	sig, err := signals.Parse(signal)
  1095  	if err != nil {
  1096  		return fmt.Errorf("failed to parse signal: %v", err)
  1097  	}
  1098  
  1099  	return h.Kill(timeout, sig)
  1100  }
  1101  
  1102  func (d *Driver) DestroyTask(taskID string, force bool) error {
  1103  	h, ok := d.tasks.Get(taskID)
  1104  	if !ok {
  1105  		return drivers.ErrTaskNotFound
  1106  	}
  1107  
  1108  	c, err := h.client.InspectContainer(h.containerID)
  1109  	if err != nil {
  1110  		switch err.(type) {
  1111  		case *docker.NoSuchContainer:
  1112  			h.logger.Info("container was removed out of band, will proceed with DestroyTask",
  1113  				"error", err)
  1114  		default:
  1115  			return fmt.Errorf("failed to inspect container state: %v", err)
  1116  		}
  1117  	} else {
  1118  		if c.State.Running {
  1119  			if !force {
  1120  				return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true")
  1121  			}
  1122  			if err := h.client.StopContainer(h.containerID, 0); err != nil {
  1123  				h.logger.Warn("failed to stop container during destroy", "error", err)
  1124  			}
  1125  		}
  1126  
  1127  		if h.removeContainerOnExit {
  1128  			if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil {
  1129  				h.logger.Error("error removing container", "error", err)
  1130  			}
  1131  		} else {
  1132  			h.logger.Debug("not removing container due to config")
  1133  		}
  1134  	}
  1135  
  1136  	if err := d.cleanupImage(h); err != nil {
  1137  		h.logger.Error("failed to cleanup image after destroying container",
  1138  			"error", err)
  1139  	}
  1140  
  1141  	d.tasks.Delete(taskID)
  1142  	return nil
  1143  }
  1144  
  1145  // cleanupImage removes a Docker image. No error is returned if the image
  1146  // doesn't exist or is still in use. Requires the global client to already be
  1147  // initialized.
  1148  func (d *Driver) cleanupImage(handle *taskHandle) error {
  1149  	if !d.config.GC.Image {
  1150  		return nil
  1151  	}
  1152  
  1153  	d.coordinator.RemoveImage(handle.containerImage, handle.task.ID)
  1154  
  1155  	return nil
  1156  }
  1157  
  1158  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
  1159  	h, ok := d.tasks.Get(taskID)
  1160  	if !ok {
  1161  		return nil, drivers.ErrTaskNotFound
  1162  	}
  1163  
  1164  	container, err := client.InspectContainer(h.containerID)
  1165  	if err != nil {
  1166  		return nil, fmt.Errorf("failed to inspect container %q: %v", h.containerID, err)
  1167  	}
  1168  	status := &drivers.TaskStatus{
  1169  		ID:          h.task.ID,
  1170  		Name:        h.task.Name,
  1171  		StartedAt:   container.State.StartedAt,
  1172  		CompletedAt: container.State.FinishedAt,
  1173  		DriverAttributes: map[string]string{
  1174  			"container_id": container.ID,
  1175  		},
  1176  		NetworkOverride: h.net,
  1177  		ExitResult:      h.ExitResult(),
  1178  	}
  1179  
  1180  	status.State = drivers.TaskStateUnknown
  1181  	if container.State.Running {
  1182  		status.State = drivers.TaskStateRunning
  1183  	}
  1184  	if container.State.Dead {
  1185  		status.State = drivers.TaskStateExited
  1186  	}
  1187  
  1188  	return status, nil
  1189  }
  1190  
  1191  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
  1192  	h, ok := d.tasks.Get(taskID)
  1193  	if !ok {
  1194  		return nil, drivers.ErrTaskNotFound
  1195  	}
  1196  
  1197  	return h.Stats(ctx, interval)
  1198  }
  1199  
  1200  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
  1201  	return d.eventer.TaskEvents(ctx)
  1202  }
  1203  
  1204  func (d *Driver) SignalTask(taskID string, signal string) error {
  1205  	h, ok := d.tasks.Get(taskID)
  1206  	if !ok {
  1207  		return drivers.ErrTaskNotFound
  1208  	}
  1209  
  1210  	sig, err := signals.Parse(signal)
  1211  	if err != nil {
  1212  		return fmt.Errorf("failed to parse signal: %v", err)
  1213  	}
  1214  
  1215  	return h.Signal(sig)
  1216  }
  1217  
  1218  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
  1219  	h, ok := d.tasks.Get(taskID)
  1220  	if !ok {
  1221  		return nil, drivers.ErrTaskNotFound
  1222  	}
  1223  
  1224  	if len(cmd) == 0 {
  1225  		return nil, fmt.Errorf("cmd is required, but was empty")
  1226  	}
  1227  
  1228  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
  1229  	defer cancel()
  1230  
  1231  	return h.Exec(ctx, cmd[0], cmd[1:])
  1232  }
  1233  
  1234  var _ drivers.ExecTaskStreamingDriver = (*Driver)(nil)
  1235  
  1236  func (d *Driver) ExecTaskStreaming(ctx context.Context, taskID string, opts *drivers.ExecOptions) (*drivers.ExitResult, error) {
  1237  	defer opts.Stdout.Close()
  1238  	defer opts.Stderr.Close()
  1239  
  1240  	done := make(chan interface{})
  1241  	defer close(done)
  1242  
  1243  	h, ok := d.tasks.Get(taskID)
  1244  	if !ok {
  1245  		return nil, drivers.ErrTaskNotFound
  1246  	}
  1247  
  1248  	if len(opts.Command) == 0 {
  1249  		return nil, fmt.Errorf("command is required but was empty")
  1250  	}
  1251  
  1252  	createExecOpts := docker.CreateExecOptions{
  1253  		AttachStdin:  true,
  1254  		AttachStdout: true,
  1255  		AttachStderr: true,
  1256  		Tty:          opts.Tty,
  1257  		Cmd:          opts.Command,
  1258  		Container:    h.containerID,
  1259  		Context:      ctx,
  1260  	}
  1261  	exec, err := h.client.CreateExec(createExecOpts)
  1262  	if err != nil {
  1263  		return nil, fmt.Errorf("failed to create exec object: %v", err)
  1264  	}
  1265  
  1266  	go func() {
  1267  		for {
  1268  			select {
  1269  			case <-ctx.Done():
  1270  				return
  1271  			case <-done:
  1272  				return
  1273  			case s, ok := <-opts.ResizeCh:
  1274  				if !ok {
  1275  					return
  1276  				}
  1277  				client.ResizeExecTTY(exec.ID, s.Height, s.Width)
  1278  			}
  1279  		}
  1280  	}()
  1281  
  1282  	startOpts := docker.StartExecOptions{
  1283  		Detach: false,
  1284  
  1285  		// When running in TTY, we must use a raw terminal.
  1286  		// If not, we set RawTerminal to false to allow docker client
  1287  		// to interpret special stdout/stderr messages
  1288  		Tty:         opts.Tty,
  1289  		RawTerminal: opts.Tty,
  1290  
  1291  		InputStream:  opts.Stdin,
  1292  		OutputStream: opts.Stdout,
  1293  		ErrorStream:  opts.Stderr,
  1294  		Context:      ctx,
  1295  	}
  1296  	if err := client.StartExec(exec.ID, startOpts); err != nil {
  1297  		return nil, fmt.Errorf("failed to start exec: %v", err)
  1298  	}
  1299  
  1300  	// StartExec returns after process completes, but InspectExec seems to have a delay
  1301  	// get in getting status code
  1302  
  1303  	const execTerminatingTimeout = 3 * time.Second
  1304  	start := time.Now()
  1305  	var res *docker.ExecInspect
  1306  	for res == nil || res.Running || time.Since(start) > execTerminatingTimeout {
  1307  		res, err = client.InspectExec(exec.ID)
  1308  		if err != nil {
  1309  			return nil, fmt.Errorf("failed to inspect exec result: %v", err)
  1310  		}
  1311  		time.Sleep(50 * time.Millisecond)
  1312  	}
  1313  
  1314  	if res == nil || res.Running {
  1315  		return nil, fmt.Errorf("failed to retrieve exec result")
  1316  	}
  1317  
  1318  	return &drivers.ExitResult{
  1319  		ExitCode: res.ExitCode,
  1320  	}, nil
  1321  }
  1322  
  1323  // dockerClients creates two *docker.Client, one for long running operations and
  1324  // the other for shorter operations. In test / dev mode we can use ENV vars to
  1325  // connect to the docker daemon. In production mode we will read docker.endpoint
  1326  // from the config file.
  1327  func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) {
  1328  	createClientsLock.Lock()
  1329  	defer createClientsLock.Unlock()
  1330  
  1331  	if client != nil && waitClient != nil {
  1332  		return client, waitClient, nil
  1333  	}
  1334  
  1335  	var err error
  1336  
  1337  	// Onlt initialize the client if it hasn't yet been done
  1338  	if client == nil {
  1339  		client, err = d.newDockerClient(dockerTimeout)
  1340  		if err != nil {
  1341  			return nil, nil, err
  1342  		}
  1343  	}
  1344  
  1345  	// Only initialize the waitClient if it hasn't yet been done
  1346  	if waitClient == nil {
  1347  		waitClient, err = d.newDockerClient(0 * time.Minute)
  1348  		if err != nil {
  1349  			return nil, nil, err
  1350  		}
  1351  	}
  1352  
  1353  	return client, waitClient, nil
  1354  }
  1355  
  1356  // newDockerClient creates a new *docker.Client with a configurable timeout
  1357  func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) {
  1358  	var err error
  1359  	var merr multierror.Error
  1360  	var newClient *docker.Client
  1361  
  1362  	// Default to using whatever is configured in docker.endpoint. If this is
  1363  	// not specified we'll fall back on NewClientFromEnv which reads config from
  1364  	// the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and
  1365  	// DOCKER_CERT_PATH. This allows us to lock down the config in production
  1366  	// but also accept the standard ENV configs for dev and test.
  1367  	dockerEndpoint := d.config.Endpoint
  1368  	if dockerEndpoint != "" {
  1369  		cert := d.config.TLS.Cert
  1370  		key := d.config.TLS.Key
  1371  		ca := d.config.TLS.CA
  1372  
  1373  		if cert+key+ca != "" {
  1374  			d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint)
  1375  			newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca)
  1376  			if err != nil {
  1377  				merr.Errors = append(merr.Errors, err)
  1378  			}
  1379  		} else {
  1380  			d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint)
  1381  			newClient, err = docker.NewClient(dockerEndpoint)
  1382  			if err != nil {
  1383  				merr.Errors = append(merr.Errors, err)
  1384  			}
  1385  		}
  1386  	} else {
  1387  		d.logger.Debug("using client connection initialized from environment")
  1388  		newClient, err = docker.NewClientFromEnv()
  1389  		if err != nil {
  1390  			merr.Errors = append(merr.Errors, err)
  1391  		}
  1392  	}
  1393  
  1394  	if timeout != 0 && newClient != nil {
  1395  		newClient.SetTimeout(timeout)
  1396  	}
  1397  	return newClient, merr.ErrorOrNil()
  1398  }
  1399  
  1400  func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) {
  1401  	var ulimits []docker.ULimit
  1402  
  1403  	for name, ulimitRaw := range ulimitsRaw {
  1404  		if len(ulimitRaw) == 0 {
  1405  			return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw)
  1406  		}
  1407  		// hard limit is optional
  1408  		if strings.Contains(ulimitRaw, ":") == false {
  1409  			ulimitRaw = ulimitRaw + ":" + ulimitRaw
  1410  		}
  1411  
  1412  		splitted := strings.SplitN(ulimitRaw, ":", 2)
  1413  		if len(splitted) < 2 {
  1414  			return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw)
  1415  		}
  1416  		soft, err := strconv.Atoi(splitted[0])
  1417  		if err != nil {
  1418  			return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw)
  1419  		}
  1420  		hard, err := strconv.Atoi(splitted[1])
  1421  		if err != nil {
  1422  			return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw)
  1423  		}
  1424  
  1425  		ulimit := docker.ULimit{
  1426  			Name: name,
  1427  			Soft: int64(soft),
  1428  			Hard: int64(hard),
  1429  		}
  1430  		ulimits = append(ulimits, ulimit)
  1431  	}
  1432  	return ulimits, nil
  1433  }
  1434  
  1435  func (d *Driver) Shutdown() {
  1436  	d.signalShutdown()
  1437  }