github.com/ilhicas/nomad@v0.11.8/drivers/docker/driver.go (about)

     1  package docker
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"net"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strconv"
    14  	"strings"
    15  	"sync"
    16  	"time"
    17  
    18  	docker "github.com/fsouza/go-dockerclient"
    19  	"github.com/hashicorp/consul-template/signals"
    20  	hclog "github.com/hashicorp/go-hclog"
    21  	multierror "github.com/hashicorp/go-multierror"
    22  	plugin "github.com/hashicorp/go-plugin"
    23  	"github.com/hashicorp/nomad/client/taskenv"
    24  	"github.com/hashicorp/nomad/drivers/docker/docklog"
    25  	"github.com/hashicorp/nomad/drivers/shared/eventer"
    26  	nstructs "github.com/hashicorp/nomad/nomad/structs"
    27  	"github.com/hashicorp/nomad/plugins/base"
    28  	"github.com/hashicorp/nomad/plugins/drivers"
    29  	"github.com/hashicorp/nomad/plugins/shared/structs"
    30  	pstructs "github.com/hashicorp/nomad/plugins/shared/structs"
    31  )
    32  
    33  var (
    34  	// createClientsLock is a lock that protects reading/writing global client
    35  	// variables
    36  	createClientsLock sync.Mutex
    37  
    38  	// client is a docker client with a timeout of 5 minutes. This is for doing
    39  	// all operations with the docker daemon besides which are not long running
    40  	// such as creating, killing containers, etc.
    41  	client *docker.Client
    42  
    43  	// waitClient is a docker client with no timeouts. This is used for long
    44  	// running operations such as waiting on containers and collect stats
    45  	waitClient *docker.Client
    46  
    47  	dockerTransientErrs = []string{
    48  		"Client.Timeout exceeded while awaiting headers",
    49  		"EOF",
    50  		"API error (500)",
    51  	}
    52  
    53  	// recoverableErrTimeouts returns a recoverable error if the error was due
    54  	// to timeouts
    55  	recoverableErrTimeouts = func(err error) error {
    56  		r := false
    57  		if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") ||
    58  			strings.Contains(err.Error(), "EOF") {
    59  			r = true
    60  		}
    61  		return nstructs.NewRecoverableError(err, r)
    62  	}
    63  
    64  	// taskHandleVersion is the version of task handle which this driver sets
    65  	// and understands how to decode driver state
    66  	taskHandleVersion = 1
    67  
    68  	// Nvidia-container-runtime environment variable names
    69  	nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
    70  )
    71  
    72  const (
    73  	dockerLabelAllocID = "com.hashicorp.nomad.alloc_id"
    74  )
    75  
    76  type Driver struct {
    77  	// eventer is used to handle multiplexing of TaskEvents calls such that an
    78  	// event can be broadcast to all callers
    79  	eventer *eventer.Eventer
    80  
    81  	// config contains the runtime configuration for the driver set by the
    82  	// SetConfig RPC
    83  	config *DriverConfig
    84  
    85  	// clientConfig contains a driver specific subset of the Nomad client
    86  	// configuration
    87  	clientConfig *base.ClientDriverConfig
    88  
    89  	// ctx is the context for the driver. It is passed to other subsystems to
    90  	// coordinate shutdown
    91  	ctx context.Context
    92  
    93  	// tasks is the in memory datastore mapping taskIDs to taskHandles
    94  	tasks *taskStore
    95  
    96  	// coordinator is what tracks multiple image pulls against the same docker image
    97  	coordinator *dockerCoordinator
    98  
    99  	// logger will log to the Nomad agent
   100  	logger hclog.Logger
   101  
   102  	// gpuRuntime indicates nvidia-docker runtime availability
   103  	gpuRuntime bool
   104  
   105  	// A tri-state boolean to know if the fingerprinting has happened and
   106  	// whether it has been successful
   107  	fingerprintSuccess *bool
   108  	fingerprintLock    sync.RWMutex
   109  
   110  	// A boolean to know if the docker driver has ever been correctly detected
   111  	// for use during fingerprinting.
   112  	detected     bool
   113  	detectedLock sync.RWMutex
   114  
   115  	reconciler *containerReconciler
   116  }
   117  
   118  // NewDockerDriver returns a docker implementation of a driver plugin
   119  func NewDockerDriver(ctx context.Context, logger hclog.Logger) drivers.DriverPlugin {
   120  	logger = logger.Named(pluginName)
   121  	return &Driver{
   122  		eventer: eventer.NewEventer(ctx, logger),
   123  		config:  &DriverConfig{},
   124  		tasks:   newTaskStore(),
   125  		ctx:     ctx,
   126  		logger:  logger,
   127  	}
   128  }
   129  
   130  func (d *Driver) reattachToDockerLogger(reattachConfig *structs.ReattachConfig) (docklog.DockerLogger, *plugin.Client, error) {
   131  	reattach, err := pstructs.ReattachConfigToGoPlugin(reattachConfig)
   132  	if err != nil {
   133  		return nil, nil, err
   134  	}
   135  
   136  	dlogger, dloggerPluginClient, err := docklog.ReattachDockerLogger(reattach)
   137  	if err != nil {
   138  		return nil, nil, fmt.Errorf("failed to reattach to docker logger process: %v", err)
   139  	}
   140  
   141  	return dlogger, dloggerPluginClient, nil
   142  }
   143  
   144  func (d *Driver) setupNewDockerLogger(container *docker.Container, cfg *drivers.TaskConfig, startTime time.Time) (docklog.DockerLogger, *plugin.Client, error) {
   145  	dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger)
   146  	if err != nil {
   147  		if pluginClient != nil {
   148  			pluginClient.Kill()
   149  		}
   150  		return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err)
   151  	}
   152  
   153  	if err := dlogger.Start(&docklog.StartOpts{
   154  		Endpoint:    d.config.Endpoint,
   155  		ContainerID: container.ID,
   156  		TTY:         container.Config.Tty,
   157  		Stdout:      cfg.StdoutPath,
   158  		Stderr:      cfg.StderrPath,
   159  		TLSCert:     d.config.TLS.Cert,
   160  		TLSKey:      d.config.TLS.Key,
   161  		TLSCA:       d.config.TLS.CA,
   162  		StartTime:   startTime.Unix(),
   163  	}); err != nil {
   164  		pluginClient.Kill()
   165  		return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err)
   166  	}
   167  
   168  	return dlogger, pluginClient, nil
   169  }
   170  
   171  func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error {
   172  	if _, ok := d.tasks.Get(handle.Config.ID); ok {
   173  		return nil
   174  	}
   175  
   176  	// COMPAT(0.10): pre 0.9 upgrade path check
   177  	if handle.Version == 0 {
   178  		return d.recoverPre09Task(handle)
   179  	}
   180  
   181  	var handleState taskHandleState
   182  	if err := handle.GetDriverState(&handleState); err != nil {
   183  		return fmt.Errorf("failed to decode driver task state: %v", err)
   184  	}
   185  
   186  	client, _, err := d.dockerClients()
   187  	if err != nil {
   188  		return fmt.Errorf("failed to get docker client: %v", err)
   189  	}
   190  
   191  	container, err := client.InspectContainer(handleState.ContainerID)
   192  	if err != nil {
   193  		return fmt.Errorf("failed to inspect container for id %q: %v", handleState.ContainerID, err)
   194  	}
   195  
   196  	h := &taskHandle{
   197  		client:                client,
   198  		waitClient:            waitClient,
   199  		logger:                d.logger.With("container_id", container.ID),
   200  		task:                  handle.Config,
   201  		containerID:           container.ID,
   202  		containerImage:        container.Image,
   203  		doneCh:                make(chan bool),
   204  		waitCh:                make(chan struct{}),
   205  		removeContainerOnExit: d.config.GC.Container,
   206  		net:                   handleState.DriverNetwork,
   207  	}
   208  
   209  	if !d.config.DisableLogCollection {
   210  		h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig)
   211  		if err != nil {
   212  			d.logger.Warn("failed to reattach to docker logger process", "error", err)
   213  
   214  			h.dlogger, h.dloggerPluginClient, err = d.setupNewDockerLogger(container, handle.Config, time.Now())
   215  			if err != nil {
   216  				if err := client.StopContainer(handleState.ContainerID, 0); err != nil {
   217  					d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err)
   218  				}
   219  				return fmt.Errorf("failed to setup replacement docker logger: %v", err)
   220  			}
   221  
   222  			if err := handle.SetDriverState(h.buildState()); err != nil {
   223  				if err := client.StopContainer(handleState.ContainerID, 0); err != nil {
   224  					d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err)
   225  				}
   226  				return fmt.Errorf("failed to store driver state: %v", err)
   227  			}
   228  		}
   229  	}
   230  
   231  	d.tasks.Set(handle.Config.ID, h)
   232  	go h.run()
   233  
   234  	return nil
   235  }
   236  
   237  func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) {
   238  	if _, ok := d.tasks.Get(cfg.ID); ok {
   239  		return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID)
   240  	}
   241  
   242  	var driverConfig TaskConfig
   243  
   244  	if err := cfg.DecodeDriverConfig(&driverConfig); err != nil {
   245  		return nil, nil, fmt.Errorf("failed to decode driver config: %v", err)
   246  	}
   247  
   248  	if driverConfig.Image == "" {
   249  		return nil, nil, fmt.Errorf("image name required for docker driver")
   250  	}
   251  
   252  	// Remove any http
   253  	if strings.HasPrefix(driverConfig.Image, "https://") {
   254  		driverConfig.Image = strings.Replace(driverConfig.Image, "https://", "", 1)
   255  	}
   256  
   257  	handle := drivers.NewTaskHandle(taskHandleVersion)
   258  	handle.Config = cfg
   259  
   260  	// Initialize docker API clients
   261  	client, _, err := d.dockerClients()
   262  	if err != nil {
   263  		return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   264  	}
   265  
   266  	id, err := d.createImage(cfg, &driverConfig, client)
   267  	if err != nil {
   268  		return nil, nil, err
   269  	}
   270  
   271  	if runtime.GOOS == "windows" {
   272  		err = d.convertAllocPathsForWindowsLCOW(cfg, driverConfig.Image)
   273  		if err != nil {
   274  			return nil, nil, err
   275  		}
   276  	}
   277  
   278  	containerCfg, err := d.createContainerConfig(cfg, &driverConfig, driverConfig.Image)
   279  	if err != nil {
   280  		d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image,
   281  			"image_id", id, "error", err)
   282  		return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err)
   283  	}
   284  
   285  	startAttempts := 0
   286  CREATE:
   287  	container, err := d.createContainer(client, containerCfg, driverConfig.Image)
   288  	if err != nil {
   289  		d.logger.Error("failed to create container", "error", err)
   290  		client.RemoveContainer(docker.RemoveContainerOptions{
   291  			ID:    containerCfg.Name,
   292  			Force: true,
   293  		})
   294  		return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create container: %v", err), err)
   295  	}
   296  
   297  	d.logger.Info("created container", "container_id", container.ID)
   298  
   299  	// We don't need to start the container if the container is already running
   300  	// since we don't create containers which are already present on the host
   301  	// and are running
   302  	if !container.State.Running {
   303  		// Start the container
   304  		if err := d.startContainer(container); err != nil {
   305  			d.logger.Error("failed to start container", "container_id", container.ID, "error", err)
   306  			client.RemoveContainer(docker.RemoveContainerOptions{
   307  				ID:    container.ID,
   308  				Force: true,
   309  			})
   310  			// Some sort of docker race bug, recreating the container usually works
   311  			if strings.Contains(err.Error(), "OCI runtime create failed: container with id exists:") && startAttempts < 5 {
   312  				startAttempts++
   313  				d.logger.Debug("reattempting container create/start sequence", "attempt", startAttempts, "container_id", id)
   314  				goto CREATE
   315  			}
   316  			return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("Failed to start container %s: %s", container.ID, err), err)
   317  		}
   318  
   319  		// InspectContainer to get all of the container metadata as
   320  		// much of the metadata (eg networking) isn't populated until
   321  		// the container is started
   322  		runningContainer, err := client.InspectContainer(container.ID)
   323  		if err != nil {
   324  			client.RemoveContainer(docker.RemoveContainerOptions{
   325  				ID:    container.ID,
   326  				Force: true,
   327  			})
   328  			msg := "failed to inspect started container"
   329  			d.logger.Error(msg, "error", err)
   330  			client.RemoveContainer(docker.RemoveContainerOptions{
   331  				ID:    container.ID,
   332  				Force: true,
   333  			})
   334  			return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true)
   335  		}
   336  		container = runningContainer
   337  		d.logger.Info("started container", "container_id", container.ID)
   338  	} else {
   339  		d.logger.Debug("re-attaching to container", "container_id",
   340  			container.ID, "container_state", container.State.String())
   341  	}
   342  
   343  	collectingLogs := !d.config.DisableLogCollection
   344  
   345  	var dlogger docklog.DockerLogger
   346  	var pluginClient *plugin.Client
   347  
   348  	if collectingLogs {
   349  		dlogger, pluginClient, err = d.setupNewDockerLogger(container, cfg, time.Unix(0, 0))
   350  		if err != nil {
   351  			d.logger.Error("an error occurred after container startup, terminating container", "container_id", container.ID)
   352  			client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true})
   353  			return nil, nil, err
   354  		}
   355  	}
   356  
   357  	// Detect container address
   358  	ip, autoUse := d.detectIP(container, &driverConfig)
   359  
   360  	net := &drivers.DriverNetwork{
   361  		PortMap:       driverConfig.PortMap,
   362  		IP:            ip,
   363  		AutoAdvertise: autoUse,
   364  	}
   365  
   366  	// Return a driver handle
   367  	h := &taskHandle{
   368  		client:                client,
   369  		waitClient:            waitClient,
   370  		dlogger:               dlogger,
   371  		dloggerPluginClient:   pluginClient,
   372  		logger:                d.logger.With("container_id", container.ID),
   373  		task:                  cfg,
   374  		containerID:           container.ID,
   375  		containerImage:        container.Image,
   376  		doneCh:                make(chan bool),
   377  		waitCh:                make(chan struct{}),
   378  		removeContainerOnExit: d.config.GC.Container,
   379  		net:                   net,
   380  	}
   381  
   382  	if err := handle.SetDriverState(h.buildState()); err != nil {
   383  		d.logger.Error("error encoding container occurred after startup, terminating container", "container_id", container.ID, "error", err)
   384  		if collectingLogs {
   385  			dlogger.Stop()
   386  			pluginClient.Kill()
   387  		}
   388  		client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true})
   389  		return nil, nil, err
   390  	}
   391  
   392  	d.tasks.Set(cfg.ID, h)
   393  	go h.run()
   394  
   395  	return handle, net, nil
   396  }
   397  
   398  // createContainerClient is the subset of Docker Client methods used by the
   399  // createContainer method to ease testing subtle error conditions.
   400  type createContainerClient interface {
   401  	CreateContainer(docker.CreateContainerOptions) (*docker.Container, error)
   402  	InspectContainer(id string) (*docker.Container, error)
   403  	ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error)
   404  	RemoveContainer(opts docker.RemoveContainerOptions) error
   405  }
   406  
   407  // createContainer creates the container given the passed configuration. It
   408  // attempts to handle any transient Docker errors.
   409  func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions,
   410  	image string) (*docker.Container, error) {
   411  	// Create a container
   412  	attempted := 0
   413  CREATE:
   414  	container, createErr := client.CreateContainer(config)
   415  	if createErr == nil {
   416  		return container, nil
   417  	}
   418  
   419  	d.logger.Debug("failed to create container", "container_name",
   420  		config.Name, "image_name", image, "image_id", config.Config.Image,
   421  		"attempt", attempted+1, "error", createErr)
   422  
   423  	// Volume management tools like Portworx may not have detached a volume
   424  	// from a previous node before Nomad started a task replacement task.
   425  	// Treat these errors as recoverable so we retry.
   426  	if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") {
   427  		return nil, nstructs.NewRecoverableError(createErr, true)
   428  	}
   429  
   430  	// If the container already exists determine whether it's already
   431  	// running or if it's dead and needs to be recreated.
   432  	if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") {
   433  
   434  		container, err := d.containerByName(config.Name)
   435  		if err != nil {
   436  			return nil, err
   437  		}
   438  
   439  		if container != nil && container.State.Running {
   440  			return container, nil
   441  		}
   442  
   443  		// Purge conflicting container if found.
   444  		// If container is nil here, the conflicting container was
   445  		// deleted in our check here, so retry again.
   446  		if container != nil {
   447  			// Delete matching containers
   448  			err = client.RemoveContainer(docker.RemoveContainerOptions{
   449  				ID:    container.ID,
   450  				Force: true,
   451  			})
   452  			if err != nil {
   453  				d.logger.Error("failed to purge container", "container_id", container.ID)
   454  				return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
   455  			} else {
   456  				d.logger.Info("purged container", "container_id", container.ID)
   457  			}
   458  		}
   459  
   460  		if attempted < 5 {
   461  			attempted++
   462  			time.Sleep(nextBackoff(attempted))
   463  			goto CREATE
   464  		}
   465  	} else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") {
   466  		// There is still a very small chance this is possible even with the
   467  		// coordinator so retry.
   468  		return nil, nstructs.NewRecoverableError(createErr, true)
   469  	} else if isDockerTransientError(createErr) && attempted < 5 {
   470  		attempted++
   471  		time.Sleep(nextBackoff(attempted))
   472  		goto CREATE
   473  	}
   474  
   475  	return nil, recoverableErrTimeouts(createErr)
   476  }
   477  
   478  // startContainer starts the passed container. It attempts to handle any
   479  // transient Docker errors.
   480  func (d *Driver) startContainer(c *docker.Container) error {
   481  	// Start a container
   482  	attempted := 0
   483  START:
   484  	startErr := client.StartContainer(c.ID, c.HostConfig)
   485  	if startErr == nil || strings.Contains(startErr.Error(), "Container already running") {
   486  		return nil
   487  	}
   488  
   489  	d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr)
   490  
   491  	if isDockerTransientError(startErr) {
   492  		if attempted < 5 {
   493  			attempted++
   494  			time.Sleep(nextBackoff(attempted))
   495  			goto START
   496  		}
   497  		return nstructs.NewRecoverableError(startErr, true)
   498  	}
   499  
   500  	return recoverableErrTimeouts(startErr)
   501  }
   502  
   503  // nextBackoff returns appropriate docker backoff durations after attempted attempts.
   504  func nextBackoff(attempted int) time.Duration {
   505  	// attempts in 200ms, 800ms, 3.2s, 12.8s, 51.2s
   506  	// TODO: add randomization factor and extract to a helper
   507  	return 1 << (2 * uint64(attempted)) * 50 * time.Millisecond
   508  }
   509  
   510  // createImage creates a docker image either by pulling it from a registry or by
   511  // loading it from the file system
   512  func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) {
   513  	image := driverConfig.Image
   514  	repo, tag := parseDockerImage(image)
   515  
   516  	// We're going to check whether the image is already downloaded. If the tag
   517  	// is "latest", or ForcePull is set, we have to check for a new version every time so we don't
   518  	// bother to check and cache the id here. We'll download first, then cache.
   519  	if driverConfig.ForcePull {
   520  		d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag))
   521  	} else if tag != "latest" {
   522  		if dockerImage, _ := client.InspectImage(image); dockerImage != nil {
   523  			// Image exists so just increment its reference count
   524  			d.coordinator.IncrementImageReference(dockerImage.ID, image, task.ID)
   525  			return dockerImage.ID, nil
   526  		}
   527  	}
   528  
   529  	// Load the image if specified
   530  	if driverConfig.LoadImage != "" {
   531  		return d.loadImage(task, driverConfig, client)
   532  	}
   533  
   534  	// Download the image
   535  	return d.pullImage(task, driverConfig, client, repo, tag)
   536  }
   537  
   538  // pullImage creates an image by pulling it from a docker registry
   539  func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) {
   540  	authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo)
   541  	if err != nil {
   542  		if driverConfig.AuthSoftFail {
   543  			d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err)
   544  		} else {
   545  			return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err)
   546  		}
   547  	}
   548  
   549  	if authIsEmpty(authOptions) {
   550  		d.logger.Debug("did not find docker auth for repo", "repo", repo)
   551  	}
   552  
   553  	d.eventer.EmitEvent(&drivers.TaskEvent{
   554  		TaskID:    task.ID,
   555  		AllocID:   task.AllocID,
   556  		TaskName:  task.Name,
   557  		Timestamp: time.Now(),
   558  		Message:   "Downloading image",
   559  		Annotations: map[string]string{
   560  			"image": dockerImageRef(repo, tag),
   561  		},
   562  	})
   563  
   564  	return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task), d.config.pullActivityTimeoutDuration)
   565  }
   566  
   567  func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn {
   568  	return func(msg string, annotations map[string]string) {
   569  		d.eventer.EmitEvent(&drivers.TaskEvent{
   570  			TaskID:      task.ID,
   571  			AllocID:     task.AllocID,
   572  			TaskName:    task.Name,
   573  			Timestamp:   time.Now(),
   574  			Message:     msg,
   575  			Annotations: annotations,
   576  		})
   577  	}
   578  }
   579  
   580  // authBackend encapsulates a function that resolves registry credentials.
   581  type authBackend func(string) (*docker.AuthConfiguration, error)
   582  
   583  // resolveRegistryAuthentication attempts to retrieve auth credentials for the
   584  // repo, trying all authentication-backends possible.
   585  func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) {
   586  	return firstValidAuth(repo, []authBackend{
   587  		authFromTaskConfig(driverConfig),
   588  		authFromDockerConfig(d.config.Auth.Config),
   589  		authFromHelper(d.config.Auth.Helper),
   590  	})
   591  }
   592  
   593  // loadImage creates an image by loading it from the file system
   594  func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) {
   595  
   596  	archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage)
   597  	d.logger.Debug("loading image from disk", "archive", archive)
   598  
   599  	f, err := os.Open(archive)
   600  	if err != nil {
   601  		return "", fmt.Errorf("unable to open image archive: %v", err)
   602  	}
   603  
   604  	if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil {
   605  		return "", err
   606  	}
   607  	f.Close()
   608  
   609  	dockerImage, err := client.InspectImage(driverConfig.Image)
   610  	if err != nil {
   611  		return "", recoverableErrTimeouts(err)
   612  	}
   613  
   614  	d.coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, task.ID)
   615  	return dockerImage.ID, nil
   616  }
   617  
   618  func (d *Driver) convertAllocPathsForWindowsLCOW(task *drivers.TaskConfig, image string) error {
   619  	imageConfig, err := client.InspectImage(image)
   620  	if err != nil {
   621  		return fmt.Errorf("the image does not exist: %v", err)
   622  	}
   623  	// LCOW If we are running a Linux Container on Windows, we need to mount it correctly, as c:\ does not exist on unix
   624  	if imageConfig.OS == "linux" {
   625  		a := []rune(task.Env[taskenv.AllocDir])
   626  		task.Env[taskenv.AllocDir] = strings.ReplaceAll(string(a[2:]), "\\", "/")
   627  		l := []rune(task.Env[taskenv.TaskLocalDir])
   628  		task.Env[taskenv.TaskLocalDir] = strings.ReplaceAll(string(l[2:]), "\\", "/")
   629  		s := []rune(task.Env[taskenv.SecretsDir])
   630  		task.Env[taskenv.SecretsDir] = strings.ReplaceAll(string(s[2:]), "\\", "/")
   631  	}
   632  	return nil
   633  }
   634  
   635  func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) {
   636  	allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[taskenv.AllocDir])
   637  	taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[taskenv.TaskLocalDir])
   638  	secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[taskenv.SecretsDir])
   639  	binds := []string{allocDirBind, taskLocalBind, secretDirBind}
   640  
   641  	taskLocalBindVolume := driverConfig.VolumeDriver == ""
   642  
   643  	if !d.config.Volumes.Enabled && !taskLocalBindVolume {
   644  		return nil, fmt.Errorf("volumes are not enabled; cannot use volume driver %q", driverConfig.VolumeDriver)
   645  	}
   646  
   647  	for _, userbind := range driverConfig.Volumes {
   648  		// This assumes host OS = docker container OS.
   649  		// Not true, when we support Linux containers on Windows
   650  		src, dst, mode, err := parseVolumeSpec(userbind, runtime.GOOS)
   651  		if err != nil {
   652  			return nil, fmt.Errorf("invalid docker volume %q: %v", userbind, err)
   653  		}
   654  
   655  		// Paths inside task dir are always allowed when using the default driver,
   656  		// Relative paths are always allowed as they mount within a container
   657  		// When a VolumeDriver is set, we assume we receive a binding in the format
   658  		// volume-name:container-dest
   659  		// Otherwise, we assume we receive a relative path binding in the format
   660  		// relative/to/task:/also/in/container
   661  		if taskLocalBindVolume {
   662  			src = expandPath(task.TaskDir().Dir, src)
   663  		} else {
   664  			// Resolve dotted path segments
   665  			src = filepath.Clean(src)
   666  		}
   667  
   668  		if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, src) {
   669  			return nil, fmt.Errorf("volumes are not enabled; cannot mount host paths: %+q", userbind)
   670  		}
   671  
   672  		bind := src + ":" + dst
   673  		if mode != "" {
   674  			bind += ":" + mode
   675  		}
   676  		binds = append(binds, bind)
   677  	}
   678  
   679  	if selinuxLabel := d.config.Volumes.SelinuxLabel; selinuxLabel != "" {
   680  		// Apply SELinux Label to each volume
   681  		for i := range binds {
   682  			binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel)
   683  		}
   684  	}
   685  
   686  	return binds, nil
   687  }
   688  
   689  var userMountToUnixMount = map[string]string{
   690  	// Empty string maps to `rprivate` for backwards compatibility in restored
   691  	// older tasks, where mount propagation will not be present.
   692  	"":                                     "rprivate",
   693  	nstructs.VolumeMountPropagationPrivate: "rprivate",
   694  	nstructs.VolumeMountPropagationHostToTask:    "rslave",
   695  	nstructs.VolumeMountPropagationBidirectional: "rshared",
   696  }
   697  
   698  // takes a local seccomp daemon, reads the file contents for sending to the daemon
   699  // this code modified slightly from the docker CLI code
   700  // https://github.com/docker/cli/blob/8ef8547eb6934b28497d309d21e280bcd25145f5/cli/command/container/opts.go#L840
   701  func parseSecurityOpts(securityOpts []string) ([]string, error) {
   702  	for key, opt := range securityOpts {
   703  		con := strings.SplitN(opt, "=", 2)
   704  		if len(con) == 1 && con[0] != "no-new-privileges" {
   705  			if strings.Contains(opt, ":") {
   706  				con = strings.SplitN(opt, ":", 2)
   707  			} else {
   708  				return securityOpts, fmt.Errorf("invalid security_opt: %q", opt)
   709  			}
   710  		}
   711  		if con[0] == "seccomp" && con[1] != "unconfined" {
   712  			f, err := ioutil.ReadFile(con[1])
   713  			if err != nil {
   714  				return securityOpts, fmt.Errorf("opening seccomp profile (%s) failed: %v", con[1], err)
   715  			}
   716  			b := bytes.NewBuffer(nil)
   717  			if err := json.Compact(b, f); err != nil {
   718  				return securityOpts, fmt.Errorf("compacting json for seccomp profile (%s) failed: %v", con[1], err)
   719  			}
   720  			securityOpts[key] = fmt.Sprintf("seccomp=%s", b.Bytes())
   721  		}
   722  	}
   723  
   724  	return securityOpts, nil
   725  }
   726  
   727  // memoryLimits computes the memory and memory_reservation values passed along to
   728  // the docker host config. These fields represent hard and soft memory limits from
   729  // docker's perspective, respectively.
   730  //
   731  // The memory field on the task configuration can be interpreted as a hard or soft
   732  // limit. Before Nomad v0.11.3, it was always a hard limit. Now, it is interpreted
   733  // as a soft limit if the memory_hard_limit value is configured on the docker
   734  // task driver configuration. When memory_hard_limit is set, the docker host
   735  // config is configured such that the memory field is equal to memory_hard_limit
   736  // value, and the memory_reservation field is set to the task driver memory value.
   737  //
   738  // If memory_hard_limit is not set (i.e. zero value), then the memory field of
   739  // the task resource config is interpreted as a hard limit. In this case both the
   740  // memory is set to the task resource memory value and memory_reservation is left
   741  // unset.
   742  //
   743  // Returns (memory (hard), memory_reservation (soft)) values in bytes.
   744  func (_ *Driver) memoryLimits(driverHardLimitMB, taskMemoryLimitBytes int64) (int64, int64) {
   745  	if driverHardLimitMB <= 0 {
   746  		return taskMemoryLimitBytes, 0
   747  	}
   748  	return driverHardLimitMB * 1024 * 1024, taskMemoryLimitBytes
   749  }
   750  
   751  func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig,
   752  	imageID string) (docker.CreateContainerOptions, error) {
   753  
   754  	// ensure that PortMap variables are populated early on
   755  	task.Env = taskenv.SetPortMapEnvs(task.Env, driverConfig.PortMap)
   756  
   757  	logger := d.logger.With("task_name", task.Name)
   758  	var c docker.CreateContainerOptions
   759  	if task.Resources == nil {
   760  		// Guard against missing resources. We should never have been able to
   761  		// schedule a job without specifying this.
   762  		logger.Error("task.Resources is empty")
   763  		return c, fmt.Errorf("task.Resources is empty")
   764  	}
   765  	binds, err := d.containerBinds(task, driverConfig)
   766  	if err != nil {
   767  		return c, err
   768  	}
   769  	logger.Trace("binding volumes", "volumes", binds)
   770  
   771  	// create the config block that will later be consumed by go-dockerclient
   772  	config := &docker.Config{
   773  		Image:      imageID,
   774  		Entrypoint: driverConfig.Entrypoint,
   775  		Hostname:   driverConfig.Hostname,
   776  		User:       task.User,
   777  		Tty:        driverConfig.TTY,
   778  		OpenStdin:  driverConfig.Interactive,
   779  	}
   780  
   781  	if driverConfig.WorkDir != "" {
   782  		config.WorkingDir = driverConfig.WorkDir
   783  	}
   784  
   785  	containerRuntime := driverConfig.Runtime
   786  	if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok {
   787  		if !d.gpuRuntime {
   788  			return c, fmt.Errorf("requested docker runtime %q was not found", d.config.GPURuntimeName)
   789  		}
   790  		if containerRuntime != "" && containerRuntime != d.config.GPURuntimeName {
   791  			return c, fmt.Errorf("conflicting runtime requests: gpu runtime %q conflicts with task runtime %q", d.config.GPURuntimeName, containerRuntime)
   792  		}
   793  		containerRuntime = d.config.GPURuntimeName
   794  	}
   795  	if _, ok := d.config.allowRuntimes[containerRuntime]; !ok && containerRuntime != "" {
   796  		return c, fmt.Errorf("requested runtime %q is not allowed", containerRuntime)
   797  	}
   798  
   799  	memory, memoryReservation := d.memoryLimits(driverConfig.MemoryHardLimit, task.Resources.LinuxResources.MemoryLimitBytes)
   800  
   801  	hostConfig := &docker.HostConfig{
   802  		Memory:            memory,            // hard limit
   803  		MemoryReservation: memoryReservation, // soft limit
   804  
   805  		CPUShares: task.Resources.LinuxResources.CPUShares,
   806  
   807  		// Binds are used to mount a host volume into the container. We mount a
   808  		// local directory for storage and a shared alloc directory that can be
   809  		// used to share data between different tasks in the same task group.
   810  		Binds: binds,
   811  
   812  		StorageOpt:   driverConfig.StorageOpt,
   813  		VolumeDriver: driverConfig.VolumeDriver,
   814  
   815  		PidsLimit: &driverConfig.PidsLimit,
   816  
   817  		Runtime: containerRuntime,
   818  	}
   819  
   820  	// Calculate CPU Quota
   821  	// cfs_quota_us is the time per core, so we must
   822  	// multiply the time by the number of cores available
   823  	// See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu
   824  	if driverConfig.CPUHardLimit {
   825  		numCores := runtime.NumCPU()
   826  		if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 {
   827  			return c, fmt.Errorf("invalid value for cpu_cfs_period")
   828  		}
   829  		if driverConfig.CPUCFSPeriod == 0 {
   830  			driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod
   831  		}
   832  		hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod
   833  		hostConfig.CPUQuota = int64(task.Resources.LinuxResources.PercentTicks*float64(driverConfig.CPUCFSPeriod)) * int64(numCores)
   834  	}
   835  
   836  	// Windows does not support MemorySwap/MemorySwappiness #2193
   837  	if runtime.GOOS == "windows" {
   838  		hostConfig.MemorySwap = 0
   839  		hostConfig.MemorySwappiness = nil
   840  	} else {
   841  		hostConfig.MemorySwap = task.Resources.LinuxResources.MemoryLimitBytes // MemorySwap is memory + swap.
   842  
   843  		// disable swap explicitly in non-Windows environments
   844  		var swapiness int64 = 0
   845  		hostConfig.MemorySwappiness = &swapiness
   846  
   847  	}
   848  
   849  	loggingDriver := driverConfig.Logging.Type
   850  	if loggingDriver == "" {
   851  		loggingDriver = driverConfig.Logging.Driver
   852  	}
   853  
   854  	hostConfig.LogConfig = docker.LogConfig{
   855  		Type:   loggingDriver,
   856  		Config: driverConfig.Logging.Config,
   857  	}
   858  
   859  	if hostConfig.LogConfig.Type == "" && hostConfig.LogConfig.Config == nil {
   860  		logger.Trace("no docker log driver provided, defaulting to json-file")
   861  		hostConfig.LogConfig.Type = "json-file"
   862  		hostConfig.LogConfig.Config = map[string]string{
   863  			"max-file": "2",
   864  			"max-size": "2m",
   865  		}
   866  	}
   867  
   868  	logger.Debug("configured resources",
   869  		"memory", hostConfig.Memory, "memory_reservation", hostConfig.MemoryReservation,
   870  		"cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota,
   871  		"cpu_period", hostConfig.CPUPeriod)
   872  
   873  	logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds))
   874  
   875  	//  set privileged mode
   876  	if driverConfig.Privileged && !d.config.AllowPrivileged {
   877  		return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`)
   878  	}
   879  	hostConfig.Privileged = driverConfig.Privileged
   880  
   881  	// set capabilities
   882  	hostCapsWhitelistConfig := d.config.AllowCaps
   883  	hostCapsWhitelist := make(map[string]struct{})
   884  	for _, cap := range hostCapsWhitelistConfig {
   885  		cap = strings.ToLower(strings.TrimSpace(cap))
   886  		hostCapsWhitelist[cap] = struct{}{}
   887  	}
   888  
   889  	if _, ok := hostCapsWhitelist["all"]; !ok {
   890  		effectiveCaps, err := tweakCapabilities(
   891  			strings.Split(dockerBasicCaps, ","),
   892  			driverConfig.CapAdd,
   893  			driverConfig.CapDrop,
   894  		)
   895  		if err != nil {
   896  			return c, err
   897  		}
   898  		var missingCaps []string
   899  		for _, cap := range effectiveCaps {
   900  			cap = strings.ToLower(cap)
   901  			if _, ok := hostCapsWhitelist[cap]; !ok {
   902  				missingCaps = append(missingCaps, cap)
   903  			}
   904  		}
   905  		if len(missingCaps) > 0 {
   906  			return c, fmt.Errorf("Docker driver doesn't have the following caps whitelisted on this Nomad agent: %s", missingCaps)
   907  		}
   908  	}
   909  
   910  	hostConfig.CapAdd = driverConfig.CapAdd
   911  	hostConfig.CapDrop = driverConfig.CapDrop
   912  
   913  	// set SHM size
   914  	if driverConfig.ShmSize != 0 {
   915  		hostConfig.ShmSize = driverConfig.ShmSize
   916  	}
   917  
   918  	// set DNS servers
   919  	for _, ip := range driverConfig.DNSServers {
   920  		if net.ParseIP(ip) != nil {
   921  			hostConfig.DNS = append(hostConfig.DNS, ip)
   922  		} else {
   923  			logger.Error("invalid ip address for container dns server", "ip", ip)
   924  		}
   925  	}
   926  
   927  	// Setup devices
   928  	for _, device := range driverConfig.Devices {
   929  		dd, err := device.toDockerDevice()
   930  		if err != nil {
   931  			return c, err
   932  		}
   933  		hostConfig.Devices = append(hostConfig.Devices, dd)
   934  	}
   935  	for _, device := range task.Devices {
   936  		hostConfig.Devices = append(hostConfig.Devices, docker.Device{
   937  			PathOnHost:        device.HostPath,
   938  			PathInContainer:   device.TaskPath,
   939  			CgroupPermissions: device.Permissions,
   940  		})
   941  	}
   942  
   943  	// Setup mounts
   944  	for _, m := range driverConfig.Mounts {
   945  		hm, err := m.toDockerHostMount()
   946  		if err != nil {
   947  			return c, err
   948  		}
   949  
   950  		switch hm.Type {
   951  		case "bind":
   952  			hm.Source = expandPath(task.TaskDir().Dir, hm.Source)
   953  
   954  			// paths inside alloc dir are always allowed as they mount within
   955  			// a container, and treated as relative to task dir
   956  			if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, hm.Source) {
   957  				return c, fmt.Errorf(
   958  					"volumes are not enabled; cannot mount host path: %q %q",
   959  					hm.Source, task.AllocDir)
   960  			}
   961  		case "tmpfs":
   962  			// no source, so no sandbox check required
   963  		default: // "volume", but also any new thing that comes along
   964  			if !d.config.Volumes.Enabled {
   965  				return c, fmt.Errorf(
   966  					"volumes are not enabled; cannot mount volume: %q", hm.Source)
   967  			}
   968  		}
   969  
   970  		hostConfig.Mounts = append(hostConfig.Mounts, hm)
   971  	}
   972  
   973  	for _, m := range task.Mounts {
   974  		hm := docker.HostMount{
   975  			Type:     "bind",
   976  			Target:   m.TaskPath,
   977  			Source:   m.HostPath,
   978  			ReadOnly: m.Readonly,
   979  		}
   980  
   981  		// MountPropagation is only supported by Docker on Linux:
   982  		// https://docs.docker.com/storage/bind-mounts/#configure-bind-propagation
   983  		if runtime.GOOS == "linux" {
   984  			hm.BindOptions = &docker.BindOptions{
   985  				Propagation: userMountToUnixMount[m.PropagationMode],
   986  			}
   987  		}
   988  
   989  		hostConfig.Mounts = append(hostConfig.Mounts, hm)
   990  	}
   991  
   992  	// set DNS search domains and extra hosts
   993  	hostConfig.DNSSearch = driverConfig.DNSSearchDomains
   994  	hostConfig.DNSOptions = driverConfig.DNSOptions
   995  	hostConfig.ExtraHosts = driverConfig.ExtraHosts
   996  
   997  	hostConfig.IpcMode = driverConfig.IPCMode
   998  	hostConfig.PidMode = driverConfig.PidMode
   999  	hostConfig.UTSMode = driverConfig.UTSMode
  1000  	hostConfig.UsernsMode = driverConfig.UsernsMode
  1001  	hostConfig.SecurityOpt = driverConfig.SecurityOpt
  1002  	hostConfig.Sysctls = driverConfig.Sysctl
  1003  
  1004  	hostConfig.SecurityOpt, err = parseSecurityOpts(driverConfig.SecurityOpt)
  1005  	if err != nil {
  1006  		return c, fmt.Errorf("failed to parse security_opt configuration: %v", err)
  1007  	}
  1008  
  1009  	ulimits, err := sliceMergeUlimit(driverConfig.Ulimit)
  1010  	if err != nil {
  1011  		return c, fmt.Errorf("failed to parse ulimit configuration: %v", err)
  1012  	}
  1013  	hostConfig.Ulimits = ulimits
  1014  
  1015  	hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs
  1016  
  1017  	// set the docker network mode
  1018  	hostConfig.NetworkMode = driverConfig.NetworkMode
  1019  
  1020  	// if the driver config does not specify a network mode then try to use the
  1021  	// shared alloc network
  1022  	if hostConfig.NetworkMode == "" {
  1023  		if task.NetworkIsolation != nil && task.NetworkIsolation.Path != "" {
  1024  			// find the previously created parent container to join networks with
  1025  			netMode := fmt.Sprintf("container:%s", task.NetworkIsolation.Labels[dockerNetSpecLabelKey])
  1026  			logger.Debug("configuring network mode for task group", "network_mode", netMode)
  1027  			hostConfig.NetworkMode = netMode
  1028  		} else {
  1029  			// docker default
  1030  			logger.Debug("networking mode not specified; using default")
  1031  			hostConfig.NetworkMode = "default"
  1032  		}
  1033  	}
  1034  
  1035  	// Setup port mapping and exposed ports
  1036  	if len(task.Resources.NomadResources.Networks) == 0 {
  1037  		if len(driverConfig.PortMap) > 0 {
  1038  			return c, fmt.Errorf("Trying to map ports but no network interface is available")
  1039  		}
  1040  	} else {
  1041  		// TODO add support for more than one network
  1042  		network := task.Resources.NomadResources.Networks[0]
  1043  		publishedPorts := map[docker.Port][]docker.PortBinding{}
  1044  		exposedPorts := map[docker.Port]struct{}{}
  1045  
  1046  		for _, port := range network.ReservedPorts {
  1047  			// By default we will map the allocated port 1:1 to the container
  1048  			containerPortInt := port.Value
  1049  
  1050  			// If the user has mapped a port using port_map we'll change it here
  1051  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
  1052  				containerPortInt = mapped
  1053  			}
  1054  
  1055  			hostPortStr := strconv.Itoa(port.Value)
  1056  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
  1057  
  1058  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
  1059  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
  1060  			logger.Debug("allocated static port", "ip", network.IP, "port", port.Value)
  1061  
  1062  			exposedPorts[containerPort+"/tcp"] = struct{}{}
  1063  			exposedPorts[containerPort+"/udp"] = struct{}{}
  1064  			logger.Debug("exposed port", "port", port.Value)
  1065  		}
  1066  
  1067  		for _, port := range network.DynamicPorts {
  1068  			// By default we will map the allocated port 1:1 to the container
  1069  			containerPortInt := port.Value
  1070  
  1071  			// If the user has mapped a port using port_map we'll change it here
  1072  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
  1073  				containerPortInt = mapped
  1074  			}
  1075  
  1076  			hostPortStr := strconv.Itoa(port.Value)
  1077  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
  1078  
  1079  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
  1080  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
  1081  			logger.Debug("allocated mapped port", "ip", network.IP, "port", port.Value)
  1082  
  1083  			exposedPorts[containerPort+"/tcp"] = struct{}{}
  1084  			exposedPorts[containerPort+"/udp"] = struct{}{}
  1085  			logger.Debug("exposed port", "port", containerPort)
  1086  		}
  1087  
  1088  		hostConfig.PortBindings = publishedPorts
  1089  		config.ExposedPorts = exposedPorts
  1090  	}
  1091  
  1092  	// If the user specified a custom command to run, we'll inject it here.
  1093  	if driverConfig.Command != "" {
  1094  		// Validate command
  1095  		if err := validateCommand(driverConfig.Command, "args"); err != nil {
  1096  			return c, err
  1097  		}
  1098  
  1099  		cmd := []string{driverConfig.Command}
  1100  		if len(driverConfig.Args) != 0 {
  1101  			cmd = append(cmd, driverConfig.Args...)
  1102  		}
  1103  		logger.Debug("setting container startup command", "command", strings.Join(cmd, " "))
  1104  		config.Cmd = cmd
  1105  	} else if len(driverConfig.Args) != 0 {
  1106  		config.Cmd = driverConfig.Args
  1107  	}
  1108  
  1109  	if len(driverConfig.Labels) > 0 {
  1110  		config.Labels = driverConfig.Labels
  1111  	}
  1112  
  1113  	labels := make(map[string]string, len(driverConfig.Labels)+1)
  1114  	for k, v := range driverConfig.Labels {
  1115  		labels[k] = v
  1116  	}
  1117  	labels[dockerLabelAllocID] = task.AllocID
  1118  	config.Labels = labels
  1119  	logger.Debug("applied labels on the container", "labels", config.Labels)
  1120  
  1121  	config.Env = task.EnvList()
  1122  
  1123  	containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID)
  1124  	logger.Debug("setting container name", "container_name", containerName)
  1125  
  1126  	var networkingConfig *docker.NetworkingConfig
  1127  	if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" {
  1128  		networkingConfig = &docker.NetworkingConfig{
  1129  			EndpointsConfig: map[string]*docker.EndpointConfig{
  1130  				hostConfig.NetworkMode: {},
  1131  			},
  1132  		}
  1133  	}
  1134  
  1135  	if len(driverConfig.NetworkAliases) > 0 {
  1136  		networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases
  1137  		logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode,
  1138  			"network_aliases", strings.Join(driverConfig.NetworkAliases, ", "))
  1139  	}
  1140  
  1141  	if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" {
  1142  		networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{
  1143  			IPv4Address: driverConfig.IPv4Address,
  1144  			IPv6Address: driverConfig.IPv6Address,
  1145  		}
  1146  		logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode,
  1147  			"ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address)
  1148  	}
  1149  
  1150  	if driverConfig.MacAddress != "" {
  1151  		config.MacAddress = driverConfig.MacAddress
  1152  		logger.Debug("setting container mac address", "mac_address", config.MacAddress)
  1153  	}
  1154  
  1155  	return docker.CreateContainerOptions{
  1156  		Name:             containerName,
  1157  		Config:           config,
  1158  		HostConfig:       hostConfig,
  1159  		NetworkingConfig: networkingConfig,
  1160  	}, nil
  1161  }
  1162  
  1163  // detectIP of Docker container. Returns the first IP found as well as true if
  1164  // the IP should be advertised (bridge network IPs return false). Returns an
  1165  // empty string and false if no IP could be found.
  1166  func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) {
  1167  	if c.NetworkSettings == nil {
  1168  		// This should only happen if there's been a coding error (such
  1169  		// as not calling InspectContainer after CreateContainer). Code
  1170  		// defensively in case the Docker API changes subtly.
  1171  		d.logger.Error("no network settings for container", "container_id", c.ID)
  1172  		return "", false
  1173  	}
  1174  
  1175  	ip, ipName := "", ""
  1176  	auto := false
  1177  	for name, net := range c.NetworkSettings.Networks {
  1178  		if net.IPAddress == "" {
  1179  			// Ignore networks without an IP address
  1180  			continue
  1181  		}
  1182  
  1183  		ip = net.IPAddress
  1184  		if driverConfig.AdvertiseIPv6Addr {
  1185  			ip = net.GlobalIPv6Address
  1186  			auto = true
  1187  		}
  1188  		ipName = name
  1189  
  1190  		// Don't auto-advertise IPs for default networks (bridge on
  1191  		// Linux, nat on Windows)
  1192  		if name != "bridge" && name != "nat" {
  1193  			auto = true
  1194  		}
  1195  
  1196  		break
  1197  	}
  1198  
  1199  	if n := len(c.NetworkSettings.Networks); n > 1 {
  1200  		d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1",
  1201  			"total_networks", n,
  1202  			"container_id", c.ID,
  1203  			"container_network", ipName)
  1204  	}
  1205  
  1206  	return ip, auto
  1207  }
  1208  
  1209  // containerByName finds a running container by name, and returns an error
  1210  // if the container is dead or can't be found.
  1211  func (d *Driver) containerByName(name string) (*docker.Container, error) {
  1212  
  1213  	client, _, err := d.dockerClients()
  1214  	if err != nil {
  1215  		return nil, err
  1216  	}
  1217  	containers, err := client.ListContainers(docker.ListContainersOptions{
  1218  		All: true,
  1219  	})
  1220  	if err != nil {
  1221  		d.logger.Error("failed to query list of containers matching name",
  1222  			"container_name", name)
  1223  		return nil, recoverableErrTimeouts(
  1224  			fmt.Errorf("Failed to query list of containers: %s", err))
  1225  	}
  1226  
  1227  	// container names with a / pre-pended to the Nomad generated container names
  1228  	containerName := "/" + name
  1229  	var (
  1230  		shimContainer docker.APIContainers
  1231  		found         bool
  1232  	)
  1233  OUTER:
  1234  	for _, shimContainer = range containers {
  1235  		d.logger.Trace("listed container", "names", hclog.Fmt("%+v", shimContainer.Names))
  1236  		for _, name := range shimContainer.Names {
  1237  			if name == containerName {
  1238  				d.logger.Trace("Found container",
  1239  					"container_name", containerName, "container_id", shimContainer.ID)
  1240  				found = true
  1241  				break OUTER
  1242  			}
  1243  		}
  1244  	}
  1245  	if !found {
  1246  		return nil, nil
  1247  	}
  1248  
  1249  	container, err := client.InspectContainer(shimContainer.ID)
  1250  	if err != nil {
  1251  		err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err)
  1252  
  1253  		// This error is always recoverable as it could
  1254  		// be caused by races between listing
  1255  		// containers and this container being removed.
  1256  		// See #2802
  1257  		return nil, nstructs.NewRecoverableError(err, true)
  1258  	}
  1259  	return container, nil
  1260  }
  1261  
  1262  // validateCommand validates that the command only has a single value and
  1263  // returns a user friendly error message telling them to use the passed
  1264  // argField.
  1265  func validateCommand(command, argField string) error {
  1266  	trimmed := strings.TrimSpace(command)
  1267  	if len(trimmed) == 0 {
  1268  		return fmt.Errorf("command empty: %q", command)
  1269  	}
  1270  
  1271  	if len(trimmed) != len(command) {
  1272  		return fmt.Errorf("command contains extra white space: %q", command)
  1273  	}
  1274  
  1275  	return nil
  1276  }
  1277  
  1278  func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) {
  1279  	h, ok := d.tasks.Get(taskID)
  1280  	if !ok {
  1281  		return nil, drivers.ErrTaskNotFound
  1282  	}
  1283  	ch := make(chan *drivers.ExitResult)
  1284  	go d.handleWait(ctx, ch, h)
  1285  	return ch, nil
  1286  }
  1287  
  1288  func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) {
  1289  	defer close(ch)
  1290  	select {
  1291  	case <-h.waitCh:
  1292  		ch <- h.ExitResult()
  1293  	case <-ctx.Done():
  1294  		ch <- &drivers.ExitResult{
  1295  			Err: ctx.Err(),
  1296  		}
  1297  	}
  1298  }
  1299  
  1300  func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error {
  1301  	h, ok := d.tasks.Get(taskID)
  1302  	if !ok {
  1303  		return drivers.ErrTaskNotFound
  1304  	}
  1305  
  1306  	if signal == "" {
  1307  		signal = "SIGINT"
  1308  	}
  1309  
  1310  	// Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that
  1311  	// allows for graceful shutdown before being followed up by a SIGKILL.
  1312  	// Supported signals:
  1313  	//   https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26
  1314  	if runtime.GOOS == "windows" && signal == "SIGINT" {
  1315  		signal = "SIGTERM"
  1316  	}
  1317  
  1318  	sig, err := signals.Parse(signal)
  1319  	if err != nil {
  1320  		return fmt.Errorf("failed to parse signal: %v", err)
  1321  	}
  1322  
  1323  	return h.Kill(timeout, sig)
  1324  }
  1325  
  1326  func (d *Driver) DestroyTask(taskID string, force bool) error {
  1327  	h, ok := d.tasks.Get(taskID)
  1328  	if !ok {
  1329  		return drivers.ErrTaskNotFound
  1330  	}
  1331  
  1332  	c, err := h.client.InspectContainer(h.containerID)
  1333  	if err != nil {
  1334  		switch err.(type) {
  1335  		case *docker.NoSuchContainer:
  1336  			h.logger.Info("container was removed out of band, will proceed with DestroyTask",
  1337  				"error", err)
  1338  		default:
  1339  			return fmt.Errorf("failed to inspect container state: %v", err)
  1340  		}
  1341  	} else {
  1342  		if c.State.Running {
  1343  			if !force {
  1344  				return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true")
  1345  			}
  1346  			if err := h.client.StopContainer(h.containerID, 0); err != nil {
  1347  				h.logger.Warn("failed to stop container during destroy", "error", err)
  1348  			}
  1349  		}
  1350  
  1351  		if h.removeContainerOnExit {
  1352  			if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil {
  1353  				h.logger.Error("error removing container", "error", err)
  1354  			}
  1355  		} else {
  1356  			h.logger.Debug("not removing container due to config")
  1357  		}
  1358  	}
  1359  
  1360  	if err := d.cleanupImage(h); err != nil {
  1361  		h.logger.Error("failed to cleanup image after destroying container",
  1362  			"error", err)
  1363  	}
  1364  
  1365  	d.tasks.Delete(taskID)
  1366  	return nil
  1367  }
  1368  
  1369  // cleanupImage removes a Docker image. No error is returned if the image
  1370  // doesn't exist or is still in use. Requires the global client to already be
  1371  // initialized.
  1372  func (d *Driver) cleanupImage(handle *taskHandle) error {
  1373  	if !d.config.GC.Image {
  1374  		return nil
  1375  	}
  1376  
  1377  	d.coordinator.RemoveImage(handle.containerImage, handle.task.ID)
  1378  
  1379  	return nil
  1380  }
  1381  
  1382  func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) {
  1383  	h, ok := d.tasks.Get(taskID)
  1384  	if !ok {
  1385  		return nil, drivers.ErrTaskNotFound
  1386  	}
  1387  
  1388  	container, err := client.InspectContainer(h.containerID)
  1389  	if err != nil {
  1390  		return nil, fmt.Errorf("failed to inspect container %q: %v", h.containerID, err)
  1391  	}
  1392  	status := &drivers.TaskStatus{
  1393  		ID:          h.task.ID,
  1394  		Name:        h.task.Name,
  1395  		StartedAt:   container.State.StartedAt,
  1396  		CompletedAt: container.State.FinishedAt,
  1397  		DriverAttributes: map[string]string{
  1398  			"container_id": container.ID,
  1399  		},
  1400  		NetworkOverride: h.net,
  1401  		ExitResult:      h.ExitResult(),
  1402  	}
  1403  
  1404  	status.State = drivers.TaskStateUnknown
  1405  	if container.State.Running {
  1406  		status.State = drivers.TaskStateRunning
  1407  	}
  1408  	if container.State.Dead {
  1409  		status.State = drivers.TaskStateExited
  1410  	}
  1411  
  1412  	return status, nil
  1413  }
  1414  
  1415  func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) {
  1416  	h, ok := d.tasks.Get(taskID)
  1417  	if !ok {
  1418  		return nil, drivers.ErrTaskNotFound
  1419  	}
  1420  
  1421  	return h.Stats(ctx, interval)
  1422  }
  1423  
  1424  func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) {
  1425  	return d.eventer.TaskEvents(ctx)
  1426  }
  1427  
  1428  func (d *Driver) SignalTask(taskID string, signal string) error {
  1429  	h, ok := d.tasks.Get(taskID)
  1430  	if !ok {
  1431  		return drivers.ErrTaskNotFound
  1432  	}
  1433  
  1434  	sig, err := signals.Parse(signal)
  1435  	if err != nil {
  1436  		return fmt.Errorf("failed to parse signal: %v", err)
  1437  	}
  1438  
  1439  	return h.Signal(sig)
  1440  }
  1441  
  1442  func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) {
  1443  	h, ok := d.tasks.Get(taskID)
  1444  	if !ok {
  1445  		return nil, drivers.ErrTaskNotFound
  1446  	}
  1447  
  1448  	if len(cmd) == 0 {
  1449  		return nil, fmt.Errorf("cmd is required, but was empty")
  1450  	}
  1451  
  1452  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
  1453  	defer cancel()
  1454  
  1455  	return h.Exec(ctx, cmd[0], cmd[1:])
  1456  }
  1457  
  1458  var _ drivers.ExecTaskStreamingDriver = (*Driver)(nil)
  1459  
  1460  func (d *Driver) ExecTaskStreaming(ctx context.Context, taskID string, opts *drivers.ExecOptions) (*drivers.ExitResult, error) {
  1461  	defer opts.Stdout.Close()
  1462  	defer opts.Stderr.Close()
  1463  
  1464  	done := make(chan interface{})
  1465  	defer close(done)
  1466  
  1467  	h, ok := d.tasks.Get(taskID)
  1468  	if !ok {
  1469  		return nil, drivers.ErrTaskNotFound
  1470  	}
  1471  
  1472  	if len(opts.Command) == 0 {
  1473  		return nil, fmt.Errorf("command is required but was empty")
  1474  	}
  1475  
  1476  	createExecOpts := docker.CreateExecOptions{
  1477  		AttachStdin:  true,
  1478  		AttachStdout: true,
  1479  		AttachStderr: true,
  1480  		Tty:          opts.Tty,
  1481  		Cmd:          opts.Command,
  1482  		Container:    h.containerID,
  1483  		Context:      ctx,
  1484  	}
  1485  	exec, err := h.client.CreateExec(createExecOpts)
  1486  	if err != nil {
  1487  		return nil, fmt.Errorf("failed to create exec object: %v", err)
  1488  	}
  1489  
  1490  	go func() {
  1491  		for {
  1492  			select {
  1493  			case <-ctx.Done():
  1494  				return
  1495  			case <-done:
  1496  				return
  1497  			case s, ok := <-opts.ResizeCh:
  1498  				if !ok {
  1499  					return
  1500  				}
  1501  				client.ResizeExecTTY(exec.ID, s.Height, s.Width)
  1502  			}
  1503  		}
  1504  	}()
  1505  
  1506  	startOpts := docker.StartExecOptions{
  1507  		Detach: false,
  1508  
  1509  		// When running in TTY, we must use a raw terminal.
  1510  		// If not, we set RawTerminal to false to allow docker client
  1511  		// to interpret special stdout/stderr messages
  1512  		Tty:         opts.Tty,
  1513  		RawTerminal: opts.Tty,
  1514  
  1515  		InputStream:  opts.Stdin,
  1516  		OutputStream: opts.Stdout,
  1517  		ErrorStream:  opts.Stderr,
  1518  		Context:      ctx,
  1519  	}
  1520  	if err := client.StartExec(exec.ID, startOpts); err != nil {
  1521  		return nil, fmt.Errorf("failed to start exec: %v", err)
  1522  	}
  1523  
  1524  	// StartExec returns after process completes, but InspectExec seems to have a delay
  1525  	// get in getting status code
  1526  
  1527  	const execTerminatingTimeout = 3 * time.Second
  1528  	start := time.Now()
  1529  	var res *docker.ExecInspect
  1530  	for (res == nil || res.Running) && time.Since(start) <= execTerminatingTimeout {
  1531  		res, err = client.InspectExec(exec.ID)
  1532  		if err != nil {
  1533  			return nil, fmt.Errorf("failed to inspect exec result: %v", err)
  1534  		}
  1535  		time.Sleep(50 * time.Millisecond)
  1536  	}
  1537  
  1538  	if res == nil || res.Running {
  1539  		return nil, fmt.Errorf("failed to retrieve exec result")
  1540  	}
  1541  
  1542  	return &drivers.ExitResult{
  1543  		ExitCode: res.ExitCode,
  1544  	}, nil
  1545  }
  1546  
  1547  // dockerClients creates two *docker.Client, one for long running operations and
  1548  // the other for shorter operations. In test / dev mode we can use ENV vars to
  1549  // connect to the docker daemon. In production mode we will read docker.endpoint
  1550  // from the config file.
  1551  func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) {
  1552  	createClientsLock.Lock()
  1553  	defer createClientsLock.Unlock()
  1554  
  1555  	if client != nil && waitClient != nil {
  1556  		return client, waitClient, nil
  1557  	}
  1558  
  1559  	var err error
  1560  
  1561  	// Onlt initialize the client if it hasn't yet been done
  1562  	if client == nil {
  1563  		client, err = d.newDockerClient(dockerTimeout)
  1564  		if err != nil {
  1565  			return nil, nil, err
  1566  		}
  1567  	}
  1568  
  1569  	// Only initialize the waitClient if it hasn't yet been done
  1570  	if waitClient == nil {
  1571  		waitClient, err = d.newDockerClient(0 * time.Minute)
  1572  		if err != nil {
  1573  			return nil, nil, err
  1574  		}
  1575  	}
  1576  
  1577  	return client, waitClient, nil
  1578  }
  1579  
  1580  // newDockerClient creates a new *docker.Client with a configurable timeout
  1581  func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) {
  1582  	var err error
  1583  	var merr multierror.Error
  1584  	var newClient *docker.Client
  1585  
  1586  	// Default to using whatever is configured in docker.endpoint. If this is
  1587  	// not specified we'll fall back on NewClientFromEnv which reads config from
  1588  	// the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and
  1589  	// DOCKER_CERT_PATH. This allows us to lock down the config in production
  1590  	// but also accept the standard ENV configs for dev and test.
  1591  	dockerEndpoint := d.config.Endpoint
  1592  	if dockerEndpoint != "" {
  1593  		cert := d.config.TLS.Cert
  1594  		key := d.config.TLS.Key
  1595  		ca := d.config.TLS.CA
  1596  
  1597  		if cert+key+ca != "" {
  1598  			d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint)
  1599  			newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca)
  1600  			if err != nil {
  1601  				merr.Errors = append(merr.Errors, err)
  1602  			}
  1603  		} else {
  1604  			d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint)
  1605  			newClient, err = docker.NewClient(dockerEndpoint)
  1606  			if err != nil {
  1607  				merr.Errors = append(merr.Errors, err)
  1608  			}
  1609  		}
  1610  	} else {
  1611  		d.logger.Debug("using client connection initialized from environment")
  1612  		newClient, err = docker.NewClientFromEnv()
  1613  		if err != nil {
  1614  			merr.Errors = append(merr.Errors, err)
  1615  		}
  1616  	}
  1617  
  1618  	if timeout != 0 && newClient != nil {
  1619  		newClient.SetTimeout(timeout)
  1620  	}
  1621  	return newClient, merr.ErrorOrNil()
  1622  }
  1623  
  1624  func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) {
  1625  	var ulimits []docker.ULimit
  1626  
  1627  	for name, ulimitRaw := range ulimitsRaw {
  1628  		if len(ulimitRaw) == 0 {
  1629  			return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw)
  1630  		}
  1631  		// hard limit is optional
  1632  		if strings.Contains(ulimitRaw, ":") == false {
  1633  			ulimitRaw = ulimitRaw + ":" + ulimitRaw
  1634  		}
  1635  
  1636  		splitted := strings.SplitN(ulimitRaw, ":", 2)
  1637  		if len(splitted) < 2 {
  1638  			return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw)
  1639  		}
  1640  		soft, err := strconv.Atoi(splitted[0])
  1641  		if err != nil {
  1642  			return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw)
  1643  		}
  1644  		hard, err := strconv.Atoi(splitted[1])
  1645  		if err != nil {
  1646  			return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw)
  1647  		}
  1648  
  1649  		ulimit := docker.ULimit{
  1650  			Name: name,
  1651  			Soft: int64(soft),
  1652  			Hard: int64(hard),
  1653  		}
  1654  		ulimits = append(ulimits, ulimit)
  1655  	}
  1656  	return ulimits, nil
  1657  }
  1658  
  1659  func isDockerTransientError(err error) bool {
  1660  	if err == nil {
  1661  		return false
  1662  	}
  1663  
  1664  	errMsg := err.Error()
  1665  	for _, te := range dockerTransientErrs {
  1666  		if strings.Contains(errMsg, te) {
  1667  			return true
  1668  		}
  1669  	}
  1670  
  1671  	return false
  1672  }