github.com/ncodes/nomad@v0.5.7-0.20170403112158-97adf4a74fb3/client/driver/docker.go (about)

     1  package driver
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"log"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"runtime"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"syscall"
    15  	"time"
    16  
    17  	docker "github.com/fsouza/go-dockerclient"
    18  
    19  	"github.com/docker/docker/cli/config/configfile"
    20  	"github.com/docker/docker/reference"
    21  	"github.com/docker/docker/registry"
    22  
    23  	"github.com/hashicorp/go-multierror"
    24  	"github.com/hashicorp/go-plugin"
    25  	"github.com/ncodes/nomad/client/allocdir"
    26  	"github.com/ncodes/nomad/client/config"
    27  	"github.com/ncodes/nomad/client/driver/env"
    28  	"github.com/ncodes/nomad/client/driver/executor"
    29  	dstructs "github.com/ncodes/nomad/client/driver/structs"
    30  	cstructs "github.com/ncodes/nomad/client/structs"
    31  	"github.com/ncodes/nomad/helper"
    32  	"github.com/ncodes/nomad/helper/fields"
    33  	shelpers "github.com/ncodes/nomad/helper/stats"
    34  	"github.com/ncodes/nomad/nomad/structs"
    35  	"github.com/mitchellh/mapstructure"
    36  )
    37  
    38  var (
    39  	// We store the clients globally to cache the connection to the docker daemon.
    40  	createClients sync.Once
    41  
    42  	// client is a docker client with a timeout of 1 minute. This is for doing
    43  	// all operations with the docker daemon besides which are not long running
    44  	// such as creating, killing containers, etc.
    45  	client *docker.Client
    46  
    47  	// waitClient is a docker client with no timeouts. This is used for long
    48  	// running operations such as waiting on containers and collect stats
    49  	waitClient *docker.Client
    50  
    51  	// The statistics the Docker driver exposes
    52  	DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage"}
    53  	DockerMeasuredCpuStats = []string{"Throttled Periods", "Throttled Time", "Percent"}
    54  
    55  	// recoverableErrTimeouts returns a recoverable error if the error was due
    56  	// to timeouts
    57  	recoverableErrTimeouts = func(err error) error {
    58  		r := false
    59  		if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") ||
    60  			strings.Contains(err.Error(), "EOF") {
    61  			r = true
    62  		}
    63  		return structs.NewRecoverableError(err, r)
    64  	}
    65  )
    66  
    67  const (
    68  	// NoSuchContainerError is returned by the docker daemon if the container
    69  	// does not exist.
    70  	NoSuchContainerError = "No such container"
    71  
    72  	// The key populated in Node Attributes to indicate presence of the Docker
    73  	// driver
    74  	dockerDriverAttr = "driver.docker"
    75  
    76  	// dockerSELinuxLabelConfigOption is the key for configuring the
    77  	// SELinux label for binds.
    78  	dockerSELinuxLabelConfigOption = "docker.volumes.selinuxlabel"
    79  
    80  	// dockerVolumesConfigOption is the key for enabling the use of custom
    81  	// bind volumes to arbitrary host paths.
    82  	dockerVolumesConfigOption  = "docker.volumes.enabled"
    83  	dockerVolumesConfigDefault = true
    84  
    85  	// dockerPrivilegedConfigOption is the key for running containers in
    86  	// Docker's privileged mode.
    87  	dockerPrivilegedConfigOption = "docker.privileged.enabled"
    88  
    89  	// dockerCleanupImageConfigOption is the key for whether or not to
    90  	// cleanup images after the task exits.
    91  	dockerCleanupImageConfigOption  = "docker.cleanup.image"
    92  	dockerCleanupImageConfigDefault = true
    93  
    94  	// dockerPullTimeoutConfigOption is the key for setting an images pull
    95  	// timeout
    96  	dockerImageRemoveDelayConfigOption  = "docker.cleanup.image.delay"
    97  	dockerImageRemoveDelayConfigDefault = 3 * time.Minute
    98  
    99  	// dockerTimeout is the length of time a request can be outstanding before
   100  	// it is timed out.
   101  	dockerTimeout = 5 * time.Minute
   102  
   103  	// dockerImageResKey is the CreatedResources key for docker images
   104  	dockerImageResKey = "image"
   105  )
   106  
   107  type DockerDriver struct {
   108  	DriverContext
   109  
   110  	driverConfig *DockerDriverConfig
   111  	imageID      string
   112  
   113  	// A tri-state boolean to know if the fingerprinting has happened and
   114  	// whether it has been successful
   115  	fingerprintSuccess *bool
   116  }
   117  
   118  type DockerDriverAuth struct {
   119  	Username      string `mapstructure:"username"`       // username for the registry
   120  	Password      string `mapstructure:"password"`       // password to access the registry
   121  	Email         string `mapstructure:"email"`          // email address of the user who is allowed to access the registry
   122  	ServerAddress string `mapstructure:"server_address"` // server address of the registry
   123  }
   124  
   125  type DockerLoggingOpts struct {
   126  	Type      string              `mapstructure:"type"`
   127  	ConfigRaw []map[string]string `mapstructure:"config"`
   128  	Config    map[string]string   `mapstructure:"-"`
   129  }
   130  
   131  type DockerDriverConfig struct {
   132  	ImageName        string              `mapstructure:"image"`              // Container's Image Name
   133  	LoadImage        string              `mapstructure:"load"`               // LoadImage is a path to an image archive file
   134  	Command          string              `mapstructure:"command"`            // The Command to run when the container starts up
   135  	Args             []string            `mapstructure:"args"`               // The arguments to the Command
   136  	IpcMode          string              `mapstructure:"ipc_mode"`           // The IPC mode of the container - host and none
   137  	NetworkMode      string              `mapstructure:"network_mode"`       // The network mode of the container - host, nat and none
   138  	NetworkAliases   []string            `mapstructure:"network_aliases"`    // The network-scoped alias for the container
   139  	PidMode          string              `mapstructure:"pid_mode"`           // The PID mode of the container - host and none
   140  	UTSMode          string              `mapstructure:"uts_mode"`           // The UTS mode of the container - host and none
   141  	UsernsMode       string              `mapstructure:"userns_mode"`        // The User namespace mode of the container - host and none
   142  	PortMapRaw       []map[string]int    `mapstructure:"port_map"`           //
   143  	PortMap          map[string]int      `mapstructure:"-"`                  // A map of host port labels and the ports exposed on the container
   144  	Privileged       bool                `mapstructure:"privileged"`         // Flag to run the container in privileged mode
   145  	DNSServers       []string            `mapstructure:"dns_servers"`        // DNS Server for containers
   146  	DNSSearchDomains []string            `mapstructure:"dns_search_domains"` // DNS Search domains for containers
   147  	Hostname         string              `mapstructure:"hostname"`           // Hostname for containers
   148  	LabelsRaw        []map[string]string `mapstructure:"labels"`             //
   149  	Labels           map[string]string   `mapstructure:"-"`                  // Labels to set when the container starts up
   150  	Auth             []DockerDriverAuth  `mapstructure:"auth"`               // Authentication credentials for a private Docker registry
   151  	TTY              bool                `mapstructure:"tty"`                // Allocate a Pseudo-TTY
   152  	Interactive      bool                `mapstructure:"interactive"`        // Keep STDIN open even if not attached
   153  	ShmSize          int64               `mapstructure:"shm_size"`           // Size of /dev/shm of the container in bytes
   154  	WorkDir          string              `mapstructure:"work_dir"`           // Working directory inside the container
   155  	Logging          []DockerLoggingOpts `mapstructure:"logging"`            // Logging options for syslog server
   156  	Volumes          []string            `mapstructure:"volumes"`            // Host-Volumes to mount in, syntax: /path/to/host/directory:/destination/path/in/container
   157  	VolumeDriver     string              `mapstructure:"volume_driver"`      // Docker volume driver used for the container's volumes
   158  	ForcePull        bool                `mapstructure:"force_pull"`         // Always force pull before running image, useful if your tags are mutable
   159  }
   160  
   161  // Validate validates a docker driver config
   162  func (c *DockerDriverConfig) Validate() error {
   163  	if c.ImageName == "" {
   164  		return fmt.Errorf("Docker Driver needs an image name")
   165  	}
   166  
   167  	c.PortMap = mapMergeStrInt(c.PortMapRaw...)
   168  	c.Labels = mapMergeStrStr(c.LabelsRaw...)
   169  	if len(c.Logging) > 0 {
   170  		c.Logging[0].Config = mapMergeStrStr(c.Logging[0].ConfigRaw...)
   171  	}
   172  	return nil
   173  }
   174  
   175  // NewDockerDriverConfig returns a docker driver config by parsing the HCL
   176  // config
   177  func NewDockerDriverConfig(task *structs.Task, env *env.TaskEnvironment) (*DockerDriverConfig, error) {
   178  	var dconf DockerDriverConfig
   179  
   180  	if err := mapstructure.WeakDecode(task.Config, &dconf); err != nil {
   181  		return nil, err
   182  	}
   183  
   184  	// Interpolate everthing that is a string
   185  	dconf.ImageName = env.ReplaceEnv(dconf.ImageName)
   186  	dconf.Command = env.ReplaceEnv(dconf.Command)
   187  	dconf.IpcMode = env.ReplaceEnv(dconf.IpcMode)
   188  	dconf.NetworkMode = env.ReplaceEnv(dconf.NetworkMode)
   189  	dconf.NetworkAliases = env.ParseAndReplace(dconf.NetworkAliases)
   190  	dconf.PidMode = env.ReplaceEnv(dconf.PidMode)
   191  	dconf.UTSMode = env.ReplaceEnv(dconf.UTSMode)
   192  	dconf.Hostname = env.ReplaceEnv(dconf.Hostname)
   193  	dconf.WorkDir = env.ReplaceEnv(dconf.WorkDir)
   194  	dconf.LoadImage = env.ReplaceEnv(dconf.LoadImage)
   195  	dconf.Volumes = env.ParseAndReplace(dconf.Volumes)
   196  	dconf.VolumeDriver = env.ReplaceEnv(dconf.VolumeDriver)
   197  	dconf.DNSServers = env.ParseAndReplace(dconf.DNSServers)
   198  	dconf.DNSSearchDomains = env.ParseAndReplace(dconf.DNSSearchDomains)
   199  
   200  	for _, m := range dconf.LabelsRaw {
   201  		for k, v := range m {
   202  			delete(m, k)
   203  			m[env.ReplaceEnv(k)] = env.ReplaceEnv(v)
   204  		}
   205  	}
   206  
   207  	for i, a := range dconf.Auth {
   208  		dconf.Auth[i].Username = env.ReplaceEnv(a.Username)
   209  		dconf.Auth[i].Password = env.ReplaceEnv(a.Password)
   210  		dconf.Auth[i].Email = env.ReplaceEnv(a.Email)
   211  		dconf.Auth[i].ServerAddress = env.ReplaceEnv(a.ServerAddress)
   212  	}
   213  
   214  	for i, l := range dconf.Logging {
   215  		dconf.Logging[i].Type = env.ReplaceEnv(l.Type)
   216  		for _, c := range l.ConfigRaw {
   217  			for k, v := range c {
   218  				delete(c, k)
   219  				c[env.ReplaceEnv(k)] = env.ReplaceEnv(v)
   220  			}
   221  		}
   222  	}
   223  
   224  	for _, m := range dconf.PortMapRaw {
   225  		for k, v := range m {
   226  			delete(m, k)
   227  			m[env.ReplaceEnv(k)] = v
   228  		}
   229  	}
   230  
   231  	// Remove any http
   232  	if strings.Contains(dconf.ImageName, "https://") {
   233  		dconf.ImageName = strings.Replace(dconf.ImageName, "https://", "", 1)
   234  	}
   235  
   236  	if err := dconf.Validate(); err != nil {
   237  		return nil, err
   238  	}
   239  	return &dconf, nil
   240  }
   241  
   242  type dockerPID struct {
   243  	Version        string
   244  	Image          string
   245  	ImageID        string
   246  	ContainerID    string
   247  	KillTimeout    time.Duration
   248  	MaxKillTimeout time.Duration
   249  	PluginConfig   *PluginReattachConfig
   250  }
   251  
   252  type DockerHandle struct {
   253  	pluginClient      *plugin.Client
   254  	executor          executor.Executor
   255  	client            *docker.Client
   256  	waitClient        *docker.Client
   257  	logger            *log.Logger
   258  	Image             string
   259  	ImageID           string
   260  	containerID       string
   261  	version           string
   262  	clkSpeed          float64
   263  	killTimeout       time.Duration
   264  	maxKillTimeout    time.Duration
   265  	resourceUsageLock sync.RWMutex
   266  	resourceUsage     *cstructs.TaskResourceUsage
   267  	waitCh            chan *dstructs.WaitResult
   268  	doneCh            chan bool
   269  }
   270  
   271  func NewDockerDriver(ctx *DriverContext) Driver {
   272  	return &DockerDriver{DriverContext: *ctx}
   273  }
   274  
   275  func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
   276  	// Initialize docker API clients
   277  	client, _, err := d.dockerClients()
   278  	if err != nil {
   279  		if d.fingerprintSuccess == nil || *d.fingerprintSuccess {
   280  			d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err)
   281  		}
   282  		delete(node.Attributes, dockerDriverAttr)
   283  		d.fingerprintSuccess = helper.BoolToPtr(false)
   284  		return false, nil
   285  	}
   286  
   287  	// This is the first operation taken on the client so we'll try to
   288  	// establish a connection to the Docker daemon. If this fails it means
   289  	// Docker isn't available so we'll simply disable the docker driver.
   290  	env, err := client.Version()
   291  	if err != nil {
   292  		delete(node.Attributes, dockerDriverAttr)
   293  		if d.fingerprintSuccess == nil || *d.fingerprintSuccess {
   294  			d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err)
   295  		}
   296  		d.fingerprintSuccess = helper.BoolToPtr(false)
   297  		return false, nil
   298  	}
   299  
   300  	node.Attributes[dockerDriverAttr] = "1"
   301  	node.Attributes["driver.docker.version"] = env.Get("Version")
   302  
   303  	privileged := d.config.ReadBoolDefault(dockerPrivilegedConfigOption, false)
   304  	if privileged {
   305  		node.Attributes[dockerPrivilegedConfigOption] = "1"
   306  	}
   307  
   308  	// Advertise if this node supports Docker volumes
   309  	if d.config.ReadBoolDefault(dockerVolumesConfigOption, dockerVolumesConfigDefault) {
   310  		node.Attributes["driver."+dockerVolumesConfigOption] = "1"
   311  	}
   312  
   313  	d.fingerprintSuccess = helper.BoolToPtr(true)
   314  	return true, nil
   315  }
   316  
   317  // Validate is used to validate the driver configuration
   318  func (d *DockerDriver) Validate(config map[string]interface{}) error {
   319  	fd := &fields.FieldData{
   320  		Raw: config,
   321  		Schema: map[string]*fields.FieldSchema{
   322  			"image": &fields.FieldSchema{
   323  				Type:     fields.TypeString,
   324  				Required: true,
   325  			},
   326  			"load": &fields.FieldSchema{
   327  				Type: fields.TypeString,
   328  			},
   329  			"command": &fields.FieldSchema{
   330  				Type: fields.TypeString,
   331  			},
   332  			"args": &fields.FieldSchema{
   333  				Type: fields.TypeArray,
   334  			},
   335  			"ipc_mode": &fields.FieldSchema{
   336  				Type: fields.TypeString,
   337  			},
   338  			"network_mode": &fields.FieldSchema{
   339  				Type: fields.TypeString,
   340  			},
   341  			"network_aliases": &fields.FieldSchema{
   342  				Type: fields.TypeArray,
   343  			},
   344  			"pid_mode": &fields.FieldSchema{
   345  				Type: fields.TypeString,
   346  			},
   347  			"uts_mode": &fields.FieldSchema{
   348  				Type: fields.TypeString,
   349  			},
   350  			"userns_mode": &fields.FieldSchema{
   351  				Type: fields.TypeString,
   352  			},
   353  			"port_map": &fields.FieldSchema{
   354  				Type: fields.TypeArray,
   355  			},
   356  			"privileged": &fields.FieldSchema{
   357  				Type: fields.TypeBool,
   358  			},
   359  			"dns_servers": &fields.FieldSchema{
   360  				Type: fields.TypeArray,
   361  			},
   362  			"dns_search_domains": &fields.FieldSchema{
   363  				Type: fields.TypeArray,
   364  			},
   365  			"hostname": &fields.FieldSchema{
   366  				Type: fields.TypeString,
   367  			},
   368  			"labels": &fields.FieldSchema{
   369  				Type: fields.TypeArray,
   370  			},
   371  			"auth": &fields.FieldSchema{
   372  				Type: fields.TypeArray,
   373  			},
   374  			// COMPAT: Remove in 0.6.0. SSL is no longer needed
   375  			"ssl": &fields.FieldSchema{
   376  				Type: fields.TypeBool,
   377  			},
   378  			"tty": &fields.FieldSchema{
   379  				Type: fields.TypeBool,
   380  			},
   381  			"interactive": &fields.FieldSchema{
   382  				Type: fields.TypeBool,
   383  			},
   384  			"shm_size": &fields.FieldSchema{
   385  				Type: fields.TypeInt,
   386  			},
   387  			"work_dir": &fields.FieldSchema{
   388  				Type: fields.TypeString,
   389  			},
   390  			"logging": &fields.FieldSchema{
   391  				Type: fields.TypeArray,
   392  			},
   393  			"volumes": &fields.FieldSchema{
   394  				Type: fields.TypeArray,
   395  			},
   396  			"volume_driver": &fields.FieldSchema{
   397  				Type: fields.TypeString,
   398  			},
   399  			"force_pull": &fields.FieldSchema{
   400  				Type: fields.TypeBool,
   401  			},
   402  		},
   403  	}
   404  
   405  	if err := fd.Validate(); err != nil {
   406  		return err
   407  	}
   408  
   409  	return nil
   410  }
   411  
   412  func (d *DockerDriver) Abilities() DriverAbilities {
   413  	return DriverAbilities{
   414  		SendSignals: true,
   415  	}
   416  }
   417  
   418  func (d *DockerDriver) FSIsolation() cstructs.FSIsolation {
   419  	return cstructs.FSIsolationImage
   420  }
   421  
   422  // getDockerCoordinator returns the docker coordinator and the caller ID to use when
   423  // interacting with the coordinator
   424  func (d *DockerDriver) getDockerCoordinator(client *docker.Client) (*dockerCoordinator, string) {
   425  	config := &dockerCoordinatorConfig{
   426  		client:      client,
   427  		cleanup:     d.config.ReadBoolDefault(dockerCleanupImageConfigOption, dockerCleanupImageConfigDefault),
   428  		logger:      d.logger,
   429  		removeDelay: d.config.ReadDurationDefault(dockerImageRemoveDelayConfigOption, dockerImageRemoveDelayConfigDefault),
   430  	}
   431  
   432  	return GetDockerCoordinator(config), fmt.Sprintf("%s-%s", d.DriverContext.allocID, d.DriverContext.taskName)
   433  }
   434  
   435  func (d *DockerDriver) Prestart(ctx *ExecContext, task *structs.Task) (*CreatedResources, error) {
   436  	driverConfig, err := NewDockerDriverConfig(task, d.taskEnv)
   437  	if err != nil {
   438  		return nil, err
   439  	}
   440  
   441  	// Set state needed by Start()
   442  	d.driverConfig = driverConfig
   443  
   444  	// Initialize docker API clients
   445  	client, _, err := d.dockerClients()
   446  	if err != nil {
   447  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   448  	}
   449  
   450  	// Ensure the image is available
   451  	id, err := d.createImage(driverConfig, client, ctx.TaskDir)
   452  	if err != nil {
   453  		return nil, err
   454  	}
   455  
   456  	res := NewCreatedResources()
   457  	res.Add(dockerImageResKey, id)
   458  	d.imageID = id
   459  	return res, nil
   460  }
   461  
   462  func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
   463  
   464  	pluginLogFile := filepath.Join(ctx.TaskDir.Dir, "executor.out")
   465  	executorConfig := &dstructs.ExecutorConfig{
   466  		LogFile:  pluginLogFile,
   467  		LogLevel: d.config.LogLevel,
   468  	}
   469  
   470  	exec, pluginClient, err := createExecutor(d.config.LogOutput, d.config, executorConfig)
   471  	if err != nil {
   472  		return nil, err
   473  	}
   474  	executorCtx := &executor.ExecutorContext{
   475  		TaskEnv:        d.taskEnv,
   476  		Task:           task,
   477  		Driver:         "docker",
   478  		AllocID:        d.DriverContext.allocID,
   479  		LogDir:         ctx.TaskDir.LogDir,
   480  		TaskDir:        ctx.TaskDir.Dir,
   481  		PortLowerBound: d.config.ClientMinPort,
   482  		PortUpperBound: d.config.ClientMaxPort,
   483  	}
   484  	if err := exec.SetContext(executorCtx); err != nil {
   485  		pluginClient.Kill()
   486  		return nil, fmt.Errorf("failed to set executor context: %v", err)
   487  	}
   488  
   489  	// Only launch syslog server if we're going to use it!
   490  	syslogAddr := ""
   491  	if runtime.GOOS == "darwin" && len(d.driverConfig.Logging) == 0 {
   492  		d.logger.Printf("[DEBUG] driver.docker: disabling syslog driver as Docker for Mac workaround")
   493  	} else if len(d.driverConfig.Logging) == 0 || d.driverConfig.Logging[0].Type == "syslog" {
   494  		ss, err := exec.LaunchSyslogServer()
   495  		if err != nil {
   496  			pluginClient.Kill()
   497  			return nil, fmt.Errorf("failed to start syslog collector: %v", err)
   498  		}
   499  		syslogAddr = ss.Addr
   500  	}
   501  
   502  	config, err := d.createContainerConfig(ctx, task, d.driverConfig, syslogAddr)
   503  	if err != nil {
   504  		d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %q (%q): %v", d.driverConfig.ImageName, d.imageID, err)
   505  		pluginClient.Kill()
   506  		return nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", d.driverConfig.ImageName, d.imageID, err)
   507  	}
   508  
   509  	container, err := d.createContainer(config)
   510  	if err != nil {
   511  		wrapped := fmt.Sprintf("Failed to create container: %v", err)
   512  		d.logger.Printf("[ERR] driver.docker: %s", wrapped)
   513  		pluginClient.Kill()
   514  		return nil, structs.WrapRecoverable(wrapped, err)
   515  	}
   516  
   517  	d.logger.Printf("[INFO] driver.docker: created container %s", container.ID)
   518  
   519  	// We don't need to start the container if the container is already running
   520  	// since we don't create containers which are already present on the host
   521  	// and are running
   522  	if !container.State.Running {
   523  		// Start the container
   524  		if err := d.startContainer(container); err != nil {
   525  			d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err)
   526  			pluginClient.Kill()
   527  			return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err)
   528  		}
   529  		d.logger.Printf("[INFO] driver.docker: started container %s", container.ID)
   530  	} else {
   531  		d.logger.Printf("[DEBUG] driver.docker: re-attaching to container %s with status %q",
   532  			container.ID, container.State.String())
   533  	}
   534  
   535  	// Return a driver handle
   536  	maxKill := d.DriverContext.config.MaxKillTimeout
   537  	h := &DockerHandle{
   538  		client:         client,
   539  		waitClient:     waitClient,
   540  		executor:       exec,
   541  		pluginClient:   pluginClient,
   542  		logger:         d.logger,
   543  		Image:          d.driverConfig.ImageName,
   544  		ImageID:        d.imageID,
   545  		containerID:    container.ID,
   546  		version:        d.config.Version,
   547  		killTimeout:    GetKillTimeout(task.KillTimeout, maxKill),
   548  		maxKillTimeout: maxKill,
   549  		doneCh:         make(chan bool),
   550  		waitCh:         make(chan *dstructs.WaitResult, 1),
   551  	}
   552  	if err := exec.SyncServices(consulContext(d.config, container.ID)); err != nil {
   553  		d.logger.Printf("[ERR] driver.docker: error registering services with consul for task: %q: %v", task.Name, err)
   554  	}
   555  	go h.collectStats()
   556  	go h.run()
   557  	return h, nil
   558  }
   559  
   560  func (d *DockerDriver) Cleanup(_ *ExecContext, res *CreatedResources) error {
   561  	retry := false
   562  	var merr multierror.Error
   563  	for key, resources := range res.Resources {
   564  		switch key {
   565  		case dockerImageResKey:
   566  			for _, value := range resources {
   567  				err := d.cleanupImage(value)
   568  				if err != nil {
   569  					if structs.IsRecoverable(err) {
   570  						retry = true
   571  					}
   572  					merr.Errors = append(merr.Errors, err)
   573  					continue
   574  				}
   575  
   576  				// Remove cleaned image from resources
   577  				res.Remove(dockerImageResKey, value)
   578  			}
   579  		default:
   580  			d.logger.Printf("[ERR] driver.docker: unknown resource to cleanup: %q", key)
   581  		}
   582  	}
   583  	return structs.NewRecoverableError(merr.ErrorOrNil(), retry)
   584  }
   585  
   586  // cleanupImage removes a Docker image. No error is returned if the image
   587  // doesn't exist or is still in use. Requires the global client to already be
   588  // initialized.
   589  func (d *DockerDriver) cleanupImage(imageID string) error {
   590  	if !d.config.ReadBoolDefault(dockerCleanupImageConfigOption, dockerCleanupImageConfigDefault) {
   591  		// Config says not to cleanup
   592  		return nil
   593  	}
   594  
   595  	coordinator, callerID := d.getDockerCoordinator(client)
   596  	coordinator.RemoveImage(imageID, callerID)
   597  
   598  	return nil
   599  }
   600  
   601  // dockerClients creates two *docker.Client, one for long running operations and
   602  // the other for shorter operations. In test / dev mode we can use ENV vars to
   603  // connect to the docker daemon. In production mode we will read docker.endpoint
   604  // from the config file.
   605  func (d *DockerDriver) dockerClients() (*docker.Client, *docker.Client, error) {
   606  	if client != nil && waitClient != nil {
   607  		return client, waitClient, nil
   608  	}
   609  
   610  	var err error
   611  	var merr multierror.Error
   612  	createClients.Do(func() {
   613  		// Default to using whatever is configured in docker.endpoint. If this is
   614  		// not specified we'll fall back on NewClientFromEnv which reads config from
   615  		// the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and
   616  		// DOCKER_CERT_PATH. This allows us to lock down the config in production
   617  		// but also accept the standard ENV configs for dev and test.
   618  		dockerEndpoint := d.config.Read("docker.endpoint")
   619  		if dockerEndpoint != "" {
   620  			cert := d.config.Read("docker.tls.cert")
   621  			key := d.config.Read("docker.tls.key")
   622  			ca := d.config.Read("docker.tls.ca")
   623  
   624  			if cert+key+ca != "" {
   625  				d.logger.Printf("[DEBUG] driver.docker: using TLS client connection to %s", dockerEndpoint)
   626  				client, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca)
   627  				if err != nil {
   628  					merr.Errors = append(merr.Errors, err)
   629  				}
   630  				waitClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca)
   631  				if err != nil {
   632  					merr.Errors = append(merr.Errors, err)
   633  				}
   634  			} else {
   635  				d.logger.Printf("[DEBUG] driver.docker: using standard client connection to %s", dockerEndpoint)
   636  				client, err = docker.NewClient(dockerEndpoint)
   637  				if err != nil {
   638  					merr.Errors = append(merr.Errors, err)
   639  				}
   640  				waitClient, err = docker.NewClient(dockerEndpoint)
   641  				if err != nil {
   642  					merr.Errors = append(merr.Errors, err)
   643  				}
   644  			}
   645  			client.SetTimeout(dockerTimeout)
   646  			return
   647  		}
   648  
   649  		d.logger.Println("[DEBUG] driver.docker: using client connection initialized from environment")
   650  		client, err = docker.NewClientFromEnv()
   651  		if err != nil {
   652  			merr.Errors = append(merr.Errors, err)
   653  		}
   654  		client.SetTimeout(dockerTimeout)
   655  
   656  		waitClient, err = docker.NewClientFromEnv()
   657  		if err != nil {
   658  			merr.Errors = append(merr.Errors, err)
   659  		}
   660  	})
   661  	return client, waitClient, merr.ErrorOrNil()
   662  }
   663  
   664  func (d *DockerDriver) containerBinds(driverConfig *DockerDriverConfig, taskDir *allocdir.TaskDir,
   665  	task *structs.Task) ([]string, error) {
   666  
   667  	allocDirBind := fmt.Sprintf("%s:%s", taskDir.SharedAllocDir, allocdir.SharedAllocContainerPath)
   668  	taskLocalBind := fmt.Sprintf("%s:%s", taskDir.LocalDir, allocdir.TaskLocalContainerPath)
   669  	secretDirBind := fmt.Sprintf("%s:%s", taskDir.SecretsDir, allocdir.TaskSecretsContainerPath)
   670  	binds := []string{allocDirBind, taskLocalBind, secretDirBind}
   671  
   672  	volumesEnabled := d.config.ReadBoolDefault(dockerVolumesConfigOption, dockerVolumesConfigDefault)
   673  
   674  	if !volumesEnabled && driverConfig.VolumeDriver != "" {
   675  		return nil, fmt.Errorf("%s is false; cannot use volume driver %q", dockerVolumesConfigOption, driverConfig.VolumeDriver)
   676  	}
   677  
   678  	for _, userbind := range driverConfig.Volumes {
   679  		parts := strings.Split(userbind, ":")
   680  		if len(parts) < 2 {
   681  			return nil, fmt.Errorf("invalid docker volume: %q", userbind)
   682  		}
   683  
   684  		// Resolve dotted path segments
   685  		parts[0] = filepath.Clean(parts[0])
   686  
   687  		// Absolute paths aren't always supported
   688  		if filepath.IsAbs(parts[0]) {
   689  			if !volumesEnabled {
   690  				// Disallow mounting arbitrary absolute paths
   691  				return nil, fmt.Errorf("%s is false; cannot mount host paths: %+q", dockerVolumesConfigOption, userbind)
   692  			}
   693  			binds = append(binds, userbind)
   694  			continue
   695  		}
   696  
   697  		// Relative paths are always allowed as they mount within a container
   698  		// When a VolumeDriver is set, we assume we receive a binding in the format volume-name:container-dest
   699  		// Otherwise, we assume we receive a relative path binding in the format relative/to/task:/also/in/container
   700  		if driverConfig.VolumeDriver == "" {
   701  			// Expand path relative to alloc dir
   702  			parts[0] = filepath.Join(taskDir.Dir, parts[0])
   703  		}
   704  
   705  		binds = append(binds, strings.Join(parts, ":"))
   706  	}
   707  
   708  	if selinuxLabel := d.config.Read(dockerSELinuxLabelConfigOption); selinuxLabel != "" {
   709  		// Apply SELinux Label to each volume
   710  		for i := range binds {
   711  			binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel)
   712  		}
   713  	}
   714  
   715  	return binds, nil
   716  }
   717  
   718  // createContainerConfig initializes a struct needed to call docker.client.CreateContainer()
   719  func (d *DockerDriver) createContainerConfig(ctx *ExecContext, task *structs.Task,
   720  	driverConfig *DockerDriverConfig, syslogAddr string) (docker.CreateContainerOptions, error) {
   721  	var c docker.CreateContainerOptions
   722  	if task.Resources == nil {
   723  		// Guard against missing resources. We should never have been able to
   724  		// schedule a job without specifying this.
   725  		d.logger.Println("[ERR] driver.docker: task.Resources is empty")
   726  		return c, fmt.Errorf("task.Resources is empty")
   727  	}
   728  
   729  	binds, err := d.containerBinds(driverConfig, ctx.TaskDir, task)
   730  	if err != nil {
   731  		return c, err
   732  	}
   733  
   734  	config := &docker.Config{
   735  		Image:     d.imageID,
   736  		Hostname:  driverConfig.Hostname,
   737  		User:      task.User,
   738  		Tty:       driverConfig.TTY,
   739  		OpenStdin: driverConfig.Interactive,
   740  	}
   741  
   742  	if driverConfig.WorkDir != "" {
   743  		config.WorkingDir = driverConfig.WorkDir
   744  	}
   745  
   746  	memLimit := int64(task.Resources.MemoryMB) * 1024 * 1024
   747  
   748  	if len(driverConfig.Logging) == 0 {
   749  		if runtime.GOOS != "darwin" {
   750  			d.logger.Printf("[DEBUG] driver.docker: Setting default logging options to syslog and %s", syslogAddr)
   751  			driverConfig.Logging = []DockerLoggingOpts{
   752  				{Type: "syslog", Config: map[string]string{"syslog-address": syslogAddr}},
   753  			}
   754  		} else {
   755  			d.logger.Printf("[DEBUG] driver.docker: deferring logging to docker on Docker for Mac")
   756  		}
   757  	}
   758  
   759  	hostConfig := &docker.HostConfig{
   760  		// Convert MB to bytes. This is an absolute value.
   761  		Memory: memLimit,
   762  		// Convert Mhz to shares. This is a relative value.
   763  		CPUShares: int64(task.Resources.CPU),
   764  
   765  		// Binds are used to mount a host volume into the container. We mount a
   766  		// local directory for storage and a shared alloc directory that can be
   767  		// used to share data between different tasks in the same task group.
   768  		Binds: binds,
   769  
   770  		VolumeDriver: driverConfig.VolumeDriver,
   771  	}
   772  
   773  	// Windows does not support MemorySwap #2193
   774  	if runtime.GOOS != "windows" {
   775  		hostConfig.MemorySwap = memLimit // MemorySwap is memory + swap.
   776  	}
   777  
   778  	if len(driverConfig.Logging) != 0 {
   779  		d.logger.Printf("[DEBUG] driver.docker: Using config for logging: %+v", driverConfig.Logging[0])
   780  		hostConfig.LogConfig = docker.LogConfig{
   781  			Type:   driverConfig.Logging[0].Type,
   782  			Config: driverConfig.Logging[0].Config,
   783  		}
   784  	}
   785  
   786  	d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Name)
   787  	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Name)
   788  	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Name)
   789  
   790  	//  set privileged mode
   791  	hostPrivileged := d.config.ReadBoolDefault(dockerPrivilegedConfigOption, false)
   792  	if driverConfig.Privileged && !hostPrivileged {
   793  		return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`)
   794  	}
   795  	hostConfig.Privileged = driverConfig.Privileged
   796  
   797  	// set SHM size
   798  	if driverConfig.ShmSize != 0 {
   799  		hostConfig.ShmSize = driverConfig.ShmSize
   800  	}
   801  
   802  	// set DNS servers
   803  	for _, ip := range driverConfig.DNSServers {
   804  		if net.ParseIP(ip) != nil {
   805  			hostConfig.DNS = append(hostConfig.DNS, ip)
   806  		} else {
   807  			d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip)
   808  		}
   809  	}
   810  
   811  	// set DNS search domains
   812  	for _, domain := range driverConfig.DNSSearchDomains {
   813  		hostConfig.DNSSearch = append(hostConfig.DNSSearch, domain)
   814  	}
   815  
   816  	hostConfig.IpcMode = driverConfig.IpcMode
   817  	hostConfig.PidMode = driverConfig.PidMode
   818  	hostConfig.UTSMode = driverConfig.UTSMode
   819  	hostConfig.UsernsMode = driverConfig.UsernsMode
   820  
   821  	hostConfig.NetworkMode = driverConfig.NetworkMode
   822  	if hostConfig.NetworkMode == "" {
   823  		// docker default
   824  		d.logger.Printf("[DEBUG] driver.docker: networking mode not specified; defaulting to %s", defaultNetworkMode)
   825  		hostConfig.NetworkMode = defaultNetworkMode
   826  	}
   827  
   828  	// Setup port mapping and exposed ports
   829  	if len(task.Resources.Networks) == 0 {
   830  		d.logger.Println("[DEBUG] driver.docker: No network interfaces are available")
   831  		if len(driverConfig.PortMap) > 0 {
   832  			return c, fmt.Errorf("Trying to map ports but no network interface is available")
   833  		}
   834  	} else {
   835  		// TODO add support for more than one network
   836  		network := task.Resources.Networks[0]
   837  		publishedPorts := map[docker.Port][]docker.PortBinding{}
   838  		exposedPorts := map[docker.Port]struct{}{}
   839  
   840  		for _, port := range network.ReservedPorts {
   841  			// By default we will map the allocated port 1:1 to the container
   842  			containerPortInt := port.Value
   843  
   844  			// If the user has mapped a port using port_map we'll change it here
   845  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   846  				containerPortInt = mapped
   847  			}
   848  
   849  			hostPortStr := strconv.Itoa(port.Value)
   850  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   851  
   852  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
   853  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
   854  			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value)
   855  
   856  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   857  			exposedPorts[containerPort+"/udp"] = struct{}{}
   858  			d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value)
   859  		}
   860  
   861  		for _, port := range network.DynamicPorts {
   862  			// By default we will map the allocated port 1:1 to the container
   863  			containerPortInt := port.Value
   864  
   865  			// If the user has mapped a port using port_map we'll change it here
   866  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   867  				containerPortInt = mapped
   868  			}
   869  
   870  			hostPortStr := strconv.Itoa(port.Value)
   871  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   872  
   873  			publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr)
   874  			publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr)
   875  			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt)
   876  
   877  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   878  			exposedPorts[containerPort+"/udp"] = struct{}{}
   879  			d.logger.Printf("[DEBUG] driver.docker: exposed port %s", containerPort)
   880  		}
   881  
   882  		d.taskEnv.SetPortMap(driverConfig.PortMap)
   883  
   884  		hostConfig.PortBindings = publishedPorts
   885  		config.ExposedPorts = exposedPorts
   886  	}
   887  
   888  	d.taskEnv.Build()
   889  	parsedArgs := d.taskEnv.ParseAndReplace(driverConfig.Args)
   890  
   891  	// If the user specified a custom command to run, we'll inject it here.
   892  	if driverConfig.Command != "" {
   893  		// Validate command
   894  		if err := validateCommand(driverConfig.Command, "args"); err != nil {
   895  			return c, err
   896  		}
   897  
   898  		cmd := []string{driverConfig.Command}
   899  		if len(driverConfig.Args) != 0 {
   900  			cmd = append(cmd, parsedArgs...)
   901  		}
   902  		d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " "))
   903  		config.Cmd = cmd
   904  	} else if len(driverConfig.Args) != 0 {
   905  		config.Cmd = parsedArgs
   906  	}
   907  
   908  	if len(driverConfig.Labels) > 0 {
   909  		config.Labels = driverConfig.Labels
   910  		d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels)
   911  	}
   912  
   913  	config.Env = d.taskEnv.EnvList()
   914  
   915  	containerName := fmt.Sprintf("%s-%s", task.Name, d.DriverContext.allocID)
   916  	d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName)
   917  
   918  	var networkingConfig *docker.NetworkingConfig
   919  	if len(driverConfig.NetworkAliases) > 0 {
   920  		networkingConfig = &docker.NetworkingConfig{
   921  			EndpointsConfig: map[string]*docker.EndpointConfig{
   922  				hostConfig.NetworkMode: &docker.EndpointConfig{
   923  					Aliases: driverConfig.NetworkAliases,
   924  				},
   925  			},
   926  		}
   927  
   928  		d.logger.Printf("[DEBUG] driver.docker: using network_mode %q with network aliases: %v",
   929  			hostConfig.NetworkMode, strings.Join(driverConfig.NetworkAliases, ", "))
   930  	}
   931  
   932  	return docker.CreateContainerOptions{
   933  		Name:             containerName,
   934  		Config:           config,
   935  		HostConfig:       hostConfig,
   936  		NetworkingConfig: networkingConfig,
   937  	}, nil
   938  }
   939  
   940  func (d *DockerDriver) Periodic() (bool, time.Duration) {
   941  	return true, 15 * time.Second
   942  }
   943  
   944  // createImage creates a docker image either by pulling it from a registry or by
   945  // loading it from the file system
   946  func (d *DockerDriver) createImage(driverConfig *DockerDriverConfig, client *docker.Client, taskDir *allocdir.TaskDir) (string, error) {
   947  	image := driverConfig.ImageName
   948  	repo, tag := docker.ParseRepositoryTag(image)
   949  	if tag == "" {
   950  		tag = "latest"
   951  	}
   952  
   953  	coordinator, callerID := d.getDockerCoordinator(client)
   954  
   955  	// We're going to check whether the image is already downloaded. If the tag
   956  	// is "latest", or ForcePull is set, we have to check for a new version every time so we don't
   957  	// bother to check and cache the id here. We'll download first, then cache.
   958  	if driverConfig.ForcePull {
   959  		d.logger.Printf("[DEBUG] driver.docker: force pull image '%s:%s' instead of inspecting local", repo, tag)
   960  	} else if tag != "latest" {
   961  		if dockerImage, _ := client.InspectImage(image); dockerImage != nil {
   962  			// Image exists so just increment its reference count
   963  			coordinator.IncrementImageReference(dockerImage.ID, image, callerID)
   964  			return dockerImage.ID, nil
   965  		}
   966  	}
   967  
   968  	// Load the image if specified
   969  	if driverConfig.LoadImage != "" {
   970  		return d.loadImage(driverConfig, client, taskDir)
   971  	}
   972  
   973  	// Download the image
   974  	return d.pullImage(driverConfig, client, repo, tag)
   975  }
   976  
   977  // pullImage creates an image by pulling it from a docker registry
   978  func (d *DockerDriver) pullImage(driverConfig *DockerDriverConfig, client *docker.Client, repo, tag string) (id string, err error) {
   979  	var authOptions *docker.AuthConfiguration
   980  	if len(driverConfig.Auth) != 0 {
   981  		authOptions = &docker.AuthConfiguration{
   982  			Username:      driverConfig.Auth[0].Username,
   983  			Password:      driverConfig.Auth[0].Password,
   984  			Email:         driverConfig.Auth[0].Email,
   985  			ServerAddress: driverConfig.Auth[0].ServerAddress,
   986  		}
   987  	} else if authConfigFile := d.config.Read("docker.auth.config"); authConfigFile != "" {
   988  		var err error
   989  		authOptions, err = authOptionFrom(authConfigFile, repo)
   990  		if err != nil {
   991  			d.logger.Printf("[INFO] driver.docker: failed to find docker auth for repo %q: %v", repo, err)
   992  			return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err)
   993  		}
   994  
   995  		if authOptions.Email == "" && authOptions.Password == "" &&
   996  			authOptions.ServerAddress == "" && authOptions.Username == "" {
   997  			d.logger.Printf("[DEBUG] driver.docker: did not find docker auth for repo %q", repo)
   998  		}
   999  	}
  1000  
  1001  	d.emitEvent("Downloading image %s:%s", repo, tag)
  1002  	coordinator, callerID := d.getDockerCoordinator(client)
  1003  	return coordinator.PullImage(driverConfig.ImageName, authOptions, callerID)
  1004  }
  1005  
  1006  // loadImage creates an image by loading it from the file system
  1007  func (d *DockerDriver) loadImage(driverConfig *DockerDriverConfig, client *docker.Client,
  1008  	taskDir *allocdir.TaskDir) (id string, err error) {
  1009  
  1010  	archive := filepath.Join(taskDir.LocalDir, driverConfig.LoadImage)
  1011  	d.logger.Printf("[DEBUG] driver.docker: loading image from: %v", archive)
  1012  
  1013  	f, err := os.Open(archive)
  1014  	if err != nil {
  1015  		return "", fmt.Errorf("unable to open image archive: %v", err)
  1016  	}
  1017  
  1018  	if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil {
  1019  		return "", err
  1020  	}
  1021  	f.Close()
  1022  
  1023  	dockerImage, err := client.InspectImage(driverConfig.ImageName)
  1024  	if err != nil {
  1025  		return "", recoverableErrTimeouts(err)
  1026  	}
  1027  
  1028  	coordinator, callerID := d.getDockerCoordinator(client)
  1029  	coordinator.IncrementImageReference(dockerImage.ID, driverConfig.ImageName, callerID)
  1030  	return dockerImage.ID, nil
  1031  }
  1032  
  1033  // createContainer creates the container given the passed configuration. It
  1034  // attempts to handle any transient Docker errors.
  1035  func (d *DockerDriver) createContainer(config docker.CreateContainerOptions) (*docker.Container, error) {
  1036  	// Create a container
  1037  	attempted := 0
  1038  CREATE:
  1039  	container, createErr := client.CreateContainer(config)
  1040  	if createErr == nil {
  1041  		return container, nil
  1042  	}
  1043  
  1044  	d.logger.Printf("[DEBUG] driver.docker: failed to create container %q from image %q (ID: %q) (attempt %d): %v",
  1045  		config.Name, d.driverConfig.ImageName, d.imageID, attempted+1, createErr)
  1046  	if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") {
  1047  		containers, err := client.ListContainers(docker.ListContainersOptions{
  1048  			All: true,
  1049  		})
  1050  		if err != nil {
  1051  			d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name)
  1052  			return nil, recoverableErrTimeouts(fmt.Errorf("Failed to query list of containers: %s", err))
  1053  		}
  1054  
  1055  		// Delete matching containers
  1056  		// Adding a / infront of the container name since Docker returns the
  1057  		// container names with a / pre-pended to the Nomad generated container names
  1058  		containerName := "/" + config.Name
  1059  		d.logger.Printf("[DEBUG] driver.docker: searching for container name %q to purge", containerName)
  1060  		for _, container := range containers {
  1061  			d.logger.Printf("[DEBUG] driver.docker: listed container %+v", container)
  1062  			found := false
  1063  			for _, name := range container.Names {
  1064  				if name == containerName {
  1065  					found = true
  1066  					break
  1067  				}
  1068  			}
  1069  
  1070  			if !found {
  1071  				continue
  1072  			}
  1073  
  1074  			// Inspect the container and if the container isn't dead then return
  1075  			// the container
  1076  			container, err := client.InspectContainer(container.ID)
  1077  			if err != nil {
  1078  				return nil, recoverableErrTimeouts(fmt.Errorf("Failed to inspect container %s: %s", container.ID, err))
  1079  			}
  1080  			if container != nil && (container.State.Running || container.State.FinishedAt.IsZero()) {
  1081  				return container, nil
  1082  			}
  1083  
  1084  			err = client.RemoveContainer(docker.RemoveContainerOptions{
  1085  				ID:    container.ID,
  1086  				Force: true,
  1087  			})
  1088  			if err != nil {
  1089  				d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID)
  1090  				return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err))
  1091  			} else if err == nil {
  1092  				d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID)
  1093  			}
  1094  		}
  1095  
  1096  		if attempted < 5 {
  1097  			attempted++
  1098  			time.Sleep(1 * time.Second)
  1099  			goto CREATE
  1100  		}
  1101  	} else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") {
  1102  		// There is still a very small chance this is possible even with the
  1103  		// coordinator so retry.
  1104  		return nil, structs.NewRecoverableError(createErr, true)
  1105  	}
  1106  
  1107  	return nil, recoverableErrTimeouts(createErr)
  1108  }
  1109  
  1110  // startContainer starts the passed container. It attempts to handle any
  1111  // transient Docker errors.
  1112  func (d *DockerDriver) startContainer(c *docker.Container) error {
  1113  	// Start a container
  1114  	attempted := 0
  1115  START:
  1116  	startErr := client.StartContainer(c.ID, c.HostConfig)
  1117  	if startErr == nil {
  1118  		return nil
  1119  	}
  1120  
  1121  	d.logger.Printf("[DEBUG] driver.docker: failed to start container %q (attempt %d): %v", c.ID, attempted+1, startErr)
  1122  
  1123  	// If it is a 500 error it is likely we can retry and be successful
  1124  	if strings.Contains(startErr.Error(), "API error (500)") {
  1125  		if attempted < 5 {
  1126  			attempted++
  1127  			time.Sleep(1 * time.Second)
  1128  			goto START
  1129  		}
  1130  	}
  1131  
  1132  	return recoverableErrTimeouts(startErr)
  1133  }
  1134  
  1135  func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
  1136  	// Split the handle
  1137  	pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:"))
  1138  	pid := &dockerPID{}
  1139  	if err := json.Unmarshal(pidBytes, pid); err != nil {
  1140  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
  1141  	}
  1142  	d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", pid.ContainerID)
  1143  	d.logger.Printf("[DEBUG] driver.docker: re-attached to handle: %s", handleID)
  1144  	pluginConfig := &plugin.ClientConfig{
  1145  		Reattach: pid.PluginConfig.PluginConfig(),
  1146  	}
  1147  
  1148  	client, waitClient, err := d.dockerClients()
  1149  	if err != nil {
  1150  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
  1151  	}
  1152  
  1153  	// Look for a running container with this ID
  1154  	containers, err := client.ListContainers(docker.ListContainersOptions{
  1155  		Filters: map[string][]string{
  1156  			"id": []string{pid.ContainerID},
  1157  		},
  1158  	})
  1159  	if err != nil {
  1160  		return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err)
  1161  	}
  1162  
  1163  	found := false
  1164  	for _, container := range containers {
  1165  		if container.ID == pid.ContainerID {
  1166  			found = true
  1167  		}
  1168  	}
  1169  	if !found {
  1170  		return nil, fmt.Errorf("Failed to find container %s", pid.ContainerID)
  1171  	}
  1172  	exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput)
  1173  	if err != nil {
  1174  		d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err)
  1175  		d.logger.Printf("[DEBUG] driver.docker: stopping container %q", pid.ContainerID)
  1176  		if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout.Seconds())); e != nil {
  1177  			d.logger.Printf("[DEBUG] driver.docker: couldn't stop container: %v", e)
  1178  		}
  1179  		return nil, err
  1180  	}
  1181  
  1182  	ver, _ := exec.Version()
  1183  	d.logger.Printf("[DEBUG] driver.docker: version of executor: %v", ver.Version)
  1184  
  1185  	// Increment the reference count since we successfully attached to this
  1186  	// container
  1187  	coordinator, callerID := d.getDockerCoordinator(client)
  1188  	coordinator.IncrementImageReference(pid.ImageID, pid.Image, callerID)
  1189  
  1190  	// Return a driver handle
  1191  	h := &DockerHandle{
  1192  		client:         client,
  1193  		waitClient:     waitClient,
  1194  		executor:       exec,
  1195  		pluginClient:   pluginClient,
  1196  		logger:         d.logger,
  1197  		Image:          pid.Image,
  1198  		ImageID:        pid.ImageID,
  1199  		containerID:    pid.ContainerID,
  1200  		version:        pid.Version,
  1201  		killTimeout:    pid.KillTimeout,
  1202  		maxKillTimeout: pid.MaxKillTimeout,
  1203  		doneCh:         make(chan bool),
  1204  		waitCh:         make(chan *dstructs.WaitResult, 1),
  1205  	}
  1206  	if err := exec.SyncServices(consulContext(d.config, pid.ContainerID)); err != nil {
  1207  		h.logger.Printf("[ERR] driver.docker: error registering services with consul: %v", err)
  1208  	}
  1209  
  1210  	go h.collectStats()
  1211  	go h.run()
  1212  	return h, nil
  1213  }
  1214  
  1215  func (h *DockerHandle) ID() string {
  1216  	// Return a handle to the PID
  1217  	pid := dockerPID{
  1218  		Version:        h.version,
  1219  		ContainerID:    h.containerID,
  1220  		Image:          h.Image,
  1221  		ImageID:        h.ImageID,
  1222  		KillTimeout:    h.killTimeout,
  1223  		MaxKillTimeout: h.maxKillTimeout,
  1224  		PluginConfig:   NewPluginReattachConfig(h.pluginClient.ReattachConfig()),
  1225  	}
  1226  	data, err := json.Marshal(pid)
  1227  	if err != nil {
  1228  		h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err)
  1229  	}
  1230  	return fmt.Sprintf("DOCKER:%s", string(data))
  1231  }
  1232  
  1233  func (h *DockerHandle) ContainerID() string {
  1234  	return h.containerID
  1235  }
  1236  
  1237  func (h *DockerHandle) WaitCh() chan *dstructs.WaitResult {
  1238  	return h.waitCh
  1239  }
  1240  
  1241  func (h *DockerHandle) Update(task *structs.Task) error {
  1242  	// Store the updated kill timeout.
  1243  	h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout)
  1244  	if err := h.executor.UpdateTask(task); err != nil {
  1245  		h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err)
  1246  	}
  1247  
  1248  	// Update is not possible
  1249  	return nil
  1250  }
  1251  
  1252  func (h *DockerHandle) Signal(s os.Signal) error {
  1253  	// Convert types
  1254  	sysSig, ok := s.(syscall.Signal)
  1255  	if !ok {
  1256  		return fmt.Errorf("Failed to determine signal number")
  1257  	}
  1258  
  1259  	dockerSignal := docker.Signal(sysSig)
  1260  	opts := docker.KillContainerOptions{
  1261  		ID:     h.containerID,
  1262  		Signal: dockerSignal,
  1263  	}
  1264  	return h.client.KillContainer(opts)
  1265  
  1266  }
  1267  
  1268  // Kill is used to terminate the task. This uses `docker stop -t killTimeout`
  1269  func (h *DockerHandle) Kill() error {
  1270  	// Stop the container
  1271  	err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
  1272  	if err != nil {
  1273  		h.executor.Exit()
  1274  		h.pluginClient.Kill()
  1275  
  1276  		// Container has already been removed.
  1277  		if strings.Contains(err.Error(), NoSuchContainerError) {
  1278  			h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID)
  1279  			return nil
  1280  		}
  1281  		h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
  1282  		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
  1283  	}
  1284  	h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
  1285  	return nil
  1286  }
  1287  
  1288  func (h *DockerHandle) Stats() (*cstructs.TaskResourceUsage, error) {
  1289  	h.resourceUsageLock.RLock()
  1290  	defer h.resourceUsageLock.RUnlock()
  1291  	var err error
  1292  	if h.resourceUsage == nil {
  1293  		err = fmt.Errorf("stats collection hasn't started yet")
  1294  	}
  1295  	return h.resourceUsage, err
  1296  }
  1297  
  1298  func (h *DockerHandle) run() {
  1299  	// Wait for it...
  1300  	exitCode, werr := h.waitClient.WaitContainer(h.containerID)
  1301  	if werr != nil {
  1302  		h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID)
  1303  	}
  1304  
  1305  	if exitCode != 0 {
  1306  		werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
  1307  	}
  1308  
  1309  	close(h.doneCh)
  1310  
  1311  	// Remove services
  1312  	if err := h.executor.DeregisterServices(); err != nil {
  1313  		h.logger.Printf("[ERR] driver.docker: error deregistering services: %v", err)
  1314  	}
  1315  
  1316  	// Shutdown the syslog collector
  1317  	if err := h.executor.Exit(); err != nil {
  1318  		h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err)
  1319  	}
  1320  	h.pluginClient.Kill()
  1321  
  1322  	// Stop the container just incase the docker daemon's wait returned
  1323  	// incorrectly
  1324  	if err := h.client.StopContainer(h.containerID, 0); err != nil {
  1325  		_, noSuchContainer := err.(*docker.NoSuchContainer)
  1326  		_, containerNotRunning := err.(*docker.ContainerNotRunning)
  1327  		if !containerNotRunning && !noSuchContainer {
  1328  			h.logger.Printf("[ERR] driver.docker: error stopping container: %v", err)
  1329  		}
  1330  	}
  1331  
  1332  	// Remove the container
  1333  	if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil {
  1334  		h.logger.Printf("[ERR] driver.docker: error removing container: %v", err)
  1335  	}
  1336  
  1337  	// Send the results
  1338  	h.waitCh <- dstructs.NewWaitResult(exitCode, 0, werr)
  1339  	close(h.waitCh)
  1340  }
  1341  
  1342  // collectStats starts collecting resource usage stats of a docker container
  1343  func (h *DockerHandle) collectStats() {
  1344  	statsCh := make(chan *docker.Stats)
  1345  	statsOpts := docker.StatsOptions{ID: h.containerID, Done: h.doneCh, Stats: statsCh, Stream: true}
  1346  	go func() {
  1347  		//TODO handle Stats error
  1348  		if err := h.waitClient.Stats(statsOpts); err != nil {
  1349  			h.logger.Printf("[DEBUG] driver.docker: error collecting stats from container %s: %v", h.containerID, err)
  1350  		}
  1351  	}()
  1352  	numCores := runtime.NumCPU()
  1353  	for {
  1354  		select {
  1355  		case s := <-statsCh:
  1356  			if s != nil {
  1357  				ms := &cstructs.MemoryStats{
  1358  					RSS:      s.MemoryStats.Stats.Rss,
  1359  					Cache:    s.MemoryStats.Stats.Cache,
  1360  					Swap:     s.MemoryStats.Stats.Swap,
  1361  					MaxUsage: s.MemoryStats.MaxUsage,
  1362  					Measured: DockerMeasuredMemStats,
  1363  				}
  1364  
  1365  				cs := &cstructs.CpuStats{
  1366  					ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods,
  1367  					ThrottledTime:    s.CPUStats.ThrottlingData.ThrottledTime,
  1368  					Measured:         DockerMeasuredCpuStats,
  1369  				}
  1370  
  1371  				// Calculate percentage
  1372  				cores := len(s.CPUStats.CPUUsage.PercpuUsage)
  1373  				cs.Percent = calculatePercent(
  1374  					s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage,
  1375  					s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, cores)
  1376  				cs.SystemMode = calculatePercent(
  1377  					s.CPUStats.CPUUsage.UsageInKernelmode, s.PreCPUStats.CPUUsage.UsageInKernelmode,
  1378  					s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores)
  1379  				cs.UserMode = calculatePercent(
  1380  					s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode,
  1381  					s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores)
  1382  				cs.TotalTicks = (cs.Percent / 100) * shelpers.TotalTicksAvailable() / float64(numCores)
  1383  
  1384  				h.resourceUsageLock.Lock()
  1385  				h.resourceUsage = &cstructs.TaskResourceUsage{
  1386  					ResourceUsage: &cstructs.ResourceUsage{
  1387  						MemoryStats: ms,
  1388  						CpuStats:    cs,
  1389  					},
  1390  					Timestamp: s.Read.UTC().UnixNano(),
  1391  				}
  1392  				h.resourceUsageLock.Unlock()
  1393  			}
  1394  		case <-h.doneCh:
  1395  			return
  1396  		}
  1397  	}
  1398  }
  1399  
  1400  func calculatePercent(newSample, oldSample, newTotal, oldTotal uint64, cores int) float64 {
  1401  	numerator := newSample - oldSample
  1402  	denom := newTotal - oldTotal
  1403  	if numerator <= 0 || denom <= 0 {
  1404  		return 0.0
  1405  	}
  1406  
  1407  	return (float64(numerator) / float64(denom)) * float64(cores) * 100.0
  1408  }
  1409  
  1410  // authOptionFrom takes the Docker auth config file and the repo being pulled
  1411  // and returns an AuthConfiguration or an error if the file/repo could not be
  1412  // parsed or looked up.
  1413  func authOptionFrom(file, repo string) (*docker.AuthConfiguration, error) {
  1414  	name, err := reference.ParseNamed(repo)
  1415  	if err != nil {
  1416  		return nil, fmt.Errorf("Failed to parse named repo %q: %v", repo, err)
  1417  	}
  1418  
  1419  	repoInfo, err := registry.ParseRepositoryInfo(name)
  1420  	if err != nil {
  1421  		return nil, fmt.Errorf("Failed to parse repository: %v", err)
  1422  	}
  1423  
  1424  	f, err := os.Open(file)
  1425  	if err != nil {
  1426  		return nil, fmt.Errorf("Failed to open auth config file: %v, error: %v", file, err)
  1427  	}
  1428  	defer f.Close()
  1429  
  1430  	cfile := new(configfile.ConfigFile)
  1431  	if err := cfile.LoadFromReader(f); err != nil {
  1432  		return nil, fmt.Errorf("Failed to parse auth config file: %v", err)
  1433  	}
  1434  
  1435  	dockerAuthConfig := registry.ResolveAuthConfig(cfile.AuthConfigs, repoInfo.Index)
  1436  
  1437  	// Convert to Api version
  1438  	apiAuthConfig := &docker.AuthConfiguration{
  1439  		Username:      dockerAuthConfig.Username,
  1440  		Password:      dockerAuthConfig.Password,
  1441  		Email:         dockerAuthConfig.Email,
  1442  		ServerAddress: dockerAuthConfig.ServerAddress,
  1443  	}
  1444  
  1445  	return apiAuthConfig, nil
  1446  }