github.com/ryanslade/nomad@v0.2.4-0.20160128061903-fc95782f2089/client/driver/docker.go (about)

     1  package driver
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"log"
     7  	"net"
     8  	"path/filepath"
     9  	"strconv"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	docker "github.com/fsouza/go-dockerclient"
    15  
    16  	"github.com/hashicorp/nomad/client/allocdir"
    17  	"github.com/hashicorp/nomad/client/config"
    18  	cstructs "github.com/hashicorp/nomad/client/driver/structs"
    19  	"github.com/hashicorp/nomad/client/fingerprint"
    20  	"github.com/hashicorp/nomad/nomad/structs"
    21  	"github.com/mitchellh/mapstructure"
    22  )
    23  
    24  // We store the client globally to cache the connection to the docker daemon.
    25  var createClient sync.Once
    26  var client *docker.Client
    27  
    28  type DockerDriver struct {
    29  	DriverContext
    30  	fingerprint.StaticFingerprinter
    31  }
    32  
    33  type DockerDriverAuth struct {
    34  	Username      string `mapstructure:"username"`       // username for the registry
    35  	Password      string `mapstructure:"password"`       // password to access the registry
    36  	Email         string `mapstructure:"email"`          // email address of the user who is allowed to access the registry
    37  	ServerAddress string `mapstructure:"server_address"` // server address of the registry
    38  }
    39  
    40  type DockerDriverConfig struct {
    41  	ImageName        string              `mapstructure:"image"`              // Container's Image Name
    42  	Command          string              `mapstructure:"command"`            // The Command/Entrypoint to run when the container starts up
    43  	Args             []string            `mapstructure:"args"`               // The arguments to the Command/Entrypoint
    44  	IpcMode          string              `mapstructure:"ipc_mode"`           // The IPC mode of the container - host and none
    45  	NetworkMode      string              `mapstructure:"network_mode"`       // The network mode of the container - host, net and none
    46  	PidMode          string              `mapstructure:"pid_mode"`           // The PID mode of the container - host and none
    47  	UTSMode          string              `mapstructure:"uts_mode"`           // The UTS mode of the container - host and none
    48  	PortMapRaw       []map[string]int    `mapstructure:"port_map"`           //
    49  	PortMap          map[string]int      `mapstructure:"-"`                  // A map of host port labels and the ports exposed on the container
    50  	Privileged       bool                `mapstructure:"privileged"`         // Flag to run the container in priviledged mode
    51  	DNSServers       []string            `mapstructure:"dns_servers"`        // DNS Server for containers
    52  	DNSSearchDomains []string            `mapstructure:"dns_search_domains"` // DNS Search domains for containers
    53  	Hostname         string              `mapstructure:"hostname"`           // Hostname for containers
    54  	LabelsRaw        []map[string]string `mapstructure:"labels"`             //
    55  	Labels           map[string]string   `mapstructure:"-"`                  // Labels to set when the container starts up
    56  	Auth             []DockerDriverAuth  `mapstructure:"auth"`               // Authentication credentials for a private Docker registry
    57  }
    58  
    59  func (c *DockerDriverConfig) Validate() error {
    60  	if c.ImageName == "" {
    61  		return fmt.Errorf("Docker Driver needs an image name")
    62  	}
    63  
    64  	c.PortMap = mapMergeStrInt(c.PortMapRaw...)
    65  	c.Labels = mapMergeStrStr(c.LabelsRaw...)
    66  
    67  	return nil
    68  }
    69  
    70  type dockerPID struct {
    71  	ImageID     string
    72  	ContainerID string
    73  	KillTimeout time.Duration
    74  }
    75  
    76  type DockerHandle struct {
    77  	client           *docker.Client
    78  	logger           *log.Logger
    79  	cleanupContainer bool
    80  	cleanupImage     bool
    81  	imageID          string
    82  	containerID      string
    83  	killTimeout      time.Duration
    84  	waitCh           chan *cstructs.WaitResult
    85  	doneCh           chan struct{}
    86  }
    87  
    88  func NewDockerDriver(ctx *DriverContext) Driver {
    89  	return &DockerDriver{DriverContext: *ctx}
    90  }
    91  
    92  // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars
    93  // to connect to the docker daemon. In production mode we will read
    94  // docker.endpoint from the config file.
    95  func (d *DockerDriver) dockerClient() (*docker.Client, error) {
    96  	if client != nil {
    97  		return client, nil
    98  	}
    99  
   100  	var err error
   101  	createClient.Do(func() {
   102  		// Default to using whatever is configured in docker.endpoint. If this is
   103  		// not specified we'll fall back on NewClientFromEnv which reads config from
   104  		// the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and
   105  		// DOCKER_CERT_PATH. This allows us to lock down the config in production
   106  		// but also accept the standard ENV configs for dev and test.
   107  		dockerEndpoint := d.config.Read("docker.endpoint")
   108  		if dockerEndpoint != "" {
   109  			cert := d.config.Read("docker.tls.cert")
   110  			key := d.config.Read("docker.tls.key")
   111  			ca := d.config.Read("docker.tls.ca")
   112  
   113  			if cert+key+ca != "" {
   114  				d.logger.Printf("[DEBUG] driver.docker: using TLS client connection to %s", dockerEndpoint)
   115  				client, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca)
   116  			} else {
   117  				d.logger.Printf("[DEBUG] driver.docker: using standard client connection to %s", dockerEndpoint)
   118  				client, err = docker.NewClient(dockerEndpoint)
   119  			}
   120  			return
   121  		}
   122  
   123  		d.logger.Println("[DEBUG] driver.docker: using client connection initialized from environment")
   124  		client, err = docker.NewClientFromEnv()
   125  	})
   126  	return client, err
   127  }
   128  
   129  func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
   130  	// Initialize docker API client
   131  	client, err := d.dockerClient()
   132  	if err != nil {
   133  		d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err)
   134  		return false, nil
   135  	}
   136  
   137  	privileged := d.config.ReadBoolDefault("docker.privileged.enabled", false)
   138  	if privileged {
   139  		d.logger.Println("[DEBUG] driver.docker: privileged containers are enabled")
   140  		node.Attributes["docker.privileged.enabled"] = "1"
   141  	} else {
   142  		d.logger.Println("[DEBUG] driver.docker: privileged containers are disabled")
   143  	}
   144  
   145  	// This is the first operation taken on the client so we'll try to
   146  	// establish a connection to the Docker daemon. If this fails it means
   147  	// Docker isn't available so we'll simply disable the docker driver.
   148  	env, err := client.Version()
   149  	if err != nil {
   150  		d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err)
   151  		return false, nil
   152  	}
   153  	node.Attributes["driver.docker"] = "1"
   154  	node.Attributes["driver.docker.version"] = env.Get("Version")
   155  
   156  	return true, nil
   157  }
   158  
   159  func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) {
   160  	shared := alloc.SharedDir
   161  	local, ok := alloc.TaskDirs[task.Name]
   162  	if !ok {
   163  		return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name)
   164  	}
   165  
   166  	return []string{
   167  		// "z" and "Z" option is to allocate directory with SELinux label.
   168  		fmt.Sprintf("%s:/%s:rw,z", shared, allocdir.SharedAllocName),
   169  		// capital "Z" will label with Multi-Category Security (MCS) labels
   170  		fmt.Sprintf("%s:/%s:rw,Z", local, allocdir.TaskLocal),
   171  	}, nil
   172  }
   173  
   174  // createContainer initializes a struct needed to call docker.client.CreateContainer()
   175  func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, driverConfig *DockerDriverConfig) (docker.CreateContainerOptions, error) {
   176  	var c docker.CreateContainerOptions
   177  	if task.Resources == nil {
   178  		// Guard against missing resources. We should never have been able to
   179  		// schedule a job without specifying this.
   180  		d.logger.Println("[ERR] driver.docker: task.Resources is empty")
   181  		return c, fmt.Errorf("task.Resources is empty")
   182  	}
   183  
   184  	binds, err := d.containerBinds(ctx.AllocDir, task)
   185  	if err != nil {
   186  		return c, err
   187  	}
   188  
   189  	// Set environment variables.
   190  	d.taskEnv.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
   191  	d.taskEnv.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
   192  
   193  	config := &docker.Config{
   194  		Image:    driverConfig.ImageName,
   195  		Hostname: driverConfig.Hostname,
   196  	}
   197  
   198  	hostConfig := &docker.HostConfig{
   199  		// Convert MB to bytes. This is an absolute value.
   200  		//
   201  		// This value represents the total amount of memory a process can use.
   202  		// Swap is added to total memory and is managed by the OS, not docker.
   203  		// Since this may cause other processes to swap and cause system
   204  		// instability, we will simply not use swap.
   205  		//
   206  		// See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt
   207  		Memory:     int64(task.Resources.MemoryMB) * 1024 * 1024,
   208  		MemorySwap: -1,
   209  		// Convert Mhz to shares. This is a relative value.
   210  		//
   211  		// There are two types of CPU limiters available: Shares and Quotas. A
   212  		// Share allows a particular process to have a proportion of CPU time
   213  		// relative to other processes; 1024 by default. A CPU Quota is enforced
   214  		// over a Period of time and is a HARD limit on the amount of CPU time a
   215  		// process can use. Processes with quotas cannot burst, while processes
   216  		// with shares can, so we'll use shares.
   217  		//
   218  		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
   219  		// given process will have at least that amount of resources, but likely
   220  		// more since it is (probably) rare that the machine will run at 100%
   221  		// CPU. This scale will cease to work if a node is overprovisioned.
   222  		//
   223  		// See:
   224  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   225  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt
   226  		CPUShares: int64(task.Resources.CPU),
   227  
   228  		// Binds are used to mount a host volume into the container. We mount a
   229  		// local directory for storage and a shared alloc directory that can be
   230  		// used to share data between different tasks in the same task group.
   231  		Binds: binds,
   232  	}
   233  
   234  	d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
   235  	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
   236  	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
   237  
   238  	//  set privileged mode
   239  	hostPrivileged := d.config.ReadBoolDefault("docker.privileged.enabled", false)
   240  	if driverConfig.Privileged && !hostPrivileged {
   241  		return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`)
   242  	}
   243  	hostConfig.Privileged = hostPrivileged
   244  
   245  	// set DNS servers
   246  	for _, ip := range driverConfig.DNSServers {
   247  		if net.ParseIP(ip) != nil {
   248  			hostConfig.DNS = append(hostConfig.DNS, ip)
   249  		} else {
   250  			d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip)
   251  		}
   252  	}
   253  
   254  	// set DNS search domains
   255  	for _, domain := range driverConfig.DNSSearchDomains {
   256  		hostConfig.DNSSearch = append(hostConfig.DNSSearch, domain)
   257  	}
   258  
   259  	if driverConfig.IpcMode != "" {
   260  		if !hostPrivileged {
   261  			return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting ipc mode not allowed`)
   262  		}
   263  		d.logger.Printf("[DEBUG] driver.docker: setting ipc mode to %s", driverConfig.IpcMode)
   264  	}
   265  	hostConfig.IpcMode = driverConfig.IpcMode
   266  
   267  	if driverConfig.PidMode != "" {
   268  		if !hostPrivileged {
   269  			return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting pid mode not allowed`)
   270  		}
   271  		d.logger.Printf("[DEBUG] driver.docker: setting pid mode to %s", driverConfig.PidMode)
   272  	}
   273  	hostConfig.PidMode = driverConfig.PidMode
   274  
   275  	if driverConfig.UTSMode != "" {
   276  		if !hostPrivileged {
   277  			return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting UTS mode not allowed`)
   278  		}
   279  		d.logger.Printf("[DEBUG] driver.docker: setting UTS mode to %s", driverConfig.UTSMode)
   280  	}
   281  	hostConfig.UTSMode = driverConfig.UTSMode
   282  
   283  	hostConfig.NetworkMode = driverConfig.NetworkMode
   284  	if hostConfig.NetworkMode == "" {
   285  		// docker default
   286  		d.logger.Println("[DEBUG] driver.docker: networking mode not specified; defaulting to bridge")
   287  		hostConfig.NetworkMode = "bridge"
   288  	}
   289  
   290  	// Setup port mapping and exposed ports
   291  	if len(task.Resources.Networks) == 0 {
   292  		d.logger.Println("[DEBUG] driver.docker: No network interfaces are available")
   293  		if len(driverConfig.PortMap) > 0 {
   294  			return c, fmt.Errorf("Trying to map ports but no network interface is available")
   295  		}
   296  	} else {
   297  		// TODO add support for more than one network
   298  		network := task.Resources.Networks[0]
   299  		publishedPorts := map[docker.Port][]docker.PortBinding{}
   300  		exposedPorts := map[docker.Port]struct{}{}
   301  
   302  		for _, port := range network.ReservedPorts {
   303  			// By default we will map the allocated port 1:1 to the container
   304  			containerPortInt := port.Value
   305  
   306  			// If the user has mapped a port using port_map we'll change it here
   307  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   308  				containerPortInt = mapped
   309  			}
   310  
   311  			hostPortStr := strconv.Itoa(port.Value)
   312  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   313  
   314  			publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}}
   315  			publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}}
   316  			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value)
   317  
   318  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   319  			exposedPorts[containerPort+"/udp"] = struct{}{}
   320  			d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value)
   321  		}
   322  
   323  		for _, port := range network.DynamicPorts {
   324  			// By default we will map the allocated port 1:1 to the container
   325  			containerPortInt := port.Value
   326  
   327  			// If the user has mapped a port using port_map we'll change it here
   328  			if mapped, ok := driverConfig.PortMap[port.Label]; ok {
   329  				containerPortInt = mapped
   330  			}
   331  
   332  			hostPortStr := strconv.Itoa(port.Value)
   333  			containerPort := docker.Port(strconv.Itoa(containerPortInt))
   334  
   335  			publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}}
   336  			publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}}
   337  			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt)
   338  
   339  			exposedPorts[containerPort+"/tcp"] = struct{}{}
   340  			exposedPorts[containerPort+"/udp"] = struct{}{}
   341  			d.logger.Printf("[DEBUG] driver.docker: exposed port %s", containerPort)
   342  		}
   343  
   344  		// This was set above in a call to GetTaskEnv but if we
   345  		// have mapped any ports we will need to override them.
   346  		//
   347  		// TODO refactor the implementation in GetTaskEnv to match
   348  		// the 0.2 ports world view. Docker seems to be the only place where
   349  		// this is actually needed, but this is kinda hacky.
   350  		if len(driverConfig.PortMap) > 0 {
   351  			d.taskEnv.SetPorts(network.MapLabelToValues(driverConfig.PortMap))
   352  		}
   353  		hostConfig.PortBindings = publishedPorts
   354  		config.ExposedPorts = exposedPorts
   355  	}
   356  
   357  	d.taskEnv.Build()
   358  	parsedArgs := d.taskEnv.ParseAndReplace(driverConfig.Args)
   359  
   360  	// If the user specified a custom command to run as their entrypoint, we'll
   361  	// inject it here.
   362  	if driverConfig.Command != "" {
   363  		cmd := []string{driverConfig.Command}
   364  		if len(driverConfig.Args) != 0 {
   365  			cmd = append(cmd, parsedArgs...)
   366  		}
   367  		d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " "))
   368  		config.Cmd = cmd
   369  	} else if len(driverConfig.Args) != 0 {
   370  		d.logger.Println("[DEBUG] driver.docker: ignoring command arguments because command is not specified")
   371  	}
   372  
   373  	if len(driverConfig.Labels) > 0 {
   374  		config.Labels = driverConfig.Labels
   375  		d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels)
   376  	}
   377  
   378  	config.Env = d.taskEnv.EnvList()
   379  
   380  	containerName := fmt.Sprintf("%s-%s", task.Name, ctx.AllocID)
   381  	d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName)
   382  
   383  	return docker.CreateContainerOptions{
   384  		Name:       containerName,
   385  		Config:     config,
   386  		HostConfig: hostConfig,
   387  	}, nil
   388  }
   389  
   390  func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
   391  	var driverConfig DockerDriverConfig
   392  	if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil {
   393  		return nil, err
   394  	}
   395  	image := driverConfig.ImageName
   396  
   397  	if err := driverConfig.Validate(); err != nil {
   398  		return nil, err
   399  	}
   400  	if task.Resources == nil {
   401  		return nil, fmt.Errorf("Resources are not specified")
   402  	}
   403  	if task.Resources.MemoryMB == 0 {
   404  		return nil, fmt.Errorf("Memory limit cannot be zero")
   405  	}
   406  	if task.Resources.CPU == 0 {
   407  		return nil, fmt.Errorf("CPU limit cannot be zero")
   408  	}
   409  
   410  	cleanupContainer := d.config.ReadBoolDefault("docker.cleanup.container", true)
   411  	cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true)
   412  
   413  	// Initialize docker API client
   414  	client, err := d.dockerClient()
   415  	if err != nil {
   416  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   417  	}
   418  
   419  	repo, tag := docker.ParseRepositoryTag(image)
   420  	// Make sure tag is always explicitly set. We'll default to "latest" if it
   421  	// isn't, which is the expected behavior.
   422  	if tag == "" {
   423  		tag = "latest"
   424  	}
   425  
   426  	var dockerImage *docker.Image
   427  	// We're going to check whether the image is already downloaded. If the tag
   428  	// is "latest" we have to check for a new version every time so we don't
   429  	// bother to check and cache the id here. We'll download first, then cache.
   430  	if tag != "latest" {
   431  		dockerImage, err = client.InspectImage(image)
   432  	}
   433  
   434  	// Download the image
   435  	if dockerImage == nil {
   436  		pullOptions := docker.PullImageOptions{
   437  			Repository: repo,
   438  			Tag:        tag,
   439  		}
   440  
   441  		authOptions := docker.AuthConfiguration{}
   442  		if len(driverConfig.Auth) != 0 {
   443  			authOptions = docker.AuthConfiguration{
   444  				Username:      driverConfig.Auth[0].Username,
   445  				Password:      driverConfig.Auth[0].Password,
   446  				Email:         driverConfig.Auth[0].Email,
   447  				ServerAddress: driverConfig.Auth[0].ServerAddress,
   448  			}
   449  		}
   450  
   451  		err = client.PullImage(pullOptions, authOptions)
   452  		if err != nil {
   453  			d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s", repo, tag, err)
   454  			return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err)
   455  		}
   456  		d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag)
   457  
   458  		// Now that we have the image we can get the image id
   459  		dockerImage, err = client.InspectImage(image)
   460  		if err != nil {
   461  			d.logger.Printf("[ERR] driver.docker: failed getting image id for %s: %s", image, err)
   462  			return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err)
   463  		}
   464  	}
   465  	d.logger.Printf("[DEBUG] driver.docker: identified image %s as %s", image, dockerImage.ID)
   466  
   467  	config, err := d.createContainer(ctx, task, &driverConfig)
   468  	if err != nil {
   469  		d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %s: %s", image, err)
   470  		return nil, fmt.Errorf("Failed to create container configuration for image %s: %s", image, err)
   471  	}
   472  	// Create a container
   473  	container, err := client.CreateContainer(config)
   474  	if err != nil {
   475  		// If the container already exists because of a previous failure we'll
   476  		// try to purge it and re-create it.
   477  		if strings.Contains(err.Error(), "container already exists") {
   478  			// Get the ID of the existing container so we can delete it
   479  			containers, err := client.ListContainers(docker.ListContainersOptions{
   480  				// The image might be in use by a stopped container, so check everything
   481  				All: true,
   482  				Filters: map[string][]string{
   483  					"name": []string{config.Name},
   484  				},
   485  			})
   486  			if err != nil {
   487  				d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name)
   488  				return nil, fmt.Errorf("Failed to query list of containers: %s", err)
   489  			}
   490  
   491  			// Couldn't find any matching containers
   492  			if len(containers) == 0 {
   493  				d.logger.Printf("[ERR] driver.docker: failed to get id for container %s: %#v", config.Name, containers)
   494  				return nil, fmt.Errorf("Failed to get id for container %s", config.Name)
   495  			}
   496  
   497  			// Delete matching containers
   498  			d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name)
   499  			for _, container := range containers {
   500  				err = client.RemoveContainer(docker.RemoveContainerOptions{
   501  					ID: container.ID,
   502  				})
   503  				if err != nil {
   504  					d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID)
   505  					return nil, fmt.Errorf("Failed to purge container %s: %s", container.ID, err)
   506  				}
   507  				d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID)
   508  			}
   509  
   510  			container, err = client.CreateContainer(config)
   511  			if err != nil {
   512  				d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name)
   513  				return nil, fmt.Errorf("Failed to re-create container %s; aborting", config.Name)
   514  			}
   515  		} else {
   516  			// We failed to create the container for some other reason.
   517  			d.logger.Printf("[ERR] driver.docker: failed to create container from image %s: %s", image, err)
   518  			return nil, fmt.Errorf("Failed to create container from image %s: %s", image, err)
   519  		}
   520  	}
   521  	d.logger.Printf("[INFO] driver.docker: created container %s", container.ID)
   522  
   523  	// Start the container
   524  	err = client.StartContainer(container.ID, container.HostConfig)
   525  	if err != nil {
   526  		d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err)
   527  		return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err)
   528  	}
   529  	d.logger.Printf("[INFO] driver.docker: started container %s", container.ID)
   530  
   531  	// Return a driver handle
   532  	h := &DockerHandle{
   533  		client:           client,
   534  		cleanupContainer: cleanupContainer,
   535  		cleanupImage:     cleanupImage,
   536  		logger:           d.logger,
   537  		imageID:          dockerImage.ID,
   538  		containerID:      container.ID,
   539  		killTimeout:      d.DriverContext.KillTimeout(task),
   540  		doneCh:           make(chan struct{}),
   541  		waitCh:           make(chan *cstructs.WaitResult, 1),
   542  	}
   543  	go h.run()
   544  	return h, nil
   545  }
   546  
   547  func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
   548  	cleanupContainer := d.config.ReadBoolDefault("docker.cleanup.container", true)
   549  	cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true)
   550  
   551  	// Split the handle
   552  	pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:"))
   553  	pid := &dockerPID{}
   554  	if err := json.Unmarshal(pidBytes, pid); err != nil {
   555  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
   556  	}
   557  	d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID)
   558  
   559  	// Initialize docker API client
   560  	client, err := d.dockerClient()
   561  	if err != nil {
   562  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   563  	}
   564  
   565  	// Look for a running container with this ID
   566  	containers, err := client.ListContainers(docker.ListContainersOptions{
   567  		Filters: map[string][]string{
   568  			"id": []string{pid.ContainerID},
   569  		},
   570  	})
   571  	if err != nil {
   572  		return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err)
   573  	}
   574  
   575  	found := false
   576  	for _, container := range containers {
   577  		if container.ID == pid.ContainerID {
   578  			found = true
   579  		}
   580  	}
   581  	if !found {
   582  		return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
   583  	}
   584  
   585  	// Return a driver handle
   586  	h := &DockerHandle{
   587  		client:           client,
   588  		cleanupContainer: cleanupContainer,
   589  		cleanupImage:     cleanupImage,
   590  		logger:           d.logger,
   591  		imageID:          pid.ImageID,
   592  		containerID:      pid.ContainerID,
   593  		killTimeout:      pid.KillTimeout,
   594  		doneCh:           make(chan struct{}),
   595  		waitCh:           make(chan *cstructs.WaitResult, 1),
   596  	}
   597  	go h.run()
   598  	return h, nil
   599  }
   600  
   601  func (h *DockerHandle) ID() string {
   602  	// Return a handle to the PID
   603  	pid := dockerPID{
   604  		ImageID:     h.imageID,
   605  		ContainerID: h.containerID,
   606  		KillTimeout: h.killTimeout,
   607  	}
   608  	data, err := json.Marshal(pid)
   609  	if err != nil {
   610  		h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err)
   611  	}
   612  	return fmt.Sprintf("DOCKER:%s", string(data))
   613  }
   614  
   615  func (h *DockerHandle) ContainerID() string {
   616  	return h.containerID
   617  }
   618  
   619  func (h *DockerHandle) WaitCh() chan *cstructs.WaitResult {
   620  	return h.waitCh
   621  }
   622  
   623  func (h *DockerHandle) Update(task *structs.Task) error {
   624  	// Update is not possible
   625  	return nil
   626  }
   627  
   628  // Kill is used to terminate the task. This uses `docker stop -t killTimeout`
   629  func (h *DockerHandle) Kill() error {
   630  	// Stop the container
   631  	err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds()))
   632  	if err != nil {
   633  		h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err)
   634  		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
   635  	}
   636  	h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
   637  
   638  	// Cleanup container
   639  	if h.cleanupContainer {
   640  		err = h.client.RemoveContainer(docker.RemoveContainerOptions{
   641  			ID:            h.containerID,
   642  			RemoveVolumes: true,
   643  		})
   644  		if err != nil {
   645  			h.logger.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID)
   646  			return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err)
   647  		}
   648  		h.logger.Printf("[INFO] driver.docker: removed container %s", h.containerID)
   649  	}
   650  
   651  	// Cleanup image. This operation may fail if the image is in use by another
   652  	// job. That is OK. Will we log a message but continue.
   653  	if h.cleanupImage {
   654  		err = h.client.RemoveImage(h.imageID)
   655  		if err != nil {
   656  			containers, err := h.client.ListContainers(docker.ListContainersOptions{
   657  				// The image might be in use by a stopped container, so check everything
   658  				All: true,
   659  				Filters: map[string][]string{
   660  					"image": []string{h.imageID},
   661  				},
   662  			})
   663  			if err != nil {
   664  				h.logger.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID)
   665  				return fmt.Errorf("Failed to query list of containers: %s", err)
   666  			}
   667  			inUse := len(containers)
   668  			if inUse > 0 {
   669  				h.logger.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse)
   670  			} else {
   671  				return fmt.Errorf("Failed to remove image %s", h.imageID)
   672  			}
   673  		} else {
   674  			h.logger.Printf("[INFO] driver.docker: removed image %s", h.imageID)
   675  		}
   676  	}
   677  	return nil
   678  }
   679  
   680  func (h *DockerHandle) run() {
   681  	// Wait for it...
   682  	exitCode, err := h.client.WaitContainer(h.containerID)
   683  	if err != nil {
   684  		h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID)
   685  	}
   686  
   687  	if exitCode != 0 {
   688  		err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
   689  	}
   690  
   691  	close(h.doneCh)
   692  	h.waitCh <- cstructs.NewWaitResult(exitCode, 0, err)
   693  	close(h.waitCh)
   694  }