github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/client/driver/docker.go (about)

     1  package driver
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"log"
     7  	"path/filepath"
     8  	"strconv"
     9  	"strings"
    10  
    11  	docker "github.com/fsouza/go-dockerclient"
    12  
    13  	"github.com/hashicorp/nomad/client/allocdir"
    14  	"github.com/hashicorp/nomad/client/config"
    15  	"github.com/hashicorp/nomad/client/driver/args"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  )
    18  
    19  type DockerDriver struct {
    20  	DriverContext
    21  }
    22  
    23  type dockerPID struct {
    24  	ImageID     string
    25  	ContainerID string
    26  }
    27  
    28  type dockerHandle struct {
    29  	client           *docker.Client
    30  	logger           *log.Logger
    31  	cleanupContainer bool
    32  	cleanupImage     bool
    33  	imageID          string
    34  	containerID      string
    35  	waitCh           chan error
    36  	doneCh           chan struct{}
    37  }
    38  
    39  func NewDockerDriver(ctx *DriverContext) Driver {
    40  	return &DockerDriver{*ctx}
    41  }
    42  
    43  // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars
    44  // to connect to the docker daemon. In production mode we will read
    45  // docker.endpoint from the config file.
    46  func (d *DockerDriver) dockerClient() (*docker.Client, error) {
    47  	// In dev mode, read DOCKER_* environment variables DOCKER_HOST,
    48  	// DOCKER_TLS_VERIFY, and DOCKER_CERT_PATH. This allows you to run tests and
    49  	// demo against boot2docker or a VM on OSX and Windows. This falls back on
    50  	// the default unix socket on linux if tests are run on linux.
    51  	//
    52  	// Also note that we need to turn on DevMode in the test configs.
    53  	if d.config.DevMode {
    54  		return docker.NewClientFromEnv()
    55  	}
    56  
    57  	// In prod mode we'll read the docker.endpoint configuration and fall back
    58  	// on the host-specific default. We do not read from the environment.
    59  	defaultEndpoint, err := docker.DefaultDockerHost()
    60  	if err != nil {
    61  		return nil, fmt.Errorf("Unable to determine default docker endpoint: %s", err)
    62  	}
    63  	dockerEndpoint := d.config.ReadDefault("docker.endpoint", defaultEndpoint)
    64  
    65  	return docker.NewClient(dockerEndpoint)
    66  }
    67  
    68  func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
    69  	// Initialize docker API client
    70  	client, err := d.dockerClient()
    71  	if err != nil {
    72  		d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon: %v", err)
    73  		return false, nil
    74  	}
    75  
    76  	_, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
    77  	if err != nil {
    78  		return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
    79  	}
    80  	_, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
    81  	if err != nil {
    82  		return false, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
    83  	}
    84  
    85  	env, err := client.Version()
    86  	if err != nil {
    87  		d.logger.Printf("[DEBUG] driver.docker: could not read version from daemon: %v", err)
    88  		// Check the "no such file" error if the unix file is missing
    89  		if strings.Contains(err.Error(), "no such file") {
    90  			return false, nil
    91  		}
    92  
    93  		// We connected to the daemon but couldn't read the version so something
    94  		// is broken.
    95  		return false, err
    96  	}
    97  	node.Attributes["driver.docker"] = "1"
    98  	node.Attributes["driver.docker.version"] = env.Get("Version")
    99  
   100  	return true, nil
   101  }
   102  
   103  func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) {
   104  	shared := alloc.SharedDir
   105  	local, ok := alloc.TaskDirs[task.Name]
   106  	if !ok {
   107  		return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name)
   108  	}
   109  
   110  	return []string{
   111  		fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName),
   112  		fmt.Sprintf("%s:%s", local, allocdir.TaskLocal),
   113  	}, nil
   114  }
   115  
   116  // createContainer initializes a struct needed to call docker.client.CreateContainer()
   117  func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (docker.CreateContainerOptions, error) {
   118  	var c docker.CreateContainerOptions
   119  	if task.Resources == nil {
   120  		d.logger.Printf("[ERR] driver.docker: task.Resources is empty")
   121  		return c, fmt.Errorf("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
   122  	}
   123  
   124  	binds, err := d.containerBinds(ctx.AllocDir, task)
   125  	if err != nil {
   126  		return c, err
   127  	}
   128  
   129  	hostConfig := &docker.HostConfig{
   130  		// Convert MB to bytes. This is an absolute value.
   131  		//
   132  		// This value represents the total amount of memory a process can use.
   133  		// Swap is added to total memory and is managed by the OS, not docker.
   134  		// Since this may cause other processes to swap and cause system
   135  		// instability, we will simply not use swap.
   136  		//
   137  		// See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt
   138  		Memory:     int64(task.Resources.MemoryMB) * 1024 * 1024,
   139  		MemorySwap: -1,
   140  		// Convert Mhz to shares. This is a relative value.
   141  		//
   142  		// There are two types of CPU limiters available: Shares and Quotas. A
   143  		// Share allows a particular process to have a proportion of CPU time
   144  		// relative to other processes; 1024 by default. A CPU Quota is enforced
   145  		// over a Period of time and is a HARD limit on the amount of CPU time a
   146  		// process can use. Processes with quotas cannot burst, while processes
   147  		// with shares can, so we'll use shares.
   148  		//
   149  		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
   150  		// given process will have at least that amount of resources, but likely
   151  		// more since it is (probably) rare that the machine will run at 100%
   152  		// CPU. This scale will cease to work if a node is overprovisioned.
   153  		//
   154  		// See:
   155  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   156  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt
   157  		CPUShares: int64(task.Resources.CPU),
   158  
   159  		// Binds are used to mount a host volume into the container. We mount a
   160  		// local directory for storage and a shared alloc directory that can be
   161  		// used to share data between different tasks in the same task group.
   162  		Binds: binds,
   163  	}
   164  
   165  	d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
   166  	d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
   167  	d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"])
   168  
   169  	mode, ok := task.Config["network_mode"]
   170  	if !ok || mode == "" {
   171  		// docker default
   172  		d.logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
   173  		mode = "bridge"
   174  	}
   175  
   176  	// Ignore the container mode for now
   177  	switch mode {
   178  	case "default", "bridge", "none", "host":
   179  		d.logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
   180  	default:
   181  		d.logger.Printf("[ERR] driver.docker: invalid setting for network mode: %s", mode)
   182  		return c, fmt.Errorf("Invalid setting for network mode: %s", mode)
   183  	}
   184  	hostConfig.NetworkMode = mode
   185  
   186  	// Setup port mapping (equivalent to -p on docker CLI). Ports must already be
   187  	// exposed in the container.
   188  	if len(task.Resources.Networks) == 0 {
   189  		d.logger.Print("[WARN] driver.docker: No networks are available for port mapping")
   190  	} else {
   191  		network := task.Resources.Networks[0]
   192  		dockerPorts := map[docker.Port][]docker.PortBinding{}
   193  
   194  		for _, port := range network.ListStaticPorts() {
   195  			dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   196  			dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   197  			d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
   198  		}
   199  
   200  		for label, port := range network.MapDynamicPorts() {
   201  			// If the label is numeric we expect that there is a service
   202  			// listening on that port inside the container. In this case we'll
   203  			// setup a mapping from our random host port to the label port.
   204  			//
   205  			// Otherwise we'll setup a direct 1:1 mapping from the host port to
   206  			// the container, and assume that the process inside will read the
   207  			// environment variable and bind to the correct port.
   208  			if _, err := strconv.Atoi(label); err == nil {
   209  				dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   210  				dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   211  				d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
   212  			} else {
   213  				dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   214  				dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   215  				d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
   216  			}
   217  		}
   218  		hostConfig.PortBindings = dockerPorts
   219  	}
   220  
   221  	// Create environment variables.
   222  	env := TaskEnvironmentVariables(ctx, task)
   223  	env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName))
   224  	env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal))
   225  
   226  	config := &docker.Config{
   227  		Env:   env.List(),
   228  		Image: task.Config["image"],
   229  	}
   230  
   231  	rawArgs, hasArgs := task.Config["args"]
   232  	parsedArgs, err := args.ParseAndReplace(rawArgs, env.Map())
   233  	if err != nil {
   234  		return c, err
   235  	}
   236  
   237  	// If the user specified a custom command to run, we'll inject it here.
   238  	if command, ok := task.Config["command"]; ok {
   239  		cmd := []string{command}
   240  		if hasArgs {
   241  			cmd = append(cmd, parsedArgs...)
   242  		}
   243  		config.Cmd = cmd
   244  	} else if hasArgs {
   245  		d.logger.Println("[DEBUG] driver.docker: ignoring args because command not specified")
   246  	}
   247  
   248  	return docker.CreateContainerOptions{
   249  		Config:     config,
   250  		HostConfig: hostConfig,
   251  	}, nil
   252  }
   253  
   254  func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
   255  	// Get the image from config
   256  	image, ok := task.Config["image"]
   257  	if !ok || image == "" {
   258  		return nil, fmt.Errorf("Image not specified")
   259  	}
   260  	if task.Resources == nil {
   261  		return nil, fmt.Errorf("Resources are not specified")
   262  	}
   263  	if task.Resources.MemoryMB == 0 {
   264  		return nil, fmt.Errorf("Memory limit cannot be zero")
   265  	}
   266  	if task.Resources.CPU == 0 {
   267  		return nil, fmt.Errorf("CPU limit cannot be zero")
   268  	}
   269  
   270  	cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
   271  	if err != nil {
   272  		return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
   273  	}
   274  	cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
   275  	if err != nil {
   276  		return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
   277  	}
   278  
   279  	// Initialize docker API client
   280  	client, err := d.dockerClient()
   281  	if err != nil {
   282  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   283  	}
   284  
   285  	repo, tag := docker.ParseRepositoryTag(image)
   286  	// Make sure tag is always explicitly set. We'll default to "latest" if it
   287  	// isn't, which is the expected behavior.
   288  	if tag == "" {
   289  		tag = "latest"
   290  	}
   291  
   292  	var dockerImage *docker.Image
   293  	// We're going to check whether the image is already downloaded. If the tag
   294  	// is "latest" we have to check for a new version every time so we don't
   295  	// bother to check and cache the id here. We'll download first, then cache.
   296  	if tag != "latest" {
   297  		dockerImage, err = client.InspectImage(image)
   298  	}
   299  
   300  	// Download the image
   301  	if dockerImage == nil {
   302  		pullOptions := docker.PullImageOptions{
   303  			Repository: repo,
   304  			Tag:        tag,
   305  		}
   306  		// TODO add auth configuration for private repos
   307  		authOptions := docker.AuthConfiguration{}
   308  		err = client.PullImage(pullOptions, authOptions)
   309  		if err != nil {
   310  			d.logger.Printf("[ERR] driver.docker: pulling container %s", err)
   311  			return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err)
   312  		}
   313  		d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag)
   314  
   315  		// Now that we have the image we can get the image id
   316  		dockerImage, err = client.InspectImage(image)
   317  		if err != nil {
   318  			d.logger.Printf("[ERR] driver.docker: getting image id for %s", image)
   319  			return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err)
   320  		}
   321  	}
   322  	d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID)
   323  	d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID)
   324  
   325  	config, err := d.createContainer(ctx, task)
   326  	if err != nil {
   327  		d.logger.Printf("[ERR] driver.docker: %s", err)
   328  		return nil, fmt.Errorf("Failed to create container config for image %s", image)
   329  	}
   330  	// Create a container
   331  	container, err := client.CreateContainer(config)
   332  	if err != nil {
   333  		d.logger.Printf("[ERR] driver.docker: %s", err)
   334  		return nil, fmt.Errorf("Failed to create container from image %s", image)
   335  	}
   336  	d.logger.Printf("[INFO] driver.docker: created container %s", container.ID)
   337  
   338  	// Start the container
   339  	err = client.StartContainer(container.ID, container.HostConfig)
   340  	if err != nil {
   341  		d.logger.Printf("[ERR] driver.docker: starting container %s", container.ID)
   342  		return nil, fmt.Errorf("Failed to start container %s", container.ID)
   343  	}
   344  	d.logger.Printf("[INFO] driver.docker: started container %s", container.ID)
   345  
   346  	// Return a driver handle
   347  	h := &dockerHandle{
   348  		client:           client,
   349  		cleanupContainer: cleanupContainer,
   350  		cleanupImage:     cleanupImage,
   351  		logger:           d.logger,
   352  		imageID:          dockerImage.ID,
   353  		containerID:      container.ID,
   354  		doneCh:           make(chan struct{}),
   355  		waitCh:           make(chan error, 1),
   356  	}
   357  	go h.run()
   358  	return h, nil
   359  }
   360  
   361  func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
   362  	cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
   363  	if err != nil {
   364  		return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
   365  	}
   366  	cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
   367  	if err != nil {
   368  		return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
   369  	}
   370  
   371  	// Split the handle
   372  	pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:"))
   373  	pid := &dockerPID{}
   374  	err = json.Unmarshal(pidBytes, pid)
   375  	if err != nil {
   376  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
   377  	}
   378  	d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID)
   379  
   380  	// Initialize docker API client
   381  	client, err := d.dockerClient()
   382  	if err != nil {
   383  		return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err)
   384  	}
   385  
   386  	// Look for a running container with this ID
   387  	containers, err := client.ListContainers(docker.ListContainersOptions{
   388  		Filters: map[string][]string{
   389  			"id": []string{pid.ContainerID},
   390  		},
   391  	})
   392  	if err != nil {
   393  		return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err)
   394  	}
   395  
   396  	found := false
   397  	for _, container := range containers {
   398  		if container.ID == pid.ContainerID {
   399  			found = true
   400  		}
   401  	}
   402  	if !found {
   403  		return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
   404  	}
   405  
   406  	// Return a driver handle
   407  	h := &dockerHandle{
   408  		client:           client,
   409  		cleanupContainer: cleanupContainer,
   410  		cleanupImage:     cleanupImage,
   411  		logger:           d.logger,
   412  		imageID:          pid.ImageID,
   413  		containerID:      pid.ContainerID,
   414  		doneCh:           make(chan struct{}),
   415  		waitCh:           make(chan error, 1),
   416  	}
   417  	go h.run()
   418  	return h, nil
   419  }
   420  
   421  func (h *dockerHandle) ID() string {
   422  	// Return a handle to the PID
   423  	pid := dockerPID{
   424  		ImageID:     h.imageID,
   425  		ContainerID: h.containerID,
   426  	}
   427  	data, err := json.Marshal(pid)
   428  	if err != nil {
   429  		h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err)
   430  	}
   431  	return fmt.Sprintf("DOCKER:%s", string(data))
   432  }
   433  
   434  func (h *dockerHandle) WaitCh() chan error {
   435  	return h.waitCh
   436  }
   437  
   438  func (h *dockerHandle) Update(task *structs.Task) error {
   439  	// Update is not possible
   440  	return nil
   441  }
   442  
   443  // Kill is used to terminate the task. This uses docker stop -t 5
   444  func (h *dockerHandle) Kill() error {
   445  	// Stop the container
   446  	err := h.client.StopContainer(h.containerID, 5)
   447  	if err != nil {
   448  		log.Printf("[ERR] driver.docker: failed stopping container %s", h.containerID)
   449  		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
   450  	}
   451  	log.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
   452  
   453  	// Cleanup container
   454  	if h.cleanupContainer {
   455  		err = h.client.RemoveContainer(docker.RemoveContainerOptions{
   456  			ID:            h.containerID,
   457  			RemoveVolumes: true,
   458  		})
   459  		if err != nil {
   460  			log.Printf("[ERR] driver.docker: removing container %s", h.containerID)
   461  			return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err)
   462  		}
   463  		log.Printf("[INFO] driver.docker: removed container %s", h.containerID)
   464  	}
   465  
   466  	// Cleanup image. This operation may fail if the image is in use by another
   467  	// job. That is OK. Will we log a message but continue.
   468  	if h.cleanupImage {
   469  		err = h.client.RemoveImage(h.imageID)
   470  		if err != nil {
   471  			containers, err := h.client.ListContainers(docker.ListContainersOptions{
   472  				// The image might be in use by a stopped container, so check everything
   473  				All: true,
   474  				Filters: map[string][]string{
   475  					"image": []string{h.imageID},
   476  				},
   477  			})
   478  			if err != nil {
   479  				return fmt.Errorf("Unable to query list of containers: %s", err)
   480  			}
   481  			inUse := len(containers)
   482  			if inUse > 0 {
   483  				log.Printf("[INFO] driver.docker: image %s is still in use by %d containers", h.imageID, inUse)
   484  			} else {
   485  				return fmt.Errorf("Failed to remove image %s", h.imageID)
   486  			}
   487  		} else {
   488  			log.Printf("[INFO] driver.docker: removed image %s", h.imageID)
   489  		}
   490  	}
   491  	return nil
   492  }
   493  
   494  func (h *dockerHandle) run() {
   495  	// Wait for it...
   496  	exitCode, err := h.client.WaitContainer(h.containerID)
   497  	if err != nil {
   498  		h.logger.Printf("[ERR] driver.docker: unable to wait for %s; container already terminated", h.containerID)
   499  	}
   500  
   501  	if exitCode != 0 {
   502  		err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
   503  	}
   504  
   505  	close(h.doneCh)
   506  	if err != nil {
   507  		h.waitCh <- err
   508  	}
   509  	close(h.waitCh)
   510  }