github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/client/driver/docker.go (about)

     1  package driver
     2  
     3  import (
     4  	"encoding/json"
     5  	"fmt"
     6  	"log"
     7  	"strconv"
     8  	"strings"
     9  
    10  	docker "github.com/fsouza/go-dockerclient"
    11  
    12  	"github.com/hashicorp/nomad/client/config"
    13  	"github.com/hashicorp/nomad/nomad/structs"
    14  )
    15  
    16  type DockerDriver struct {
    17  	DriverContext
    18  }
    19  
    20  type dockerPID struct {
    21  	ImageID     string
    22  	ContainerID string
    23  }
    24  
    25  type dockerHandle struct {
    26  	client           *docker.Client
    27  	logger           *log.Logger
    28  	cleanupContainer bool
    29  	cleanupImage     bool
    30  	imageID          string
    31  	containerID      string
    32  	waitCh           chan error
    33  	doneCh           chan struct{}
    34  }
    35  
    36  func NewDockerDriver(ctx *DriverContext) Driver {
    37  	return &DockerDriver{*ctx}
    38  }
    39  
    40  func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) {
    41  	// Initialize docker API client
    42  	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
    43  	client, err := docker.NewClient(dockerEndpoint)
    44  	if err != nil {
    45  		return false, nil
    46  	}
    47  
    48  	_, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
    49  	if err != nil {
    50  		return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
    51  	}
    52  	_, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
    53  	if err != nil {
    54  		return false, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
    55  	}
    56  
    57  	env, err := client.Version()
    58  	if err != nil {
    59  		// Check the "no such file" error if the unix file is missing
    60  		if strings.Contains(err.Error(), "no such file") {
    61  			return false, nil
    62  		}
    63  
    64  		// We connected to the daemon but couldn't read the version so something
    65  		// is broken.
    66  		return false, err
    67  	}
    68  	node.Attributes["driver.docker"] = "true"
    69  	node.Attributes["driver.docker.version"] = env.Get("Version")
    70  
    71  	return true, nil
    72  }
    73  
    74  // We have to call this when we create the container AND when we start it so
    75  // we'll make a function.
    76  func createHostConfig(task *structs.Task) *docker.HostConfig {
    77  	// hostConfig holds options for the docker container that are unique to this
    78  	// machine, such as resource limits and port mappings
    79  	return &docker.HostConfig{
    80  		// Convert MB to bytes. This is an absolute value.
    81  		//
    82  		// This value represents the total amount of memory a process can use.
    83  		// Swap is added to total memory and is managed by the OS, not docker.
    84  		// Since this may cause other processes to swap and cause system
    85  		// instability, we will simply not use swap.
    86  		//
    87  		// See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt
    88  		Memory:     int64(task.Resources.MemoryMB) * 1024 * 1024,
    89  		MemorySwap: -1,
    90  		// Convert Mhz to shares. This is a relative value.
    91  		//
    92  		// There are two types of CPU limiters available: Shares and Quotas. A
    93  		// Share allows a particular process to have a proportion of CPU time
    94  		// relative to other processes; 1024 by default. A CPU Quota is enforced
    95  		// over a Period of time and is a HARD limit on the amount of CPU time a
    96  		// process can use. Processes with quotas cannot burst, while processes
    97  		// with shares can, so we'll use shares.
    98  		//
    99  		// The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any
   100  		// given process will have at least that amount of resources, but likely
   101  		// more since it is (probably) rare that the machine will run at 100%
   102  		// CPU. This scale will cease to work if a node is overprovisioned.
   103  		//
   104  		// See:
   105  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt
   106  		//  - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt
   107  		CPUShares: int64(task.Resources.CPU),
   108  	}
   109  }
   110  
   111  // createContainer initializes a struct needed to call docker.client.CreateContainer()
   112  func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) docker.CreateContainerOptions {
   113  	if task.Resources == nil {
   114  		panic("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.")
   115  	}
   116  
   117  	hostConfig := createHostConfig(task)
   118  	logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"])
   119  	logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"])
   120  
   121  	mode, ok := task.Config["network_mode"]
   122  	if !ok || mode == "" {
   123  		// docker default
   124  		logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge")
   125  		mode = "bridge"
   126  	}
   127  
   128  	// Ignore the container mode for now
   129  	switch mode {
   130  	case "default", "bridge", "none", "host":
   131  		logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode)
   132  	default:
   133  		logger.Printf("[WARN] invalid setting for network mode %s, defaulting to bridge mode on docker0", mode)
   134  		mode = "bridge"
   135  	}
   136  	hostConfig.NetworkMode = mode
   137  
   138  	// Setup port mapping (equivalent to -p on docker CLI). Ports must already be
   139  	// exposed in the container.
   140  	if len(task.Resources.Networks) == 0 {
   141  		logger.Print("[WARN] driver.docker: No networks are available for port mapping")
   142  	} else {
   143  		network := task.Resources.Networks[0]
   144  		dockerPorts := map[docker.Port][]docker.PortBinding{}
   145  
   146  		for _, port := range network.ListStaticPorts() {
   147  			dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   148  			dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   149  			logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port)
   150  		}
   151  
   152  		for label, port := range network.MapDynamicPorts() {
   153  			// If the label is numeric we expect that there is a service
   154  			// listening on that port inside the container. In this case we'll
   155  			// setup a mapping from our random host port to the label port.
   156  			//
   157  			// Otherwise we'll setup a direct 1:1 mapping from the host port to
   158  			// the container, and assume that the process inside will read the
   159  			// environment variable and bind to the correct port.
   160  			if _, err := strconv.Atoi(label); err == nil {
   161  				dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   162  				dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   163  				logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label)
   164  			} else {
   165  				dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   166  				dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}}
   167  				logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label)
   168  			}
   169  		}
   170  		hostConfig.PortBindings = dockerPorts
   171  	}
   172  
   173  	config := &docker.Config{
   174  		Env:   TaskEnvironmentVariables(ctx, task).List(),
   175  		Image: task.Config["image"],
   176  	}
   177  
   178  	// If the user specified a custom command to run, we'll inject it here.
   179  	if command, ok := task.Config["command"]; ok {
   180  		config.Cmd = strings.Split(command, " ")
   181  	}
   182  
   183  	return docker.CreateContainerOptions{
   184  		Config:     config,
   185  		HostConfig: hostConfig,
   186  	}
   187  }
   188  
   189  func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) {
   190  	// Get the image from config
   191  	image, ok := task.Config["image"]
   192  	if !ok || image == "" {
   193  		return nil, fmt.Errorf("Image not specified")
   194  	}
   195  	if task.Resources == nil {
   196  		return nil, fmt.Errorf("Resources are not specified")
   197  	}
   198  	if task.Resources.MemoryMB == 0 {
   199  		return nil, fmt.Errorf("Memory limit cannot be zero")
   200  	}
   201  	if task.Resources.CPU == 0 {
   202  		return nil, fmt.Errorf("CPU limit cannot be zero")
   203  	}
   204  
   205  	cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
   206  	if err != nil {
   207  		return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
   208  	}
   209  	cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
   210  	if err != nil {
   211  		return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
   212  	}
   213  
   214  	// Initialize docker API client
   215  	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
   216  	client, err := docker.NewClient(dockerEndpoint)
   217  	if err != nil {
   218  		return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
   219  	}
   220  
   221  	repo, tag := docker.ParseRepositoryTag(image)
   222  	// Make sure tag is always explicitly set. We'll default to "latest" if it
   223  	// isn't, which is the expected behavior.
   224  	if tag == "" {
   225  		tag = "latest"
   226  	}
   227  
   228  	var dockerImage *docker.Image
   229  	// We're going to check whether the image is already downloaded. If the tag
   230  	// is "latest" we have to check for a new version every time so we don't
   231  	// bother to check and cache the id here. We'll download first, then cache.
   232  	if tag != "latest" {
   233  		dockerImage, err = client.InspectImage(image)
   234  	}
   235  
   236  	// Download the image
   237  	if dockerImage == nil {
   238  		pullOptions := docker.PullImageOptions{
   239  			Repository: repo,
   240  			Tag:        tag,
   241  		}
   242  		// TODO add auth configuration for private repos
   243  		authOptions := docker.AuthConfiguration{}
   244  		err = client.PullImage(pullOptions, authOptions)
   245  		if err != nil {
   246  			d.logger.Printf("[ERR] driver.docker: pulling container %s", err)
   247  			return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err)
   248  		}
   249  		d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag)
   250  
   251  		// Now that we have the image we can get the image id
   252  		dockerImage, err = client.InspectImage(image)
   253  		if err != nil {
   254  			d.logger.Printf("[ERR] driver.docker: getting image id for %s", image)
   255  			return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err)
   256  		}
   257  	}
   258  	d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID)
   259  	d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID)
   260  
   261  	// Create a container
   262  	container, err := client.CreateContainer(createContainer(ctx, task, d.logger))
   263  	if err != nil {
   264  		d.logger.Printf("[ERR] driver.docker: %s", err)
   265  		return nil, fmt.Errorf("Failed to create container from image %s", image)
   266  	}
   267  	d.logger.Printf("[INFO] driver.docker: created container %s", container.ID)
   268  
   269  	// Start the container
   270  	err = client.StartContainer(container.ID, container.HostConfig)
   271  	if err != nil {
   272  		d.logger.Printf("[ERR] driver.docker: starting container %s", container.ID)
   273  		return nil, fmt.Errorf("Failed to start container %s", container.ID)
   274  	}
   275  	d.logger.Printf("[INFO] driver.docker: started container %s", container.ID)
   276  
   277  	// Return a driver handle
   278  	h := &dockerHandle{
   279  		client:           client,
   280  		cleanupContainer: cleanupContainer,
   281  		cleanupImage:     cleanupImage,
   282  		logger:           d.logger,
   283  		imageID:          dockerImage.ID,
   284  		containerID:      container.ID,
   285  		doneCh:           make(chan struct{}),
   286  		waitCh:           make(chan error, 1),
   287  	}
   288  	go h.run()
   289  	return h, nil
   290  }
   291  
   292  func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) {
   293  	cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true"))
   294  	if err != nil {
   295  		return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err)
   296  	}
   297  	cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true"))
   298  	if err != nil {
   299  		return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err)
   300  	}
   301  
   302  	// Split the handle
   303  	pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:"))
   304  	pid := &dockerPID{}
   305  	err = json.Unmarshal(pidBytes, pid)
   306  	if err != nil {
   307  		return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err)
   308  	}
   309  	d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID)
   310  
   311  	// Initialize docker API client
   312  	dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock")
   313  	client, err := docker.NewClient(dockerEndpoint)
   314  	if err != nil {
   315  		return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err)
   316  	}
   317  
   318  	// Look for a running container with this ID
   319  	containers, err := client.ListContainers(docker.ListContainersOptions{
   320  		Filters: map[string][]string{
   321  			"id": []string{pid.ContainerID},
   322  		},
   323  	})
   324  	if err != nil {
   325  		return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err)
   326  	}
   327  
   328  	found := false
   329  	for _, container := range containers {
   330  		if container.ID == pid.ContainerID {
   331  			found = true
   332  		}
   333  	}
   334  	if !found {
   335  		return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err)
   336  	}
   337  
   338  	// Return a driver handle
   339  	h := &dockerHandle{
   340  		client:           client,
   341  		cleanupContainer: cleanupContainer,
   342  		cleanupImage:     cleanupImage,
   343  		logger:           d.logger,
   344  		imageID:          pid.ImageID,
   345  		containerID:      pid.ContainerID,
   346  		doneCh:           make(chan struct{}),
   347  		waitCh:           make(chan error, 1),
   348  	}
   349  	go h.run()
   350  	return h, nil
   351  }
   352  
   353  func (h *dockerHandle) ID() string {
   354  	// Return a handle to the PID
   355  	pid := dockerPID{
   356  		ImageID:     h.imageID,
   357  		ContainerID: h.containerID,
   358  	}
   359  	data, err := json.Marshal(pid)
   360  	if err != nil {
   361  		h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err)
   362  	}
   363  	return fmt.Sprintf("DOCKER:%s", string(data))
   364  }
   365  
   366  func (h *dockerHandle) WaitCh() chan error {
   367  	return h.waitCh
   368  }
   369  
   370  func (h *dockerHandle) Update(task *structs.Task) error {
   371  	// Update is not possible
   372  	return nil
   373  }
   374  
   375  // Kill is used to terminate the task. This uses docker stop -t 5
   376  func (h *dockerHandle) Kill() error {
   377  	// Stop the container
   378  	err := h.client.StopContainer(h.containerID, 5)
   379  	if err != nil {
   380  		log.Printf("[ERR] driver.docker: failed stopping container %s", h.containerID)
   381  		return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err)
   382  	}
   383  	log.Printf("[INFO] driver.docker: stopped container %s", h.containerID)
   384  
   385  	// Cleanup container
   386  	if h.cleanupContainer {
   387  		err = h.client.RemoveContainer(docker.RemoveContainerOptions{
   388  			ID:            h.containerID,
   389  			RemoveVolumes: true,
   390  		})
   391  		if err != nil {
   392  			log.Printf("[ERR] driver.docker: removing container %s", h.containerID)
   393  			return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err)
   394  		}
   395  		log.Printf("[INFO] driver.docker: removed container %s", h.containerID)
   396  	}
   397  
   398  	// Cleanup image. This operation may fail if the image is in use by another
   399  	// job. That is OK. Will we log a message but continue.
   400  	if h.cleanupImage {
   401  		err = h.client.RemoveImage(h.imageID)
   402  		if err != nil {
   403  			containers, err := h.client.ListContainers(docker.ListContainersOptions{
   404  				All: true,
   405  				Filters: map[string][]string{
   406  					"image": []string{h.imageID},
   407  				},
   408  			})
   409  			if err != nil {
   410  				return fmt.Errorf("Unable to query list of containers: %s", err)
   411  			}
   412  			inUse := len(containers)
   413  			if inUse > 0 {
   414  				log.Printf("[INFO] driver.docker: image %s is still in use by %d containers", h.imageID, inUse)
   415  			} else {
   416  				return fmt.Errorf("Failed to remove image %s", h.imageID)
   417  			}
   418  		} else {
   419  			log.Printf("[INFO] driver.docker: removed image %s", h.imageID)
   420  		}
   421  	}
   422  	return nil
   423  }
   424  
   425  func (h *dockerHandle) run() {
   426  	// Wait for it...
   427  	exitCode, err := h.client.WaitContainer(h.containerID)
   428  	if err != nil {
   429  		h.logger.Printf("[ERR] driver.docker: unable to wait for %s; container already terminated", h.containerID)
   430  	}
   431  
   432  	if exitCode != 0 {
   433  		err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode)
   434  	}
   435  
   436  	close(h.doneCh)
   437  	if err != nil {
   438  		h.waitCh <- err
   439  	}
   440  	close(h.waitCh)
   441  }