github.com/jmitchell/nomad@v0.1.3-0.20151007230021-7ab84c2862d8/client/driver/docker.go (about) 1 package driver 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "log" 7 "strconv" 8 "strings" 9 10 docker "github.com/fsouza/go-dockerclient" 11 12 "github.com/hashicorp/nomad/client/config" 13 "github.com/hashicorp/nomad/nomad/structs" 14 ) 15 16 type DockerDriver struct { 17 DriverContext 18 } 19 20 type dockerPID struct { 21 ImageID string 22 ContainerID string 23 } 24 25 type dockerHandle struct { 26 client *docker.Client 27 logger *log.Logger 28 cleanupContainer bool 29 cleanupImage bool 30 imageID string 31 containerID string 32 waitCh chan error 33 doneCh chan struct{} 34 } 35 36 func NewDockerDriver(ctx *DriverContext) Driver { 37 return &DockerDriver{*ctx} 38 } 39 40 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { 41 // Initialize docker API client 42 dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock") 43 client, err := docker.NewClient(dockerEndpoint) 44 if err != nil { 45 return false, nil 46 } 47 48 _, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 49 if err != nil { 50 return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 51 } 52 _, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 53 if err != nil { 54 return false, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 55 } 56 57 env, err := client.Version() 58 if err != nil { 59 // Check the "no such file" error if the unix file is missing 60 if strings.Contains(err.Error(), "no such file") { 61 return false, nil 62 } 63 64 // We connected to the daemon but couldn't read the version so something 65 // is broken. 66 return false, err 67 } 68 node.Attributes["driver.docker"] = "true" 69 node.Attributes["driver.docker.version"] = env.Get("Version") 70 71 return true, nil 72 } 73 74 // We have to call this when we create the container AND when we start it so 75 // we'll make a function. 76 func createHostConfig(task *structs.Task) *docker.HostConfig { 77 // hostConfig holds options for the docker container that are unique to this 78 // machine, such as resource limits and port mappings 79 return &docker.HostConfig{ 80 // Convert MB to bytes. This is an absolute value. 81 // 82 // This value represents the total amount of memory a process can use. 83 // Swap is added to total memory and is managed by the OS, not docker. 84 // Since this may cause other processes to swap and cause system 85 // instability, we will simply not use swap. 86 // 87 // See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt 88 Memory: int64(task.Resources.MemoryMB) * 1024 * 1024, 89 MemorySwap: -1, 90 // Convert Mhz to shares. This is a relative value. 91 // 92 // There are two types of CPU limiters available: Shares and Quotas. A 93 // Share allows a particular process to have a proportion of CPU time 94 // relative to other processes; 1024 by default. A CPU Quota is enforced 95 // over a Period of time and is a HARD limit on the amount of CPU time a 96 // process can use. Processes with quotas cannot burst, while processes 97 // with shares can, so we'll use shares. 98 // 99 // The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any 100 // given process will have at least that amount of resources, but likely 101 // more since it is (probably) rare that the machine will run at 100% 102 // CPU. This scale will cease to work if a node is overprovisioned. 103 // 104 // See: 105 // - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 106 // - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt 107 CPUShares: int64(task.Resources.CPU), 108 } 109 } 110 111 // createContainer initializes a struct needed to call docker.client.CreateContainer() 112 func createContainer(ctx *ExecContext, task *structs.Task, logger *log.Logger) docker.CreateContainerOptions { 113 if task.Resources == nil { 114 panic("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.") 115 } 116 117 hostConfig := createHostConfig(task) 118 logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"]) 119 logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"]) 120 121 mode, ok := task.Config["network_mode"] 122 if !ok || mode == "" { 123 // docker default 124 logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge") 125 mode = "bridge" 126 } 127 128 // Ignore the container mode for now 129 switch mode { 130 case "default", "bridge", "none", "host": 131 logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode) 132 default: 133 logger.Printf("[WARN] invalid setting for network mode %s, defaulting to bridge mode on docker0", mode) 134 mode = "bridge" 135 } 136 hostConfig.NetworkMode = mode 137 138 // Setup port mapping (equivalent to -p on docker CLI). Ports must already be 139 // exposed in the container. 140 if len(task.Resources.Networks) == 0 { 141 logger.Print("[WARN] driver.docker: No networks are available for port mapping") 142 } else { 143 network := task.Resources.Networks[0] 144 dockerPorts := map[docker.Port][]docker.PortBinding{} 145 146 for _, port := range network.ListStaticPorts() { 147 dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 148 dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 149 logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port) 150 } 151 152 for label, port := range network.MapDynamicPorts() { 153 // If the label is numeric we expect that there is a service 154 // listening on that port inside the container. In this case we'll 155 // setup a mapping from our random host port to the label port. 156 // 157 // Otherwise we'll setup a direct 1:1 mapping from the host port to 158 // the container, and assume that the process inside will read the 159 // environment variable and bind to the correct port. 160 if _, err := strconv.Atoi(label); err == nil { 161 dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 162 dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 163 logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label) 164 } else { 165 dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 166 dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 167 logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label) 168 } 169 } 170 hostConfig.PortBindings = dockerPorts 171 } 172 173 config := &docker.Config{ 174 Env: TaskEnvironmentVariables(ctx, task).List(), 175 Image: task.Config["image"], 176 } 177 178 // If the user specified a custom command to run, we'll inject it here. 179 if command, ok := task.Config["command"]; ok { 180 config.Cmd = strings.Split(command, " ") 181 } 182 183 return docker.CreateContainerOptions{ 184 Config: config, 185 HostConfig: hostConfig, 186 } 187 } 188 189 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { 190 // Get the image from config 191 image, ok := task.Config["image"] 192 if !ok || image == "" { 193 return nil, fmt.Errorf("Image not specified") 194 } 195 if task.Resources == nil { 196 return nil, fmt.Errorf("Resources are not specified") 197 } 198 if task.Resources.MemoryMB == 0 { 199 return nil, fmt.Errorf("Memory limit cannot be zero") 200 } 201 if task.Resources.CPU == 0 { 202 return nil, fmt.Errorf("CPU limit cannot be zero") 203 } 204 205 cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 206 if err != nil { 207 return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 208 } 209 cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 210 if err != nil { 211 return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 212 } 213 214 // Initialize docker API client 215 dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock") 216 client, err := docker.NewClient(dockerEndpoint) 217 if err != nil { 218 return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err) 219 } 220 221 repo, tag := docker.ParseRepositoryTag(image) 222 // Make sure tag is always explicitly set. We'll default to "latest" if it 223 // isn't, which is the expected behavior. 224 if tag == "" { 225 tag = "latest" 226 } 227 228 var dockerImage *docker.Image 229 // We're going to check whether the image is already downloaded. If the tag 230 // is "latest" we have to check for a new version every time so we don't 231 // bother to check and cache the id here. We'll download first, then cache. 232 if tag != "latest" { 233 dockerImage, err = client.InspectImage(image) 234 } 235 236 // Download the image 237 if dockerImage == nil { 238 pullOptions := docker.PullImageOptions{ 239 Repository: repo, 240 Tag: tag, 241 } 242 // TODO add auth configuration for private repos 243 authOptions := docker.AuthConfiguration{} 244 err = client.PullImage(pullOptions, authOptions) 245 if err != nil { 246 d.logger.Printf("[ERR] driver.docker: pulling container %s", err) 247 return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err) 248 } 249 d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag) 250 251 // Now that we have the image we can get the image id 252 dockerImage, err = client.InspectImage(image) 253 if err != nil { 254 d.logger.Printf("[ERR] driver.docker: getting image id for %s", image) 255 return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err) 256 } 257 } 258 d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID) 259 d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID) 260 261 // Create a container 262 container, err := client.CreateContainer(createContainer(ctx, task, d.logger)) 263 if err != nil { 264 d.logger.Printf("[ERR] driver.docker: %s", err) 265 return nil, fmt.Errorf("Failed to create container from image %s", image) 266 } 267 d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) 268 269 // Start the container 270 err = client.StartContainer(container.ID, container.HostConfig) 271 if err != nil { 272 d.logger.Printf("[ERR] driver.docker: starting container %s", container.ID) 273 return nil, fmt.Errorf("Failed to start container %s", container.ID) 274 } 275 d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) 276 277 // Return a driver handle 278 h := &dockerHandle{ 279 client: client, 280 cleanupContainer: cleanupContainer, 281 cleanupImage: cleanupImage, 282 logger: d.logger, 283 imageID: dockerImage.ID, 284 containerID: container.ID, 285 doneCh: make(chan struct{}), 286 waitCh: make(chan error, 1), 287 } 288 go h.run() 289 return h, nil 290 } 291 292 func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 293 cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 294 if err != nil { 295 return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 296 } 297 cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 298 if err != nil { 299 return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 300 } 301 302 // Split the handle 303 pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:")) 304 pid := &dockerPID{} 305 err = json.Unmarshal(pidBytes, pid) 306 if err != nil { 307 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 308 } 309 d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID) 310 311 // Initialize docker API client 312 dockerEndpoint := d.config.ReadDefault("docker.endpoint", "unix:///var/run/docker.sock") 313 client, err := docker.NewClient(dockerEndpoint) 314 if err != nil { 315 return nil, fmt.Errorf("Failed to connect to docker.endpoint (%s): %s", dockerEndpoint, err) 316 } 317 318 // Look for a running container with this ID 319 containers, err := client.ListContainers(docker.ListContainersOptions{ 320 Filters: map[string][]string{ 321 "id": []string{pid.ContainerID}, 322 }, 323 }) 324 if err != nil { 325 return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err) 326 } 327 328 found := false 329 for _, container := range containers { 330 if container.ID == pid.ContainerID { 331 found = true 332 } 333 } 334 if !found { 335 return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err) 336 } 337 338 // Return a driver handle 339 h := &dockerHandle{ 340 client: client, 341 cleanupContainer: cleanupContainer, 342 cleanupImage: cleanupImage, 343 logger: d.logger, 344 imageID: pid.ImageID, 345 containerID: pid.ContainerID, 346 doneCh: make(chan struct{}), 347 waitCh: make(chan error, 1), 348 } 349 go h.run() 350 return h, nil 351 } 352 353 func (h *dockerHandle) ID() string { 354 // Return a handle to the PID 355 pid := dockerPID{ 356 ImageID: h.imageID, 357 ContainerID: h.containerID, 358 } 359 data, err := json.Marshal(pid) 360 if err != nil { 361 h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) 362 } 363 return fmt.Sprintf("DOCKER:%s", string(data)) 364 } 365 366 func (h *dockerHandle) WaitCh() chan error { 367 return h.waitCh 368 } 369 370 func (h *dockerHandle) Update(task *structs.Task) error { 371 // Update is not possible 372 return nil 373 } 374 375 // Kill is used to terminate the task. This uses docker stop -t 5 376 func (h *dockerHandle) Kill() error { 377 // Stop the container 378 err := h.client.StopContainer(h.containerID, 5) 379 if err != nil { 380 log.Printf("[ERR] driver.docker: failed stopping container %s", h.containerID) 381 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 382 } 383 log.Printf("[INFO] driver.docker: stopped container %s", h.containerID) 384 385 // Cleanup container 386 if h.cleanupContainer { 387 err = h.client.RemoveContainer(docker.RemoveContainerOptions{ 388 ID: h.containerID, 389 RemoveVolumes: true, 390 }) 391 if err != nil { 392 log.Printf("[ERR] driver.docker: removing container %s", h.containerID) 393 return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err) 394 } 395 log.Printf("[INFO] driver.docker: removed container %s", h.containerID) 396 } 397 398 // Cleanup image. This operation may fail if the image is in use by another 399 // job. That is OK. Will we log a message but continue. 400 if h.cleanupImage { 401 err = h.client.RemoveImage(h.imageID) 402 if err != nil { 403 containers, err := h.client.ListContainers(docker.ListContainersOptions{ 404 All: true, 405 Filters: map[string][]string{ 406 "image": []string{h.imageID}, 407 }, 408 }) 409 if err != nil { 410 return fmt.Errorf("Unable to query list of containers: %s", err) 411 } 412 inUse := len(containers) 413 if inUse > 0 { 414 log.Printf("[INFO] driver.docker: image %s is still in use by %d containers", h.imageID, inUse) 415 } else { 416 return fmt.Errorf("Failed to remove image %s", h.imageID) 417 } 418 } else { 419 log.Printf("[INFO] driver.docker: removed image %s", h.imageID) 420 } 421 } 422 return nil 423 } 424 425 func (h *dockerHandle) run() { 426 // Wait for it... 427 exitCode, err := h.client.WaitContainer(h.containerID) 428 if err != nil { 429 h.logger.Printf("[ERR] driver.docker: unable to wait for %s; container already terminated", h.containerID) 430 } 431 432 if exitCode != 0 { 433 err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 434 } 435 436 close(h.doneCh) 437 if err != nil { 438 h.waitCh <- err 439 } 440 close(h.waitCh) 441 }