github.com/kardianos/nomad@v0.1.3-0.20151022182107-b13df73ee850/client/driver/docker.go (about) 1 package driver 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "log" 7 "path/filepath" 8 "strconv" 9 "strings" 10 11 docker "github.com/fsouza/go-dockerclient" 12 13 "github.com/hashicorp/nomad/client/allocdir" 14 "github.com/hashicorp/nomad/client/config" 15 "github.com/hashicorp/nomad/client/driver/args" 16 "github.com/hashicorp/nomad/nomad/structs" 17 ) 18 19 type DockerDriver struct { 20 DriverContext 21 } 22 23 type dockerPID struct { 24 ImageID string 25 ContainerID string 26 } 27 28 type dockerHandle struct { 29 client *docker.Client 30 logger *log.Logger 31 cleanupContainer bool 32 cleanupImage bool 33 imageID string 34 containerID string 35 waitCh chan error 36 doneCh chan struct{} 37 } 38 39 func NewDockerDriver(ctx *DriverContext) Driver { 40 return &DockerDriver{*ctx} 41 } 42 43 // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars 44 // to connect to the docker daemon. In production mode we will read 45 // docker.endpoint from the config file. 46 func (d *DockerDriver) dockerClient() (*docker.Client, error) { 47 // In dev mode, read DOCKER_* environment variables DOCKER_HOST, 48 // DOCKER_TLS_VERIFY, and DOCKER_CERT_PATH. This allows you to run tests and 49 // demo against boot2docker or a VM on OSX and Windows. This falls back on 50 // the default unix socket on linux if tests are run on linux. 51 // 52 // Also note that we need to turn on DevMode in the test configs. 53 if d.config.DevMode { 54 return docker.NewClientFromEnv() 55 } 56 57 // In prod mode we'll read the docker.endpoint configuration and fall back 58 // on the host-specific default. We do not read from the environment. 59 defaultEndpoint, err := docker.DefaultDockerHost() 60 if err != nil { 61 return nil, fmt.Errorf("Unable to determine default docker endpoint: %s", err) 62 } 63 dockerEndpoint := d.config.ReadDefault("docker.endpoint", defaultEndpoint) 64 65 return docker.NewClient(dockerEndpoint) 66 } 67 68 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { 69 // Initialize docker API client 70 client, err := d.dockerClient() 71 if err != nil { 72 d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon: %v", err) 73 return false, nil 74 } 75 76 _, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 77 if err != nil { 78 return false, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 79 } 80 _, err = strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 81 if err != nil { 82 return false, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 83 } 84 85 env, err := client.Version() 86 if err != nil { 87 d.logger.Printf("[DEBUG] driver.docker: could not read version from daemon: %v", err) 88 // Check the "no such file" error if the unix file is missing 89 if strings.Contains(err.Error(), "no such file") { 90 return false, nil 91 } 92 93 // We connected to the daemon but couldn't read the version so something 94 // is broken. 95 return false, err 96 } 97 node.Attributes["driver.docker"] = "1" 98 node.Attributes["driver.docker.version"] = env.Get("Version") 99 100 return true, nil 101 } 102 103 func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) { 104 shared := alloc.SharedDir 105 local, ok := alloc.TaskDirs[task.Name] 106 if !ok { 107 return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name) 108 } 109 110 return []string{ 111 fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocName), 112 fmt.Sprintf("%s:%s", local, allocdir.TaskLocal), 113 }, nil 114 } 115 116 // createContainer initializes a struct needed to call docker.client.CreateContainer() 117 func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task) (docker.CreateContainerOptions, error) { 118 var c docker.CreateContainerOptions 119 if task.Resources == nil { 120 d.logger.Printf("[ERR] driver.docker: task.Resources is empty") 121 return c, fmt.Errorf("task.Resources is nil and we can't constrain resource usage. We shouldn't have been able to schedule this in the first place.") 122 } 123 124 binds, err := d.containerBinds(ctx.AllocDir, task) 125 if err != nil { 126 return c, err 127 } 128 129 hostConfig := &docker.HostConfig{ 130 // Convert MB to bytes. This is an absolute value. 131 // 132 // This value represents the total amount of memory a process can use. 133 // Swap is added to total memory and is managed by the OS, not docker. 134 // Since this may cause other processes to swap and cause system 135 // instability, we will simply not use swap. 136 // 137 // See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt 138 Memory: int64(task.Resources.MemoryMB) * 1024 * 1024, 139 MemorySwap: -1, 140 // Convert Mhz to shares. This is a relative value. 141 // 142 // There are two types of CPU limiters available: Shares and Quotas. A 143 // Share allows a particular process to have a proportion of CPU time 144 // relative to other processes; 1024 by default. A CPU Quota is enforced 145 // over a Period of time and is a HARD limit on the amount of CPU time a 146 // process can use. Processes with quotas cannot burst, while processes 147 // with shares can, so we'll use shares. 148 // 149 // The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any 150 // given process will have at least that amount of resources, but likely 151 // more since it is (probably) rare that the machine will run at 100% 152 // CPU. This scale will cease to work if a node is overprovisioned. 153 // 154 // See: 155 // - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 156 // - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt 157 CPUShares: int64(task.Resources.CPU), 158 159 // Binds are used to mount a host volume into the container. We mount a 160 // local directory for storage and a shared alloc directory that can be 161 // used to share data between different tasks in the same task group. 162 Binds: binds, 163 } 164 165 d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"]) 166 d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"]) 167 d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"]) 168 169 mode, ok := task.Config["network_mode"] 170 if !ok || mode == "" { 171 // docker default 172 d.logger.Printf("[WARN] driver.docker: no mode specified for networking, defaulting to bridge") 173 mode = "bridge" 174 } 175 176 // Ignore the container mode for now 177 switch mode { 178 case "default", "bridge", "none", "host": 179 d.logger.Printf("[DEBUG] driver.docker: using %s as network mode", mode) 180 default: 181 d.logger.Printf("[ERR] driver.docker: invalid setting for network mode: %s", mode) 182 return c, fmt.Errorf("Invalid setting for network mode: %s", mode) 183 } 184 hostConfig.NetworkMode = mode 185 186 // Setup port mapping (equivalent to -p on docker CLI). Ports must already be 187 // exposed in the container. 188 if len(task.Resources.Networks) == 0 { 189 d.logger.Print("[WARN] driver.docker: No networks are available for port mapping") 190 } else { 191 network := task.Resources.Networks[0] 192 dockerPorts := map[docker.Port][]docker.PortBinding{} 193 194 for _, port := range network.ListStaticPorts() { 195 dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 196 dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 197 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)\n", network.IP, port, port) 198 } 199 200 for label, port := range network.MapDynamicPorts() { 201 // If the label is numeric we expect that there is a service 202 // listening on that port inside the container. In this case we'll 203 // setup a mapping from our random host port to the label port. 204 // 205 // Otherwise we'll setup a direct 1:1 mapping from the host port to 206 // the container, and assume that the process inside will read the 207 // environment variable and bind to the correct port. 208 if _, err := strconv.Atoi(label); err == nil { 209 dockerPorts[docker.Port(label+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 210 dockerPorts[docker.Port(label+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 211 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %s (mapped)", network.IP, port, label) 212 } else { 213 dockerPorts[docker.Port(strconv.Itoa(port)+"/tcp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 214 dockerPorts[docker.Port(strconv.Itoa(port)+"/udp")] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: strconv.Itoa(port)}} 215 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d for label %s\n", network.IP, port, port, label) 216 } 217 } 218 hostConfig.PortBindings = dockerPorts 219 } 220 221 // Create environment variables. 222 env := TaskEnvironmentVariables(ctx, task) 223 env.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)) 224 env.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)) 225 226 config := &docker.Config{ 227 Env: env.List(), 228 Image: task.Config["image"], 229 } 230 231 rawArgs, hasArgs := task.Config["args"] 232 parsedArgs, err := args.ParseAndReplace(rawArgs, env.Map()) 233 if err != nil { 234 return c, err 235 } 236 237 // If the user specified a custom command to run, we'll inject it here. 238 if command, ok := task.Config["command"]; ok { 239 cmd := []string{command} 240 if hasArgs { 241 cmd = append(cmd, parsedArgs...) 242 } 243 config.Cmd = cmd 244 } else if hasArgs { 245 d.logger.Println("[DEBUG] driver.docker: ignoring args because command not specified") 246 } 247 248 return docker.CreateContainerOptions{ 249 Config: config, 250 HostConfig: hostConfig, 251 }, nil 252 } 253 254 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { 255 // Get the image from config 256 image, ok := task.Config["image"] 257 if !ok || image == "" { 258 return nil, fmt.Errorf("Image not specified") 259 } 260 if task.Resources == nil { 261 return nil, fmt.Errorf("Resources are not specified") 262 } 263 if task.Resources.MemoryMB == 0 { 264 return nil, fmt.Errorf("Memory limit cannot be zero") 265 } 266 if task.Resources.CPU == 0 { 267 return nil, fmt.Errorf("CPU limit cannot be zero") 268 } 269 270 cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 271 if err != nil { 272 return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 273 } 274 cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 275 if err != nil { 276 return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 277 } 278 279 // Initialize docker API client 280 client, err := d.dockerClient() 281 if err != nil { 282 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 283 } 284 285 repo, tag := docker.ParseRepositoryTag(image) 286 // Make sure tag is always explicitly set. We'll default to "latest" if it 287 // isn't, which is the expected behavior. 288 if tag == "" { 289 tag = "latest" 290 } 291 292 var dockerImage *docker.Image 293 // We're going to check whether the image is already downloaded. If the tag 294 // is "latest" we have to check for a new version every time so we don't 295 // bother to check and cache the id here. We'll download first, then cache. 296 if tag != "latest" { 297 dockerImage, err = client.InspectImage(image) 298 } 299 300 // Download the image 301 if dockerImage == nil { 302 pullOptions := docker.PullImageOptions{ 303 Repository: repo, 304 Tag: tag, 305 } 306 // TODO add auth configuration for private repos 307 authOptions := docker.AuthConfiguration{} 308 err = client.PullImage(pullOptions, authOptions) 309 if err != nil { 310 d.logger.Printf("[ERR] driver.docker: pulling container %s", err) 311 return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err) 312 } 313 d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag) 314 315 // Now that we have the image we can get the image id 316 dockerImage, err = client.InspectImage(image) 317 if err != nil { 318 d.logger.Printf("[ERR] driver.docker: getting image id for %s", image) 319 return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err) 320 } 321 } 322 d.logger.Printf("[DEBUG] driver.docker: using image %s", dockerImage.ID) 323 d.logger.Printf("[INFO] driver.docker: identified image %s as %s", image, dockerImage.ID) 324 325 config, err := d.createContainer(ctx, task) 326 if err != nil { 327 d.logger.Printf("[ERR] driver.docker: %s", err) 328 return nil, fmt.Errorf("Failed to create container config for image %s", image) 329 } 330 // Create a container 331 container, err := client.CreateContainer(config) 332 if err != nil { 333 d.logger.Printf("[ERR] driver.docker: %s", err) 334 return nil, fmt.Errorf("Failed to create container from image %s", image) 335 } 336 d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) 337 338 // Start the container 339 err = client.StartContainer(container.ID, container.HostConfig) 340 if err != nil { 341 d.logger.Printf("[ERR] driver.docker: starting container %s", container.ID) 342 return nil, fmt.Errorf("Failed to start container %s", container.ID) 343 } 344 d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) 345 346 // Return a driver handle 347 h := &dockerHandle{ 348 client: client, 349 cleanupContainer: cleanupContainer, 350 cleanupImage: cleanupImage, 351 logger: d.logger, 352 imageID: dockerImage.ID, 353 containerID: container.ID, 354 doneCh: make(chan struct{}), 355 waitCh: make(chan error, 1), 356 } 357 go h.run() 358 return h, nil 359 } 360 361 func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 362 cleanupContainer, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.container", "true")) 363 if err != nil { 364 return nil, fmt.Errorf("Unable to parse docker.cleanup.container: %s", err) 365 } 366 cleanupImage, err := strconv.ParseBool(d.config.ReadDefault("docker.cleanup.image", "true")) 367 if err != nil { 368 return nil, fmt.Errorf("Unable to parse docker.cleanup.image: %s", err) 369 } 370 371 // Split the handle 372 pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:")) 373 pid := &dockerPID{} 374 err = json.Unmarshal(pidBytes, pid) 375 if err != nil { 376 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 377 } 378 d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", handleID) 379 380 // Initialize docker API client 381 client, err := d.dockerClient() 382 if err != nil { 383 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 384 } 385 386 // Look for a running container with this ID 387 containers, err := client.ListContainers(docker.ListContainersOptions{ 388 Filters: map[string][]string{ 389 "id": []string{pid.ContainerID}, 390 }, 391 }) 392 if err != nil { 393 return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err) 394 } 395 396 found := false 397 for _, container := range containers { 398 if container.ID == pid.ContainerID { 399 found = true 400 } 401 } 402 if !found { 403 return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err) 404 } 405 406 // Return a driver handle 407 h := &dockerHandle{ 408 client: client, 409 cleanupContainer: cleanupContainer, 410 cleanupImage: cleanupImage, 411 logger: d.logger, 412 imageID: pid.ImageID, 413 containerID: pid.ContainerID, 414 doneCh: make(chan struct{}), 415 waitCh: make(chan error, 1), 416 } 417 go h.run() 418 return h, nil 419 } 420 421 func (h *dockerHandle) ID() string { 422 // Return a handle to the PID 423 pid := dockerPID{ 424 ImageID: h.imageID, 425 ContainerID: h.containerID, 426 } 427 data, err := json.Marshal(pid) 428 if err != nil { 429 h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) 430 } 431 return fmt.Sprintf("DOCKER:%s", string(data)) 432 } 433 434 func (h *dockerHandle) WaitCh() chan error { 435 return h.waitCh 436 } 437 438 func (h *dockerHandle) Update(task *structs.Task) error { 439 // Update is not possible 440 return nil 441 } 442 443 // Kill is used to terminate the task. This uses docker stop -t 5 444 func (h *dockerHandle) Kill() error { 445 // Stop the container 446 err := h.client.StopContainer(h.containerID, 5) 447 if err != nil { 448 log.Printf("[ERR] driver.docker: failed stopping container %s", h.containerID) 449 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 450 } 451 log.Printf("[INFO] driver.docker: stopped container %s", h.containerID) 452 453 // Cleanup container 454 if h.cleanupContainer { 455 err = h.client.RemoveContainer(docker.RemoveContainerOptions{ 456 ID: h.containerID, 457 RemoveVolumes: true, 458 }) 459 if err != nil { 460 log.Printf("[ERR] driver.docker: removing container %s", h.containerID) 461 return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err) 462 } 463 log.Printf("[INFO] driver.docker: removed container %s", h.containerID) 464 } 465 466 // Cleanup image. This operation may fail if the image is in use by another 467 // job. That is OK. Will we log a message but continue. 468 if h.cleanupImage { 469 err = h.client.RemoveImage(h.imageID) 470 if err != nil { 471 containers, err := h.client.ListContainers(docker.ListContainersOptions{ 472 // The image might be in use by a stopped container, so check everything 473 All: true, 474 Filters: map[string][]string{ 475 "image": []string{h.imageID}, 476 }, 477 }) 478 if err != nil { 479 return fmt.Errorf("Unable to query list of containers: %s", err) 480 } 481 inUse := len(containers) 482 if inUse > 0 { 483 log.Printf("[INFO] driver.docker: image %s is still in use by %d containers", h.imageID, inUse) 484 } else { 485 return fmt.Errorf("Failed to remove image %s", h.imageID) 486 } 487 } else { 488 log.Printf("[INFO] driver.docker: removed image %s", h.imageID) 489 } 490 } 491 return nil 492 } 493 494 func (h *dockerHandle) run() { 495 // Wait for it... 496 exitCode, err := h.client.WaitContainer(h.containerID) 497 if err != nil { 498 h.logger.Printf("[ERR] driver.docker: unable to wait for %s; container already terminated", h.containerID) 499 } 500 501 if exitCode != 0 { 502 err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 503 } 504 505 close(h.doneCh) 506 if err != nil { 507 h.waitCh <- err 508 } 509 close(h.waitCh) 510 }