github.com/ranjib/nomad@v0.1.1-0.20160225204057-97751b02f70b/client/driver/docker.go (about) 1 package driver 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "log" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "strconv" 12 "strings" 13 "sync" 14 "time" 15 16 docker "github.com/fsouza/go-dockerclient" 17 18 "github.com/hashicorp/go-plugin" 19 20 "github.com/hashicorp/nomad/client/allocdir" 21 "github.com/hashicorp/nomad/client/config" 22 "github.com/hashicorp/nomad/client/driver/logging" 23 cstructs "github.com/hashicorp/nomad/client/driver/structs" 24 "github.com/hashicorp/nomad/client/fingerprint" 25 "github.com/hashicorp/nomad/helper/discover" 26 "github.com/hashicorp/nomad/nomad/structs" 27 "github.com/mitchellh/mapstructure" 28 ) 29 30 // We store the client globally to cache the connection to the docker daemon. 31 var createClient sync.Once 32 var client *docker.Client 33 34 type DockerDriver struct { 35 DriverContext 36 fingerprint.StaticFingerprinter 37 } 38 39 type DockerDriverAuth struct { 40 Username string `mapstructure:"username"` // username for the registry 41 Password string `mapstructure:"password"` // password to access the registry 42 Email string `mapstructure:"email"` // email address of the user who is allowed to access the registry 43 ServerAddress string `mapstructure:"server_address"` // server address of the registry 44 } 45 46 type DockerDriverConfig struct { 47 ImageName string `mapstructure:"image"` // Container's Image Name 48 Command string `mapstructure:"command"` // The Command/Entrypoint to run when the container starts up 49 Args []string `mapstructure:"args"` // The arguments to the Command/Entrypoint 50 IpcMode string `mapstructure:"ipc_mode"` // The IPC mode of the container - host and none 51 NetworkMode string `mapstructure:"network_mode"` // The network mode of the container - host, net and none 52 PidMode string `mapstructure:"pid_mode"` // The PID mode of the container - host and none 53 UTSMode string `mapstructure:"uts_mode"` // The UTS mode of the container - host and none 54 PortMapRaw []map[string]int `mapstructure:"port_map"` // 55 PortMap map[string]int `mapstructure:"-"` // A map of host port labels and the ports exposed on the container 56 Privileged bool `mapstructure:"privileged"` // Flag to run the container in priviledged mode 57 DNSServers []string `mapstructure:"dns_servers"` // DNS Server for containers 58 DNSSearchDomains []string `mapstructure:"dns_search_domains"` // DNS Search domains for containers 59 Hostname string `mapstructure:"hostname"` // Hostname for containers 60 LabelsRaw []map[string]string `mapstructure:"labels"` // 61 Labels map[string]string `mapstructure:"-"` // Labels to set when the container starts up 62 Auth []DockerDriverAuth `mapstructure:"auth"` // Authentication credentials for a private Docker registry 63 } 64 65 func (c *DockerDriverConfig) Validate() error { 66 if c.ImageName == "" { 67 return fmt.Errorf("Docker Driver needs an image name") 68 } 69 70 c.PortMap = mapMergeStrInt(c.PortMapRaw...) 71 c.Labels = mapMergeStrStr(c.LabelsRaw...) 72 73 return nil 74 } 75 76 type dockerPID struct { 77 Version string 78 ImageID string 79 ContainerID string 80 KillTimeout time.Duration 81 PluginConfig *PluginReattachConfig 82 } 83 84 type DockerHandle struct { 85 pluginClient *plugin.Client 86 logCollector logging.LogCollector 87 client *docker.Client 88 logger *log.Logger 89 cleanupContainer bool 90 cleanupImage bool 91 imageID string 92 containerID string 93 version string 94 killTimeout time.Duration 95 waitCh chan *cstructs.WaitResult 96 doneCh chan struct{} 97 } 98 99 func NewDockerDriver(ctx *DriverContext) Driver { 100 return &DockerDriver{DriverContext: *ctx} 101 } 102 103 // dockerClient creates *docker.Client. In test / dev mode we can use ENV vars 104 // to connect to the docker daemon. In production mode we will read 105 // docker.endpoint from the config file. 106 func (d *DockerDriver) dockerClient() (*docker.Client, error) { 107 if client != nil { 108 return client, nil 109 } 110 111 var err error 112 createClient.Do(func() { 113 // Default to using whatever is configured in docker.endpoint. If this is 114 // not specified we'll fall back on NewClientFromEnv which reads config from 115 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 116 // DOCKER_CERT_PATH. This allows us to lock down the config in production 117 // but also accept the standard ENV configs for dev and test. 118 dockerEndpoint := d.config.Read("docker.endpoint") 119 if dockerEndpoint != "" { 120 cert := d.config.Read("docker.tls.cert") 121 key := d.config.Read("docker.tls.key") 122 ca := d.config.Read("docker.tls.ca") 123 124 if cert+key+ca != "" { 125 d.logger.Printf("[DEBUG] driver.docker: using TLS client connection to %s", dockerEndpoint) 126 client, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 127 } else { 128 d.logger.Printf("[DEBUG] driver.docker: using standard client connection to %s", dockerEndpoint) 129 client, err = docker.NewClient(dockerEndpoint) 130 } 131 return 132 } 133 134 d.logger.Println("[DEBUG] driver.docker: using client connection initialized from environment") 135 client, err = docker.NewClientFromEnv() 136 }) 137 return client, err 138 } 139 140 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { 141 // Initialize docker API client 142 client, err := d.dockerClient() 143 if err != nil { 144 d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err) 145 return false, nil 146 } 147 148 privileged := d.config.ReadBoolDefault("docker.privileged.enabled", false) 149 if privileged { 150 d.logger.Println("[DEBUG] driver.docker: privileged containers are enabled") 151 node.Attributes["docker.privileged.enabled"] = "1" 152 } else { 153 d.logger.Println("[DEBUG] driver.docker: privileged containers are disabled") 154 } 155 156 // This is the first operation taken on the client so we'll try to 157 // establish a connection to the Docker daemon. If this fails it means 158 // Docker isn't available so we'll simply disable the docker driver. 159 env, err := client.Version() 160 if err != nil { 161 d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err) 162 return false, nil 163 } 164 node.Attributes["driver.docker"] = "1" 165 node.Attributes["driver.docker.version"] = env.Get("Version") 166 167 return true, nil 168 } 169 170 func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) { 171 shared := alloc.SharedDir 172 local, ok := alloc.TaskDirs[task.Name] 173 if !ok { 174 return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name) 175 } 176 177 return []string{ 178 // "z" and "Z" option is to allocate directory with SELinux label. 179 fmt.Sprintf("%s:/%s:rw,z", shared, allocdir.SharedAllocName), 180 // capital "Z" will label with Multi-Category Security (MCS) labels 181 fmt.Sprintf("%s:/%s:rw,Z", local, allocdir.TaskLocal), 182 }, nil 183 } 184 185 // createContainer initializes a struct needed to call docker.client.CreateContainer() 186 func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, 187 driverConfig *DockerDriverConfig, syslogAddr string) (docker.CreateContainerOptions, error) { 188 var c docker.CreateContainerOptions 189 if task.Resources == nil { 190 // Guard against missing resources. We should never have been able to 191 // schedule a job without specifying this. 192 d.logger.Println("[ERR] driver.docker: task.Resources is empty") 193 return c, fmt.Errorf("task.Resources is empty") 194 } 195 196 binds, err := d.containerBinds(ctx.AllocDir, task) 197 if err != nil { 198 return c, err 199 } 200 201 // Set environment variables. 202 d.taskEnv.SetAllocDir(filepath.Join("/", allocdir.SharedAllocName)) 203 d.taskEnv.SetTaskLocalDir(filepath.Join("/", allocdir.TaskLocal)) 204 205 config := &docker.Config{ 206 Image: driverConfig.ImageName, 207 Hostname: driverConfig.Hostname, 208 } 209 210 hostConfig := &docker.HostConfig{ 211 // Convert MB to bytes. This is an absolute value. 212 // 213 // This value represents the total amount of memory a process can use. 214 // Swap is added to total memory and is managed by the OS, not docker. 215 // Since this may cause other processes to swap and cause system 216 // instability, we will simply not use swap. 217 // 218 // See: https://www.kernel.org/doc/Documentation/cgroups/memory.txt 219 Memory: int64(task.Resources.MemoryMB) * 1024 * 1024, 220 MemorySwap: -1, 221 // Convert Mhz to shares. This is a relative value. 222 // 223 // There are two types of CPU limiters available: Shares and Quotas. A 224 // Share allows a particular process to have a proportion of CPU time 225 // relative to other processes; 1024 by default. A CPU Quota is enforced 226 // over a Period of time and is a HARD limit on the amount of CPU time a 227 // process can use. Processes with quotas cannot burst, while processes 228 // with shares can, so we'll use shares. 229 // 230 // The simplest scale is 1 share to 1 MHz so 1024 = 1GHz. This means any 231 // given process will have at least that amount of resources, but likely 232 // more since it is (probably) rare that the machine will run at 100% 233 // CPU. This scale will cease to work if a node is overprovisioned. 234 // 235 // See: 236 // - https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt 237 // - https://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt 238 CPUShares: int64(task.Resources.CPU), 239 240 // Binds are used to mount a host volume into the container. We mount a 241 // local directory for storage and a shared alloc directory that can be 242 // used to share data between different tasks in the same task group. 243 Binds: binds, 244 LogConfig: docker.LogConfig{ 245 Type: "syslog", 246 Config: map[string]string{ 247 "syslog-address": fmt.Sprintf("tcp://%v", syslogAddr), 248 }, 249 }, 250 } 251 252 d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Config["image"]) 253 d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Config["image"]) 254 d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Config["image"]) 255 256 // set privileged mode 257 hostPrivileged := d.config.ReadBoolDefault("docker.privileged.enabled", false) 258 if driverConfig.Privileged && !hostPrivileged { 259 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 260 } 261 hostConfig.Privileged = hostPrivileged 262 263 // set DNS servers 264 for _, ip := range driverConfig.DNSServers { 265 if net.ParseIP(ip) != nil { 266 hostConfig.DNS = append(hostConfig.DNS, ip) 267 } else { 268 d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip) 269 } 270 } 271 272 // set DNS search domains 273 for _, domain := range driverConfig.DNSSearchDomains { 274 hostConfig.DNSSearch = append(hostConfig.DNSSearch, domain) 275 } 276 277 if driverConfig.IpcMode != "" { 278 if !hostPrivileged { 279 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting ipc mode not allowed`) 280 } 281 d.logger.Printf("[DEBUG] driver.docker: setting ipc mode to %s", driverConfig.IpcMode) 282 } 283 hostConfig.IpcMode = driverConfig.IpcMode 284 285 if driverConfig.PidMode != "" { 286 if !hostPrivileged { 287 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting pid mode not allowed`) 288 } 289 d.logger.Printf("[DEBUG] driver.docker: setting pid mode to %s", driverConfig.PidMode) 290 } 291 hostConfig.PidMode = driverConfig.PidMode 292 293 if driverConfig.UTSMode != "" { 294 if !hostPrivileged { 295 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent, setting UTS mode not allowed`) 296 } 297 d.logger.Printf("[DEBUG] driver.docker: setting UTS mode to %s", driverConfig.UTSMode) 298 } 299 hostConfig.UTSMode = driverConfig.UTSMode 300 301 hostConfig.NetworkMode = driverConfig.NetworkMode 302 if hostConfig.NetworkMode == "" { 303 // docker default 304 d.logger.Println("[DEBUG] driver.docker: networking mode not specified; defaulting to bridge") 305 hostConfig.NetworkMode = "bridge" 306 } 307 308 // Setup port mapping and exposed ports 309 if len(task.Resources.Networks) == 0 { 310 d.logger.Println("[DEBUG] driver.docker: No network interfaces are available") 311 if len(driverConfig.PortMap) > 0 { 312 return c, fmt.Errorf("Trying to map ports but no network interface is available") 313 } 314 } else { 315 // TODO add support for more than one network 316 network := task.Resources.Networks[0] 317 publishedPorts := map[docker.Port][]docker.PortBinding{} 318 exposedPorts := map[docker.Port]struct{}{} 319 320 for _, port := range network.ReservedPorts { 321 // By default we will map the allocated port 1:1 to the container 322 containerPortInt := port.Value 323 324 // If the user has mapped a port using port_map we'll change it here 325 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 326 containerPortInt = mapped 327 } 328 329 hostPortStr := strconv.Itoa(port.Value) 330 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 331 332 publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} 333 publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} 334 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value) 335 336 exposedPorts[containerPort+"/tcp"] = struct{}{} 337 exposedPorts[containerPort+"/udp"] = struct{}{} 338 d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value) 339 } 340 341 for _, port := range network.DynamicPorts { 342 // By default we will map the allocated port 1:1 to the container 343 containerPortInt := port.Value 344 345 // If the user has mapped a port using port_map we'll change it here 346 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 347 containerPortInt = mapped 348 } 349 350 hostPortStr := strconv.Itoa(port.Value) 351 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 352 353 publishedPorts[containerPort+"/tcp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} 354 publishedPorts[containerPort+"/udp"] = []docker.PortBinding{docker.PortBinding{HostIP: network.IP, HostPort: hostPortStr}} 355 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt) 356 357 exposedPorts[containerPort+"/tcp"] = struct{}{} 358 exposedPorts[containerPort+"/udp"] = struct{}{} 359 d.logger.Printf("[DEBUG] driver.docker: exposed port %s", containerPort) 360 } 361 362 d.taskEnv.SetPortMap(driverConfig.PortMap) 363 364 hostConfig.PortBindings = publishedPorts 365 config.ExposedPorts = exposedPorts 366 } 367 368 d.taskEnv.Build() 369 parsedArgs := d.taskEnv.ParseAndReplace(driverConfig.Args) 370 371 // If the user specified a custom command to run as their entrypoint, we'll 372 // inject it here. 373 if driverConfig.Command != "" { 374 // Validate command 375 if err := validateCommand(driverConfig.Command, "args"); err != nil { 376 return c, err 377 } 378 379 cmd := []string{driverConfig.Command} 380 if len(driverConfig.Args) != 0 { 381 cmd = append(cmd, parsedArgs...) 382 } 383 d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " ")) 384 config.Cmd = cmd 385 } else if len(driverConfig.Args) != 0 { 386 d.logger.Println("[DEBUG] driver.docker: ignoring command arguments because command is not specified") 387 } 388 389 if len(driverConfig.Labels) > 0 { 390 config.Labels = driverConfig.Labels 391 d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels) 392 } 393 394 config.Env = d.taskEnv.EnvList() 395 396 containerName := fmt.Sprintf("%s-%s", task.Name, ctx.AllocID) 397 d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName) 398 399 return docker.CreateContainerOptions{ 400 Name: containerName, 401 Config: config, 402 HostConfig: hostConfig, 403 }, nil 404 } 405 406 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { 407 var driverConfig DockerDriverConfig 408 if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { 409 return nil, err 410 } 411 image := driverConfig.ImageName 412 413 if err := driverConfig.Validate(); err != nil { 414 return nil, err 415 } 416 if task.Resources == nil { 417 return nil, fmt.Errorf("Resources are not specified") 418 } 419 if task.Resources.MemoryMB == 0 { 420 return nil, fmt.Errorf("Memory limit cannot be zero") 421 } 422 if task.Resources.CPU == 0 { 423 return nil, fmt.Errorf("CPU limit cannot be zero") 424 } 425 426 cleanupContainer := d.config.ReadBoolDefault("docker.cleanup.container", true) 427 cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true) 428 429 // Initialize docker API client 430 client, err := d.dockerClient() 431 if err != nil { 432 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 433 } 434 435 repo, tag := docker.ParseRepositoryTag(image) 436 // Make sure tag is always explicitly set. We'll default to "latest" if it 437 // isn't, which is the expected behavior. 438 if tag == "" { 439 tag = "latest" 440 } 441 442 var dockerImage *docker.Image 443 // We're going to check whether the image is already downloaded. If the tag 444 // is "latest" we have to check for a new version every time so we don't 445 // bother to check and cache the id here. We'll download first, then cache. 446 if tag != "latest" { 447 dockerImage, err = client.InspectImage(image) 448 } 449 450 // Download the image 451 if dockerImage == nil { 452 pullOptions := docker.PullImageOptions{ 453 Repository: repo, 454 Tag: tag, 455 } 456 457 authOptions := docker.AuthConfiguration{} 458 if len(driverConfig.Auth) != 0 { 459 authOptions = docker.AuthConfiguration{ 460 Username: driverConfig.Auth[0].Username, 461 Password: driverConfig.Auth[0].Password, 462 Email: driverConfig.Auth[0].Email, 463 ServerAddress: driverConfig.Auth[0].ServerAddress, 464 } 465 } 466 467 if authConfigFile := d.config.Read("docker.auth.config"); authConfigFile != "" { 468 if f, err := os.Open(authConfigFile); err == nil { 469 defer f.Close() 470 var authConfigurations *docker.AuthConfigurations 471 if authConfigurations, err = docker.NewAuthConfigurations(f); err != nil { 472 return nil, fmt.Errorf("Failed to create docker auth object: %v", err) 473 } 474 if authConfiguration, ok := authConfigurations.Configs[repo]; ok { 475 authOptions = authConfiguration 476 } 477 } else { 478 return nil, fmt.Errorf("Failed to open auth config file: %v, error: %v", authConfigFile, err) 479 } 480 } 481 482 err = client.PullImage(pullOptions, authOptions) 483 if err != nil { 484 d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s", repo, tag, err) 485 return nil, fmt.Errorf("Failed to pull `%s`: %s", image, err) 486 } 487 d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag) 488 489 // Now that we have the image we can get the image id 490 dockerImage, err = client.InspectImage(image) 491 if err != nil { 492 d.logger.Printf("[ERR] driver.docker: failed getting image id for %s: %s", image, err) 493 return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err) 494 } 495 } 496 497 taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] 498 if !ok { 499 return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) 500 } 501 502 d.logger.Printf("[DEBUG] driver.docker: identified image %s as %s", image, dockerImage.ID) 503 504 bin, err := discover.NomadExecutable() 505 if err != nil { 506 return nil, fmt.Errorf("unable to find the nomad binary: %v", err) 507 } 508 pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-syslog-collector.out", task.Name)) 509 pluginConfig := &plugin.ClientConfig{ 510 Cmd: exec.Command(bin, "syslog", pluginLogFile), 511 } 512 513 logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config) 514 if err != nil { 515 return nil, err 516 } 517 logCollectorCtx := &logging.LogCollectorContext{ 518 TaskName: task.Name, 519 AllocDir: ctx.AllocDir, 520 LogConfig: task.LogConfig, 521 PortLowerBound: d.config.ClientMinPort, 522 PortUpperBound: d.config.ClientMaxPort, 523 } 524 ss, err := logCollector.LaunchCollector(logCollectorCtx) 525 if err != nil { 526 return nil, fmt.Errorf("failed to start syslog collector: %v", err) 527 } 528 529 config, err := d.createContainer(ctx, task, &driverConfig, ss.Addr) 530 if err != nil { 531 d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %s: %s", image, err) 532 pluginClient.Kill() 533 return nil, fmt.Errorf("Failed to create container configuration for image %s: %s", image, err) 534 } 535 // Create a container 536 container, err := client.CreateContainer(config) 537 if err != nil { 538 // If the container already exists because of a previous failure we'll 539 // try to purge it and re-create it. 540 if strings.Contains(err.Error(), "container already exists") { 541 // Get the ID of the existing container so we can delete it 542 containers, err := client.ListContainers(docker.ListContainersOptions{ 543 // The image might be in use by a stopped container, so check everything 544 All: true, 545 Filters: map[string][]string{ 546 "name": []string{config.Name}, 547 }, 548 }) 549 if err != nil { 550 d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) 551 pluginClient.Kill() 552 return nil, fmt.Errorf("Failed to query list of containers: %s", err) 553 } 554 555 // Couldn't find any matching containers 556 if len(containers) == 0 { 557 d.logger.Printf("[ERR] driver.docker: failed to get id for container %s: %#v", config.Name, containers) 558 pluginClient.Kill() 559 return nil, fmt.Errorf("Failed to get id for container %s", config.Name) 560 } 561 562 // Delete matching containers 563 d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) 564 for _, container := range containers { 565 err = client.RemoveContainer(docker.RemoveContainerOptions{ 566 ID: container.ID, 567 }) 568 if err != nil { 569 d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID) 570 pluginClient.Kill() 571 return nil, fmt.Errorf("Failed to purge container %s: %s", container.ID, err) 572 } 573 d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID) 574 } 575 576 container, err = client.CreateContainer(config) 577 if err != nil { 578 d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) 579 pluginClient.Kill() 580 return nil, fmt.Errorf("Failed to re-create container %s; aborting", config.Name) 581 } 582 } else { 583 // We failed to create the container for some other reason. 584 d.logger.Printf("[ERR] driver.docker: failed to create container from image %s: %s", image, err) 585 pluginClient.Kill() 586 return nil, fmt.Errorf("Failed to create container from image %s: %s", image, err) 587 } 588 } 589 d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) 590 591 // Start the container 592 err = client.StartContainer(container.ID, container.HostConfig) 593 if err != nil { 594 d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err) 595 pluginClient.Kill() 596 return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err) 597 } 598 d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) 599 600 // Return a driver handle 601 h := &DockerHandle{ 602 client: client, 603 logCollector: logCollector, 604 pluginClient: pluginClient, 605 cleanupContainer: cleanupContainer, 606 cleanupImage: cleanupImage, 607 logger: d.logger, 608 imageID: dockerImage.ID, 609 containerID: container.ID, 610 version: d.config.Version, 611 killTimeout: d.DriverContext.KillTimeout(task), 612 doneCh: make(chan struct{}), 613 waitCh: make(chan *cstructs.WaitResult, 1), 614 } 615 go h.run() 616 return h, nil 617 } 618 619 func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 620 cleanupContainer := d.config.ReadBoolDefault("docker.cleanup.container", true) 621 cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true) 622 623 // Split the handle 624 pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:")) 625 pid := &dockerPID{} 626 if err := json.Unmarshal(pidBytes, pid); err != nil { 627 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 628 } 629 d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", pid.ContainerID) 630 d.logger.Printf("[DEBUG] driver.docker: re-attached to handle: %s", handleID) 631 pluginConfig := &plugin.ClientConfig{ 632 Reattach: pid.PluginConfig.PluginConfig(), 633 } 634 635 client, err := d.dockerClient() 636 if err != nil { 637 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 638 } 639 640 // Look for a running container with this ID 641 containers, err := client.ListContainers(docker.ListContainersOptions{ 642 Filters: map[string][]string{ 643 "id": []string{pid.ContainerID}, 644 }, 645 }) 646 if err != nil { 647 return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err) 648 } 649 650 found := false 651 for _, container := range containers { 652 if container.ID == pid.ContainerID { 653 found = true 654 } 655 } 656 if !found { 657 return nil, fmt.Errorf("Failed to find container %s: %v", pid.ContainerID, err) 658 } 659 logCollector, pluginClient, err := createLogCollector(pluginConfig, d.config.LogOutput, d.config) 660 if err != nil { 661 d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err) 662 if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout*time.Second)); e != nil { 663 d.logger.Printf("[DEBUG] driver.docker: couldn't stop container: %v", e) 664 } 665 return nil, err 666 } 667 668 // Return a driver handle 669 h := &DockerHandle{ 670 client: client, 671 logCollector: logCollector, 672 pluginClient: pluginClient, 673 cleanupContainer: cleanupContainer, 674 cleanupImage: cleanupImage, 675 logger: d.logger, 676 imageID: pid.ImageID, 677 containerID: pid.ContainerID, 678 version: pid.Version, 679 killTimeout: pid.KillTimeout, 680 doneCh: make(chan struct{}), 681 waitCh: make(chan *cstructs.WaitResult, 1), 682 } 683 go h.run() 684 return h, nil 685 } 686 687 func (h *DockerHandle) ID() string { 688 // Return a handle to the PID 689 pid := dockerPID{ 690 Version: h.version, 691 ImageID: h.imageID, 692 ContainerID: h.containerID, 693 KillTimeout: h.killTimeout, 694 PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()), 695 } 696 data, err := json.Marshal(pid) 697 if err != nil { 698 h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) 699 } 700 return fmt.Sprintf("DOCKER:%s", string(data)) 701 } 702 703 func (h *DockerHandle) ContainerID() string { 704 return h.containerID 705 } 706 707 func (h *DockerHandle) WaitCh() chan *cstructs.WaitResult { 708 return h.waitCh 709 } 710 711 func (h *DockerHandle) Update(task *structs.Task) error { 712 // Store the updated kill timeout. 713 h.killTimeout = task.KillTimeout 714 if err := h.logCollector.UpdateLogConfig(task.LogConfig); err != nil { 715 h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err) 716 } 717 718 // Update is not possible 719 return nil 720 } 721 722 // Kill is used to terminate the task. This uses `docker stop -t killTimeout` 723 func (h *DockerHandle) Kill() error { 724 // Stop the container 725 err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds())) 726 if err != nil { 727 h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err) 728 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 729 } 730 h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) 731 732 // Cleanup container 733 if h.cleanupContainer { 734 err = h.client.RemoveContainer(docker.RemoveContainerOptions{ 735 ID: h.containerID, 736 RemoveVolumes: true, 737 }) 738 if err != nil { 739 h.logger.Printf("[ERR] driver.docker: failed to remove container %s", h.containerID) 740 return fmt.Errorf("Failed to remove container %s: %s", h.containerID, err) 741 } 742 h.logger.Printf("[INFO] driver.docker: removed container %s", h.containerID) 743 } 744 745 // Cleanup image. This operation may fail if the image is in use by another 746 // job. That is OK. Will we log a message but continue. 747 if h.cleanupImage { 748 err = h.client.RemoveImage(h.imageID) 749 if err != nil { 750 containers, err := h.client.ListContainers(docker.ListContainersOptions{ 751 // The image might be in use by a stopped container, so check everything 752 All: true, 753 Filters: map[string][]string{ 754 "image": []string{h.imageID}, 755 }, 756 }) 757 if err != nil { 758 h.logger.Printf("[ERR] driver.docker: failed to query list of containers matching image:%s", h.imageID) 759 return fmt.Errorf("Failed to query list of containers: %s", err) 760 } 761 inUse := len(containers) 762 if inUse > 0 { 763 h.logger.Printf("[INFO] driver.docker: image %s is still in use by %d container(s)", h.imageID, inUse) 764 } else { 765 return fmt.Errorf("Failed to remove image %s", h.imageID) 766 } 767 } else { 768 h.logger.Printf("[INFO] driver.docker: removed image %s", h.imageID) 769 } 770 } 771 return nil 772 } 773 774 func (h *DockerHandle) run() { 775 // Wait for it... 776 exitCode, err := h.client.WaitContainer(h.containerID) 777 if err != nil { 778 h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID) 779 } 780 781 if exitCode != 0 { 782 err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 783 } 784 785 close(h.doneCh) 786 h.waitCh <- cstructs.NewWaitResult(exitCode, 0, err) 787 close(h.waitCh) 788 789 // Shutdown the syslog collector 790 if err := h.logCollector.Exit(); err != nil { 791 h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err) 792 } 793 h.pluginClient.Kill() 794 }