github.com/mattyr/nomad@v0.3.3-0.20160919021406-3485a065154a/client/driver/docker.go (about) 1 package driver 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "log" 7 "net" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "regexp" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "time" 17 18 docker "github.com/fsouza/go-dockerclient" 19 20 "github.com/hashicorp/go-multierror" 21 "github.com/hashicorp/go-plugin" 22 "github.com/hashicorp/nomad/client/allocdir" 23 "github.com/hashicorp/nomad/client/config" 24 "github.com/hashicorp/nomad/client/driver/executor" 25 dstructs "github.com/hashicorp/nomad/client/driver/structs" 26 cstructs "github.com/hashicorp/nomad/client/structs" 27 "github.com/hashicorp/nomad/helper/discover" 28 "github.com/hashicorp/nomad/helper/fields" 29 shelpers "github.com/hashicorp/nomad/helper/stats" 30 "github.com/hashicorp/nomad/nomad/structs" 31 "github.com/mitchellh/mapstructure" 32 ) 33 34 var ( 35 // We store the clients globally to cache the connection to the docker daemon. 36 createClients sync.Once 37 38 // client is a docker client with a timeout of 1 minute. This is for doing 39 // all operations with the docker daemon besides which are not long running 40 // such as creating, killing containers, etc. 41 client *docker.Client 42 43 // waitClient is a docker client with no timeouts. This is used for long 44 // running operations such as waiting on containers and collect stats 45 waitClient *docker.Client 46 47 // The statistics the Docker driver exposes 48 DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage"} 49 DockerMeasuredCpuStats = []string{"Throttled Periods", "Throttled Time", "Percent"} 50 ) 51 52 const ( 53 // NoSuchContainerError is returned by the docker daemon if the container 54 // does not exist. 55 NoSuchContainerError = "No such container" 56 57 // The key populated in Node Attributes to indicate presence of the Docker 58 // driver 59 dockerDriverAttr = "driver.docker" 60 61 // dockerTimeout is the length of time a request can be outstanding before 62 // it is timed out. 63 dockerTimeout = 1 * time.Minute 64 ) 65 66 type DockerDriver struct { 67 DriverContext 68 } 69 70 type DockerDriverAuth struct { 71 Username string `mapstructure:"username"` // username for the registry 72 Password string `mapstructure:"password"` // password to access the registry 73 Email string `mapstructure:"email"` // email address of the user who is allowed to access the registry 74 ServerAddress string `mapstructure:"server_address"` // server address of the registry 75 } 76 77 type DockerDriverConfig struct { 78 ImageName string `mapstructure:"image"` // Container's Image Name 79 LoadImages []string `mapstructure:"load"` // LoadImage is array of paths to image archive files 80 Command string `mapstructure:"command"` // The Command/Entrypoint to run when the container starts up 81 Args []string `mapstructure:"args"` // The arguments to the Command/Entrypoint 82 IpcMode string `mapstructure:"ipc_mode"` // The IPC mode of the container - host and none 83 NetworkMode string `mapstructure:"network_mode"` // The network mode of the container - host, nat and none 84 PidMode string `mapstructure:"pid_mode"` // The PID mode of the container - host and none 85 UTSMode string `mapstructure:"uts_mode"` // The UTS mode of the container - host and none 86 PortMapRaw []map[string]int `mapstructure:"port_map"` // 87 PortMap map[string]int `mapstructure:"-"` // A map of host port labels and the ports exposed on the container 88 Privileged bool `mapstructure:"privileged"` // Flag to run the container in privileged mode 89 DNSServers []string `mapstructure:"dns_servers"` // DNS Server for containers 90 DNSSearchDomains []string `mapstructure:"dns_search_domains"` // DNS Search domains for containers 91 Hostname string `mapstructure:"hostname"` // Hostname for containers 92 LabelsRaw []map[string]string `mapstructure:"labels"` // 93 Labels map[string]string `mapstructure:"-"` // Labels to set when the container starts up 94 Auth []DockerDriverAuth `mapstructure:"auth"` // Authentication credentials for a private Docker registry 95 SSL bool `mapstructure:"ssl"` // Flag indicating repository is served via https 96 TTY bool `mapstructure:"tty"` // Allocate a Pseudo-TTY 97 Interactive bool `mapstructure:"interactive"` // Keep STDIN open even if not attached 98 AttachStdin bool `mapstructure:"attach_stdin"` // Attach to STDIN 99 AttachStdout bool `mapstructure:"attach_stdout"` // Attach to STDOUT 100 AttachStderr bool `mapstructure:"attach_stderr"` // Attach to STDERR 101 ShmSize int64 `mapstructure:"shm_size"` // Size of /dev/shm of the container in bytes 102 WorkDir string `mapstructure:"work_dir"` // Working directory inside the container 103 } 104 105 // Validate validates a docker driver config 106 func (c *DockerDriverConfig) Validate() error { 107 if c.ImageName == "" { 108 return fmt.Errorf("Docker Driver needs an image name") 109 } 110 111 c.PortMap = mapMergeStrInt(c.PortMapRaw...) 112 c.Labels = mapMergeStrStr(c.LabelsRaw...) 113 114 return nil 115 } 116 117 // NewDockerDriverConfig returns a docker driver config by parsing the HCL 118 // config 119 func NewDockerDriverConfig(task *structs.Task) (*DockerDriverConfig, error) { 120 var driverConfig DockerDriverConfig 121 driverConfig.SSL = true 122 if err := mapstructure.WeakDecode(task.Config, &driverConfig); err != nil { 123 return nil, err 124 } 125 if strings.Contains(driverConfig.ImageName, "https://") { 126 driverConfig.ImageName = strings.Replace(driverConfig.ImageName, "https://", "", 1) 127 } 128 129 if err := driverConfig.Validate(); err != nil { 130 return nil, err 131 } 132 return &driverConfig, nil 133 } 134 135 type dockerPID struct { 136 Version string 137 ImageID string 138 ContainerID string 139 KillTimeout time.Duration 140 MaxKillTimeout time.Duration 141 PluginConfig *PluginReattachConfig 142 } 143 144 type DockerHandle struct { 145 pluginClient *plugin.Client 146 executor executor.Executor 147 client *docker.Client 148 waitClient *docker.Client 149 logger *log.Logger 150 cleanupImage bool 151 imageID string 152 containerID string 153 version string 154 clkSpeed float64 155 killTimeout time.Duration 156 maxKillTimeout time.Duration 157 resourceUsageLock sync.RWMutex 158 resourceUsage *cstructs.TaskResourceUsage 159 waitCh chan *dstructs.WaitResult 160 doneCh chan bool 161 } 162 163 func NewDockerDriver(ctx *DriverContext) Driver { 164 return &DockerDriver{DriverContext: *ctx} 165 } 166 167 // Validate is used to validate the driver configuration 168 func (d *DockerDriver) Validate(config map[string]interface{}) error { 169 fd := &fields.FieldData{ 170 Raw: config, 171 Schema: map[string]*fields.FieldSchema{ 172 "image": &fields.FieldSchema{ 173 Type: fields.TypeString, 174 Required: true, 175 }, 176 "load": &fields.FieldSchema{ 177 Type: fields.TypeArray, 178 }, 179 "command": &fields.FieldSchema{ 180 Type: fields.TypeString, 181 }, 182 "args": &fields.FieldSchema{ 183 Type: fields.TypeArray, 184 }, 185 "ipc_mode": &fields.FieldSchema{ 186 Type: fields.TypeString, 187 }, 188 "network_mode": &fields.FieldSchema{ 189 Type: fields.TypeString, 190 }, 191 "pid_mode": &fields.FieldSchema{ 192 Type: fields.TypeString, 193 }, 194 "uts_mode": &fields.FieldSchema{ 195 Type: fields.TypeString, 196 }, 197 "port_map": &fields.FieldSchema{ 198 Type: fields.TypeArray, 199 }, 200 "privileged": &fields.FieldSchema{ 201 Type: fields.TypeBool, 202 }, 203 "dns_servers": &fields.FieldSchema{ 204 Type: fields.TypeArray, 205 }, 206 "dns_search_domains": &fields.FieldSchema{ 207 Type: fields.TypeArray, 208 }, 209 "hostname": &fields.FieldSchema{ 210 Type: fields.TypeString, 211 }, 212 "labels": &fields.FieldSchema{ 213 Type: fields.TypeArray, 214 }, 215 "auth": &fields.FieldSchema{ 216 Type: fields.TypeArray, 217 }, 218 "ssl": &fields.FieldSchema{ 219 Type: fields.TypeBool, 220 }, 221 "tty": &fields.FieldSchema{ 222 Type: fields.TypeBool, 223 }, 224 "interactive": &fields.FieldSchema{ 225 Type: fields.TypeBool, 226 }, 227 "attach_stdin": &fields.FieldSchema{ 228 Type: fields.TypeBool, 229 }, 230 "attach_stdout": &fields.FieldSchema{ 231 Type: fields.TypeBool, 232 }, 233 "attach_stderr": &fields.FieldSchema{ 234 Type: fields.TypeBool, 235 }, 236 "shm_size": &fields.FieldSchema{ 237 Type: fields.TypeInt, 238 }, 239 "work_dir": &fields.FieldSchema{ 240 Type: fields.TypeString, 241 }, 242 }, 243 } 244 245 if err := fd.Validate(); err != nil { 246 return err 247 } 248 249 return nil 250 } 251 252 // dockerClients creates two *docker.Client, one for long running operations and 253 // the other for shorter operations. In test / dev mode we can use ENV vars to 254 // connect to the docker daemon. In production mode we will read docker.endpoint 255 // from the config file. 256 func (d *DockerDriver) dockerClients() (*docker.Client, *docker.Client, error) { 257 if client != nil && waitClient != nil { 258 return client, waitClient, nil 259 } 260 261 var err error 262 var merr multierror.Error 263 createClients.Do(func() { 264 if err = shelpers.Init(); err != nil { 265 d.logger.Printf("[FATAL] driver.docker: unable to initialize stats: %v", err) 266 return 267 } 268 269 // Default to using whatever is configured in docker.endpoint. If this is 270 // not specified we'll fall back on NewClientFromEnv which reads config from 271 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 272 // DOCKER_CERT_PATH. This allows us to lock down the config in production 273 // but also accept the standard ENV configs for dev and test. 274 dockerEndpoint := d.config.Read("docker.endpoint") 275 if dockerEndpoint != "" { 276 cert := d.config.Read("docker.tls.cert") 277 key := d.config.Read("docker.tls.key") 278 ca := d.config.Read("docker.tls.ca") 279 280 if cert+key+ca != "" { 281 d.logger.Printf("[DEBUG] driver.docker: using TLS client connection to %s", dockerEndpoint) 282 client, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 283 if err != nil { 284 merr.Errors = append(merr.Errors, err) 285 } 286 waitClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 287 if err != nil { 288 merr.Errors = append(merr.Errors, err) 289 } 290 } else { 291 d.logger.Printf("[DEBUG] driver.docker: using standard client connection to %s", dockerEndpoint) 292 client, err = docker.NewClient(dockerEndpoint) 293 if err != nil { 294 merr.Errors = append(merr.Errors, err) 295 } 296 waitClient, err = docker.NewClient(dockerEndpoint) 297 if err != nil { 298 merr.Errors = append(merr.Errors, err) 299 } 300 } 301 client.SetTimeout(dockerTimeout) 302 return 303 } 304 305 d.logger.Println("[DEBUG] driver.docker: using client connection initialized from environment") 306 client, err = docker.NewClientFromEnv() 307 if err != nil { 308 merr.Errors = append(merr.Errors, err) 309 } 310 client.SetTimeout(dockerTimeout) 311 312 waitClient, err = docker.NewClientFromEnv() 313 if err != nil { 314 merr.Errors = append(merr.Errors, err) 315 } 316 }) 317 return client, waitClient, merr.ErrorOrNil() 318 } 319 320 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { 321 // Get the current status so that we can log any debug messages only if the 322 // state changes 323 _, currentlyEnabled := node.Attributes[dockerDriverAttr] 324 325 // Initialize docker API clients 326 client, _, err := d.dockerClients() 327 if err != nil { 328 delete(node.Attributes, dockerDriverAttr) 329 if currentlyEnabled { 330 d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err) 331 } 332 return false, nil 333 } 334 335 privileged := d.config.ReadBoolDefault("docker.privileged.enabled", false) 336 if privileged { 337 node.Attributes["docker.privileged.enabled"] = "1" 338 } 339 340 // This is the first operation taken on the client so we'll try to 341 // establish a connection to the Docker daemon. If this fails it means 342 // Docker isn't available so we'll simply disable the docker driver. 343 env, err := client.Version() 344 if err != nil { 345 if currentlyEnabled { 346 d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err) 347 } 348 delete(node.Attributes, dockerDriverAttr) 349 return false, nil 350 } 351 352 node.Attributes[dockerDriverAttr] = "1" 353 node.Attributes["driver.docker.version"] = env.Get("Version") 354 return true, nil 355 } 356 357 func (d *DockerDriver) containerBinds(alloc *allocdir.AllocDir, task *structs.Task) ([]string, error) { 358 shared := alloc.SharedDir 359 local, ok := alloc.TaskDirs[task.Name] 360 if !ok { 361 return nil, fmt.Errorf("Failed to find task local directory: %v", task.Name) 362 } 363 364 allocDirBind := fmt.Sprintf("%s:%s", shared, allocdir.SharedAllocContainerPath) 365 taskLocalBind := fmt.Sprintf("%s:%s", local, allocdir.TaskLocalContainerPath) 366 367 if selinuxLabel := d.config.Read("docker.volumes.selinuxlabel"); selinuxLabel != "" { 368 allocDirBind = fmt.Sprintf("%s:%s", allocDirBind, selinuxLabel) 369 taskLocalBind = fmt.Sprintf("%s:%s", taskLocalBind, selinuxLabel) 370 } 371 return []string{ 372 allocDirBind, 373 taskLocalBind, 374 }, nil 375 } 376 377 // createContainer initializes a struct needed to call docker.client.CreateContainer() 378 func (d *DockerDriver) createContainer(ctx *ExecContext, task *structs.Task, 379 driverConfig *DockerDriverConfig, syslogAddr string) (docker.CreateContainerOptions, error) { 380 var c docker.CreateContainerOptions 381 if task.Resources == nil { 382 // Guard against missing resources. We should never have been able to 383 // schedule a job without specifying this. 384 d.logger.Println("[ERR] driver.docker: task.Resources is empty") 385 return c, fmt.Errorf("task.Resources is empty") 386 } 387 388 binds, err := d.containerBinds(ctx.AllocDir, task) 389 if err != nil { 390 return c, err 391 } 392 393 // Set environment variables. 394 d.taskEnv.SetAllocDir(allocdir.SharedAllocContainerPath) 395 d.taskEnv.SetTaskLocalDir(allocdir.TaskLocalContainerPath) 396 d.taskEnv.SetTaskLocalDir(allocdir.TaskSecretsContainerPath) 397 398 config := &docker.Config{ 399 Image: driverConfig.ImageName, 400 Hostname: driverConfig.Hostname, 401 User: task.User, 402 Tty: driverConfig.TTY, 403 OpenStdin: driverConfig.Interactive, 404 AttachStdin: driverConfig.AttachStdin, 405 AttachStdout: driverConfig.AttachStdout, 406 AttachStderr: driverConfig.AttachStderr, 407 } 408 409 // When allocating stdin in attached mode, close stdin at client disconnect 410 if config.OpenStdin && config.AttachStdin { 411 config.StdinOnce = true 412 } 413 414 if driverConfig.WorkDir != "" { 415 config.WorkingDir = driverConfig.WorkDir 416 } 417 418 memLimit := int64(task.Resources.MemoryMB) * 1024 * 1024 419 hostConfig := &docker.HostConfig{ 420 // Convert MB to bytes. This is an absolute value. 421 Memory: memLimit, 422 MemorySwap: memLimit, // MemorySwap is memory + swap. 423 // Convert Mhz to shares. This is a relative value. 424 CPUShares: int64(task.Resources.CPU), 425 426 // Binds are used to mount a host volume into the container. We mount a 427 // local directory for storage and a shared alloc directory that can be 428 // used to share data between different tasks in the same task group. 429 Binds: binds, 430 LogConfig: docker.LogConfig{ 431 Type: "syslog", 432 Config: map[string]string{ 433 "syslog-address": syslogAddr, 434 }, 435 }, 436 } 437 438 d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Name) 439 d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Name) 440 d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Name) 441 442 // set privileged mode 443 hostPrivileged := d.config.ReadBoolDefault("docker.privileged.enabled", false) 444 if driverConfig.Privileged && !hostPrivileged { 445 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 446 } 447 hostConfig.Privileged = driverConfig.Privileged 448 449 // set SHM size 450 if driverConfig.ShmSize != 0 { 451 hostConfig.ShmSize = driverConfig.ShmSize 452 } 453 454 // set DNS servers 455 for _, ip := range driverConfig.DNSServers { 456 if net.ParseIP(ip) != nil { 457 hostConfig.DNS = append(hostConfig.DNS, ip) 458 } else { 459 d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip) 460 } 461 } 462 463 // set DNS search domains 464 for _, domain := range driverConfig.DNSSearchDomains { 465 hostConfig.DNSSearch = append(hostConfig.DNSSearch, domain) 466 } 467 468 hostConfig.IpcMode = driverConfig.IpcMode 469 hostConfig.PidMode = driverConfig.PidMode 470 hostConfig.UTSMode = driverConfig.UTSMode 471 472 hostConfig.NetworkMode = driverConfig.NetworkMode 473 if hostConfig.NetworkMode == "" { 474 // docker default 475 d.logger.Printf("[DEBUG] driver.docker: networking mode not specified; defaulting to %s", defaultNetworkMode) 476 hostConfig.NetworkMode = defaultNetworkMode 477 } 478 479 // Setup port mapping and exposed ports 480 if len(task.Resources.Networks) == 0 { 481 d.logger.Println("[DEBUG] driver.docker: No network interfaces are available") 482 if len(driverConfig.PortMap) > 0 { 483 return c, fmt.Errorf("Trying to map ports but no network interface is available") 484 } 485 } else { 486 // TODO add support for more than one network 487 network := task.Resources.Networks[0] 488 publishedPorts := map[docker.Port][]docker.PortBinding{} 489 exposedPorts := map[docker.Port]struct{}{} 490 491 for _, port := range network.ReservedPorts { 492 // By default we will map the allocated port 1:1 to the container 493 containerPortInt := port.Value 494 495 // If the user has mapped a port using port_map we'll change it here 496 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 497 containerPortInt = mapped 498 } 499 500 hostPortStr := strconv.Itoa(port.Value) 501 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 502 503 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 504 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 505 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value) 506 507 exposedPorts[containerPort+"/tcp"] = struct{}{} 508 exposedPorts[containerPort+"/udp"] = struct{}{} 509 d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value) 510 } 511 512 for _, port := range network.DynamicPorts { 513 // By default we will map the allocated port 1:1 to the container 514 containerPortInt := port.Value 515 516 // If the user has mapped a port using port_map we'll change it here 517 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 518 containerPortInt = mapped 519 } 520 521 hostPortStr := strconv.Itoa(port.Value) 522 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 523 524 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 525 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 526 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt) 527 528 exposedPorts[containerPort+"/tcp"] = struct{}{} 529 exposedPorts[containerPort+"/udp"] = struct{}{} 530 d.logger.Printf("[DEBUG] driver.docker: exposed port %s", containerPort) 531 } 532 533 d.taskEnv.SetPortMap(driverConfig.PortMap) 534 535 hostConfig.PortBindings = publishedPorts 536 config.ExposedPorts = exposedPorts 537 } 538 539 d.taskEnv.Build() 540 parsedArgs := d.taskEnv.ParseAndReplace(driverConfig.Args) 541 542 // If the user specified a custom command to run as their entrypoint, we'll 543 // inject it here. 544 if driverConfig.Command != "" { 545 // Validate command 546 if err := validateCommand(driverConfig.Command, "args"); err != nil { 547 return c, err 548 } 549 550 cmd := []string{driverConfig.Command} 551 if len(driverConfig.Args) != 0 { 552 cmd = append(cmd, parsedArgs...) 553 } 554 d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " ")) 555 config.Cmd = cmd 556 } else if len(driverConfig.Args) != 0 { 557 config.Cmd = parsedArgs 558 } 559 560 if len(driverConfig.Labels) > 0 { 561 config.Labels = driverConfig.Labels 562 d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels) 563 } 564 565 config.Env = d.taskEnv.EnvList() 566 567 containerName := fmt.Sprintf("%s-%s", task.Name, ctx.AllocID) 568 d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName) 569 570 return docker.CreateContainerOptions{ 571 Name: containerName, 572 Config: config, 573 HostConfig: hostConfig, 574 }, nil 575 } 576 577 var ( 578 // imageNotFoundMatcher is a regex expression that matches the image not 579 // found error Docker returns. 580 imageNotFoundMatcher = regexp.MustCompile(`Error: image .+ not found`) 581 ) 582 583 // recoverablePullError wraps the error gotten when trying to pull and image if 584 // the error is recoverable. 585 func (d *DockerDriver) recoverablePullError(err error, image string) error { 586 recoverable := true 587 if imageNotFoundMatcher.MatchString(err.Error()) { 588 recoverable = false 589 } 590 return dstructs.NewRecoverableError(fmt.Errorf("Failed to pull `%s`: %s", image, err), recoverable) 591 } 592 593 func (d *DockerDriver) Periodic() (bool, time.Duration) { 594 return true, 15 * time.Second 595 } 596 597 // createImage creates a docker image either by pulling it from a registry or by 598 // loading it from the file system 599 func (d *DockerDriver) createImage(driverConfig *DockerDriverConfig, client *docker.Client, taskDir string) error { 600 image := driverConfig.ImageName 601 repo, tag := docker.ParseRepositoryTag(image) 602 if tag == "" { 603 tag = "latest" 604 } 605 606 var dockerImage *docker.Image 607 var err error 608 // We're going to check whether the image is already downloaded. If the tag 609 // is "latest" we have to check for a new version every time so we don't 610 // bother to check and cache the id here. We'll download first, then cache. 611 if tag != "latest" { 612 dockerImage, err = client.InspectImage(image) 613 } 614 615 // Download the image 616 if dockerImage == nil { 617 if len(driverConfig.LoadImages) > 0 { 618 return d.loadImage(driverConfig, client, taskDir) 619 } 620 621 return d.pullImage(driverConfig, client, repo, tag) 622 } 623 return err 624 } 625 626 // pullImage creates an image by pulling it from a docker registry 627 func (d *DockerDriver) pullImage(driverConfig *DockerDriverConfig, client *docker.Client, repo string, tag string) error { 628 pullOptions := docker.PullImageOptions{ 629 Repository: repo, 630 Tag: tag, 631 } 632 633 authOptions := docker.AuthConfiguration{} 634 if len(driverConfig.Auth) != 0 { 635 authOptions = docker.AuthConfiguration{ 636 Username: driverConfig.Auth[0].Username, 637 Password: driverConfig.Auth[0].Password, 638 Email: driverConfig.Auth[0].Email, 639 ServerAddress: driverConfig.Auth[0].ServerAddress, 640 } 641 } 642 643 if authConfigFile := d.config.Read("docker.auth.config"); authConfigFile != "" { 644 if f, err := os.Open(authConfigFile); err == nil { 645 defer f.Close() 646 var authConfigurations *docker.AuthConfigurations 647 if authConfigurations, err = docker.NewAuthConfigurations(f); err != nil { 648 return fmt.Errorf("Failed to create docker auth object: %v", err) 649 } 650 651 authConfigurationKey := "" 652 if driverConfig.SSL { 653 authConfigurationKey += "https://" 654 } 655 656 authConfigurationKey += strings.Split(driverConfig.ImageName, "/")[0] 657 if authConfiguration, ok := authConfigurations.Configs[authConfigurationKey]; ok { 658 authOptions = authConfiguration 659 } 660 } else { 661 return fmt.Errorf("Failed to open auth config file: %v, error: %v", authConfigFile, err) 662 } 663 } 664 665 err := client.PullImage(pullOptions, authOptions) 666 if err != nil { 667 d.logger.Printf("[ERR] driver.docker: failed pulling container %s:%s: %s", repo, tag, err) 668 return d.recoverablePullError(err, driverConfig.ImageName) 669 } 670 d.logger.Printf("[DEBUG] driver.docker: docker pull %s:%s succeeded", repo, tag) 671 return nil 672 } 673 674 // loadImage creates an image by loading it from the file system 675 func (d *DockerDriver) loadImage(driverConfig *DockerDriverConfig, client *docker.Client, taskDir string) error { 676 var errors multierror.Error 677 for _, image := range driverConfig.LoadImages { 678 archive := filepath.Join(taskDir, allocdir.TaskLocal, image) 679 d.logger.Printf("[DEBUG] driver.docker: loading image from: %v", archive) 680 f, err := os.Open(archive) 681 if err != nil { 682 errors.Errors = append(errors.Errors, fmt.Errorf("unable to open image archive: %v", err)) 683 continue 684 } 685 if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { 686 errors.Errors = append(errors.Errors, err) 687 } 688 f.Close() 689 } 690 return errors.ErrorOrNil() 691 } 692 693 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (DriverHandle, error) { 694 driverConfig, err := NewDockerDriverConfig(task) 695 if err != nil { 696 return nil, err 697 } 698 699 cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true) 700 701 taskDir, ok := ctx.AllocDir.TaskDirs[d.DriverContext.taskName] 702 if !ok { 703 return nil, fmt.Errorf("Could not find task directory for task: %v", d.DriverContext.taskName) 704 } 705 706 // Initialize docker API clients 707 client, waitClient, err := d.dockerClients() 708 if err != nil { 709 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 710 } 711 712 if err := d.createImage(driverConfig, client, taskDir); err != nil { 713 return nil, fmt.Errorf("failed to create image: %v", err) 714 } 715 716 image := driverConfig.ImageName 717 // Now that we have the image we can get the image id 718 dockerImage, err := client.InspectImage(image) 719 if err != nil { 720 d.logger.Printf("[ERR] driver.docker: failed getting image id for %s: %s", image, err) 721 return nil, fmt.Errorf("Failed to determine image id for `%s`: %s", image, err) 722 } 723 d.logger.Printf("[DEBUG] driver.docker: identified image %s as %s", image, dockerImage.ID) 724 725 bin, err := discover.NomadExecutable() 726 if err != nil { 727 return nil, fmt.Errorf("unable to find the nomad binary: %v", err) 728 } 729 pluginLogFile := filepath.Join(taskDir, fmt.Sprintf("%s-executor.out", task.Name)) 730 pluginConfig := &plugin.ClientConfig{ 731 Cmd: exec.Command(bin, "executor", pluginLogFile), 732 } 733 734 exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config) 735 if err != nil { 736 return nil, err 737 } 738 executorCtx := &executor.ExecutorContext{ 739 TaskEnv: d.taskEnv, 740 Task: task, 741 Driver: "docker", 742 AllocDir: ctx.AllocDir, 743 AllocID: ctx.AllocID, 744 PortLowerBound: d.config.ClientMinPort, 745 PortUpperBound: d.config.ClientMaxPort, 746 } 747 ss, err := exec.LaunchSyslogServer(executorCtx) 748 if err != nil { 749 return nil, fmt.Errorf("failed to start syslog collector: %v", err) 750 } 751 752 config, err := d.createContainer(ctx, task, driverConfig, ss.Addr) 753 if err != nil { 754 d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %s: %s", image, err) 755 pluginClient.Kill() 756 return nil, fmt.Errorf("Failed to create container configuration for image %s: %s", image, err) 757 } 758 // Create a container 759 container, err := client.CreateContainer(config) 760 if err != nil { 761 // If the container already exists because of a previous failure we'll 762 // try to purge it and re-create it. 763 if strings.Contains(err.Error(), "container already exists") { 764 // Get the ID of the existing container so we can delete it 765 containers, err := client.ListContainers(docker.ListContainersOptions{ 766 // The image might be in use by a stopped container, so check everything 767 All: true, 768 Filters: map[string][]string{ 769 "name": []string{config.Name}, 770 }, 771 }) 772 if err != nil { 773 d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) 774 pluginClient.Kill() 775 return nil, fmt.Errorf("Failed to query list of containers: %s", err) 776 } 777 778 // Couldn't find any matching containers 779 if len(containers) == 0 { 780 d.logger.Printf("[ERR] driver.docker: failed to get id for container %s: %#v", config.Name, containers) 781 pluginClient.Kill() 782 return nil, fmt.Errorf("Failed to get id for container %s", config.Name) 783 } 784 785 // Delete matching containers 786 d.logger.Printf("[INFO] driver.docker: a container with the name %s already exists; will attempt to purge and re-create", config.Name) 787 for _, container := range containers { 788 err = client.RemoveContainer(docker.RemoveContainerOptions{ 789 ID: container.ID, 790 }) 791 if err != nil { 792 d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID) 793 pluginClient.Kill() 794 return nil, fmt.Errorf("Failed to purge container %s: %s", container.ID, err) 795 } 796 d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID) 797 } 798 799 container, err = client.CreateContainer(config) 800 if err != nil { 801 d.logger.Printf("[ERR] driver.docker: failed to re-create container %s; aborting", config.Name) 802 pluginClient.Kill() 803 return nil, fmt.Errorf("Failed to re-create container %s; aborting", config.Name) 804 } 805 } else { 806 // We failed to create the container for some other reason. 807 d.logger.Printf("[ERR] driver.docker: failed to create container from image %s: %s", image, err) 808 pluginClient.Kill() 809 return nil, fmt.Errorf("Failed to create container from image %s: %s", image, err) 810 } 811 } 812 d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) 813 814 // Start the container 815 err = client.StartContainer(container.ID, container.HostConfig) 816 if err != nil { 817 d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err) 818 pluginClient.Kill() 819 return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err) 820 } 821 d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) 822 823 // Return a driver handle 824 maxKill := d.DriverContext.config.MaxKillTimeout 825 h := &DockerHandle{ 826 client: client, 827 waitClient: waitClient, 828 executor: exec, 829 pluginClient: pluginClient, 830 cleanupImage: cleanupImage, 831 logger: d.logger, 832 imageID: dockerImage.ID, 833 containerID: container.ID, 834 version: d.config.Version, 835 killTimeout: GetKillTimeout(task.KillTimeout, maxKill), 836 maxKillTimeout: maxKill, 837 doneCh: make(chan bool), 838 waitCh: make(chan *dstructs.WaitResult, 1), 839 } 840 if err := exec.SyncServices(consulContext(d.config, container.ID)); err != nil { 841 d.logger.Printf("[ERR] driver.docker: error registering services with consul for task: %q: %v", task.Name, err) 842 } 843 go h.collectStats() 844 go h.run() 845 return h, nil 846 } 847 848 func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 849 cleanupImage := d.config.ReadBoolDefault("docker.cleanup.image", true) 850 851 // Split the handle 852 pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:")) 853 pid := &dockerPID{} 854 if err := json.Unmarshal(pidBytes, pid); err != nil { 855 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 856 } 857 d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", pid.ContainerID) 858 d.logger.Printf("[DEBUG] driver.docker: re-attached to handle: %s", handleID) 859 pluginConfig := &plugin.ClientConfig{ 860 Reattach: pid.PluginConfig.PluginConfig(), 861 } 862 863 client, waitClient, err := d.dockerClients() 864 if err != nil { 865 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 866 } 867 868 // Look for a running container with this ID 869 containers, err := client.ListContainers(docker.ListContainersOptions{ 870 Filters: map[string][]string{ 871 "id": []string{pid.ContainerID}, 872 }, 873 }) 874 if err != nil { 875 return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err) 876 } 877 878 found := false 879 for _, container := range containers { 880 if container.ID == pid.ContainerID { 881 found = true 882 } 883 } 884 if !found { 885 return nil, fmt.Errorf("Failed to find container %s", pid.ContainerID) 886 } 887 exec, pluginClient, err := createExecutor(pluginConfig, d.config.LogOutput, d.config) 888 if err != nil { 889 d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err) 890 d.logger.Printf("[DEBUG] driver.docker: stopping container %q", pid.ContainerID) 891 if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout.Seconds())); e != nil { 892 d.logger.Printf("[DEBUG] driver.docker: couldn't stop container: %v", e) 893 } 894 return nil, err 895 } 896 897 ver, _ := exec.Version() 898 d.logger.Printf("[DEBUG] driver.docker: version of executor: %v", ver.Version) 899 900 // Return a driver handle 901 h := &DockerHandle{ 902 client: client, 903 waitClient: waitClient, 904 executor: exec, 905 pluginClient: pluginClient, 906 cleanupImage: cleanupImage, 907 logger: d.logger, 908 imageID: pid.ImageID, 909 containerID: pid.ContainerID, 910 version: pid.Version, 911 killTimeout: pid.KillTimeout, 912 maxKillTimeout: pid.MaxKillTimeout, 913 doneCh: make(chan bool), 914 waitCh: make(chan *dstructs.WaitResult, 1), 915 } 916 if err := exec.SyncServices(consulContext(d.config, pid.ContainerID)); err != nil { 917 h.logger.Printf("[ERR] driver.docker: error registering services with consul: %v", err) 918 } 919 920 go h.collectStats() 921 go h.run() 922 return h, nil 923 } 924 925 func (h *DockerHandle) ID() string { 926 // Return a handle to the PID 927 pid := dockerPID{ 928 Version: h.version, 929 ImageID: h.imageID, 930 ContainerID: h.containerID, 931 KillTimeout: h.killTimeout, 932 MaxKillTimeout: h.maxKillTimeout, 933 PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()), 934 } 935 data, err := json.Marshal(pid) 936 if err != nil { 937 h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) 938 } 939 return fmt.Sprintf("DOCKER:%s", string(data)) 940 } 941 942 func (h *DockerHandle) ContainerID() string { 943 return h.containerID 944 } 945 946 func (h *DockerHandle) WaitCh() chan *dstructs.WaitResult { 947 return h.waitCh 948 } 949 950 func (h *DockerHandle) Update(task *structs.Task) error { 951 // Store the updated kill timeout. 952 h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout) 953 if err := h.executor.UpdateTask(task); err != nil { 954 h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err) 955 } 956 957 // Update is not possible 958 return nil 959 } 960 961 // Kill is used to terminate the task. This uses `docker stop -t killTimeout` 962 func (h *DockerHandle) Kill() error { 963 // Stop the container 964 err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds())) 965 if err != nil { 966 h.executor.Exit() 967 h.pluginClient.Kill() 968 969 // Container has already been removed. 970 if strings.Contains(err.Error(), NoSuchContainerError) { 971 h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID) 972 return nil 973 } 974 h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err) 975 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 976 } 977 h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) 978 return nil 979 } 980 981 func (h *DockerHandle) Stats() (*cstructs.TaskResourceUsage, error) { 982 h.resourceUsageLock.RLock() 983 defer h.resourceUsageLock.RUnlock() 984 var err error 985 if h.resourceUsage == nil { 986 err = fmt.Errorf("stats collection hasn't started yet") 987 } 988 return h.resourceUsage, err 989 } 990 991 func (h *DockerHandle) run() { 992 // Wait for it... 993 exitCode, err := h.waitClient.WaitContainer(h.containerID) 994 if err != nil { 995 h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID) 996 } 997 998 if exitCode != 0 { 999 err = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 1000 } 1001 1002 close(h.doneCh) 1003 h.waitCh <- dstructs.NewWaitResult(exitCode, 0, err) 1004 close(h.waitCh) 1005 1006 // Remove services 1007 if err := h.executor.DeregisterServices(); err != nil { 1008 h.logger.Printf("[ERR] driver.docker: error deregistering services: %v", err) 1009 } 1010 1011 // Shutdown the syslog collector 1012 if err := h.executor.Exit(); err != nil { 1013 h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err) 1014 } 1015 h.pluginClient.Kill() 1016 1017 // Stop the container just incase the docker daemon's wait returned 1018 // incorrectly 1019 if err := h.client.StopContainer(h.containerID, 0); err != nil { 1020 _, noSuchContainer := err.(*docker.NoSuchContainer) 1021 _, containerNotRunning := err.(*docker.ContainerNotRunning) 1022 if !containerNotRunning && !noSuchContainer { 1023 h.logger.Printf("[ERR] driver.docker: error stopping container: %v", err) 1024 } 1025 } 1026 1027 // Remove the container 1028 if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil { 1029 h.logger.Printf("[ERR] driver.docker: error removing container: %v", err) 1030 } 1031 1032 // Cleanup the image 1033 if h.cleanupImage { 1034 if err := h.client.RemoveImage(h.imageID); err != nil { 1035 h.logger.Printf("[DEBUG] driver.docker: error removing image: %v", err) 1036 } 1037 } 1038 } 1039 1040 // collectStats starts collecting resource usage stats of a docker container 1041 func (h *DockerHandle) collectStats() { 1042 statsCh := make(chan *docker.Stats) 1043 statsOpts := docker.StatsOptions{ID: h.containerID, Done: h.doneCh, Stats: statsCh, Stream: true} 1044 go func() { 1045 //TODO handle Stats error 1046 if err := h.waitClient.Stats(statsOpts); err != nil { 1047 h.logger.Printf("[DEBUG] driver.docker: error collecting stats from container %s: %v", h.containerID, err) 1048 } 1049 }() 1050 numCores := runtime.NumCPU() 1051 for { 1052 select { 1053 case s := <-statsCh: 1054 if s != nil { 1055 ms := &cstructs.MemoryStats{ 1056 RSS: s.MemoryStats.Stats.Rss, 1057 Cache: s.MemoryStats.Stats.Cache, 1058 Swap: s.MemoryStats.Stats.Swap, 1059 MaxUsage: s.MemoryStats.MaxUsage, 1060 Measured: DockerMeasuredMemStats, 1061 } 1062 1063 cs := &cstructs.CpuStats{ 1064 ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods, 1065 ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime, 1066 Measured: DockerMeasuredCpuStats, 1067 } 1068 1069 // Calculate percentage 1070 cores := len(s.CPUStats.CPUUsage.PercpuUsage) 1071 cs.Percent = calculatePercent( 1072 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, 1073 s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, cores) 1074 cs.SystemMode = calculatePercent( 1075 s.CPUStats.CPUUsage.UsageInKernelmode, s.PreCPUStats.CPUUsage.UsageInKernelmode, 1076 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores) 1077 cs.UserMode = calculatePercent( 1078 s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode, 1079 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores) 1080 cs.TotalTicks = (cs.Percent / 100) * shelpers.TotalTicksAvailable() / float64(numCores) 1081 1082 h.resourceUsageLock.Lock() 1083 h.resourceUsage = &cstructs.TaskResourceUsage{ 1084 ResourceUsage: &cstructs.ResourceUsage{ 1085 MemoryStats: ms, 1086 CpuStats: cs, 1087 }, 1088 Timestamp: s.Read.UTC().UnixNano(), 1089 } 1090 h.resourceUsageLock.Unlock() 1091 } 1092 case <-h.doneCh: 1093 return 1094 } 1095 } 1096 } 1097 1098 func calculatePercent(newSample, oldSample, newTotal, oldTotal uint64, cores int) float64 { 1099 numerator := newSample - oldSample 1100 denom := newTotal - oldTotal 1101 if numerator <= 0 || denom <= 0 { 1102 return 0.0 1103 } 1104 1105 return (float64(numerator) / float64(denom)) * float64(cores) * 100.0 1106 }