github.com/hhrutter/nomad@v0.6.0-rc2.0.20170723054333-80c4b03f0705/client/driver/docker.go (about) 1 package driver 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "log" 8 "net" 9 "os" 10 "os/exec" 11 "path/filepath" 12 "runtime" 13 "strconv" 14 "strings" 15 "sync" 16 "syscall" 17 "time" 18 19 "github.com/armon/circbuf" 20 docker "github.com/fsouza/go-dockerclient" 21 22 "github.com/docker/docker/cli/config/configfile" 23 "github.com/docker/docker/reference" 24 "github.com/docker/docker/registry" 25 26 "github.com/hashicorp/go-multierror" 27 "github.com/hashicorp/go-plugin" 28 "github.com/hashicorp/nomad/client/allocdir" 29 "github.com/hashicorp/nomad/client/config" 30 "github.com/hashicorp/nomad/client/driver/env" 31 "github.com/hashicorp/nomad/client/driver/executor" 32 dstructs "github.com/hashicorp/nomad/client/driver/structs" 33 cstructs "github.com/hashicorp/nomad/client/structs" 34 "github.com/hashicorp/nomad/helper" 35 "github.com/hashicorp/nomad/helper/fields" 36 shelpers "github.com/hashicorp/nomad/helper/stats" 37 "github.com/hashicorp/nomad/nomad/structs" 38 "github.com/mitchellh/mapstructure" 39 ) 40 41 var ( 42 // We store the clients globally to cache the connection to the docker daemon. 43 createClients sync.Once 44 45 // client is a docker client with a timeout of 1 minute. This is for doing 46 // all operations with the docker daemon besides which are not long running 47 // such as creating, killing containers, etc. 48 client *docker.Client 49 50 // waitClient is a docker client with no timeouts. This is used for long 51 // running operations such as waiting on containers and collect stats 52 waitClient *docker.Client 53 54 // The statistics the Docker driver exposes 55 DockerMeasuredMemStats = []string{"RSS", "Cache", "Swap", "Max Usage"} 56 DockerMeasuredCpuStats = []string{"Throttled Periods", "Throttled Time", "Percent"} 57 58 // recoverableErrTimeouts returns a recoverable error if the error was due 59 // to timeouts 60 recoverableErrTimeouts = func(err error) error { 61 r := false 62 if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") || 63 strings.Contains(err.Error(), "EOF") { 64 r = true 65 } 66 return structs.NewRecoverableError(err, r) 67 } 68 ) 69 70 const ( 71 // NoSuchContainerError is returned by the docker daemon if the container 72 // does not exist. 73 NoSuchContainerError = "No such container" 74 75 // The key populated in Node Attributes to indicate presence of the Docker 76 // driver 77 dockerDriverAttr = "driver.docker" 78 79 // dockerSELinuxLabelConfigOption is the key for configuring the 80 // SELinux label for binds. 81 dockerSELinuxLabelConfigOption = "docker.volumes.selinuxlabel" 82 83 // dockerVolumesConfigOption is the key for enabling the use of custom 84 // bind volumes to arbitrary host paths. 85 dockerVolumesConfigOption = "docker.volumes.enabled" 86 dockerVolumesConfigDefault = true 87 88 // dockerPrivilegedConfigOption is the key for running containers in 89 // Docker's privileged mode. 90 dockerPrivilegedConfigOption = "docker.privileged.enabled" 91 92 // dockerCleanupImageConfigOption is the key for whether or not to 93 // cleanup images after the task exits. 94 dockerCleanupImageConfigOption = "docker.cleanup.image" 95 dockerCleanupImageConfigDefault = true 96 97 // dockerPullTimeoutConfigOption is the key for setting an images pull 98 // timeout 99 dockerImageRemoveDelayConfigOption = "docker.cleanup.image.delay" 100 dockerImageRemoveDelayConfigDefault = 3 * time.Minute 101 102 // dockerTimeout is the length of time a request can be outstanding before 103 // it is timed out. 104 dockerTimeout = 5 * time.Minute 105 106 // dockerImageResKey is the CreatedResources key for docker images 107 dockerImageResKey = "image" 108 109 // dockerAuthHelperPrefix is the prefix to attach to the credential helper 110 // and should be found in the $PATH. Example: ${prefix-}${helper-name} 111 dockerAuthHelperPrefix = "docker-credential-" 112 ) 113 114 type DockerDriver struct { 115 DriverContext 116 117 driverConfig *DockerDriverConfig 118 imageID string 119 120 // A tri-state boolean to know if the fingerprinting has happened and 121 // whether it has been successful 122 fingerprintSuccess *bool 123 } 124 125 type DockerDriverAuth struct { 126 Username string `mapstructure:"username"` // username for the registry 127 Password string `mapstructure:"password"` // password to access the registry 128 Email string `mapstructure:"email"` // email address of the user who is allowed to access the registry 129 ServerAddress string `mapstructure:"server_address"` // server address of the registry 130 } 131 132 type DockerLoggingOpts struct { 133 Type string `mapstructure:"type"` 134 ConfigRaw []map[string]string `mapstructure:"config"` 135 Config map[string]string `mapstructure:"-"` 136 } 137 138 type DockerDriverConfig struct { 139 ImageName string `mapstructure:"image"` // Container's Image Name 140 LoadImage string `mapstructure:"load"` // LoadImage is a path to an image archive file 141 Command string `mapstructure:"command"` // The Command to run when the container starts up 142 Args []string `mapstructure:"args"` // The arguments to the Command 143 IpcMode string `mapstructure:"ipc_mode"` // The IPC mode of the container - host and none 144 NetworkMode string `mapstructure:"network_mode"` // The network mode of the container - host, nat and none 145 NetworkAliases []string `mapstructure:"network_aliases"` // The network-scoped alias for the container 146 IPv4Address string `mapstructure:"ipv4_address"` // The container ipv4 address 147 IPv6Address string `mapstructure:"ipv6_address"` // the container ipv6 address 148 PidMode string `mapstructure:"pid_mode"` // The PID mode of the container - host and none 149 UTSMode string `mapstructure:"uts_mode"` // The UTS mode of the container - host and none 150 UsernsMode string `mapstructure:"userns_mode"` // The User namespace mode of the container - host and none 151 PortMapRaw []map[string]string `mapstructure:"port_map"` // 152 PortMap map[string]int `mapstructure:"-"` // A map of host port labels and the ports exposed on the container 153 Privileged bool `mapstructure:"privileged"` // Flag to run the container in privileged mode 154 DNSServers []string `mapstructure:"dns_servers"` // DNS Server for containers 155 DNSSearchDomains []string `mapstructure:"dns_search_domains"` // DNS Search domains for containers 156 ExtraHosts []string `mapstructure:"extra_hosts"` // Add host to /etc/hosts (host:IP) 157 Hostname string `mapstructure:"hostname"` // Hostname for containers 158 LabelsRaw []map[string]string `mapstructure:"labels"` // 159 Labels map[string]string `mapstructure:"-"` // Labels to set when the container starts up 160 Auth []DockerDriverAuth `mapstructure:"auth"` // Authentication credentials for a private Docker registry 161 AuthSoftFail bool `mapstructure:"auth_soft_fail"` // Soft-fail if auth creds are provided but fail 162 TTY bool `mapstructure:"tty"` // Allocate a Pseudo-TTY 163 Interactive bool `mapstructure:"interactive"` // Keep STDIN open even if not attached 164 ShmSize int64 `mapstructure:"shm_size"` // Size of /dev/shm of the container in bytes 165 WorkDir string `mapstructure:"work_dir"` // Working directory inside the container 166 Logging []DockerLoggingOpts `mapstructure:"logging"` // Logging options for syslog server 167 Volumes []string `mapstructure:"volumes"` // Host-Volumes to mount in, syntax: /path/to/host/directory:/destination/path/in/container 168 VolumeDriver string `mapstructure:"volume_driver"` // Docker volume driver used for the container's volumes 169 ForcePull bool `mapstructure:"force_pull"` // Always force pull before running image, useful if your tags are mutable 170 MacAddress string `mapstructure:"mac_address"` // Pin mac address to container 171 SecurityOpt []string `mapstructure:"security_opt"` // Flags to pass directly to security-opt 172 } 173 174 // Validate validates a docker driver config 175 func (c *DockerDriverConfig) Validate() error { 176 if c.ImageName == "" { 177 return fmt.Errorf("Docker Driver needs an image name") 178 } 179 return nil 180 } 181 182 // NewDockerDriverConfig returns a docker driver config by parsing the HCL 183 // config 184 func NewDockerDriverConfig(task *structs.Task, env *env.TaskEnv) (*DockerDriverConfig, error) { 185 var dconf DockerDriverConfig 186 187 if err := mapstructure.WeakDecode(task.Config, &dconf); err != nil { 188 return nil, err 189 } 190 191 // Interpolate everthing that is a string 192 dconf.ImageName = env.ReplaceEnv(dconf.ImageName) 193 dconf.Command = env.ReplaceEnv(dconf.Command) 194 dconf.IpcMode = env.ReplaceEnv(dconf.IpcMode) 195 dconf.NetworkMode = env.ReplaceEnv(dconf.NetworkMode) 196 dconf.NetworkAliases = env.ParseAndReplace(dconf.NetworkAliases) 197 dconf.IPv4Address = env.ReplaceEnv(dconf.IPv4Address) 198 dconf.IPv6Address = env.ReplaceEnv(dconf.IPv6Address) 199 dconf.PidMode = env.ReplaceEnv(dconf.PidMode) 200 dconf.UTSMode = env.ReplaceEnv(dconf.UTSMode) 201 dconf.Hostname = env.ReplaceEnv(dconf.Hostname) 202 dconf.WorkDir = env.ReplaceEnv(dconf.WorkDir) 203 dconf.LoadImage = env.ReplaceEnv(dconf.LoadImage) 204 dconf.Volumes = env.ParseAndReplace(dconf.Volumes) 205 dconf.VolumeDriver = env.ReplaceEnv(dconf.VolumeDriver) 206 dconf.DNSServers = env.ParseAndReplace(dconf.DNSServers) 207 dconf.DNSSearchDomains = env.ParseAndReplace(dconf.DNSSearchDomains) 208 dconf.ExtraHosts = env.ParseAndReplace(dconf.ExtraHosts) 209 dconf.MacAddress = env.ReplaceEnv(dconf.MacAddress) 210 dconf.SecurityOpt = env.ParseAndReplace(dconf.SecurityOpt) 211 212 for _, m := range dconf.LabelsRaw { 213 for k, v := range m { 214 delete(m, k) 215 m[env.ReplaceEnv(k)] = env.ReplaceEnv(v) 216 } 217 } 218 dconf.Labels = mapMergeStrStr(dconf.LabelsRaw...) 219 220 for i, a := range dconf.Auth { 221 dconf.Auth[i].Username = env.ReplaceEnv(a.Username) 222 dconf.Auth[i].Password = env.ReplaceEnv(a.Password) 223 dconf.Auth[i].Email = env.ReplaceEnv(a.Email) 224 dconf.Auth[i].ServerAddress = env.ReplaceEnv(a.ServerAddress) 225 } 226 227 for i, l := range dconf.Logging { 228 dconf.Logging[i].Type = env.ReplaceEnv(l.Type) 229 for _, c := range l.ConfigRaw { 230 for k, v := range c { 231 delete(c, k) 232 c[env.ReplaceEnv(k)] = env.ReplaceEnv(v) 233 } 234 } 235 } 236 237 if len(dconf.Logging) > 0 { 238 dconf.Logging[0].Config = mapMergeStrStr(dconf.Logging[0].ConfigRaw...) 239 } 240 241 portMap := make(map[string]int) 242 for _, m := range dconf.PortMapRaw { 243 for k, v := range m { 244 ki, vi := env.ReplaceEnv(k), env.ReplaceEnv(v) 245 p, err := strconv.Atoi(vi) 246 if err != nil { 247 return nil, fmt.Errorf("failed to parse port map value %v to %v: %v", ki, vi, err) 248 } 249 portMap[ki] = p 250 } 251 } 252 dconf.PortMap = portMap 253 254 // Remove any http 255 if strings.Contains(dconf.ImageName, "https://") { 256 dconf.ImageName = strings.Replace(dconf.ImageName, "https://", "", 1) 257 } 258 259 if err := dconf.Validate(); err != nil { 260 return nil, err 261 } 262 return &dconf, nil 263 } 264 265 type dockerPID struct { 266 Version string 267 Image string 268 ImageID string 269 ContainerID string 270 KillTimeout time.Duration 271 MaxKillTimeout time.Duration 272 PluginConfig *PluginReattachConfig 273 } 274 275 type DockerHandle struct { 276 pluginClient *plugin.Client 277 executor executor.Executor 278 client *docker.Client 279 waitClient *docker.Client 280 logger *log.Logger 281 Image string 282 ImageID string 283 containerID string 284 version string 285 clkSpeed float64 286 killTimeout time.Duration 287 maxKillTimeout time.Duration 288 resourceUsageLock sync.RWMutex 289 resourceUsage *cstructs.TaskResourceUsage 290 waitCh chan *dstructs.WaitResult 291 doneCh chan bool 292 } 293 294 func NewDockerDriver(ctx *DriverContext) Driver { 295 return &DockerDriver{DriverContext: *ctx} 296 } 297 298 func (d *DockerDriver) Fingerprint(cfg *config.Config, node *structs.Node) (bool, error) { 299 // Initialize docker API clients 300 client, _, err := d.dockerClients() 301 if err != nil { 302 if d.fingerprintSuccess == nil || *d.fingerprintSuccess { 303 d.logger.Printf("[INFO] driver.docker: failed to initialize client: %s", err) 304 } 305 delete(node.Attributes, dockerDriverAttr) 306 d.fingerprintSuccess = helper.BoolToPtr(false) 307 return false, nil 308 } 309 310 // This is the first operation taken on the client so we'll try to 311 // establish a connection to the Docker daemon. If this fails it means 312 // Docker isn't available so we'll simply disable the docker driver. 313 env, err := client.Version() 314 if err != nil { 315 delete(node.Attributes, dockerDriverAttr) 316 if d.fingerprintSuccess == nil || *d.fingerprintSuccess { 317 d.logger.Printf("[DEBUG] driver.docker: could not connect to docker daemon at %s: %s", client.Endpoint(), err) 318 } 319 d.fingerprintSuccess = helper.BoolToPtr(false) 320 return false, nil 321 } 322 323 node.Attributes[dockerDriverAttr] = "1" 324 node.Attributes["driver.docker.version"] = env.Get("Version") 325 326 privileged := d.config.ReadBoolDefault(dockerPrivilegedConfigOption, false) 327 if privileged { 328 node.Attributes[dockerPrivilegedConfigOption] = "1" 329 } 330 331 // Advertise if this node supports Docker volumes 332 if d.config.ReadBoolDefault(dockerVolumesConfigOption, dockerVolumesConfigDefault) { 333 node.Attributes["driver."+dockerVolumesConfigOption] = "1" 334 } 335 336 // Detect bridge IP address - #2785 337 if nets, err := client.ListNetworks(); err != nil { 338 d.logger.Printf("[WARN] driver.docker: error discovering bridge IP: %v", err) 339 } else { 340 for _, n := range nets { 341 if n.Name != "bridge" { 342 continue 343 } 344 345 if len(n.IPAM.Config) == 0 { 346 d.logger.Printf("[WARN] driver.docker: no IPAM config for bridge network") 347 break 348 } 349 350 node.Attributes["driver.docker.bridge_ip"] = n.IPAM.Config[0].Gateway 351 } 352 } 353 354 d.fingerprintSuccess = helper.BoolToPtr(true) 355 return true, nil 356 } 357 358 // Validate is used to validate the driver configuration 359 func (d *DockerDriver) Validate(config map[string]interface{}) error { 360 fd := &fields.FieldData{ 361 Raw: config, 362 Schema: map[string]*fields.FieldSchema{ 363 "image": &fields.FieldSchema{ 364 Type: fields.TypeString, 365 Required: true, 366 }, 367 "load": &fields.FieldSchema{ 368 Type: fields.TypeString, 369 }, 370 "command": &fields.FieldSchema{ 371 Type: fields.TypeString, 372 }, 373 "args": &fields.FieldSchema{ 374 Type: fields.TypeArray, 375 }, 376 "ipc_mode": &fields.FieldSchema{ 377 Type: fields.TypeString, 378 }, 379 "network_mode": &fields.FieldSchema{ 380 Type: fields.TypeString, 381 }, 382 "network_aliases": &fields.FieldSchema{ 383 Type: fields.TypeArray, 384 }, 385 "ipv4_address": &fields.FieldSchema{ 386 Type: fields.TypeString, 387 }, 388 "ipv6_address": &fields.FieldSchema{ 389 Type: fields.TypeString, 390 }, 391 "mac_address": &fields.FieldSchema{ 392 Type: fields.TypeString, 393 }, 394 "pid_mode": &fields.FieldSchema{ 395 Type: fields.TypeString, 396 }, 397 "uts_mode": &fields.FieldSchema{ 398 Type: fields.TypeString, 399 }, 400 "userns_mode": &fields.FieldSchema{ 401 Type: fields.TypeString, 402 }, 403 "port_map": &fields.FieldSchema{ 404 Type: fields.TypeArray, 405 }, 406 "privileged": &fields.FieldSchema{ 407 Type: fields.TypeBool, 408 }, 409 "dns_servers": &fields.FieldSchema{ 410 Type: fields.TypeArray, 411 }, 412 "dns_search_domains": &fields.FieldSchema{ 413 Type: fields.TypeArray, 414 }, 415 "extra_hosts": &fields.FieldSchema{ 416 Type: fields.TypeArray, 417 }, 418 "hostname": &fields.FieldSchema{ 419 Type: fields.TypeString, 420 }, 421 "labels": &fields.FieldSchema{ 422 Type: fields.TypeArray, 423 }, 424 "auth": &fields.FieldSchema{ 425 Type: fields.TypeArray, 426 }, 427 "auth_soft_fail": &fields.FieldSchema{ 428 Type: fields.TypeBool, 429 }, 430 // COMPAT: Remove in 0.6.0. SSL is no longer needed 431 "ssl": &fields.FieldSchema{ 432 Type: fields.TypeBool, 433 }, 434 "tty": &fields.FieldSchema{ 435 Type: fields.TypeBool, 436 }, 437 "interactive": &fields.FieldSchema{ 438 Type: fields.TypeBool, 439 }, 440 "shm_size": &fields.FieldSchema{ 441 Type: fields.TypeInt, 442 }, 443 "work_dir": &fields.FieldSchema{ 444 Type: fields.TypeString, 445 }, 446 "logging": &fields.FieldSchema{ 447 Type: fields.TypeArray, 448 }, 449 "volumes": &fields.FieldSchema{ 450 Type: fields.TypeArray, 451 }, 452 "volume_driver": &fields.FieldSchema{ 453 Type: fields.TypeString, 454 }, 455 "force_pull": &fields.FieldSchema{ 456 Type: fields.TypeBool, 457 }, 458 "security_opt": &fields.FieldSchema{ 459 Type: fields.TypeArray, 460 }, 461 }, 462 } 463 464 if err := fd.Validate(); err != nil { 465 return err 466 } 467 468 return nil 469 } 470 471 func (d *DockerDriver) Abilities() DriverAbilities { 472 return DriverAbilities{ 473 SendSignals: true, 474 Exec: true, 475 } 476 } 477 478 func (d *DockerDriver) FSIsolation() cstructs.FSIsolation { 479 return cstructs.FSIsolationImage 480 } 481 482 // getDockerCoordinator returns the docker coordinator and the caller ID to use when 483 // interacting with the coordinator 484 func (d *DockerDriver) getDockerCoordinator(client *docker.Client) (*dockerCoordinator, string) { 485 config := &dockerCoordinatorConfig{ 486 client: client, 487 cleanup: d.config.ReadBoolDefault(dockerCleanupImageConfigOption, dockerCleanupImageConfigDefault), 488 logger: d.logger, 489 removeDelay: d.config.ReadDurationDefault(dockerImageRemoveDelayConfigOption, dockerImageRemoveDelayConfigDefault), 490 } 491 492 return GetDockerCoordinator(config), fmt.Sprintf("%s-%s", d.DriverContext.allocID, d.DriverContext.taskName) 493 } 494 495 func (d *DockerDriver) Prestart(ctx *ExecContext, task *structs.Task) (*PrestartResponse, error) { 496 driverConfig, err := NewDockerDriverConfig(task, ctx.TaskEnv) 497 if err != nil { 498 return nil, err 499 } 500 501 // Set state needed by Start 502 d.driverConfig = driverConfig 503 504 // Initialize docker API clients 505 client, _, err := d.dockerClients() 506 if err != nil { 507 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 508 } 509 510 // Ensure the image is available 511 id, err := d.createImage(driverConfig, client, ctx.TaskDir) 512 if err != nil { 513 return nil, err 514 } 515 d.imageID = id 516 517 resp := NewPrestartResponse() 518 resp.CreatedResources.Add(dockerImageResKey, id) 519 520 // Return the PortMap if it's set 521 if len(driverConfig.PortMap) > 0 { 522 resp.Network = &cstructs.DriverNetwork{ 523 PortMap: driverConfig.PortMap, 524 } 525 } 526 return resp, nil 527 } 528 529 func (d *DockerDriver) Start(ctx *ExecContext, task *structs.Task) (*StartResponse, error) { 530 531 pluginLogFile := filepath.Join(ctx.TaskDir.Dir, "executor.out") 532 executorConfig := &dstructs.ExecutorConfig{ 533 LogFile: pluginLogFile, 534 LogLevel: d.config.LogLevel, 535 } 536 537 exec, pluginClient, err := createExecutor(d.config.LogOutput, d.config, executorConfig) 538 if err != nil { 539 return nil, err 540 } 541 executorCtx := &executor.ExecutorContext{ 542 TaskEnv: ctx.TaskEnv, 543 Task: task, 544 Driver: "docker", 545 AllocID: d.DriverContext.allocID, 546 LogDir: ctx.TaskDir.LogDir, 547 TaskDir: ctx.TaskDir.Dir, 548 PortLowerBound: d.config.ClientMinPort, 549 PortUpperBound: d.config.ClientMaxPort, 550 } 551 if err := exec.SetContext(executorCtx); err != nil { 552 pluginClient.Kill() 553 return nil, fmt.Errorf("failed to set executor context: %v", err) 554 } 555 556 // Only launch syslog server if we're going to use it! 557 syslogAddr := "" 558 if runtime.GOOS == "darwin" && len(d.driverConfig.Logging) == 0 { 559 d.logger.Printf("[DEBUG] driver.docker: disabling syslog driver as Docker for Mac workaround") 560 } else if len(d.driverConfig.Logging) == 0 || d.driverConfig.Logging[0].Type == "syslog" { 561 ss, err := exec.LaunchSyslogServer() 562 if err != nil { 563 pluginClient.Kill() 564 return nil, fmt.Errorf("failed to start syslog collector: %v", err) 565 } 566 syslogAddr = ss.Addr 567 } 568 569 config, err := d.createContainerConfig(ctx, task, d.driverConfig, syslogAddr) 570 if err != nil { 571 d.logger.Printf("[ERR] driver.docker: failed to create container configuration for image %q (%q): %v", d.driverConfig.ImageName, d.imageID, err) 572 pluginClient.Kill() 573 return nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", d.driverConfig.ImageName, d.imageID, err) 574 } 575 576 container, err := d.createContainer(config) 577 if err != nil { 578 wrapped := fmt.Sprintf("Failed to create container: %v", err) 579 d.logger.Printf("[ERR] driver.docker: %s", wrapped) 580 pluginClient.Kill() 581 return nil, structs.WrapRecoverable(wrapped, err) 582 } 583 584 d.logger.Printf("[INFO] driver.docker: created container %s", container.ID) 585 586 // We don't need to start the container if the container is already running 587 // since we don't create containers which are already present on the host 588 // and are running 589 if !container.State.Running { 590 // Start the container 591 if err := d.startContainer(container); err != nil { 592 d.logger.Printf("[ERR] driver.docker: failed to start container %s: %s", container.ID, err) 593 pluginClient.Kill() 594 return nil, fmt.Errorf("Failed to start container %s: %s", container.ID, err) 595 } 596 597 // InspectContainer to get all of the container metadata as 598 // much of the metadata (eg networking) isn't populated until 599 // the container is started 600 runningContainer, err := client.InspectContainer(container.ID) 601 if err != nil { 602 err = fmt.Errorf("failed to inspect started container %s: %s", container.ID, err) 603 d.logger.Printf("[ERR] driver.docker: %v", err) 604 pluginClient.Kill() 605 return nil, structs.NewRecoverableError(err, true) 606 } 607 container = runningContainer 608 d.logger.Printf("[INFO] driver.docker: started container %s", container.ID) 609 } else { 610 d.logger.Printf("[DEBUG] driver.docker: re-attaching to container %s with status %q", 611 container.ID, container.State.String()) 612 } 613 614 // Return a driver handle 615 maxKill := d.DriverContext.config.MaxKillTimeout 616 h := &DockerHandle{ 617 client: client, 618 waitClient: waitClient, 619 executor: exec, 620 pluginClient: pluginClient, 621 logger: d.logger, 622 Image: d.driverConfig.ImageName, 623 ImageID: d.imageID, 624 containerID: container.ID, 625 version: d.config.Version, 626 killTimeout: GetKillTimeout(task.KillTimeout, maxKill), 627 maxKillTimeout: maxKill, 628 doneCh: make(chan bool), 629 waitCh: make(chan *dstructs.WaitResult, 1), 630 } 631 go h.collectStats() 632 go h.run() 633 634 // Detect container address 635 ip, autoUse := d.detectIP(container) 636 637 // Create a response with the driver handle and container network metadata 638 resp := &StartResponse{ 639 Handle: h, 640 Network: &cstructs.DriverNetwork{ 641 PortMap: d.driverConfig.PortMap, 642 IP: ip, 643 AutoAdvertise: autoUse, 644 }, 645 } 646 return resp, nil 647 } 648 649 // detectIP of Docker container. Returns the first IP found as well as true if 650 // the IP should be advertised (bridge network IPs return false). Returns an 651 // empty string and false if no IP could be found. 652 func (d *DockerDriver) detectIP(c *docker.Container) (string, bool) { 653 if c.NetworkSettings == nil { 654 // This should only happen if there's been a coding error (such 655 // as not calling InspetContainer after CreateContainer). Code 656 // defensively in case the Docker API changes subtly. 657 d.logger.Printf("[ERROR] driver.docker: no network settings for container %s", c.ID) 658 return "", false 659 } 660 661 ip, ipName := "", "" 662 auto := false 663 for name, net := range c.NetworkSettings.Networks { 664 if net.IPAddress == "" { 665 // Ignore networks without an IP address 666 continue 667 } 668 669 ip = net.IPAddress 670 ipName = name 671 672 // Don't auto-advertise bridge IPs 673 if name != "bridge" { 674 auto = true 675 } 676 677 break 678 } 679 680 if n := len(c.NetworkSettings.Networks); n > 1 { 681 d.logger.Printf("[WARN] driver.docker: multiple (%d) Docker networks for container %q but Nomad only supports 1: choosing %q", n, c.ID, ipName) 682 } 683 684 return ip, auto 685 } 686 687 func (d *DockerDriver) Cleanup(_ *ExecContext, res *CreatedResources) error { 688 retry := false 689 var merr multierror.Error 690 for key, resources := range res.Resources { 691 switch key { 692 case dockerImageResKey: 693 for _, value := range resources { 694 err := d.cleanupImage(value) 695 if err != nil { 696 if structs.IsRecoverable(err) { 697 retry = true 698 } 699 merr.Errors = append(merr.Errors, err) 700 continue 701 } 702 703 // Remove cleaned image from resources 704 res.Remove(dockerImageResKey, value) 705 } 706 default: 707 d.logger.Printf("[ERR] driver.docker: unknown resource to cleanup: %q", key) 708 } 709 } 710 return structs.NewRecoverableError(merr.ErrorOrNil(), retry) 711 } 712 713 // cleanupImage removes a Docker image. No error is returned if the image 714 // doesn't exist or is still in use. Requires the global client to already be 715 // initialized. 716 func (d *DockerDriver) cleanupImage(imageID string) error { 717 if !d.config.ReadBoolDefault(dockerCleanupImageConfigOption, dockerCleanupImageConfigDefault) { 718 // Config says not to cleanup 719 return nil 720 } 721 722 coordinator, callerID := d.getDockerCoordinator(client) 723 coordinator.RemoveImage(imageID, callerID) 724 725 return nil 726 } 727 728 // dockerClients creates two *docker.Client, one for long running operations and 729 // the other for shorter operations. In test / dev mode we can use ENV vars to 730 // connect to the docker daemon. In production mode we will read docker.endpoint 731 // from the config file. 732 func (d *DockerDriver) dockerClients() (*docker.Client, *docker.Client, error) { 733 if client != nil && waitClient != nil { 734 return client, waitClient, nil 735 } 736 737 var err error 738 var merr multierror.Error 739 createClients.Do(func() { 740 // Default to using whatever is configured in docker.endpoint. If this is 741 // not specified we'll fall back on NewClientFromEnv which reads config from 742 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 743 // DOCKER_CERT_PATH. This allows us to lock down the config in production 744 // but also accept the standard ENV configs for dev and test. 745 dockerEndpoint := d.config.Read("docker.endpoint") 746 if dockerEndpoint != "" { 747 cert := d.config.Read("docker.tls.cert") 748 key := d.config.Read("docker.tls.key") 749 ca := d.config.Read("docker.tls.ca") 750 751 if cert+key+ca != "" { 752 d.logger.Printf("[DEBUG] driver.docker: using TLS client connection to %s", dockerEndpoint) 753 client, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 754 if err != nil { 755 merr.Errors = append(merr.Errors, err) 756 } 757 waitClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 758 if err != nil { 759 merr.Errors = append(merr.Errors, err) 760 } 761 } else { 762 d.logger.Printf("[DEBUG] driver.docker: using standard client connection to %s", dockerEndpoint) 763 client, err = docker.NewClient(dockerEndpoint) 764 if err != nil { 765 merr.Errors = append(merr.Errors, err) 766 } 767 waitClient, err = docker.NewClient(dockerEndpoint) 768 if err != nil { 769 merr.Errors = append(merr.Errors, err) 770 } 771 } 772 client.SetTimeout(dockerTimeout) 773 return 774 } 775 776 d.logger.Println("[DEBUG] driver.docker: using client connection initialized from environment") 777 client, err = docker.NewClientFromEnv() 778 if err != nil { 779 merr.Errors = append(merr.Errors, err) 780 } 781 client.SetTimeout(dockerTimeout) 782 783 waitClient, err = docker.NewClientFromEnv() 784 if err != nil { 785 merr.Errors = append(merr.Errors, err) 786 } 787 }) 788 return client, waitClient, merr.ErrorOrNil() 789 } 790 791 func (d *DockerDriver) containerBinds(driverConfig *DockerDriverConfig, taskDir *allocdir.TaskDir, 792 task *structs.Task) ([]string, error) { 793 794 allocDirBind := fmt.Sprintf("%s:%s", taskDir.SharedAllocDir, allocdir.SharedAllocContainerPath) 795 taskLocalBind := fmt.Sprintf("%s:%s", taskDir.LocalDir, allocdir.TaskLocalContainerPath) 796 secretDirBind := fmt.Sprintf("%s:%s", taskDir.SecretsDir, allocdir.TaskSecretsContainerPath) 797 binds := []string{allocDirBind, taskLocalBind, secretDirBind} 798 799 volumesEnabled := d.config.ReadBoolDefault(dockerVolumesConfigOption, dockerVolumesConfigDefault) 800 801 if !volumesEnabled && driverConfig.VolumeDriver != "" { 802 return nil, fmt.Errorf("%s is false; cannot use volume driver %q", dockerVolumesConfigOption, driverConfig.VolumeDriver) 803 } 804 805 for _, userbind := range driverConfig.Volumes { 806 parts := strings.Split(userbind, ":") 807 if len(parts) < 2 { 808 return nil, fmt.Errorf("invalid docker volume: %q", userbind) 809 } 810 811 // Resolve dotted path segments 812 parts[0] = filepath.Clean(parts[0]) 813 814 // Absolute paths aren't always supported 815 if filepath.IsAbs(parts[0]) { 816 if !volumesEnabled { 817 // Disallow mounting arbitrary absolute paths 818 return nil, fmt.Errorf("%s is false; cannot mount host paths: %+q", dockerVolumesConfigOption, userbind) 819 } 820 binds = append(binds, userbind) 821 continue 822 } 823 824 // Relative paths are always allowed as they mount within a container 825 // When a VolumeDriver is set, we assume we receive a binding in the format volume-name:container-dest 826 // Otherwise, we assume we receive a relative path binding in the format relative/to/task:/also/in/container 827 if driverConfig.VolumeDriver == "" { 828 // Expand path relative to alloc dir 829 parts[0] = filepath.Join(taskDir.Dir, parts[0]) 830 } 831 832 binds = append(binds, strings.Join(parts, ":")) 833 } 834 835 if selinuxLabel := d.config.Read(dockerSELinuxLabelConfigOption); selinuxLabel != "" { 836 // Apply SELinux Label to each volume 837 for i := range binds { 838 binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel) 839 } 840 } 841 842 return binds, nil 843 } 844 845 // createContainerConfig initializes a struct needed to call docker.client.CreateContainer() 846 func (d *DockerDriver) createContainerConfig(ctx *ExecContext, task *structs.Task, 847 driverConfig *DockerDriverConfig, syslogAddr string) (docker.CreateContainerOptions, error) { 848 var c docker.CreateContainerOptions 849 if task.Resources == nil { 850 // Guard against missing resources. We should never have been able to 851 // schedule a job without specifying this. 852 d.logger.Println("[ERR] driver.docker: task.Resources is empty") 853 return c, fmt.Errorf("task.Resources is empty") 854 } 855 856 binds, err := d.containerBinds(driverConfig, ctx.TaskDir, task) 857 if err != nil { 858 return c, err 859 } 860 861 config := &docker.Config{ 862 Image: d.imageID, 863 Hostname: driverConfig.Hostname, 864 User: task.User, 865 Tty: driverConfig.TTY, 866 OpenStdin: driverConfig.Interactive, 867 } 868 869 if driverConfig.WorkDir != "" { 870 config.WorkingDir = driverConfig.WorkDir 871 } 872 873 memLimit := int64(task.Resources.MemoryMB) * 1024 * 1024 874 875 if len(driverConfig.Logging) == 0 { 876 if runtime.GOOS != "darwin" { 877 d.logger.Printf("[DEBUG] driver.docker: Setting default logging options to syslog and %s", syslogAddr) 878 driverConfig.Logging = []DockerLoggingOpts{ 879 {Type: "syslog", Config: map[string]string{"syslog-address": syslogAddr}}, 880 } 881 } else { 882 d.logger.Printf("[DEBUG] driver.docker: deferring logging to docker on Docker for Mac") 883 } 884 } 885 886 hostConfig := &docker.HostConfig{ 887 // Convert MB to bytes. This is an absolute value. 888 Memory: memLimit, 889 // Convert Mhz to shares. This is a relative value. 890 CPUShares: int64(task.Resources.CPU), 891 892 // Binds are used to mount a host volume into the container. We mount a 893 // local directory for storage and a shared alloc directory that can be 894 // used to share data between different tasks in the same task group. 895 Binds: binds, 896 897 VolumeDriver: driverConfig.VolumeDriver, 898 } 899 900 // Windows does not support MemorySwap #2193 901 if runtime.GOOS != "windows" { 902 hostConfig.MemorySwap = memLimit // MemorySwap is memory + swap. 903 } 904 905 if len(driverConfig.Logging) != 0 { 906 d.logger.Printf("[DEBUG] driver.docker: Using config for logging: %+v", driverConfig.Logging[0]) 907 hostConfig.LogConfig = docker.LogConfig{ 908 Type: driverConfig.Logging[0].Type, 909 Config: driverConfig.Logging[0].Config, 910 } 911 } 912 913 d.logger.Printf("[DEBUG] driver.docker: using %d bytes memory for %s", hostConfig.Memory, task.Name) 914 d.logger.Printf("[DEBUG] driver.docker: using %d cpu shares for %s", hostConfig.CPUShares, task.Name) 915 d.logger.Printf("[DEBUG] driver.docker: binding directories %#v for %s", hostConfig.Binds, task.Name) 916 917 // set privileged mode 918 hostPrivileged := d.config.ReadBoolDefault(dockerPrivilegedConfigOption, false) 919 if driverConfig.Privileged && !hostPrivileged { 920 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 921 } 922 hostConfig.Privileged = driverConfig.Privileged 923 924 // set SHM size 925 if driverConfig.ShmSize != 0 { 926 hostConfig.ShmSize = driverConfig.ShmSize 927 } 928 929 // set DNS servers 930 for _, ip := range driverConfig.DNSServers { 931 if net.ParseIP(ip) != nil { 932 hostConfig.DNS = append(hostConfig.DNS, ip) 933 } else { 934 d.logger.Printf("[ERR] driver.docker: invalid ip address for container dns server: %s", ip) 935 } 936 } 937 938 // set DNS search domains and extra hosts 939 hostConfig.DNSSearch = driverConfig.DNSSearchDomains 940 hostConfig.ExtraHosts = driverConfig.ExtraHosts 941 942 hostConfig.IpcMode = driverConfig.IpcMode 943 hostConfig.PidMode = driverConfig.PidMode 944 hostConfig.UTSMode = driverConfig.UTSMode 945 hostConfig.UsernsMode = driverConfig.UsernsMode 946 hostConfig.SecurityOpt = driverConfig.SecurityOpt 947 948 hostConfig.NetworkMode = driverConfig.NetworkMode 949 if hostConfig.NetworkMode == "" { 950 // docker default 951 d.logger.Printf("[DEBUG] driver.docker: networking mode not specified; defaulting to %s", defaultNetworkMode) 952 hostConfig.NetworkMode = defaultNetworkMode 953 } 954 955 // Setup port mapping and exposed ports 956 if len(task.Resources.Networks) == 0 { 957 d.logger.Println("[DEBUG] driver.docker: No network interfaces are available") 958 if len(driverConfig.PortMap) > 0 { 959 return c, fmt.Errorf("Trying to map ports but no network interface is available") 960 } 961 } else { 962 // TODO add support for more than one network 963 network := task.Resources.Networks[0] 964 publishedPorts := map[docker.Port][]docker.PortBinding{} 965 exposedPorts := map[docker.Port]struct{}{} 966 967 for _, port := range network.ReservedPorts { 968 // By default we will map the allocated port 1:1 to the container 969 containerPortInt := port.Value 970 971 // If the user has mapped a port using port_map we'll change it here 972 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 973 containerPortInt = mapped 974 } 975 976 hostPortStr := strconv.Itoa(port.Value) 977 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 978 979 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 980 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 981 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (static)", network.IP, port.Value, port.Value) 982 983 exposedPorts[containerPort+"/tcp"] = struct{}{} 984 exposedPorts[containerPort+"/udp"] = struct{}{} 985 d.logger.Printf("[DEBUG] driver.docker: exposed port %d", port.Value) 986 } 987 988 for _, port := range network.DynamicPorts { 989 // By default we will map the allocated port 1:1 to the container 990 containerPortInt := port.Value 991 992 // If the user has mapped a port using port_map we'll change it here 993 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 994 containerPortInt = mapped 995 } 996 997 hostPortStr := strconv.Itoa(port.Value) 998 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 999 1000 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 1001 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 1002 d.logger.Printf("[DEBUG] driver.docker: allocated port %s:%d -> %d (mapped)", network.IP, port.Value, containerPortInt) 1003 1004 exposedPorts[containerPort+"/tcp"] = struct{}{} 1005 exposedPorts[containerPort+"/udp"] = struct{}{} 1006 d.logger.Printf("[DEBUG] driver.docker: exposed port %s", containerPort) 1007 } 1008 1009 hostConfig.PortBindings = publishedPorts 1010 config.ExposedPorts = exposedPorts 1011 } 1012 1013 parsedArgs := ctx.TaskEnv.ParseAndReplace(driverConfig.Args) 1014 1015 // If the user specified a custom command to run, we'll inject it here. 1016 if driverConfig.Command != "" { 1017 // Validate command 1018 if err := validateCommand(driverConfig.Command, "args"); err != nil { 1019 return c, err 1020 } 1021 1022 cmd := []string{driverConfig.Command} 1023 if len(driverConfig.Args) != 0 { 1024 cmd = append(cmd, parsedArgs...) 1025 } 1026 d.logger.Printf("[DEBUG] driver.docker: setting container startup command to: %s", strings.Join(cmd, " ")) 1027 config.Cmd = cmd 1028 } else if len(driverConfig.Args) != 0 { 1029 config.Cmd = parsedArgs 1030 } 1031 1032 if len(driverConfig.Labels) > 0 { 1033 config.Labels = driverConfig.Labels 1034 d.logger.Printf("[DEBUG] driver.docker: applied labels on the container: %+v", config.Labels) 1035 } 1036 1037 config.Env = ctx.TaskEnv.List() 1038 1039 containerName := fmt.Sprintf("%s-%s", task.Name, d.DriverContext.allocID) 1040 d.logger.Printf("[DEBUG] driver.docker: setting container name to: %s", containerName) 1041 1042 var networkingConfig *docker.NetworkingConfig 1043 if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1044 networkingConfig = &docker.NetworkingConfig{ 1045 EndpointsConfig: map[string]*docker.EndpointConfig{ 1046 hostConfig.NetworkMode: &docker.EndpointConfig{}, 1047 }, 1048 } 1049 } 1050 1051 if len(driverConfig.NetworkAliases) > 0 { 1052 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases 1053 d.logger.Printf("[DEBUG] driver.docker: using network_mode %q with network aliases: %v", 1054 hostConfig.NetworkMode, strings.Join(driverConfig.NetworkAliases, ", ")) 1055 } 1056 1057 if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 1058 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{ 1059 IPv4Address: driverConfig.IPv4Address, 1060 IPv6Address: driverConfig.IPv6Address, 1061 } 1062 d.logger.Printf("[DEBUG] driver.docker: using network_mode %q with ipv4: %q and ipv6: %q", 1063 hostConfig.NetworkMode, driverConfig.IPv4Address, driverConfig.IPv6Address) 1064 } 1065 1066 if driverConfig.MacAddress != "" { 1067 config.MacAddress = driverConfig.MacAddress 1068 d.logger.Printf("[DEBUG] driver.docker: using pinned mac address: %q", config.MacAddress) 1069 } 1070 1071 return docker.CreateContainerOptions{ 1072 Name: containerName, 1073 Config: config, 1074 HostConfig: hostConfig, 1075 NetworkingConfig: networkingConfig, 1076 }, nil 1077 } 1078 1079 func (d *DockerDriver) Periodic() (bool, time.Duration) { 1080 return true, 15 * time.Second 1081 } 1082 1083 // createImage creates a docker image either by pulling it from a registry or by 1084 // loading it from the file system 1085 func (d *DockerDriver) createImage(driverConfig *DockerDriverConfig, client *docker.Client, taskDir *allocdir.TaskDir) (string, error) { 1086 image := driverConfig.ImageName 1087 repo, tag := docker.ParseRepositoryTag(image) 1088 if tag == "" { 1089 tag = "latest" 1090 } 1091 1092 coordinator, callerID := d.getDockerCoordinator(client) 1093 1094 // We're going to check whether the image is already downloaded. If the tag 1095 // is "latest", or ForcePull is set, we have to check for a new version every time so we don't 1096 // bother to check and cache the id here. We'll download first, then cache. 1097 if driverConfig.ForcePull { 1098 d.logger.Printf("[DEBUG] driver.docker: force pull image '%s:%s' instead of inspecting local", repo, tag) 1099 } else if tag != "latest" { 1100 if dockerImage, _ := client.InspectImage(image); dockerImage != nil { 1101 // Image exists so just increment its reference count 1102 coordinator.IncrementImageReference(dockerImage.ID, image, callerID) 1103 return dockerImage.ID, nil 1104 } 1105 } 1106 1107 // Load the image if specified 1108 if driverConfig.LoadImage != "" { 1109 return d.loadImage(driverConfig, client, taskDir) 1110 } 1111 1112 // Download the image 1113 return d.pullImage(driverConfig, client, repo, tag) 1114 } 1115 1116 // pullImage creates an image by pulling it from a docker registry 1117 func (d *DockerDriver) pullImage(driverConfig *DockerDriverConfig, client *docker.Client, repo, tag string) (id string, err error) { 1118 authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo) 1119 if err != nil { 1120 if d.driverConfig.AuthSoftFail { 1121 d.logger.Printf("[WARN] Failed to find docker auth for repo %q: %v", repo, err) 1122 } else { 1123 return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err) 1124 } 1125 } 1126 1127 if authIsEmpty(authOptions) { 1128 d.logger.Printf("[DEBUG] driver.docker: did not find docker auth for repo %q", repo) 1129 } 1130 1131 d.emitEvent("Downloading image %s:%s", repo, tag) 1132 coordinator, callerID := d.getDockerCoordinator(client) 1133 return coordinator.PullImage(driverConfig.ImageName, authOptions, callerID) 1134 } 1135 1136 // authBackend encapsulates a function that resolves registry credentials. 1137 type authBackend func(string) (*docker.AuthConfiguration, error) 1138 1139 // resolveRegistryAuthentication attempts to retrieve auth credentials for the 1140 // repo, trying all authentication-backends possible. 1141 func (d *DockerDriver) resolveRegistryAuthentication(driverConfig *DockerDriverConfig, repo string) (*docker.AuthConfiguration, error) { 1142 return firstValidAuth(repo, []authBackend{ 1143 authFromTaskConfig(driverConfig), 1144 authFromDockerConfig(d.config.Read("docker.auth.config")), 1145 authFromHelper(d.config.Read("docker.auth.helper")), 1146 }) 1147 } 1148 1149 // loadImage creates an image by loading it from the file system 1150 func (d *DockerDriver) loadImage(driverConfig *DockerDriverConfig, client *docker.Client, 1151 taskDir *allocdir.TaskDir) (id string, err error) { 1152 1153 archive := filepath.Join(taskDir.LocalDir, driverConfig.LoadImage) 1154 d.logger.Printf("[DEBUG] driver.docker: loading image from: %v", archive) 1155 1156 f, err := os.Open(archive) 1157 if err != nil { 1158 return "", fmt.Errorf("unable to open image archive: %v", err) 1159 } 1160 1161 if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { 1162 return "", err 1163 } 1164 f.Close() 1165 1166 dockerImage, err := client.InspectImage(driverConfig.ImageName) 1167 if err != nil { 1168 return "", recoverableErrTimeouts(err) 1169 } 1170 1171 coordinator, callerID := d.getDockerCoordinator(client) 1172 coordinator.IncrementImageReference(dockerImage.ID, driverConfig.ImageName, callerID) 1173 return dockerImage.ID, nil 1174 } 1175 1176 // createContainer creates the container given the passed configuration. It 1177 // attempts to handle any transient Docker errors. 1178 func (d *DockerDriver) createContainer(config docker.CreateContainerOptions) (*docker.Container, error) { 1179 // Create a container 1180 attempted := 0 1181 CREATE: 1182 container, createErr := client.CreateContainer(config) 1183 if createErr == nil { 1184 return container, nil 1185 } 1186 1187 d.logger.Printf("[DEBUG] driver.docker: failed to create container %q from image %q (ID: %q) (attempt %d): %v", 1188 config.Name, d.driverConfig.ImageName, d.imageID, attempted+1, createErr) 1189 if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { 1190 containers, err := client.ListContainers(docker.ListContainersOptions{ 1191 All: true, 1192 }) 1193 if err != nil { 1194 d.logger.Printf("[ERR] driver.docker: failed to query list of containers matching name:%s", config.Name) 1195 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to query list of containers: %s", err)) 1196 } 1197 1198 // Delete matching containers 1199 // Adding a / infront of the container name since Docker returns the 1200 // container names with a / pre-pended to the Nomad generated container names 1201 containerName := "/" + config.Name 1202 d.logger.Printf("[DEBUG] driver.docker: searching for container name %q to purge", containerName) 1203 for _, shimContainer := range containers { 1204 d.logger.Printf("[DEBUG] driver.docker: listed container %+v", container) 1205 found := false 1206 for _, name := range shimContainer.Names { 1207 if name == containerName { 1208 found = true 1209 break 1210 } 1211 } 1212 1213 if !found { 1214 continue 1215 } 1216 1217 // Inspect the container and if the container isn't dead then return 1218 // the container 1219 container, err := client.InspectContainer(shimContainer.ID) 1220 if err != nil { 1221 err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err) 1222 1223 // This error is always recoverable as it could 1224 // be caused by races between listing 1225 // containers and this container being removed. 1226 // See #2802 1227 return nil, structs.NewRecoverableError(err, true) 1228 } 1229 if container != nil && (container.State.Running || container.State.FinishedAt.IsZero()) { 1230 return container, nil 1231 } 1232 1233 err = client.RemoveContainer(docker.RemoveContainerOptions{ 1234 ID: container.ID, 1235 Force: true, 1236 }) 1237 if err != nil { 1238 d.logger.Printf("[ERR] driver.docker: failed to purge container %s", container.ID) 1239 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err)) 1240 } else if err == nil { 1241 d.logger.Printf("[INFO] driver.docker: purged container %s", container.ID) 1242 } 1243 } 1244 1245 if attempted < 5 { 1246 attempted++ 1247 time.Sleep(1 * time.Second) 1248 goto CREATE 1249 } 1250 } else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") { 1251 // There is still a very small chance this is possible even with the 1252 // coordinator so retry. 1253 return nil, structs.NewRecoverableError(createErr, true) 1254 } 1255 1256 return nil, recoverableErrTimeouts(createErr) 1257 } 1258 1259 // startContainer starts the passed container. It attempts to handle any 1260 // transient Docker errors. 1261 func (d *DockerDriver) startContainer(c *docker.Container) error { 1262 // Start a container 1263 attempted := 0 1264 START: 1265 startErr := client.StartContainer(c.ID, c.HostConfig) 1266 if startErr == nil { 1267 return nil 1268 } 1269 1270 d.logger.Printf("[DEBUG] driver.docker: failed to start container %q (attempt %d): %v", c.ID, attempted+1, startErr) 1271 1272 // If it is a 500 error it is likely we can retry and be successful 1273 if strings.Contains(startErr.Error(), "API error (500)") { 1274 if attempted < 5 { 1275 attempted++ 1276 time.Sleep(1 * time.Second) 1277 goto START 1278 } 1279 } 1280 1281 return recoverableErrTimeouts(startErr) 1282 } 1283 1284 func (d *DockerDriver) Open(ctx *ExecContext, handleID string) (DriverHandle, error) { 1285 // Split the handle 1286 pidBytes := []byte(strings.TrimPrefix(handleID, "DOCKER:")) 1287 pid := &dockerPID{} 1288 if err := json.Unmarshal(pidBytes, pid); err != nil { 1289 return nil, fmt.Errorf("Failed to parse handle '%s': %v", handleID, err) 1290 } 1291 d.logger.Printf("[INFO] driver.docker: re-attaching to docker process: %s", pid.ContainerID) 1292 d.logger.Printf("[DEBUG] driver.docker: re-attached to handle: %s", handleID) 1293 pluginConfig := &plugin.ClientConfig{ 1294 Reattach: pid.PluginConfig.PluginConfig(), 1295 } 1296 1297 client, waitClient, err := d.dockerClients() 1298 if err != nil { 1299 return nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 1300 } 1301 1302 // Look for a running container with this ID 1303 containers, err := client.ListContainers(docker.ListContainersOptions{ 1304 Filters: map[string][]string{ 1305 "id": []string{pid.ContainerID}, 1306 }, 1307 }) 1308 if err != nil { 1309 return nil, fmt.Errorf("Failed to query for container %s: %v", pid.ContainerID, err) 1310 } 1311 1312 found := false 1313 for _, container := range containers { 1314 if container.ID == pid.ContainerID { 1315 found = true 1316 } 1317 } 1318 if !found { 1319 return nil, fmt.Errorf("Failed to find container %s", pid.ContainerID) 1320 } 1321 exec, pluginClient, err := createExecutorWithConfig(pluginConfig, d.config.LogOutput) 1322 if err != nil { 1323 d.logger.Printf("[INFO] driver.docker: couldn't re-attach to the plugin process: %v", err) 1324 d.logger.Printf("[DEBUG] driver.docker: stopping container %q", pid.ContainerID) 1325 if e := client.StopContainer(pid.ContainerID, uint(pid.KillTimeout.Seconds())); e != nil { 1326 d.logger.Printf("[DEBUG] driver.docker: couldn't stop container: %v", e) 1327 } 1328 return nil, err 1329 } 1330 1331 ver, _ := exec.Version() 1332 d.logger.Printf("[DEBUG] driver.docker: version of executor: %v", ver.Version) 1333 1334 // Increment the reference count since we successfully attached to this 1335 // container 1336 coordinator, callerID := d.getDockerCoordinator(client) 1337 coordinator.IncrementImageReference(pid.ImageID, pid.Image, callerID) 1338 1339 // Return a driver handle 1340 h := &DockerHandle{ 1341 client: client, 1342 waitClient: waitClient, 1343 executor: exec, 1344 pluginClient: pluginClient, 1345 logger: d.logger, 1346 Image: pid.Image, 1347 ImageID: pid.ImageID, 1348 containerID: pid.ContainerID, 1349 version: pid.Version, 1350 killTimeout: pid.KillTimeout, 1351 maxKillTimeout: pid.MaxKillTimeout, 1352 doneCh: make(chan bool), 1353 waitCh: make(chan *dstructs.WaitResult, 1), 1354 } 1355 go h.collectStats() 1356 go h.run() 1357 return h, nil 1358 } 1359 1360 func (h *DockerHandle) ID() string { 1361 // Return a handle to the PID 1362 pid := dockerPID{ 1363 Version: h.version, 1364 ContainerID: h.containerID, 1365 Image: h.Image, 1366 ImageID: h.ImageID, 1367 KillTimeout: h.killTimeout, 1368 MaxKillTimeout: h.maxKillTimeout, 1369 PluginConfig: NewPluginReattachConfig(h.pluginClient.ReattachConfig()), 1370 } 1371 data, err := json.Marshal(pid) 1372 if err != nil { 1373 h.logger.Printf("[ERR] driver.docker: failed to marshal docker PID to JSON: %s", err) 1374 } 1375 return fmt.Sprintf("DOCKER:%s", string(data)) 1376 } 1377 1378 func (h *DockerHandle) ContainerID() string { 1379 return h.containerID 1380 } 1381 1382 func (h *DockerHandle) WaitCh() chan *dstructs.WaitResult { 1383 return h.waitCh 1384 } 1385 1386 func (h *DockerHandle) Update(task *structs.Task) error { 1387 // Store the updated kill timeout. 1388 h.killTimeout = GetKillTimeout(task.KillTimeout, h.maxKillTimeout) 1389 if err := h.executor.UpdateTask(task); err != nil { 1390 h.logger.Printf("[DEBUG] driver.docker: failed to update log config: %v", err) 1391 } 1392 1393 // Update is not possible 1394 return nil 1395 } 1396 1397 func (h *DockerHandle) Exec(ctx context.Context, cmd string, args []string) ([]byte, int, error) { 1398 fullCmd := make([]string, len(args)+1) 1399 fullCmd[0] = cmd 1400 copy(fullCmd[1:], args) 1401 createExecOpts := docker.CreateExecOptions{ 1402 AttachStdin: false, 1403 AttachStdout: true, 1404 AttachStderr: true, 1405 Tty: false, 1406 Cmd: fullCmd, 1407 Container: h.containerID, 1408 Context: ctx, 1409 } 1410 exec, err := h.client.CreateExec(createExecOpts) 1411 if err != nil { 1412 return nil, 0, err 1413 } 1414 1415 output, _ := circbuf.NewBuffer(int64(dstructs.CheckBufSize)) 1416 startOpts := docker.StartExecOptions{ 1417 Detach: false, 1418 Tty: false, 1419 OutputStream: output, 1420 ErrorStream: output, 1421 Context: ctx, 1422 } 1423 if err := client.StartExec(exec.ID, startOpts); err != nil { 1424 return nil, 0, err 1425 } 1426 res, err := client.InspectExec(exec.ID) 1427 if err != nil { 1428 return output.Bytes(), 0, err 1429 } 1430 return output.Bytes(), res.ExitCode, nil 1431 } 1432 1433 func (h *DockerHandle) Signal(s os.Signal) error { 1434 // Convert types 1435 sysSig, ok := s.(syscall.Signal) 1436 if !ok { 1437 return fmt.Errorf("Failed to determine signal number") 1438 } 1439 1440 dockerSignal := docker.Signal(sysSig) 1441 opts := docker.KillContainerOptions{ 1442 ID: h.containerID, 1443 Signal: dockerSignal, 1444 } 1445 return h.client.KillContainer(opts) 1446 1447 } 1448 1449 // Kill is used to terminate the task. This uses `docker stop -t killTimeout` 1450 func (h *DockerHandle) Kill() error { 1451 // Stop the container 1452 err := h.client.StopContainer(h.containerID, uint(h.killTimeout.Seconds())) 1453 if err != nil { 1454 h.executor.Exit() 1455 h.pluginClient.Kill() 1456 1457 // Container has already been removed. 1458 if strings.Contains(err.Error(), NoSuchContainerError) { 1459 h.logger.Printf("[DEBUG] driver.docker: attempted to stop non-existent container %s", h.containerID) 1460 return nil 1461 } 1462 h.logger.Printf("[ERR] driver.docker: failed to stop container %s: %v", h.containerID, err) 1463 return fmt.Errorf("Failed to stop container %s: %s", h.containerID, err) 1464 } 1465 h.logger.Printf("[INFO] driver.docker: stopped container %s", h.containerID) 1466 return nil 1467 } 1468 1469 func (h *DockerHandle) Stats() (*cstructs.TaskResourceUsage, error) { 1470 h.resourceUsageLock.RLock() 1471 defer h.resourceUsageLock.RUnlock() 1472 var err error 1473 if h.resourceUsage == nil { 1474 err = fmt.Errorf("stats collection hasn't started yet") 1475 } 1476 return h.resourceUsage, err 1477 } 1478 1479 func (h *DockerHandle) run() { 1480 // Wait for it... 1481 exitCode, werr := h.waitClient.WaitContainer(h.containerID) 1482 if werr != nil { 1483 h.logger.Printf("[ERR] driver.docker: failed to wait for %s; container already terminated", h.containerID) 1484 } 1485 1486 if exitCode != 0 { 1487 werr = fmt.Errorf("Docker container exited with non-zero exit code: %d", exitCode) 1488 } 1489 1490 close(h.doneCh) 1491 1492 // Shutdown the syslog collector 1493 if err := h.executor.Exit(); err != nil { 1494 h.logger.Printf("[ERR] driver.docker: failed to kill the syslog collector: %v", err) 1495 } 1496 h.pluginClient.Kill() 1497 1498 // Stop the container just incase the docker daemon's wait returned 1499 // incorrectly 1500 if err := h.client.StopContainer(h.containerID, 0); err != nil { 1501 _, noSuchContainer := err.(*docker.NoSuchContainer) 1502 _, containerNotRunning := err.(*docker.ContainerNotRunning) 1503 if !containerNotRunning && !noSuchContainer { 1504 h.logger.Printf("[ERR] driver.docker: error stopping container: %v", err) 1505 } 1506 } 1507 1508 // Remove the container 1509 if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil { 1510 h.logger.Printf("[ERR] driver.docker: error removing container: %v", err) 1511 } 1512 1513 // Send the results 1514 h.waitCh <- dstructs.NewWaitResult(exitCode, 0, werr) 1515 close(h.waitCh) 1516 } 1517 1518 // collectStats starts collecting resource usage stats of a docker container 1519 func (h *DockerHandle) collectStats() { 1520 statsCh := make(chan *docker.Stats) 1521 statsOpts := docker.StatsOptions{ID: h.containerID, Done: h.doneCh, Stats: statsCh, Stream: true} 1522 go func() { 1523 //TODO handle Stats error 1524 if err := h.waitClient.Stats(statsOpts); err != nil { 1525 h.logger.Printf("[DEBUG] driver.docker: error collecting stats from container %s: %v", h.containerID, err) 1526 } 1527 }() 1528 numCores := runtime.NumCPU() 1529 for { 1530 select { 1531 case s := <-statsCh: 1532 if s != nil { 1533 ms := &cstructs.MemoryStats{ 1534 RSS: s.MemoryStats.Stats.Rss, 1535 Cache: s.MemoryStats.Stats.Cache, 1536 Swap: s.MemoryStats.Stats.Swap, 1537 MaxUsage: s.MemoryStats.MaxUsage, 1538 Measured: DockerMeasuredMemStats, 1539 } 1540 1541 cs := &cstructs.CpuStats{ 1542 ThrottledPeriods: s.CPUStats.ThrottlingData.ThrottledPeriods, 1543 ThrottledTime: s.CPUStats.ThrottlingData.ThrottledTime, 1544 Measured: DockerMeasuredCpuStats, 1545 } 1546 1547 // Calculate percentage 1548 cores := len(s.CPUStats.CPUUsage.PercpuUsage) 1549 cs.Percent = calculatePercent( 1550 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, 1551 s.CPUStats.SystemCPUUsage, s.PreCPUStats.SystemCPUUsage, cores) 1552 cs.SystemMode = calculatePercent( 1553 s.CPUStats.CPUUsage.UsageInKernelmode, s.PreCPUStats.CPUUsage.UsageInKernelmode, 1554 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores) 1555 cs.UserMode = calculatePercent( 1556 s.CPUStats.CPUUsage.UsageInUsermode, s.PreCPUStats.CPUUsage.UsageInUsermode, 1557 s.CPUStats.CPUUsage.TotalUsage, s.PreCPUStats.CPUUsage.TotalUsage, cores) 1558 cs.TotalTicks = (cs.Percent / 100) * shelpers.TotalTicksAvailable() / float64(numCores) 1559 1560 h.resourceUsageLock.Lock() 1561 h.resourceUsage = &cstructs.TaskResourceUsage{ 1562 ResourceUsage: &cstructs.ResourceUsage{ 1563 MemoryStats: ms, 1564 CpuStats: cs, 1565 }, 1566 Timestamp: s.Read.UTC().UnixNano(), 1567 } 1568 h.resourceUsageLock.Unlock() 1569 } 1570 case <-h.doneCh: 1571 return 1572 } 1573 } 1574 } 1575 1576 func calculatePercent(newSample, oldSample, newTotal, oldTotal uint64, cores int) float64 { 1577 numerator := newSample - oldSample 1578 denom := newTotal - oldTotal 1579 if numerator <= 0 || denom <= 0 { 1580 return 0.0 1581 } 1582 1583 return (float64(numerator) / float64(denom)) * float64(cores) * 100.0 1584 } 1585 1586 // loadDockerConfig loads the docker config at the specified path, returning an 1587 // error if it couldn't be read. 1588 func loadDockerConfig(file string) (*configfile.ConfigFile, error) { 1589 f, err := os.Open(file) 1590 if err != nil { 1591 return nil, fmt.Errorf("Failed to open auth config file: %v, error: %v", file, err) 1592 } 1593 defer f.Close() 1594 1595 cfile := new(configfile.ConfigFile) 1596 if err = cfile.LoadFromReader(f); err != nil { 1597 return nil, fmt.Errorf("Failed to parse auth config file: %v", err) 1598 } 1599 return cfile, nil 1600 } 1601 1602 // parseRepositoryInfo takes a repo and returns the Docker RepositoryInfo. This 1603 // is useful for interacting with a Docker config object. 1604 func parseRepositoryInfo(repo string) (*registry.RepositoryInfo, error) { 1605 name, err := reference.ParseNamed(repo) 1606 if err != nil { 1607 return nil, fmt.Errorf("Failed to parse named repo %q: %v", repo, err) 1608 } 1609 1610 repoInfo, err := registry.ParseRepositoryInfo(name) 1611 if err != nil { 1612 return nil, fmt.Errorf("Failed to parse repository: %v", err) 1613 } 1614 1615 return repoInfo, nil 1616 } 1617 1618 // firstValidAuth tries a list of auth backends, returning first error or AuthConfiguration 1619 func firstValidAuth(repo string, backends []authBackend) (*docker.AuthConfiguration, error) { 1620 for _, backend := range backends { 1621 auth, err := backend(repo) 1622 if auth != nil || err != nil { 1623 return auth, err 1624 } 1625 } 1626 return nil, nil 1627 } 1628 1629 // authFromTaskConfig generates an authBackend for any auth given in the task-configuration 1630 func authFromTaskConfig(driverConfig *DockerDriverConfig) authBackend { 1631 return func(string) (*docker.AuthConfiguration, error) { 1632 if len(driverConfig.Auth) == 0 { 1633 return nil, nil 1634 } 1635 auth := driverConfig.Auth[0] 1636 return &docker.AuthConfiguration{ 1637 Username: auth.Username, 1638 Password: auth.Password, 1639 Email: auth.Email, 1640 ServerAddress: auth.ServerAddress, 1641 }, nil 1642 } 1643 } 1644 1645 // authFromDockerConfig generate an authBackend for a dockercfg-compatible file. 1646 // The authBacken can either be from explicit auth definitions or via credential 1647 // helpers 1648 func authFromDockerConfig(file string) authBackend { 1649 return func(repo string) (*docker.AuthConfiguration, error) { 1650 if file == "" { 1651 return nil, nil 1652 } 1653 repoInfo, err := parseRepositoryInfo(repo) 1654 if err != nil { 1655 return nil, err 1656 } 1657 1658 cfile, err := loadDockerConfig(file) 1659 if err != nil { 1660 return nil, err 1661 } 1662 1663 return firstValidAuth(repo, []authBackend{ 1664 func(string) (*docker.AuthConfiguration, error) { 1665 dockerAuthConfig := registry.ResolveAuthConfig(cfile.AuthConfigs, repoInfo.Index) 1666 auth := &docker.AuthConfiguration{ 1667 Username: dockerAuthConfig.Username, 1668 Password: dockerAuthConfig.Password, 1669 Email: dockerAuthConfig.Email, 1670 ServerAddress: dockerAuthConfig.ServerAddress, 1671 } 1672 if authIsEmpty(auth) { 1673 return nil, nil 1674 } 1675 return auth, nil 1676 }, 1677 authFromHelper(cfile.CredentialHelpers[registry.GetAuthConfigKey(repoInfo.Index)]), 1678 authFromHelper(cfile.CredentialsStore), 1679 }) 1680 } 1681 } 1682 1683 // authFromHelper generates an authBackend for a docker-credentials-helper; 1684 // A script taking the requested domain on input, outputting JSON with 1685 // "Username" and "Secret" 1686 func authFromHelper(helperName string) authBackend { 1687 return func(repo string) (*docker.AuthConfiguration, error) { 1688 if helperName == "" { 1689 return nil, nil 1690 } 1691 helper := dockerAuthHelperPrefix + helperName 1692 cmd := exec.Command(helper, "get") 1693 cmd.Stdin = strings.NewReader(repo) 1694 1695 output, err := cmd.Output() 1696 if err != nil { 1697 switch e := err.(type) { 1698 default: 1699 return nil, err 1700 case *exec.ExitError: 1701 return nil, fmt.Errorf("%s failed with stderr: %s", helper, string(e.Stderr)) 1702 } 1703 } 1704 1705 var response map[string]string 1706 if err := json.Unmarshal(output, &response); err != nil { 1707 return nil, err 1708 } 1709 1710 auth := &docker.AuthConfiguration{ 1711 Username: response["Username"], 1712 Password: response["Secret"], 1713 } 1714 1715 if authIsEmpty(auth) { 1716 return nil, nil 1717 } 1718 return auth, nil 1719 } 1720 } 1721 1722 // authIsEmpty returns if auth is nil or an empty structure 1723 func authIsEmpty(auth *docker.AuthConfiguration) bool { 1724 if auth == nil { 1725 return false 1726 } 1727 return auth.Username == "" && 1728 auth.Password == "" && 1729 auth.Email == "" && 1730 auth.ServerAddress == "" 1731 }