github.com/bigcommerce/nomad@v0.9.3-bc/drivers/docker/driver.go (about) 1 package docker 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "os" 8 "path/filepath" 9 "runtime" 10 "strconv" 11 "strings" 12 "sync" 13 "time" 14 15 docker "github.com/fsouza/go-dockerclient" 16 "github.com/hashicorp/consul-template/signals" 17 hclog "github.com/hashicorp/go-hclog" 18 multierror "github.com/hashicorp/go-multierror" 19 plugin "github.com/hashicorp/go-plugin" 20 "github.com/hashicorp/nomad/client/taskenv" 21 "github.com/hashicorp/nomad/drivers/docker/docklog" 22 "github.com/hashicorp/nomad/drivers/shared/eventer" 23 nstructs "github.com/hashicorp/nomad/nomad/structs" 24 "github.com/hashicorp/nomad/plugins/base" 25 "github.com/hashicorp/nomad/plugins/drivers" 26 "github.com/hashicorp/nomad/plugins/shared/structs" 27 pstructs "github.com/hashicorp/nomad/plugins/shared/structs" 28 ) 29 30 var ( 31 // createClientsLock is a lock that protects reading/writing global client 32 // variables 33 createClientsLock sync.Mutex 34 35 // client is a docker client with a timeout of 5 minutes. This is for doing 36 // all operations with the docker daemon besides which are not long running 37 // such as creating, killing containers, etc. 38 client *docker.Client 39 40 // waitClient is a docker client with no timeouts. This is used for long 41 // running operations such as waiting on containers and collect stats 42 waitClient *docker.Client 43 44 // recoverableErrTimeouts returns a recoverable error if the error was due 45 // to timeouts 46 recoverableErrTimeouts = func(err error) error { 47 r := false 48 if strings.Contains(err.Error(), "Client.Timeout exceeded while awaiting headers") || 49 strings.Contains(err.Error(), "EOF") { 50 r = true 51 } 52 return nstructs.NewRecoverableError(err, r) 53 } 54 55 // taskHandleVersion is the version of task handle which this driver sets 56 // and understands how to decode driver state 57 taskHandleVersion = 1 58 59 // Nvidia-container-runtime environment variable names 60 nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES" 61 ) 62 63 type Driver struct { 64 // eventer is used to handle multiplexing of TaskEvents calls such that an 65 // event can be broadcast to all callers 66 eventer *eventer.Eventer 67 68 // config contains the runtime configuration for the driver set by the 69 // SetConfig RPC 70 config *DriverConfig 71 72 // clientConfig contains a driver specific subset of the Nomad client 73 // configuration 74 clientConfig *base.ClientDriverConfig 75 76 // ctx is the context for the driver. It is passed to other subsystems to 77 // coordinate shutdown 78 ctx context.Context 79 80 // signalShutdown is called when the driver is shutting down and cancels the 81 // ctx passed to any subsystems 82 signalShutdown context.CancelFunc 83 84 // tasks is the in memory datastore mapping taskIDs to taskHandles 85 tasks *taskStore 86 87 // coordinator is what tracks multiple image pulls against the same docker image 88 coordinator *dockerCoordinator 89 90 // logger will log to the Nomad agent 91 logger hclog.Logger 92 93 // gpuRuntime indicates nvidia-docker runtime availability 94 gpuRuntime bool 95 96 // A tri-state boolean to know if the fingerprinting has happened and 97 // whether it has been successful 98 fingerprintSuccess *bool 99 fingerprintLock sync.RWMutex 100 101 // A boolean to know if the docker driver has ever been correctly detected 102 // for use during fingerprinting. 103 detected bool 104 detectedLock sync.RWMutex 105 } 106 107 // NewDockerDriver returns a docker implementation of a driver plugin 108 func NewDockerDriver(logger hclog.Logger) drivers.DriverPlugin { 109 ctx, cancel := context.WithCancel(context.Background()) 110 logger = logger.Named(pluginName) 111 return &Driver{ 112 eventer: eventer.NewEventer(ctx, logger), 113 config: &DriverConfig{}, 114 tasks: newTaskStore(), 115 ctx: ctx, 116 signalShutdown: cancel, 117 logger: logger, 118 } 119 } 120 121 func (d *Driver) reattachToDockerLogger(reattachConfig *structs.ReattachConfig) (docklog.DockerLogger, *plugin.Client, error) { 122 reattach, err := pstructs.ReattachConfigToGoPlugin(reattachConfig) 123 if err != nil { 124 return nil, nil, err 125 } 126 127 dlogger, dloggerPluginClient, err := docklog.ReattachDockerLogger(reattach) 128 if err != nil { 129 return nil, nil, fmt.Errorf("failed to reattach to docker logger process: %v", err) 130 } 131 132 return dlogger, dloggerPluginClient, nil 133 } 134 135 func (d *Driver) setupNewDockerLogger(container *docker.Container, cfg *drivers.TaskConfig, startTime time.Time) (docklog.DockerLogger, *plugin.Client, error) { 136 dlogger, pluginClient, err := docklog.LaunchDockerLogger(d.logger) 137 if err != nil { 138 if pluginClient != nil { 139 pluginClient.Kill() 140 } 141 return nil, nil, fmt.Errorf("failed to launch docker logger plugin: %v", err) 142 } 143 144 if err := dlogger.Start(&docklog.StartOpts{ 145 Endpoint: d.config.Endpoint, 146 ContainerID: container.ID, 147 TTY: container.Config.Tty, 148 Stdout: cfg.StdoutPath, 149 Stderr: cfg.StderrPath, 150 TLSCert: d.config.TLS.Cert, 151 TLSKey: d.config.TLS.Key, 152 TLSCA: d.config.TLS.CA, 153 StartTime: startTime.Unix(), 154 }); err != nil { 155 pluginClient.Kill() 156 return nil, nil, fmt.Errorf("failed to launch docker logger process %s: %v", container.ID, err) 157 } 158 159 return dlogger, pluginClient, nil 160 } 161 162 func (d *Driver) RecoverTask(handle *drivers.TaskHandle) error { 163 if _, ok := d.tasks.Get(handle.Config.ID); ok { 164 return nil 165 } 166 167 // COMPAT(0.10): pre 0.9 upgrade path check 168 if handle.Version == 0 { 169 return d.recoverPre09Task(handle) 170 } 171 172 var handleState taskHandleState 173 if err := handle.GetDriverState(&handleState); err != nil { 174 return fmt.Errorf("failed to decode driver task state: %v", err) 175 } 176 177 client, _, err := d.dockerClients() 178 if err != nil { 179 return fmt.Errorf("failed to get docker client: %v", err) 180 } 181 182 container, err := client.InspectContainer(handleState.ContainerID) 183 if err != nil { 184 return fmt.Errorf("failed to inspect container for id %q: %v", handleState.ContainerID, err) 185 } 186 187 h := &taskHandle{ 188 client: client, 189 waitClient: waitClient, 190 logger: d.logger.With("container_id", container.ID), 191 task: handle.Config, 192 containerID: container.ID, 193 containerImage: container.Image, 194 doneCh: make(chan bool), 195 waitCh: make(chan struct{}), 196 removeContainerOnExit: d.config.GC.Container, 197 net: handleState.DriverNetwork, 198 } 199 200 h.dlogger, h.dloggerPluginClient, err = d.reattachToDockerLogger(handleState.ReattachConfig) 201 if err != nil { 202 d.logger.Warn("failed to reattach to docker logger process", "error", err) 203 204 h.dlogger, h.dloggerPluginClient, err = d.setupNewDockerLogger(container, handle.Config, time.Now()) 205 if err != nil { 206 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 207 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 208 } 209 return fmt.Errorf("failed to setup replacement docker logger: %v", err) 210 } 211 212 if err := handle.SetDriverState(h.buildState()); err != nil { 213 if err := client.StopContainer(handleState.ContainerID, 0); err != nil { 214 d.logger.Warn("failed to stop container during cleanup", "container_id", handleState.ContainerID, "error", err) 215 } 216 return fmt.Errorf("failed to store driver state: %v", err) 217 } 218 } 219 220 d.tasks.Set(handle.Config.ID, h) 221 go h.run() 222 223 return nil 224 } 225 226 func (d *Driver) StartTask(cfg *drivers.TaskConfig) (*drivers.TaskHandle, *drivers.DriverNetwork, error) { 227 if _, ok := d.tasks.Get(cfg.ID); ok { 228 return nil, nil, fmt.Errorf("task with ID %q already started", cfg.ID) 229 } 230 231 var driverConfig TaskConfig 232 233 if err := cfg.DecodeDriverConfig(&driverConfig); err != nil { 234 return nil, nil, fmt.Errorf("failed to decode driver config: %v", err) 235 } 236 237 if driverConfig.Image == "" { 238 return nil, nil, fmt.Errorf("image name required for docker driver") 239 } 240 241 // Remove any http 242 if strings.HasPrefix(driverConfig.Image, "https://") { 243 driverConfig.Image = strings.Replace(driverConfig.Image, "https://", "", 1) 244 } 245 246 handle := drivers.NewTaskHandle(taskHandleVersion) 247 handle.Config = cfg 248 249 // Initialize docker API clients 250 client, _, err := d.dockerClients() 251 if err != nil { 252 return nil, nil, fmt.Errorf("Failed to connect to docker daemon: %s", err) 253 } 254 255 id, err := d.createImage(cfg, &driverConfig, client) 256 if err != nil { 257 return nil, nil, err 258 } 259 260 containerCfg, err := d.createContainerConfig(cfg, &driverConfig, driverConfig.Image) 261 if err != nil { 262 d.logger.Error("failed to create container configuration", "image_name", driverConfig.Image, 263 "image_id", id, "error", err) 264 return nil, nil, fmt.Errorf("Failed to create container configuration for image %q (%q): %v", driverConfig.Image, id, err) 265 } 266 267 startAttempts := 0 268 CREATE: 269 container, err := d.createContainer(client, containerCfg, &driverConfig) 270 if err != nil { 271 d.logger.Error("failed to create container", "error", err) 272 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("failed to create container: %v", err), err) 273 } 274 275 d.logger.Info("created container", "container_id", container.ID) 276 277 // We don't need to start the container if the container is already running 278 // since we don't create containers which are already present on the host 279 // and are running 280 if !container.State.Running { 281 // Start the container 282 if err := d.startContainer(container); err != nil { 283 d.logger.Error("failed to start container", "container_id", container.ID, "error", err) 284 client.RemoveContainer(docker.RemoveContainerOptions{ 285 ID: container.ID, 286 Force: true, 287 }) 288 // Some sort of docker race bug, recreating the container usually works 289 if strings.Contains(err.Error(), "OCI runtime create failed: container with id exists:") && startAttempts < 5 { 290 startAttempts++ 291 d.logger.Debug("reattempting container create/start sequence", "attempt", startAttempts, "container_id", id) 292 goto CREATE 293 } 294 return nil, nil, nstructs.WrapRecoverable(fmt.Sprintf("Failed to start container %s: %s", container.ID, err), err) 295 } 296 297 // InspectContainer to get all of the container metadata as 298 // much of the metadata (eg networking) isn't populated until 299 // the container is started 300 runningContainer, err := client.InspectContainer(container.ID) 301 if err != nil { 302 msg := "failed to inspect started container" 303 d.logger.Error(msg, "error", err) 304 return nil, nil, nstructs.NewRecoverableError(fmt.Errorf("%s %s: %s", msg, container.ID, err), true) 305 } 306 container = runningContainer 307 d.logger.Info("started container", "container_id", container.ID) 308 } else { 309 d.logger.Debug("re-attaching to container", "container_id", 310 container.ID, "container_state", container.State.String()) 311 } 312 313 dlogger, pluginClient, err := d.setupNewDockerLogger(container, cfg, time.Unix(0, 0)) 314 if err != nil { 315 d.logger.Error("an error occurred after container startup, terminating container", "container_id", container.ID) 316 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 317 return nil, nil, err 318 } 319 320 // Detect container address 321 ip, autoUse := d.detectIP(container, &driverConfig) 322 323 net := &drivers.DriverNetwork{ 324 PortMap: driverConfig.PortMap, 325 IP: ip, 326 AutoAdvertise: autoUse, 327 } 328 329 // Return a driver handle 330 h := &taskHandle{ 331 client: client, 332 waitClient: waitClient, 333 dlogger: dlogger, 334 dloggerPluginClient: pluginClient, 335 logger: d.logger.With("container_id", container.ID), 336 task: cfg, 337 containerID: container.ID, 338 containerImage: container.Image, 339 doneCh: make(chan bool), 340 waitCh: make(chan struct{}), 341 removeContainerOnExit: d.config.GC.Container, 342 net: net, 343 } 344 345 if err := handle.SetDriverState(h.buildState()); err != nil { 346 d.logger.Error("error encoding container occurred after startup, terminating container", "container_id", container.ID, "error", err) 347 dlogger.Stop() 348 pluginClient.Kill() 349 client.RemoveContainer(docker.RemoveContainerOptions{ID: container.ID, Force: true}) 350 return nil, nil, err 351 } 352 353 d.tasks.Set(cfg.ID, h) 354 go h.run() 355 356 return handle, net, nil 357 } 358 359 // createContainerClient is the subset of Docker Client methods used by the 360 // createContainer method to ease testing subtle error conditions. 361 type createContainerClient interface { 362 CreateContainer(docker.CreateContainerOptions) (*docker.Container, error) 363 InspectContainer(id string) (*docker.Container, error) 364 ListContainers(docker.ListContainersOptions) ([]docker.APIContainers, error) 365 RemoveContainer(opts docker.RemoveContainerOptions) error 366 } 367 368 // createContainer creates the container given the passed configuration. It 369 // attempts to handle any transient Docker errors. 370 func (d *Driver) createContainer(client createContainerClient, config docker.CreateContainerOptions, 371 driverConfig *TaskConfig) (*docker.Container, error) { 372 // Create a container 373 attempted := 0 374 CREATE: 375 container, createErr := client.CreateContainer(config) 376 if createErr == nil { 377 return container, nil 378 } 379 380 d.logger.Debug("failed to create container", "container_name", 381 config.Name, "image_name", driverConfig.Image, "image_id", config.Config.Image, 382 "attempt", attempted+1, "error", createErr) 383 384 // Volume management tools like Portworx may not have detached a volume 385 // from a previous node before Nomad started a task replacement task. 386 // Treat these errors as recoverable so we retry. 387 if strings.Contains(strings.ToLower(createErr.Error()), "volume is attached on another node") { 388 return nil, nstructs.NewRecoverableError(createErr, true) 389 } 390 391 // If the container already exists determine whether it's already 392 // running or if it's dead and needs to be recreated. 393 if strings.Contains(strings.ToLower(createErr.Error()), "container already exists") { 394 containers, err := client.ListContainers(docker.ListContainersOptions{ 395 All: true, 396 }) 397 if err != nil { 398 d.logger.Error("failed to query list of containers matching name", "container_name", config.Name) 399 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to query list of containers: %s", err)) 400 } 401 402 // Delete matching containers 403 // Adding a / infront of the container name since Docker returns the 404 // container names with a / pre-pended to the Nomad generated container names 405 containerName := "/" + config.Name 406 d.logger.Debug("searching for container to purge", "container_name", containerName) 407 for _, shimContainer := range containers { 408 d.logger.Debug("listed container", "names", hclog.Fmt("%+v", shimContainer.Names)) 409 found := false 410 for _, name := range shimContainer.Names { 411 if name == containerName { 412 d.logger.Debug("Found container", "containter_name", containerName, "container_id", shimContainer.ID) 413 found = true 414 break 415 } 416 } 417 418 if !found { 419 continue 420 } 421 422 // Inspect the container and if the container isn't dead then return 423 // the container 424 container, err := client.InspectContainer(shimContainer.ID) 425 if err != nil { 426 err = fmt.Errorf("Failed to inspect container %s: %s", shimContainer.ID, err) 427 428 // This error is always recoverable as it could 429 // be caused by races between listing 430 // containers and this container being removed. 431 // See #2802 432 return nil, nstructs.NewRecoverableError(err, true) 433 } 434 if container != nil && container.State.Running { 435 return container, nil 436 } 437 438 err = client.RemoveContainer(docker.RemoveContainerOptions{ 439 ID: container.ID, 440 Force: true, 441 }) 442 if err != nil { 443 d.logger.Error("failed to purge container", "container_id", container.ID) 444 return nil, recoverableErrTimeouts(fmt.Errorf("Failed to purge container %s: %s", container.ID, err)) 445 } else if err == nil { 446 d.logger.Info("purged container", "container_id", container.ID) 447 } 448 } 449 450 if attempted < 5 { 451 attempted++ 452 time.Sleep(1 * time.Second) 453 goto CREATE 454 } 455 } else if strings.Contains(strings.ToLower(createErr.Error()), "no such image") { 456 // There is still a very small chance this is possible even with the 457 // coordinator so retry. 458 return nil, nstructs.NewRecoverableError(createErr, true) 459 } 460 461 return nil, recoverableErrTimeouts(createErr) 462 } 463 464 // startContainer starts the passed container. It attempts to handle any 465 // transient Docker errors. 466 func (d *Driver) startContainer(c *docker.Container) error { 467 // Start a container 468 attempted := 0 469 START: 470 startErr := client.StartContainer(c.ID, c.HostConfig) 471 if startErr == nil { 472 return nil 473 } 474 475 d.logger.Debug("failed to start container", "container_id", c.ID, "attempt", attempted+1, "error", startErr) 476 477 // If it is a 500 error it is likely we can retry and be successful 478 if strings.Contains(startErr.Error(), "API error (500)") { 479 if attempted < 5 { 480 attempted++ 481 time.Sleep(1 * time.Second) 482 goto START 483 } 484 return nstructs.NewRecoverableError(startErr, true) 485 } 486 487 return recoverableErrTimeouts(startErr) 488 } 489 490 // createImage creates a docker image either by pulling it from a registry or by 491 // loading it from the file system 492 func (d *Driver) createImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (string, error) { 493 image := driverConfig.Image 494 repo, tag := parseDockerImage(image) 495 496 callerID := fmt.Sprintf("%s-%s", task.ID, task.Name) 497 498 // We're going to check whether the image is already downloaded. If the tag 499 // is "latest", or ForcePull is set, we have to check for a new version every time so we don't 500 // bother to check and cache the id here. We'll download first, then cache. 501 if driverConfig.ForcePull { 502 d.logger.Debug("force pulling image instead of inspecting local", "image_ref", dockerImageRef(repo, tag)) 503 } else if tag != "latest" { 504 if dockerImage, _ := client.InspectImage(image); dockerImage != nil { 505 // Image exists so just increment its reference count 506 d.coordinator.IncrementImageReference(dockerImage.ID, image, callerID) 507 return dockerImage.ID, nil 508 } 509 } 510 511 // Load the image if specified 512 if driverConfig.LoadImage != "" { 513 return d.loadImage(task, driverConfig, client) 514 } 515 516 // Download the image 517 return d.pullImage(task, driverConfig, client, repo, tag) 518 } 519 520 // pullImage creates an image by pulling it from a docker registry 521 func (d *Driver) pullImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client, repo, tag string) (id string, err error) { 522 authOptions, err := d.resolveRegistryAuthentication(driverConfig, repo) 523 if err != nil { 524 if driverConfig.AuthSoftFail { 525 d.logger.Warn("Failed to find docker repo auth", "repo", repo, "error", err) 526 } else { 527 return "", fmt.Errorf("Failed to find docker auth for repo %q: %v", repo, err) 528 } 529 } 530 531 if authIsEmpty(authOptions) { 532 d.logger.Debug("did not find docker auth for repo", "repo", repo) 533 } 534 535 d.eventer.EmitEvent(&drivers.TaskEvent{ 536 TaskID: task.ID, 537 AllocID: task.AllocID, 538 TaskName: task.Name, 539 Timestamp: time.Now(), 540 Message: "Downloading image", 541 Annotations: map[string]string{ 542 "image": dockerImageRef(repo, tag), 543 }, 544 }) 545 546 return d.coordinator.PullImage(driverConfig.Image, authOptions, task.ID, d.emitEventFunc(task)) 547 } 548 549 func (d *Driver) emitEventFunc(task *drivers.TaskConfig) LogEventFn { 550 return func(msg string, annotations map[string]string) { 551 d.eventer.EmitEvent(&drivers.TaskEvent{ 552 TaskID: task.ID, 553 AllocID: task.AllocID, 554 TaskName: task.Name, 555 Timestamp: time.Now(), 556 Message: msg, 557 Annotations: annotations, 558 }) 559 } 560 } 561 562 // authBackend encapsulates a function that resolves registry credentials. 563 type authBackend func(string) (*docker.AuthConfiguration, error) 564 565 // resolveRegistryAuthentication attempts to retrieve auth credentials for the 566 // repo, trying all authentication-backends possible. 567 func (d *Driver) resolveRegistryAuthentication(driverConfig *TaskConfig, repo string) (*docker.AuthConfiguration, error) { 568 return firstValidAuth(repo, []authBackend{ 569 authFromTaskConfig(driverConfig), 570 authFromDockerConfig(d.config.Auth.Config), 571 authFromHelper(d.config.Auth.Helper), 572 }) 573 } 574 575 // loadImage creates an image by loading it from the file system 576 func (d *Driver) loadImage(task *drivers.TaskConfig, driverConfig *TaskConfig, client *docker.Client) (id string, err error) { 577 578 archive := filepath.Join(task.TaskDir().LocalDir, driverConfig.LoadImage) 579 d.logger.Debug("loading image from disk", "archive", archive) 580 581 f, err := os.Open(archive) 582 if err != nil { 583 return "", fmt.Errorf("unable to open image archive: %v", err) 584 } 585 586 if err := client.LoadImage(docker.LoadImageOptions{InputStream: f}); err != nil { 587 return "", err 588 } 589 f.Close() 590 591 dockerImage, err := client.InspectImage(driverConfig.Image) 592 if err != nil { 593 return "", recoverableErrTimeouts(err) 594 } 595 596 d.coordinator.IncrementImageReference(dockerImage.ID, driverConfig.Image, task.ID) 597 return dockerImage.ID, nil 598 } 599 600 func (d *Driver) containerBinds(task *drivers.TaskConfig, driverConfig *TaskConfig) ([]string, error) { 601 602 allocDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SharedAllocDir, task.Env[taskenv.AllocDir]) 603 taskLocalBind := fmt.Sprintf("%s:%s", task.TaskDir().LocalDir, task.Env[taskenv.TaskLocalDir]) 604 secretDirBind := fmt.Sprintf("%s:%s", task.TaskDir().SecretsDir, task.Env[taskenv.SecretsDir]) 605 binds := []string{allocDirBind, taskLocalBind, secretDirBind} 606 607 taskLocalBindVolume := driverConfig.VolumeDriver == "" 608 609 if !d.config.Volumes.Enabled && !taskLocalBindVolume { 610 return nil, fmt.Errorf("volumes are not enabled; cannot use volume driver %q", driverConfig.VolumeDriver) 611 } 612 613 for _, userbind := range driverConfig.Volumes { 614 // This assumes host OS = docker container OS. 615 // Not true, when we support Linux containers on Windows 616 src, dst, mode, err := parseVolumeSpec(userbind, runtime.GOOS) 617 if err != nil { 618 return nil, fmt.Errorf("invalid docker volume %q: %v", userbind, err) 619 } 620 621 // Paths inside task dir are always allowed when using the default driver, 622 // Relative paths are always allowed as they mount within a container 623 // When a VolumeDriver is set, we assume we receive a binding in the format 624 // volume-name:container-dest 625 // Otherwise, we assume we receive a relative path binding in the format 626 // relative/to/task:/also/in/container 627 if taskLocalBindVolume { 628 src = expandPath(task.TaskDir().Dir, src) 629 } else { 630 // Resolve dotted path segments 631 src = filepath.Clean(src) 632 } 633 634 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, src) { 635 return nil, fmt.Errorf("volumes are not enabled; cannot mount host paths: %+q", userbind) 636 } 637 638 bind := src + ":" + dst 639 if mode != "" { 640 bind += ":" + mode 641 } 642 binds = append(binds, bind) 643 } 644 645 if selinuxLabel := d.config.Volumes.SelinuxLabel; selinuxLabel != "" { 646 // Apply SELinux Label to each volume 647 for i := range binds { 648 binds[i] = fmt.Sprintf("%s:%s", binds[i], selinuxLabel) 649 } 650 } 651 652 return binds, nil 653 } 654 655 func (d *Driver) createContainerConfig(task *drivers.TaskConfig, driverConfig *TaskConfig, 656 imageID string) (docker.CreateContainerOptions, error) { 657 658 logger := d.logger.With("task_name", task.Name) 659 var c docker.CreateContainerOptions 660 if task.Resources == nil { 661 // Guard against missing resources. We should never have been able to 662 // schedule a job without specifying this. 663 logger.Error("task.Resources is empty") 664 return c, fmt.Errorf("task.Resources is empty") 665 } 666 667 binds, err := d.containerBinds(task, driverConfig) 668 if err != nil { 669 return c, err 670 } 671 logger.Trace("binding volumes", "volumes", binds) 672 673 // create the config block that will later be consumed by go-dockerclient 674 config := &docker.Config{ 675 Image: imageID, 676 Entrypoint: driverConfig.Entrypoint, 677 Hostname: driverConfig.Hostname, 678 User: task.User, 679 Tty: driverConfig.TTY, 680 OpenStdin: driverConfig.Interactive, 681 } 682 683 if driverConfig.WorkDir != "" { 684 config.WorkingDir = driverConfig.WorkDir 685 } 686 687 hostConfig := &docker.HostConfig{ 688 Memory: task.Resources.LinuxResources.MemoryLimitBytes, 689 CPUShares: task.Resources.LinuxResources.CPUShares, 690 691 // Binds are used to mount a host volume into the container. We mount a 692 // local directory for storage and a shared alloc directory that can be 693 // used to share data between different tasks in the same task group. 694 Binds: binds, 695 696 StorageOpt: driverConfig.StorageOpt, 697 VolumeDriver: driverConfig.VolumeDriver, 698 699 PidsLimit: driverConfig.PidsLimit, 700 } 701 702 if _, ok := task.DeviceEnv[nvidiaVisibleDevices]; ok { 703 if !d.gpuRuntime { 704 return c, fmt.Errorf("requested docker-runtime %q was not found", d.config.GPURuntimeName) 705 } 706 hostConfig.Runtime = d.config.GPURuntimeName 707 } 708 709 // Calculate CPU Quota 710 // cfs_quota_us is the time per core, so we must 711 // multiply the time by the number of cores available 712 // See https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu 713 if driverConfig.CPUHardLimit { 714 numCores := runtime.NumCPU() 715 if driverConfig.CPUCFSPeriod < 0 || driverConfig.CPUCFSPeriod > 1000000 { 716 return c, fmt.Errorf("invalid value for cpu_cfs_period") 717 } 718 if driverConfig.CPUCFSPeriod == 0 { 719 driverConfig.CPUCFSPeriod = task.Resources.LinuxResources.CPUPeriod 720 } 721 hostConfig.CPUPeriod = driverConfig.CPUCFSPeriod 722 hostConfig.CPUQuota = int64(task.Resources.LinuxResources.PercentTicks*float64(driverConfig.CPUCFSPeriod)) * int64(numCores) 723 } 724 725 // Windows does not support MemorySwap/MemorySwappiness #2193 726 if runtime.GOOS == "windows" { 727 hostConfig.MemorySwap = 0 728 hostConfig.MemorySwappiness = -1 729 } else { 730 hostConfig.MemorySwap = task.Resources.LinuxResources.MemoryLimitBytes // MemorySwap is memory + swap. 731 } 732 733 loggingDriver := driverConfig.Logging.Type 734 if loggingDriver == "" { 735 loggingDriver = driverConfig.Logging.Driver 736 } 737 738 hostConfig.LogConfig = docker.LogConfig{ 739 Type: loggingDriver, 740 Config: driverConfig.Logging.Config, 741 } 742 743 logger.Debug("configured resources", "memory", hostConfig.Memory, 744 "cpu_shares", hostConfig.CPUShares, "cpu_quota", hostConfig.CPUQuota, 745 "cpu_period", hostConfig.CPUPeriod) 746 logger.Debug("binding directories", "binds", hclog.Fmt("%#v", hostConfig.Binds)) 747 748 // set privileged mode 749 if driverConfig.Privileged && !d.config.AllowPrivileged { 750 return c, fmt.Errorf(`Docker privileged mode is disabled on this Nomad agent`) 751 } 752 hostConfig.Privileged = driverConfig.Privileged 753 754 // set capabilities 755 hostCapsWhitelistConfig := d.config.AllowCaps 756 hostCapsWhitelist := make(map[string]struct{}) 757 for _, cap := range hostCapsWhitelistConfig { 758 cap = strings.ToLower(strings.TrimSpace(cap)) 759 hostCapsWhitelist[cap] = struct{}{} 760 } 761 762 if _, ok := hostCapsWhitelist["all"]; !ok { 763 effectiveCaps, err := tweakCapabilities( 764 strings.Split(dockerBasicCaps, ","), 765 driverConfig.CapAdd, 766 driverConfig.CapDrop, 767 ) 768 if err != nil { 769 return c, err 770 } 771 var missingCaps []string 772 for _, cap := range effectiveCaps { 773 cap = strings.ToLower(cap) 774 if _, ok := hostCapsWhitelist[cap]; !ok { 775 missingCaps = append(missingCaps, cap) 776 } 777 } 778 if len(missingCaps) > 0 { 779 return c, fmt.Errorf("Docker driver doesn't have the following caps whitelisted on this Nomad agent: %s", missingCaps) 780 } 781 } 782 783 hostConfig.CapAdd = driverConfig.CapAdd 784 hostConfig.CapDrop = driverConfig.CapDrop 785 786 // set SHM size 787 if driverConfig.ShmSize != 0 { 788 hostConfig.ShmSize = driverConfig.ShmSize 789 } 790 791 // set DNS servers 792 for _, ip := range driverConfig.DNSServers { 793 if net.ParseIP(ip) != nil { 794 hostConfig.DNS = append(hostConfig.DNS, ip) 795 } else { 796 logger.Error("invalid ip address for container dns server", "ip", ip) 797 } 798 } 799 800 // Setup devices 801 for _, device := range driverConfig.Devices { 802 dd, err := device.toDockerDevice() 803 if err != nil { 804 return c, err 805 } 806 hostConfig.Devices = append(hostConfig.Devices, dd) 807 } 808 for _, device := range task.Devices { 809 hostConfig.Devices = append(hostConfig.Devices, docker.Device{ 810 PathOnHost: device.HostPath, 811 PathInContainer: device.TaskPath, 812 CgroupPermissions: device.Permissions, 813 }) 814 } 815 816 // Setup mounts 817 for _, m := range driverConfig.Mounts { 818 hm, err := m.toDockerHostMount() 819 if err != nil { 820 return c, err 821 } 822 823 if hm.Type == "bind" { 824 hm.Source = expandPath(task.TaskDir().Dir, hm.Source) 825 826 // paths inside alloc dir are always allowed as they mount within a container, and treated as relative to task dir 827 if !d.config.Volumes.Enabled && !isParentPath(task.AllocDir, hm.Source) { 828 return c, fmt.Errorf("volumes are not enabled; cannot mount host path: %q %q", hm.Source, task.AllocDir) 829 } 830 } 831 832 hostConfig.Mounts = append(hostConfig.Mounts, hm) 833 } 834 for _, m := range task.Mounts { 835 hostConfig.Mounts = append(hostConfig.Mounts, docker.HostMount{ 836 Type: "bind", 837 Target: m.TaskPath, 838 Source: m.HostPath, 839 ReadOnly: m.Readonly, 840 }) 841 } 842 843 // set DNS search domains and extra hosts 844 hostConfig.DNSSearch = driverConfig.DNSSearchDomains 845 hostConfig.DNSOptions = driverConfig.DNSOptions 846 hostConfig.ExtraHosts = driverConfig.ExtraHosts 847 848 hostConfig.IpcMode = driverConfig.IPCMode 849 hostConfig.PidMode = driverConfig.PidMode 850 hostConfig.UTSMode = driverConfig.UTSMode 851 hostConfig.UsernsMode = driverConfig.UsernsMode 852 hostConfig.SecurityOpt = driverConfig.SecurityOpt 853 hostConfig.Sysctls = driverConfig.Sysctl 854 855 ulimits, err := sliceMergeUlimit(driverConfig.Ulimit) 856 if err != nil { 857 return c, fmt.Errorf("failed to parse ulimit configuration: %v", err) 858 } 859 hostConfig.Ulimits = ulimits 860 861 hostConfig.ReadonlyRootfs = driverConfig.ReadonlyRootfs 862 863 hostConfig.NetworkMode = driverConfig.NetworkMode 864 if hostConfig.NetworkMode == "" { 865 // docker default 866 logger.Debug("networking mode not specified; using default", "network_mode", defaultNetworkMode) 867 hostConfig.NetworkMode = defaultNetworkMode 868 } 869 870 // Setup port mapping and exposed ports 871 if len(task.Resources.NomadResources.Networks) == 0 { 872 logger.Debug("no network interfaces are available") 873 if len(driverConfig.PortMap) > 0 { 874 return c, fmt.Errorf("Trying to map ports but no network interface is available") 875 } 876 } else { 877 // TODO add support for more than one network 878 network := task.Resources.NomadResources.Networks[0] 879 publishedPorts := map[docker.Port][]docker.PortBinding{} 880 exposedPorts := map[docker.Port]struct{}{} 881 882 for _, port := range network.ReservedPorts { 883 // By default we will map the allocated port 1:1 to the container 884 containerPortInt := port.Value 885 886 // If the user has mapped a port using port_map we'll change it here 887 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 888 containerPortInt = mapped 889 } 890 891 hostPortStr := strconv.Itoa(port.Value) 892 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 893 894 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 895 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 896 logger.Debug("allocated static port", "ip", network.IP, "port", port.Value) 897 898 exposedPorts[containerPort+"/tcp"] = struct{}{} 899 exposedPorts[containerPort+"/udp"] = struct{}{} 900 logger.Debug("exposed port", "port", port.Value) 901 } 902 903 for _, port := range network.DynamicPorts { 904 // By default we will map the allocated port 1:1 to the container 905 containerPortInt := port.Value 906 907 // If the user has mapped a port using port_map we'll change it here 908 if mapped, ok := driverConfig.PortMap[port.Label]; ok { 909 containerPortInt = mapped 910 } 911 912 hostPortStr := strconv.Itoa(port.Value) 913 containerPort := docker.Port(strconv.Itoa(containerPortInt)) 914 915 publishedPorts[containerPort+"/tcp"] = getPortBinding(network.IP, hostPortStr) 916 publishedPorts[containerPort+"/udp"] = getPortBinding(network.IP, hostPortStr) 917 logger.Debug("allocated mapped port", "ip", network.IP, "port", port.Value) 918 919 exposedPorts[containerPort+"/tcp"] = struct{}{} 920 exposedPorts[containerPort+"/udp"] = struct{}{} 921 logger.Debug("exposed port", "port", containerPort) 922 } 923 924 hostConfig.PortBindings = publishedPorts 925 config.ExposedPorts = exposedPorts 926 } 927 928 // If the user specified a custom command to run, we'll inject it here. 929 if driverConfig.Command != "" { 930 // Validate command 931 if err := validateCommand(driverConfig.Command, "args"); err != nil { 932 return c, err 933 } 934 935 cmd := []string{driverConfig.Command} 936 if len(driverConfig.Args) != 0 { 937 cmd = append(cmd, driverConfig.Args...) 938 } 939 logger.Debug("setting container startup command", "command", strings.Join(cmd, " ")) 940 config.Cmd = cmd 941 } else if len(driverConfig.Args) != 0 { 942 config.Cmd = driverConfig.Args 943 } 944 945 if len(driverConfig.Labels) > 0 { 946 config.Labels = driverConfig.Labels 947 logger.Debug("applied labels on the container", "labels", config.Labels) 948 } 949 950 config.Env = task.EnvList() 951 952 containerName := fmt.Sprintf("%s-%s", strings.Replace(task.Name, "/", "_", -1), task.AllocID) 953 logger.Debug("setting container name", "container_name", containerName) 954 955 var networkingConfig *docker.NetworkingConfig 956 if len(driverConfig.NetworkAliases) > 0 || driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 957 networkingConfig = &docker.NetworkingConfig{ 958 EndpointsConfig: map[string]*docker.EndpointConfig{ 959 hostConfig.NetworkMode: {}, 960 }, 961 } 962 } 963 964 if len(driverConfig.NetworkAliases) > 0 { 965 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].Aliases = driverConfig.NetworkAliases 966 logger.Debug("setting container network aliases", "network_mode", hostConfig.NetworkMode, 967 "network_aliases", strings.Join(driverConfig.NetworkAliases, ", ")) 968 } 969 970 if driverConfig.IPv4Address != "" || driverConfig.IPv6Address != "" { 971 networkingConfig.EndpointsConfig[hostConfig.NetworkMode].IPAMConfig = &docker.EndpointIPAMConfig{ 972 IPv4Address: driverConfig.IPv4Address, 973 IPv6Address: driverConfig.IPv6Address, 974 } 975 logger.Debug("setting container network configuration", "network_mode", hostConfig.NetworkMode, 976 "ipv4_address", driverConfig.IPv4Address, "ipv6_address", driverConfig.IPv6Address) 977 } 978 979 if driverConfig.MacAddress != "" { 980 config.MacAddress = driverConfig.MacAddress 981 logger.Debug("setting container mac address", "mac_address", config.MacAddress) 982 } 983 984 return docker.CreateContainerOptions{ 985 Name: containerName, 986 Config: config, 987 HostConfig: hostConfig, 988 NetworkingConfig: networkingConfig, 989 }, nil 990 } 991 992 // detectIP of Docker container. Returns the first IP found as well as true if 993 // the IP should be advertised (bridge network IPs return false). Returns an 994 // empty string and false if no IP could be found. 995 func (d *Driver) detectIP(c *docker.Container, driverConfig *TaskConfig) (string, bool) { 996 if c.NetworkSettings == nil { 997 // This should only happen if there's been a coding error (such 998 // as not calling InspectContainer after CreateContainer). Code 999 // defensively in case the Docker API changes subtly. 1000 d.logger.Error("no network settings for container", "container_id", c.ID) 1001 return "", false 1002 } 1003 1004 ip, ipName := "", "" 1005 auto := false 1006 for name, net := range c.NetworkSettings.Networks { 1007 if net.IPAddress == "" { 1008 // Ignore networks without an IP address 1009 continue 1010 } 1011 1012 ip = net.IPAddress 1013 if driverConfig.AdvertiseIPv6Addr { 1014 ip = net.GlobalIPv6Address 1015 auto = true 1016 } 1017 ipName = name 1018 1019 // Don't auto-advertise IPs for default networks (bridge on 1020 // Linux, nat on Windows) 1021 if name != "bridge" && name != "nat" { 1022 auto = true 1023 } 1024 1025 break 1026 } 1027 1028 if n := len(c.NetworkSettings.Networks); n > 1 { 1029 d.logger.Warn("multiple Docker networks for container found but Nomad only supports 1", 1030 "total_networks", n, 1031 "container_id", c.ID, 1032 "container_network", ipName) 1033 } 1034 1035 return ip, auto 1036 } 1037 1038 // validateCommand validates that the command only has a single value and 1039 // returns a user friendly error message telling them to use the passed 1040 // argField. 1041 func validateCommand(command, argField string) error { 1042 trimmed := strings.TrimSpace(command) 1043 if len(trimmed) == 0 { 1044 return fmt.Errorf("command empty: %q", command) 1045 } 1046 1047 if len(trimmed) != len(command) { 1048 return fmt.Errorf("command contains extra white space: %q", command) 1049 } 1050 1051 return nil 1052 } 1053 1054 func (d *Driver) WaitTask(ctx context.Context, taskID string) (<-chan *drivers.ExitResult, error) { 1055 h, ok := d.tasks.Get(taskID) 1056 if !ok { 1057 return nil, drivers.ErrTaskNotFound 1058 } 1059 ch := make(chan *drivers.ExitResult) 1060 go d.handleWait(ctx, ch, h) 1061 return ch, nil 1062 } 1063 1064 func (d *Driver) handleWait(ctx context.Context, ch chan *drivers.ExitResult, h *taskHandle) { 1065 defer close(ch) 1066 select { 1067 case <-h.waitCh: 1068 ch <- h.ExitResult() 1069 case <-ctx.Done(): 1070 ch <- &drivers.ExitResult{ 1071 Err: ctx.Err(), 1072 } 1073 } 1074 } 1075 1076 func (d *Driver) StopTask(taskID string, timeout time.Duration, signal string) error { 1077 h, ok := d.tasks.Get(taskID) 1078 if !ok { 1079 return drivers.ErrTaskNotFound 1080 } 1081 1082 if signal == "" { 1083 signal = "SIGINT" 1084 } 1085 1086 // Windows Docker daemon does not support SIGINT, SIGTERM is the semantic equivalent that 1087 // allows for graceful shutdown before being followed up by a SIGKILL. 1088 // Supported signals: 1089 // https://github.com/moby/moby/blob/0111ee70874a4947d93f64b672f66a2a35071ee2/pkg/signal/signal_windows.go#L17-L26 1090 if runtime.GOOS == "windows" && signal == "SIGINT" { 1091 signal = "SIGTERM" 1092 } 1093 1094 sig, err := signals.Parse(signal) 1095 if err != nil { 1096 return fmt.Errorf("failed to parse signal: %v", err) 1097 } 1098 1099 return h.Kill(timeout, sig) 1100 } 1101 1102 func (d *Driver) DestroyTask(taskID string, force bool) error { 1103 h, ok := d.tasks.Get(taskID) 1104 if !ok { 1105 return drivers.ErrTaskNotFound 1106 } 1107 1108 c, err := h.client.InspectContainer(h.containerID) 1109 if err != nil { 1110 switch err.(type) { 1111 case *docker.NoSuchContainer: 1112 h.logger.Info("container was removed out of band, will proceed with DestroyTask", 1113 "error", err) 1114 default: 1115 return fmt.Errorf("failed to inspect container state: %v", err) 1116 } 1117 } else { 1118 if c.State.Running { 1119 if !force { 1120 return fmt.Errorf("must call StopTask for the given task before Destroy or set force to true") 1121 } 1122 if err := h.client.StopContainer(h.containerID, 0); err != nil { 1123 h.logger.Warn("failed to stop container during destroy", "error", err) 1124 } 1125 } 1126 1127 if h.removeContainerOnExit { 1128 if err := h.client.RemoveContainer(docker.RemoveContainerOptions{ID: h.containerID, RemoveVolumes: true, Force: true}); err != nil { 1129 h.logger.Error("error removing container", "error", err) 1130 } 1131 } else { 1132 h.logger.Debug("not removing container due to config") 1133 } 1134 } 1135 1136 if err := d.cleanupImage(h); err != nil { 1137 h.logger.Error("failed to cleanup image after destroying container", 1138 "error", err) 1139 } 1140 1141 d.tasks.Delete(taskID) 1142 return nil 1143 } 1144 1145 // cleanupImage removes a Docker image. No error is returned if the image 1146 // doesn't exist or is still in use. Requires the global client to already be 1147 // initialized. 1148 func (d *Driver) cleanupImage(handle *taskHandle) error { 1149 if !d.config.GC.Image { 1150 return nil 1151 } 1152 1153 d.coordinator.RemoveImage(handle.containerImage, handle.task.ID) 1154 1155 return nil 1156 } 1157 1158 func (d *Driver) InspectTask(taskID string) (*drivers.TaskStatus, error) { 1159 h, ok := d.tasks.Get(taskID) 1160 if !ok { 1161 return nil, drivers.ErrTaskNotFound 1162 } 1163 1164 container, err := client.InspectContainer(h.containerID) 1165 if err != nil { 1166 return nil, fmt.Errorf("failed to inspect container %q: %v", h.containerID, err) 1167 } 1168 status := &drivers.TaskStatus{ 1169 ID: h.task.ID, 1170 Name: h.task.Name, 1171 StartedAt: container.State.StartedAt, 1172 CompletedAt: container.State.FinishedAt, 1173 DriverAttributes: map[string]string{ 1174 "container_id": container.ID, 1175 }, 1176 NetworkOverride: h.net, 1177 ExitResult: h.ExitResult(), 1178 } 1179 1180 status.State = drivers.TaskStateUnknown 1181 if container.State.Running { 1182 status.State = drivers.TaskStateRunning 1183 } 1184 if container.State.Dead { 1185 status.State = drivers.TaskStateExited 1186 } 1187 1188 return status, nil 1189 } 1190 1191 func (d *Driver) TaskStats(ctx context.Context, taskID string, interval time.Duration) (<-chan *drivers.TaskResourceUsage, error) { 1192 h, ok := d.tasks.Get(taskID) 1193 if !ok { 1194 return nil, drivers.ErrTaskNotFound 1195 } 1196 1197 return h.Stats(ctx, interval) 1198 } 1199 1200 func (d *Driver) TaskEvents(ctx context.Context) (<-chan *drivers.TaskEvent, error) { 1201 return d.eventer.TaskEvents(ctx) 1202 } 1203 1204 func (d *Driver) SignalTask(taskID string, signal string) error { 1205 h, ok := d.tasks.Get(taskID) 1206 if !ok { 1207 return drivers.ErrTaskNotFound 1208 } 1209 1210 sig, err := signals.Parse(signal) 1211 if err != nil { 1212 return fmt.Errorf("failed to parse signal: %v", err) 1213 } 1214 1215 return h.Signal(sig) 1216 } 1217 1218 func (d *Driver) ExecTask(taskID string, cmd []string, timeout time.Duration) (*drivers.ExecTaskResult, error) { 1219 h, ok := d.tasks.Get(taskID) 1220 if !ok { 1221 return nil, drivers.ErrTaskNotFound 1222 } 1223 1224 if len(cmd) == 0 { 1225 return nil, fmt.Errorf("cmd is required, but was empty") 1226 } 1227 1228 ctx, cancel := context.WithTimeout(context.Background(), timeout) 1229 defer cancel() 1230 1231 return h.Exec(ctx, cmd[0], cmd[1:]) 1232 } 1233 1234 var _ drivers.ExecTaskStreamingDriver = (*Driver)(nil) 1235 1236 func (d *Driver) ExecTaskStreaming(ctx context.Context, taskID string, opts *drivers.ExecOptions) (*drivers.ExitResult, error) { 1237 defer opts.Stdout.Close() 1238 defer opts.Stderr.Close() 1239 1240 done := make(chan interface{}) 1241 defer close(done) 1242 1243 h, ok := d.tasks.Get(taskID) 1244 if !ok { 1245 return nil, drivers.ErrTaskNotFound 1246 } 1247 1248 if len(opts.Command) == 0 { 1249 return nil, fmt.Errorf("command is required but was empty") 1250 } 1251 1252 createExecOpts := docker.CreateExecOptions{ 1253 AttachStdin: true, 1254 AttachStdout: true, 1255 AttachStderr: true, 1256 Tty: opts.Tty, 1257 Cmd: opts.Command, 1258 Container: h.containerID, 1259 Context: ctx, 1260 } 1261 exec, err := h.client.CreateExec(createExecOpts) 1262 if err != nil { 1263 return nil, fmt.Errorf("failed to create exec object: %v", err) 1264 } 1265 1266 go func() { 1267 for { 1268 select { 1269 case <-ctx.Done(): 1270 return 1271 case <-done: 1272 return 1273 case s, ok := <-opts.ResizeCh: 1274 if !ok { 1275 return 1276 } 1277 client.ResizeExecTTY(exec.ID, s.Height, s.Width) 1278 } 1279 } 1280 }() 1281 1282 startOpts := docker.StartExecOptions{ 1283 Detach: false, 1284 1285 // When running in TTY, we must use a raw terminal. 1286 // If not, we set RawTerminal to false to allow docker client 1287 // to interpret special stdout/stderr messages 1288 Tty: opts.Tty, 1289 RawTerminal: opts.Tty, 1290 1291 InputStream: opts.Stdin, 1292 OutputStream: opts.Stdout, 1293 ErrorStream: opts.Stderr, 1294 Context: ctx, 1295 } 1296 if err := client.StartExec(exec.ID, startOpts); err != nil { 1297 return nil, fmt.Errorf("failed to start exec: %v", err) 1298 } 1299 1300 // StartExec returns after process completes, but InspectExec seems to have a delay 1301 // get in getting status code 1302 1303 const execTerminatingTimeout = 3 * time.Second 1304 start := time.Now() 1305 var res *docker.ExecInspect 1306 for res == nil || res.Running || time.Since(start) > execTerminatingTimeout { 1307 res, err = client.InspectExec(exec.ID) 1308 if err != nil { 1309 return nil, fmt.Errorf("failed to inspect exec result: %v", err) 1310 } 1311 time.Sleep(50 * time.Millisecond) 1312 } 1313 1314 if res == nil || res.Running { 1315 return nil, fmt.Errorf("failed to retrieve exec result") 1316 } 1317 1318 return &drivers.ExitResult{ 1319 ExitCode: res.ExitCode, 1320 }, nil 1321 } 1322 1323 // dockerClients creates two *docker.Client, one for long running operations and 1324 // the other for shorter operations. In test / dev mode we can use ENV vars to 1325 // connect to the docker daemon. In production mode we will read docker.endpoint 1326 // from the config file. 1327 func (d *Driver) dockerClients() (*docker.Client, *docker.Client, error) { 1328 createClientsLock.Lock() 1329 defer createClientsLock.Unlock() 1330 1331 if client != nil && waitClient != nil { 1332 return client, waitClient, nil 1333 } 1334 1335 var err error 1336 1337 // Onlt initialize the client if it hasn't yet been done 1338 if client == nil { 1339 client, err = d.newDockerClient(dockerTimeout) 1340 if err != nil { 1341 return nil, nil, err 1342 } 1343 } 1344 1345 // Only initialize the waitClient if it hasn't yet been done 1346 if waitClient == nil { 1347 waitClient, err = d.newDockerClient(0 * time.Minute) 1348 if err != nil { 1349 return nil, nil, err 1350 } 1351 } 1352 1353 return client, waitClient, nil 1354 } 1355 1356 // newDockerClient creates a new *docker.Client with a configurable timeout 1357 func (d *Driver) newDockerClient(timeout time.Duration) (*docker.Client, error) { 1358 var err error 1359 var merr multierror.Error 1360 var newClient *docker.Client 1361 1362 // Default to using whatever is configured in docker.endpoint. If this is 1363 // not specified we'll fall back on NewClientFromEnv which reads config from 1364 // the DOCKER_* environment variables DOCKER_HOST, DOCKER_TLS_VERIFY, and 1365 // DOCKER_CERT_PATH. This allows us to lock down the config in production 1366 // but also accept the standard ENV configs for dev and test. 1367 dockerEndpoint := d.config.Endpoint 1368 if dockerEndpoint != "" { 1369 cert := d.config.TLS.Cert 1370 key := d.config.TLS.Key 1371 ca := d.config.TLS.CA 1372 1373 if cert+key+ca != "" { 1374 d.logger.Debug("using TLS client connection", "endpoint", dockerEndpoint) 1375 newClient, err = docker.NewTLSClient(dockerEndpoint, cert, key, ca) 1376 if err != nil { 1377 merr.Errors = append(merr.Errors, err) 1378 } 1379 } else { 1380 d.logger.Debug("using standard client connection", "endpoint", dockerEndpoint) 1381 newClient, err = docker.NewClient(dockerEndpoint) 1382 if err != nil { 1383 merr.Errors = append(merr.Errors, err) 1384 } 1385 } 1386 } else { 1387 d.logger.Debug("using client connection initialized from environment") 1388 newClient, err = docker.NewClientFromEnv() 1389 if err != nil { 1390 merr.Errors = append(merr.Errors, err) 1391 } 1392 } 1393 1394 if timeout != 0 && newClient != nil { 1395 newClient.SetTimeout(timeout) 1396 } 1397 return newClient, merr.ErrorOrNil() 1398 } 1399 1400 func sliceMergeUlimit(ulimitsRaw map[string]string) ([]docker.ULimit, error) { 1401 var ulimits []docker.ULimit 1402 1403 for name, ulimitRaw := range ulimitsRaw { 1404 if len(ulimitRaw) == 0 { 1405 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %q, cannot be empty", name, ulimitRaw) 1406 } 1407 // hard limit is optional 1408 if strings.Contains(ulimitRaw, ":") == false { 1409 ulimitRaw = ulimitRaw + ":" + ulimitRaw 1410 } 1411 1412 splitted := strings.SplitN(ulimitRaw, ":", 2) 1413 if len(splitted) < 2 { 1414 return []docker.ULimit{}, fmt.Errorf("Malformed ulimit specification %v: %v", name, ulimitRaw) 1415 } 1416 soft, err := strconv.Atoi(splitted[0]) 1417 if err != nil { 1418 return []docker.ULimit{}, fmt.Errorf("Malformed soft ulimit %v: %v", name, ulimitRaw) 1419 } 1420 hard, err := strconv.Atoi(splitted[1]) 1421 if err != nil { 1422 return []docker.ULimit{}, fmt.Errorf("Malformed hard ulimit %v: %v", name, ulimitRaw) 1423 } 1424 1425 ulimit := docker.ULimit{ 1426 Name: name, 1427 Soft: int64(soft), 1428 Hard: int64(hard), 1429 } 1430 ulimits = append(ulimits, ulimit) 1431 } 1432 return ulimits, nil 1433 } 1434 1435 func (d *Driver) Shutdown() { 1436 d.signalShutdown() 1437 }